Code:
/ Dotnetfx_Win7_3.5.1 / Dotnetfx_Win7_3.5.1 / 3.5.1 / DEVDIV / depot / DevDiv / releases / whidbey / NetFXspW7 / ndp / fx / src / Sys / System / IO / compression / HuffmanTree.cs / 1 / HuffmanTree.cs
namespace System.IO.Compression { using System; using System.Diagnostics; // Strictly speaking this class is not a HuffmanTree, this class is // a lookup table combined with a HuffmanTree. The idea is to speed up // the lookup for short symbols (they should appear more frequently ideally.) // However we don't want to create a huge table since it might take longer to // build the table than decoding (Deflate usually generates new tables frequently.) // // Jean-loup Gailly and Mark Adler gave a very good explanation about this. // The full text (algorithm.txt) can be found inside // ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip. // // Following paper explains decoding in details: // Hirschberg and Lelewer, "Efficient decoding of prefix codes," // Comm. ACM, 33,4, April 1990, pp. 449-459. // internal class HuffmanTree { internal const int MaxLiteralTreeElements = 288; internal const int MaxDistTreeElements = 32; internal const int EndOfBlockCode = 256; internal const int NumberOfCodeLengthTreeElements = 19; int tableBits; short[] table; short[] left; short[] right; byte[] codeLengthArray; #if DEBUG uint[] codeArrayDebug; #endif int tableMask; // huffman tree for static block static HuffmanTree staticLiteralLengthTree; static HuffmanTree staticDistanceTree; static HuffmanTree() { // construct the static literal tree and distance tree staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength()); staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength()); } static public HuffmanTree StaticLiteralLengthTree { get { return staticLiteralLengthTree; } } static public HuffmanTree StaticDistanceTree { get { return staticDistanceTree; } } public HuffmanTree(byte[] codeLengths) { Debug.Assert( codeLengths.Length == MaxLiteralTreeElements || codeLengths.Length == MaxDistTreeElements || codeLengths.Length == NumberOfCodeLengthTreeElements, "we only expect three kinds of Length here"); codeLengthArray = codeLengths; if (codeLengthArray.Length == MaxLiteralTreeElements) { // bits for Literal/Length tree table tableBits = 9; } else { // bits for distance tree table and code length tree table tableBits = 7; } tableMask = (1 << tableBits) -1; CreateTable(); } // Generate the array contains huffman codes lengths for static huffman tree. // The data is in RFC 1951. static byte[] GetStaticLiteralTreeLength() { byte[] literalTreeLength = new byte[MaxLiteralTreeElements]; for (int i = 0; i <= 143; i++) literalTreeLength[i] = 8; for (int i = 144; i <= 255; i++) literalTreeLength[i] = 9; for (int i = 256; i <= 279; i++) literalTreeLength[i] = 7; for (int i = 280; i <= 287; i++) literalTreeLength[i] = 8; return literalTreeLength; } static byte[] GetStaticDistanceTreeLength() { byte[] staticDistanceTreeLength = new byte[MaxDistTreeElements]; for (int i = 0; i < MaxDistTreeElements; i++) { staticDistanceTreeLength[i] = 5; } return staticDistanceTreeLength; } // Calculate the huffman code for each character based on the code length for each character. // This algorithm is described in standard RFC 1951 uint[] CalculateHuffmanCode() { uint[] bitLengthCount = new uint[17]; foreach( int codeLength in codeLengthArray) { bitLengthCount[codeLength]++; } bitLengthCount[0] = 0; // clear count for length 0 uint[] nextCode = new uint[17]; uint tempCode = 0; for (int bits = 1; bits <= 16; bits++) { tempCode = (tempCode + bitLengthCount[bits-1]) << 1; nextCode[bits] = tempCode; } uint[] code = new uint[MaxLiteralTreeElements]; for (int i = 0; i < codeLengthArray.Length; i++) { int len = codeLengthArray[i]; if (len > 0) { code[i] = DecodeHelper.BitReverse(nextCode[len], len); nextCode[len]++; } } return code; } private void CreateTable() { uint[] codeArray = CalculateHuffmanCode(); table = new short[ 1 << tableBits]; #if DEBUG codeArrayDebug = codeArray; #endif // I need to find proof that left and right array will always be // enough. I think they are. left = new short[2* codeLengthArray.Length]; right = new short[2* codeLengthArray.Length]; short avail = (short)codeLengthArray.Length; for (int ch = 0; ch < codeLengthArray.Length; ch++) { // length of this code int len = codeLengthArray[ch]; if (len > 0) { // start value (bit reversed) int start = (int)codeArray[ch]; if (len <= tableBits) { // If a particular symbol is shorter than nine bits, // then that symbol's translation is duplicated // in all those entries that start with that symbol's bits. // For example, if the symbol is four bits, then it's duplicated // 32 times in a nine-bit table. If a symbol is nine bits long, // it appears in the table once. // // Make sure that in the loop below, code is always // less than table_size. // // On last iteration we store at array index: // initial_start_at + (locs-1)*increment // = initial_start_at + locs*increment - increment // = initial_start_at + (1 << tableBits) - increment // = initial_start_at + table_size - increment // // Therefore we must ensure: // initial_start_at + table_size - increment < table_size // or: initial_start_at < increment // int increment = 1 << len; if (start >= increment) { throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData)); } // Note the bits in the table are reverted. int locs = 1 << (tableBits - len); for (int j = 0; j < locs; j++) { table[start] = (short) ch; start += increment; } } else { // For any code which has length longer than num_elements, // build a binary tree. int overflowBits = len - tableBits; // the nodes we need to respent the data. int codeBitMask = 1 << tableBits; // mask to get current bit (the bits can't fit in the table) // the left, right table is used to repesent the // the rest bits. When we got the first part (number bits.) and look at // tbe table, we will need to follow the tree to find the real character. // This is in place to avoid bloating the table if there are // a few ones with long code. int index = start & ((1 << tableBits) -1); short[] array = table; do { short value = array[index]; if (value == 0) { // set up next pointer if this node is not used before. array[index] = (short)-avail; // use next available slot. value = (short)-avail; avail++; } Debug.Assert( value < 0, "Only negative number is used a tree pointer!"); if ((start & codeBitMask) == 0) { // if current bit is 0, go change the left array array = left; } else { // if current bit is 1, set value in the right array array = right; } index = -value; // go to next node codeBitMask <<= 1; overflowBits--; } while (overflowBits != 0); array[index] = (short) ch; } } } } // // This function will try to get enough bits from input and // try to decode the bits. // If there are no enought bits in the input, this function will return -1. // public int GetNextSymbol(InputBuffer input) { // Try to load 16 bits into input buffer if possible and get the bitBuffer value. // If there aren't 16 bits available we will return all we have in the // input buffer. uint bitBuffer = input.TryLoad16Bits(); if( input.AvailableBits == 0) { // running out of input. return -1; } // decode an element int symbol = table[bitBuffer & tableMask]; if( symbol < 0) { // this will be the start of the binary tree // navigate the tree uint mask = (uint)1 << tableBits; do { symbol = -symbol; if ((bitBuffer & mask) == 0) symbol = left[symbol]; else symbol = right[symbol]; mask <<= 1; } while (symbol < 0); } // // If this code is longer than the # bits we had in the bit buffer (i.e. // we read only part of the code), we can hit the entry in the table or the tree // for another symbol. However the length of another symbol will not match the // available bits count. if (codeLengthArray[symbol] > input.AvailableBits) { // We already tried to load 16 bits and maximum length is 15, // so this means we are running out of input. return -1; } input.SkipBits(codeLengthArray[symbol]); return symbol; } } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // Copyright (c) Microsoft Corporation. All rights reserved. namespace System.IO.Compression { using System; using System.Diagnostics; // Strictly speaking this class is not a HuffmanTree, this class is // a lookup table combined with a HuffmanTree. The idea is to speed up // the lookup for short symbols (they should appear more frequently ideally.) // However we don't want to create a huge table since it might take longer to // build the table than decoding (Deflate usually generates new tables frequently.) // // Jean-loup Gailly and Mark Adler gave a very good explanation about this. // The full text (algorithm.txt) can be found inside // ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip. // // Following paper explains decoding in details: // Hirschberg and Lelewer, "Efficient decoding of prefix codes," // Comm. ACM, 33,4, April 1990, pp. 449-459. // internal class HuffmanTree { internal const int MaxLiteralTreeElements = 288; internal const int MaxDistTreeElements = 32; internal const int EndOfBlockCode = 256; internal const int NumberOfCodeLengthTreeElements = 19; int tableBits; short[] table; short[] left; short[] right; byte[] codeLengthArray; #if DEBUG uint[] codeArrayDebug; #endif int tableMask; // huffman tree for static block static HuffmanTree staticLiteralLengthTree; static HuffmanTree staticDistanceTree; static HuffmanTree() { // construct the static literal tree and distance tree staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength()); staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength()); } static public HuffmanTree StaticLiteralLengthTree { get { return staticLiteralLengthTree; } } static public HuffmanTree StaticDistanceTree { get { return staticDistanceTree; } } public HuffmanTree(byte[] codeLengths) { Debug.Assert( codeLengths.Length == MaxLiteralTreeElements || codeLengths.Length == MaxDistTreeElements || codeLengths.Length == NumberOfCodeLengthTreeElements, "we only expect three kinds of Length here"); codeLengthArray = codeLengths; if (codeLengthArray.Length == MaxLiteralTreeElements) { // bits for Literal/Length tree table tableBits = 9; } else { // bits for distance tree table and code length tree table tableBits = 7; } tableMask = (1 << tableBits) -1; CreateTable(); } // Generate the array contains huffman codes lengths for static huffman tree. // The data is in RFC 1951. static byte[] GetStaticLiteralTreeLength() { byte[] literalTreeLength = new byte[MaxLiteralTreeElements]; for (int i = 0; i <= 143; i++) literalTreeLength[i] = 8; for (int i = 144; i <= 255; i++) literalTreeLength[i] = 9; for (int i = 256; i <= 279; i++) literalTreeLength[i] = 7; for (int i = 280; i <= 287; i++) literalTreeLength[i] = 8; return literalTreeLength; } static byte[] GetStaticDistanceTreeLength() { byte[] staticDistanceTreeLength = new byte[MaxDistTreeElements]; for (int i = 0; i < MaxDistTreeElements; i++) { staticDistanceTreeLength[i] = 5; } return staticDistanceTreeLength; } // Calculate the huffman code for each character based on the code length for each character. // This algorithm is described in standard RFC 1951 uint[] CalculateHuffmanCode() { uint[] bitLengthCount = new uint[17]; foreach( int codeLength in codeLengthArray) { bitLengthCount[codeLength]++; } bitLengthCount[0] = 0; // clear count for length 0 uint[] nextCode = new uint[17]; uint tempCode = 0; for (int bits = 1; bits <= 16; bits++) { tempCode = (tempCode + bitLengthCount[bits-1]) << 1; nextCode[bits] = tempCode; } uint[] code = new uint[MaxLiteralTreeElements]; for (int i = 0; i < codeLengthArray.Length; i++) { int len = codeLengthArray[i]; if (len > 0) { code[i] = DecodeHelper.BitReverse(nextCode[len], len); nextCode[len]++; } } return code; } private void CreateTable() { uint[] codeArray = CalculateHuffmanCode(); table = new short[ 1 << tableBits]; #if DEBUG codeArrayDebug = codeArray; #endif // I need to find proof that left and right array will always be // enough. I think they are. left = new short[2* codeLengthArray.Length]; right = new short[2* codeLengthArray.Length]; short avail = (short)codeLengthArray.Length; for (int ch = 0; ch < codeLengthArray.Length; ch++) { // length of this code int len = codeLengthArray[ch]; if (len > 0) { // start value (bit reversed) int start = (int)codeArray[ch]; if (len <= tableBits) { // If a particular symbol is shorter than nine bits, // then that symbol's translation is duplicated // in all those entries that start with that symbol's bits. // For example, if the symbol is four bits, then it's duplicated // 32 times in a nine-bit table. If a symbol is nine bits long, // it appears in the table once. // // Make sure that in the loop below, code is always // less than table_size. // // On last iteration we store at array index: // initial_start_at + (locs-1)*increment // = initial_start_at + locs*increment - increment // = initial_start_at + (1 << tableBits) - increment // = initial_start_at + table_size - increment // // Therefore we must ensure: // initial_start_at + table_size - increment < table_size // or: initial_start_at < increment // int increment = 1 << len; if (start >= increment) { throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData)); } // Note the bits in the table are reverted. int locs = 1 << (tableBits - len); for (int j = 0; j < locs; j++) { table[start] = (short) ch; start += increment; } } else { // For any code which has length longer than num_elements, // build a binary tree. int overflowBits = len - tableBits; // the nodes we need to respent the data. int codeBitMask = 1 << tableBits; // mask to get current bit (the bits can't fit in the table) // the left, right table is used to repesent the // the rest bits. When we got the first part (number bits.) and look at // tbe table, we will need to follow the tree to find the real character. // This is in place to avoid bloating the table if there are // a few ones with long code. int index = start & ((1 << tableBits) -1); short[] array = table; do { short value = array[index]; if (value == 0) { // set up next pointer if this node is not used before. array[index] = (short)-avail; // use next available slot. value = (short)-avail; avail++; } Debug.Assert( value < 0, "Only negative number is used a tree pointer!"); if ((start & codeBitMask) == 0) { // if current bit is 0, go change the left array array = left; } else { // if current bit is 1, set value in the right array array = right; } index = -value; // go to next node codeBitMask <<= 1; overflowBits--; } while (overflowBits != 0); array[index] = (short) ch; } } } } // // This function will try to get enough bits from input and // try to decode the bits. // If there are no enought bits in the input, this function will return -1. // public int GetNextSymbol(InputBuffer input) { // Try to load 16 bits into input buffer if possible and get the bitBuffer value. // If there aren't 16 bits available we will return all we have in the // input buffer. uint bitBuffer = input.TryLoad16Bits(); if( input.AvailableBits == 0) { // running out of input. return -1; } // decode an element int symbol = table[bitBuffer & tableMask]; if( symbol < 0) { // this will be the start of the binary tree // navigate the tree uint mask = (uint)1 << tableBits; do { symbol = -symbol; if ((bitBuffer & mask) == 0) symbol = left[symbol]; else symbol = right[symbol]; mask <<= 1; } while (symbol < 0); } // // If this code is longer than the # bits we had in the bit buffer (i.e. // we read only part of the code), we can hit the entry in the table or the tree // for another symbol. However the length of another symbol will not match the // available bits count. if (codeLengthArray[symbol] > input.AvailableBits) { // We already tried to load 16 bits and maximum length is 15, // so this means we are running out of input. return -1; } input.SkipBits(codeLengthArray[symbol]); return symbol; } } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- SamlAuthorizationDecisionClaimResource.cs
- LinqDataSourceInsertEventArgs.cs
- HelpProvider.cs
- PersonalizableTypeEntry.cs
- CrossSiteScriptingValidation.cs
- BasicCellRelation.cs
- SqlCacheDependency.cs
- QueuePathDialog.cs
- XmlObjectSerializerWriteContextComplexJson.cs
- AttachedAnnotationChangedEventArgs.cs
- ProfileEventArgs.cs
- TimeManager.cs
- UnsafeCollabNativeMethods.cs
- ModelPropertyImpl.cs
- RecognizedPhrase.cs
- LogLogRecord.cs
- Package.cs
- Rights.cs
- DecoderExceptionFallback.cs
- ReferenceEqualityComparer.cs
- Stylesheet.cs
- IsolatedStorageFilePermission.cs
- TextBox.cs
- HelpInfo.cs
- SQLBinary.cs
- Vector3DCollectionValueSerializer.cs
- MatrixCamera.cs
- NetworkInformationException.cs
- MembershipValidatePasswordEventArgs.cs
- SafeThreadHandle.cs
- SqlBulkCopyColumnMappingCollection.cs
- AnimationClock.cs
- PtsPage.cs
- XmlLinkedNode.cs
- GroupBoxAutomationPeer.cs
- Condition.cs
- RectangleHotSpot.cs
- SecurityUtils.cs
- XmlBinaryReader.cs
- WebHttpBindingCollectionElement.cs
- DataGridViewComboBoxCell.cs
- RemotingClientProxy.cs
- SimpleWebHandlerParser.cs
- wgx_commands.cs
- ClonableStack.cs
- MsmqProcessProtocolHandler.cs
- ButtonBaseAutomationPeer.cs
- StackBuilderSink.cs
- Sequence.cs
- MaterialCollection.cs
- OleDbSchemaGuid.cs
- DesignerOptionService.cs
- EmbossBitmapEffect.cs
- SqlBooleanizer.cs
- XmlDictionary.cs
- HostSecurityManager.cs
- AnnotationDocumentPaginator.cs
- CompositeControl.cs
- PeerInvitationResponse.cs
- TableLayoutPanelBehavior.cs
- CheckBoxPopupAdapter.cs
- UriSection.cs
- SerializationFieldInfo.cs
- FileFormatException.cs
- LogLogRecord.cs
- dbdatarecord.cs
- Point3DConverter.cs
- MsmqProcessProtocolHandler.cs
- cryptoapiTransform.cs
- SHA512CryptoServiceProvider.cs
- NextPreviousPagerField.cs
- DataGridTextBoxColumn.cs
- XmlReader.cs
- GenericPrincipal.cs
- ZipIOCentralDirectoryDigitalSignature.cs
- BaseUriHelper.cs
- HtmlTernaryTree.cs
- CheckBox.cs
- AnimationClock.cs
- TailPinnedEventArgs.cs
- AutoGeneratedFieldProperties.cs
- ProgressBarRenderer.cs
- QualifiedCellIdBoolean.cs
- SqlProcedureAttribute.cs
- RemoteWebConfigurationHost.cs
- ExpandSegment.cs
- TableStyle.cs
- AnimationClock.cs
- CqlLexer.cs
- StringDictionaryEditor.cs
- prompt.cs
- WeakHashtable.cs
- ControlCachePolicy.cs
- TextBoxView.cs
- WSTransactionSection.cs
- DynamicDataExtensions.cs
- InvalidPrinterException.cs
- WindowsTokenRoleProvider.cs
- PopOutPanel.cs
- SourceFileBuildProvider.cs