Code:
/ Dotnetfx_Vista_SP2 / Dotnetfx_Vista_SP2 / 8.0.50727.4016 / DEVDIV / depot / DevDiv / releases / whidbey / NetFxQFE / ndp / fx / src / Sys / System / IO / compression / HuffmanTree.cs / 1 / HuffmanTree.cs
namespace System.IO.Compression
{
using System;
using System.Diagnostics;
// Strictly speaking this class is not a HuffmanTree, this class is
// a lookup table combined with a HuffmanTree. The idea is to speed up
// the lookup for short symbols (they should appear more frequently ideally.)
// However we don't want to create a huge table since it might take longer to
// build the table than decoding (Deflate usually generates new tables frequently.)
//
// Jean-loup Gailly and Mark Adler gave a very good explanation about this.
// The full text (algorithm.txt) can be found inside
// ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip.
//
// Following paper explains decoding in details:
// Hirschberg and Lelewer, "Efficient decoding of prefix codes,"
// Comm. ACM, 33,4, April 1990, pp. 449-459.
//
internal class HuffmanTree {
internal const int MaxLiteralTreeElements = 288;
internal const int MaxDistTreeElements = 32;
internal const int EndOfBlockCode = 256;
internal const int NumberOfCodeLengthTreeElements = 19;
int tableBits;
short[] table;
short[] left;
short[] right;
byte[] codeLengthArray;
#if DEBUG
uint[] codeArrayDebug;
#endif
int tableMask;
// huffman tree for static block
static HuffmanTree staticLiteralLengthTree;
static HuffmanTree staticDistanceTree;
static HuffmanTree() {
// construct the static literal tree and distance tree
staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength());
staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength());
}
static public HuffmanTree StaticLiteralLengthTree {
get {
return staticLiteralLengthTree;
}
}
static public HuffmanTree StaticDistanceTree {
get {
return staticDistanceTree;
}
}
public HuffmanTree(byte[] codeLengths) {
Debug.Assert( codeLengths.Length == MaxLiteralTreeElements
|| codeLengths.Length == MaxDistTreeElements
|| codeLengths.Length == NumberOfCodeLengthTreeElements,
"we only expect three kinds of Length here");
codeLengthArray = codeLengths;
if (codeLengthArray.Length == MaxLiteralTreeElements) { // bits for Literal/Length tree table
tableBits = 9;
}
else { // bits for distance tree table and code length tree table
tableBits = 7;
}
tableMask = (1 << tableBits) -1;
CreateTable();
}
// Generate the array contains huffman codes lengths for static huffman tree.
// The data is in RFC 1951.
static byte[] GetStaticLiteralTreeLength() {
byte[] literalTreeLength = new byte[MaxLiteralTreeElements];
for (int i = 0; i <= 143; i++)
literalTreeLength[i] = 8;
for (int i = 144; i <= 255; i++)
literalTreeLength[i] = 9;
for (int i = 256; i <= 279; i++)
literalTreeLength[i] = 7;
for (int i = 280; i <= 287; i++)
literalTreeLength[i] = 8;
return literalTreeLength;
}
static byte[] GetStaticDistanceTreeLength() {
byte[] staticDistanceTreeLength = new byte[MaxDistTreeElements];
for (int i = 0; i < MaxDistTreeElements; i++) {
staticDistanceTreeLength[i] = 5;
}
return staticDistanceTreeLength;
}
// Calculate the huffman code for each character based on the code length for each character.
// This algorithm is described in standard RFC 1951
uint[] CalculateHuffmanCode() {
uint[] bitLengthCount = new uint[17];
foreach( int codeLength in codeLengthArray) {
bitLengthCount[codeLength]++;
}
bitLengthCount[0] = 0; // clear count for length 0
uint[] nextCode = new uint[17];
uint tempCode = 0;
for (int bits = 1; bits <= 16; bits++) {
tempCode = (tempCode + bitLengthCount[bits-1]) << 1;
nextCode[bits] = tempCode;
}
uint[] code = new uint[MaxLiteralTreeElements];
for (int i = 0; i < codeLengthArray.Length; i++) {
int len = codeLengthArray[i];
if (len > 0) {
code[i] = DecodeHelper.BitReverse(nextCode[len], len);
nextCode[len]++;
}
}
return code;
}
private void CreateTable() {
uint[] codeArray = CalculateHuffmanCode();
table = new short[ 1 << tableBits];
#if DEBUG
codeArrayDebug = codeArray;
#endif
// I need to find proof that left and right array will always be
// enough. I think they are.
left = new short[2* codeLengthArray.Length];
right = new short[2* codeLengthArray.Length];
short avail = (short)codeLengthArray.Length;
for (int ch = 0; ch < codeLengthArray.Length; ch++) {
// length of this code
int len = codeLengthArray[ch];
if (len > 0) {
// start value (bit reversed)
int start = (int)codeArray[ch];
if (len <= tableBits) {
// If a particular symbol is shorter than nine bits,
// then that symbol's translation is duplicated
// in all those entries that start with that symbol's bits.
// For example, if the symbol is four bits, then it's duplicated
// 32 times in a nine-bit table. If a symbol is nine bits long,
// it appears in the table once.
//
// Make sure that in the loop below, code is always
// less than table_size.
//
// On last iteration we store at array index:
// initial_start_at + (locs-1)*increment
// = initial_start_at + locs*increment - increment
// = initial_start_at + (1 << tableBits) - increment
// = initial_start_at + table_size - increment
//
// Therefore we must ensure:
// initial_start_at + table_size - increment < table_size
// or: initial_start_at < increment
//
int increment = 1 << len;
if (start >= increment) {
throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));
}
// Note the bits in the table are reverted.
int locs = 1 << (tableBits - len);
for (int j = 0; j < locs; j++) {
table[start] = (short) ch;
start += increment;
}
} else {
// For any code which has length longer than num_elements,
// build a binary tree.
int overflowBits = len - tableBits; // the nodes we need to respent the data.
int codeBitMask = 1 << tableBits; // mask to get current bit (the bits can't fit in the table)
// the left, right table is used to repesent the
// the rest bits. When we got the first part (number bits.) and look at
// tbe table, we will need to follow the tree to find the real character.
// This is in place to avoid bloating the table if there are
// a few ones with long code.
int index = start & ((1 << tableBits) -1);
short[] array = table;
do {
short value = array[index];
if (value == 0) { // set up next pointer if this node is not used before.
array[index] = (short)-avail; // use next available slot.
value = (short)-avail;
avail++;
}
Debug.Assert( value < 0, "Only negative number is used a tree pointer!");
if ((start & codeBitMask) == 0) { // if current bit is 0, go change the left array
array = left;
} else { // if current bit is 1, set value in the right array
array = right;
}
index = -value; // go to next node
codeBitMask <<= 1;
overflowBits--;
} while (overflowBits != 0);
array[index] = (short) ch;
}
}
}
}
//
// This function will try to get enough bits from input and
// try to decode the bits.
// If there are no enought bits in the input, this function will return -1.
//
public int GetNextSymbol(InputBuffer input) {
// Try to load 16 bits into input buffer if possible and get the bitBuffer value.
// If there aren't 16 bits available we will return all we have in the
// input buffer.
uint bitBuffer = input.TryLoad16Bits();
if( input.AvailableBits == 0) { // running out of input.
return -1;
}
// decode an element
int symbol = table[bitBuffer & tableMask];
if( symbol < 0) { // this will be the start of the binary tree
// navigate the tree
uint mask = (uint)1 << tableBits;
do
{
symbol = -symbol;
if ((bitBuffer & mask) == 0)
symbol = left[symbol];
else
symbol = right[symbol];
mask <<= 1;
} while (symbol < 0);
}
//
// If this code is longer than the # bits we had in the bit buffer (i.e.
// we read only part of the code), we can hit the entry in the table or the tree
// for another symbol. However the length of another symbol will not match the
// available bits count.
if (codeLengthArray[symbol] > input.AvailableBits) {
// We already tried to load 16 bits and maximum length is 15,
// so this means we are running out of input.
return -1;
}
input.SkipBits(codeLengthArray[symbol]);
return symbol;
}
}
}
// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
namespace System.IO.Compression
{
using System;
using System.Diagnostics;
// Strictly speaking this class is not a HuffmanTree, this class is
// a lookup table combined with a HuffmanTree. The idea is to speed up
// the lookup for short symbols (they should appear more frequently ideally.)
// However we don't want to create a huge table since it might take longer to
// build the table than decoding (Deflate usually generates new tables frequently.)
//
// Jean-loup Gailly and Mark Adler gave a very good explanation about this.
// The full text (algorithm.txt) can be found inside
// ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip.
//
// Following paper explains decoding in details:
// Hirschberg and Lelewer, "Efficient decoding of prefix codes,"
// Comm. ACM, 33,4, April 1990, pp. 449-459.
//
internal class HuffmanTree {
internal const int MaxLiteralTreeElements = 288;
internal const int MaxDistTreeElements = 32;
internal const int EndOfBlockCode = 256;
internal const int NumberOfCodeLengthTreeElements = 19;
int tableBits;
short[] table;
short[] left;
short[] right;
byte[] codeLengthArray;
#if DEBUG
uint[] codeArrayDebug;
#endif
int tableMask;
// huffman tree for static block
static HuffmanTree staticLiteralLengthTree;
static HuffmanTree staticDistanceTree;
static HuffmanTree() {
// construct the static literal tree and distance tree
staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength());
staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength());
}
static public HuffmanTree StaticLiteralLengthTree {
get {
return staticLiteralLengthTree;
}
}
static public HuffmanTree StaticDistanceTree {
get {
return staticDistanceTree;
}
}
public HuffmanTree(byte[] codeLengths) {
Debug.Assert( codeLengths.Length == MaxLiteralTreeElements
|| codeLengths.Length == MaxDistTreeElements
|| codeLengths.Length == NumberOfCodeLengthTreeElements,
"we only expect three kinds of Length here");
codeLengthArray = codeLengths;
if (codeLengthArray.Length == MaxLiteralTreeElements) { // bits for Literal/Length tree table
tableBits = 9;
}
else { // bits for distance tree table and code length tree table
tableBits = 7;
}
tableMask = (1 << tableBits) -1;
CreateTable();
}
// Generate the array contains huffman codes lengths for static huffman tree.
// The data is in RFC 1951.
static byte[] GetStaticLiteralTreeLength() {
byte[] literalTreeLength = new byte[MaxLiteralTreeElements];
for (int i = 0; i <= 143; i++)
literalTreeLength[i] = 8;
for (int i = 144; i <= 255; i++)
literalTreeLength[i] = 9;
for (int i = 256; i <= 279; i++)
literalTreeLength[i] = 7;
for (int i = 280; i <= 287; i++)
literalTreeLength[i] = 8;
return literalTreeLength;
}
static byte[] GetStaticDistanceTreeLength() {
byte[] staticDistanceTreeLength = new byte[MaxDistTreeElements];
for (int i = 0; i < MaxDistTreeElements; i++) {
staticDistanceTreeLength[i] = 5;
}
return staticDistanceTreeLength;
}
// Calculate the huffman code for each character based on the code length for each character.
// This algorithm is described in standard RFC 1951
uint[] CalculateHuffmanCode() {
uint[] bitLengthCount = new uint[17];
foreach( int codeLength in codeLengthArray) {
bitLengthCount[codeLength]++;
}
bitLengthCount[0] = 0; // clear count for length 0
uint[] nextCode = new uint[17];
uint tempCode = 0;
for (int bits = 1; bits <= 16; bits++) {
tempCode = (tempCode + bitLengthCount[bits-1]) << 1;
nextCode[bits] = tempCode;
}
uint[] code = new uint[MaxLiteralTreeElements];
for (int i = 0; i < codeLengthArray.Length; i++) {
int len = codeLengthArray[i];
if (len > 0) {
code[i] = DecodeHelper.BitReverse(nextCode[len], len);
nextCode[len]++;
}
}
return code;
}
private void CreateTable() {
uint[] codeArray = CalculateHuffmanCode();
table = new short[ 1 << tableBits];
#if DEBUG
codeArrayDebug = codeArray;
#endif
// I need to find proof that left and right array will always be
// enough. I think they are.
left = new short[2* codeLengthArray.Length];
right = new short[2* codeLengthArray.Length];
short avail = (short)codeLengthArray.Length;
for (int ch = 0; ch < codeLengthArray.Length; ch++) {
// length of this code
int len = codeLengthArray[ch];
if (len > 0) {
// start value (bit reversed)
int start = (int)codeArray[ch];
if (len <= tableBits) {
// If a particular symbol is shorter than nine bits,
// then that symbol's translation is duplicated
// in all those entries that start with that symbol's bits.
// For example, if the symbol is four bits, then it's duplicated
// 32 times in a nine-bit table. If a symbol is nine bits long,
// it appears in the table once.
//
// Make sure that in the loop below, code is always
// less than table_size.
//
// On last iteration we store at array index:
// initial_start_at + (locs-1)*increment
// = initial_start_at + locs*increment - increment
// = initial_start_at + (1 << tableBits) - increment
// = initial_start_at + table_size - increment
//
// Therefore we must ensure:
// initial_start_at + table_size - increment < table_size
// or: initial_start_at < increment
//
int increment = 1 << len;
if (start >= increment) {
throw new InvalidDataException(SR.GetString(SR.InvalidHuffmanData));
}
// Note the bits in the table are reverted.
int locs = 1 << (tableBits - len);
for (int j = 0; j < locs; j++) {
table[start] = (short) ch;
start += increment;
}
} else {
// For any code which has length longer than num_elements,
// build a binary tree.
int overflowBits = len - tableBits; // the nodes we need to respent the data.
int codeBitMask = 1 << tableBits; // mask to get current bit (the bits can't fit in the table)
// the left, right table is used to repesent the
// the rest bits. When we got the first part (number bits.) and look at
// tbe table, we will need to follow the tree to find the real character.
// This is in place to avoid bloating the table if there are
// a few ones with long code.
int index = start & ((1 << tableBits) -1);
short[] array = table;
do {
short value = array[index];
if (value == 0) { // set up next pointer if this node is not used before.
array[index] = (short)-avail; // use next available slot.
value = (short)-avail;
avail++;
}
Debug.Assert( value < 0, "Only negative number is used a tree pointer!");
if ((start & codeBitMask) == 0) { // if current bit is 0, go change the left array
array = left;
} else { // if current bit is 1, set value in the right array
array = right;
}
index = -value; // go to next node
codeBitMask <<= 1;
overflowBits--;
} while (overflowBits != 0);
array[index] = (short) ch;
}
}
}
}
//
// This function will try to get enough bits from input and
// try to decode the bits.
// If there are no enought bits in the input, this function will return -1.
//
public int GetNextSymbol(InputBuffer input) {
// Try to load 16 bits into input buffer if possible and get the bitBuffer value.
// If there aren't 16 bits available we will return all we have in the
// input buffer.
uint bitBuffer = input.TryLoad16Bits();
if( input.AvailableBits == 0) { // running out of input.
return -1;
}
// decode an element
int symbol = table[bitBuffer & tableMask];
if( symbol < 0) { // this will be the start of the binary tree
// navigate the tree
uint mask = (uint)1 << tableBits;
do
{
symbol = -symbol;
if ((bitBuffer & mask) == 0)
symbol = left[symbol];
else
symbol = right[symbol];
mask <<= 1;
} while (symbol < 0);
}
//
// If this code is longer than the # bits we had in the bit buffer (i.e.
// we read only part of the code), we can hit the entry in the table or the tree
// for another symbol. However the length of another symbol will not match the
// available bits count.
if (codeLengthArray[symbol] > input.AvailableBits) {
// We already tried to load 16 bits and maximum length is 15,
// so this means we are running out of input.
return -1;
}
input.SkipBits(codeLengthArray[symbol]);
return symbol;
}
}
}
// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- HttpCachePolicy.cs
- CodeTryCatchFinallyStatement.cs
- FormsAuthenticationCredentials.cs
- InvokeMethodActivityDesigner.cs
- SelectingProviderEventArgs.cs
- NativeWrapper.cs
- TypedDataSetSchemaImporterExtensionFx35.cs
- PropertyInformation.cs
- DataTableTypeConverter.cs
- MethodExpr.cs
- IndexedGlyphRun.cs
- XslAstAnalyzer.cs
- ClientApiGenerator.cs
- DataConnectionHelper.cs
- SerializationUtilities.cs
- CodeGen.cs
- PropertiesTab.cs
- WebPartManagerInternals.cs
- PrivilegeNotHeldException.cs
- DataGridHeadersVisibilityToVisibilityConverter.cs
- TreeNodeCollection.cs
- CompositeFontFamily.cs
- ActivityUtilities.cs
- TreeNodeBindingCollection.cs
- MissingMethodException.cs
- SyndicationDeserializer.cs
- pingexception.cs
- PromptEventArgs.cs
- ToolStripItemRenderEventArgs.cs
- DownloadProgressEventArgs.cs
- RadioButtonAutomationPeer.cs
- WriterOutput.cs
- ACL.cs
- StringReader.cs
- StrokeNode.cs
- HtmlLink.cs
- AuthStoreRoleProvider.cs
- InstancePersistenceContext.cs
- TreeViewEvent.cs
- precedingquery.cs
- SecurityRuntime.cs
- WindowsSspiNegotiation.cs
- SkipStoryboardToFill.cs
- MasterPageBuildProvider.cs
- OutputCacheSettingsSection.cs
- ZipIOBlockManager.cs
- Int32Rect.cs
- XmlQualifiedName.cs
- InstanceLockQueryResult.cs
- OrderingExpression.cs
- CodeFieldReferenceExpression.cs
- ReceiveContextCollection.cs
- SqlWebEventProvider.cs
- TreeView.cs
- UnitySerializationHolder.cs
- VisualProxy.cs
- ErrorFormatterPage.cs
- ListBindingConverter.cs
- connectionpool.cs
- EnumerableRowCollectionExtensions.cs
- HtmlPanelAdapter.cs
- SqlDesignerDataSourceView.cs
- GestureRecognitionResult.cs
- Int32Storage.cs
- SelectionBorderGlyph.cs
- ResolvedKeyFrameEntry.cs
- SignerInfo.cs
- TemplateXamlParser.cs
- JobInputBins.cs
- GeneralTransform3DTo2DTo3D.cs
- DataKeyArray.cs
- RegionIterator.cs
- X509InitiatorCertificateServiceElement.cs
- FilterFactory.cs
- BamlBinaryWriter.cs
- SolidBrush.cs
- NominalTypeEliminator.cs
- InteropAutomationProvider.cs
- TraceUtility.cs
- GreenMethods.cs
- SurrogateSelector.cs
- TaiwanCalendar.cs
- IPGlobalProperties.cs
- WebPartZoneCollection.cs
- OleDbConnectionPoolGroupProviderInfo.cs
- Quad.cs
- HotSpotCollection.cs
- ByteStack.cs
- PropertyExpression.cs
- SizeAnimationUsingKeyFrames.cs
- ConstantSlot.cs
- EdmItemCollection.cs
- MobileUserControl.cs
- ImageButton.cs
- DataControlImageButton.cs
- CharacterString.cs
- ResumeStoryboard.cs
- WebConfigurationManager.cs
- XmlResolver.cs
- SchemaImporterExtensionsSection.cs