531 lines
18 KiB
ObjectPascal

(* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is TurboPower Abbrevia
*
* The Initial Developer of the Original Code is
* TurboPower Software
*
* Portions created by the Initial Developer are Copyright (C) 1997-2002
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* ***** END LICENSE BLOCK ***** *)
{*********************************************************}
{* ABBREVIA: AbDfHufD.pas *}
{*********************************************************}
{* Deflate Huffman tree for decoder *}
{*********************************************************}
unit AbDfHufD;
{$I AbDefine.inc}
{Activate this compiler define and rebuild if you want the complete
huffman tree output to print to the current log. The output is
voluminous to say the least...}
{$IFDEF UseLogging}
{.$DEFINE EnableMegaLog}
{$ENDIF}
{Notes:
The object of this class is to build a decoder array, not to build a
Huffman tree particularly. We don't want to decode huffman strings bit
by bit. moving down the Huffman tree sometimes left, sometimes right.
Instead we want to grab a set of bits and look them up in an array.
Sometimes we'll grab too many bits, sure, but we can deal with that
later. So, the object of the exercise is to calculate the code for a
symbol, reverse it ('cos that's how the input bit stream will present
it to us) and set that element of the array to the decoded symbol
value (plus some extra information: bit lengths).
If the alphabet size were 19 (the codelengths huffman tree) and the
maximum code length 5, for example, the decoder array would be 2^5
elements long, much larger than the alphabet size. The user of this
class will be presenting sets of 5 bits for us to decode. We would
like to look up these 5 bits in the array (as an index) and have the
symbol returned. Now, since the alphabet size is much less than the
number of elements in the decoder array, we must set the other
elements in the array as well. Consider a symbol that has a code of
110 in this scenario. The reversed code is 011, or 3, so we'd be
setting element 3. However we should also be setting elements 01011,
10011, and 11011 to this symbol information as well, since the lookup
will be 5 bits long.
Because the code is a huffman code from a prefix tree, we won't get
any index clashes between actual codes by this "filling in" process.
For the codelength Huffman tree, the maximum code length is at most 7.
This equates to a 128 element array. For the literal and distance
trees, the max code length is at most 15. This equates to a 32768
element array.
For a given lookup value the decoder will return a 32-bit value. The
lower 16 bits is the decoded symbol, the next 8 bits is the code
length for that symbol, the last 8 bits (the most significant) are the
number of extra bits that must be extracted from the input bit stream.
}
interface
uses
AbDfBase;
type
TAbDfHuffmanUsage = ( {usage of a huffman decoder..}
huEncoding, {..encoding}
huDecoding, {..decoding}
huBoth); {..both (used for static trees)}
TAbDfDecodeHuffmanTree = class
private
FAlphaSize : integer;
FDecodes : PAbDfLongintList;
FDefMaxCodeLen : integer;
FEncodes : PAbDfLongintList;
{$IFOPT C+}
FMask : integer;
{$ENDIF}
FMaxCodeLen : integer;
FUsage : TAbDfHuffmanUsage;
protected
public
constructor Create(aAlphabetSize : integer;
aDefMaxCodeLen: integer;
aUsage : TAbDfHuffmanUsage);
destructor Destroy; override;
procedure Build(const aCodeLengths : array of integer;
aStartInx : integer;
aCount : integer;
const aExtraBits : array of byte;
aExtraOffset : integer);
function Decode(aLookupBits : integer) : longint;
function Encode(aSymbol : integer) : longint;
{$IFDEF UseLogging}
procedure DebugPrint(aLog : TAbLogger);
{$ENDIF}
property LookupBitLength : integer read FMaxCodeLen;
property Decodes : PAbDfLongintList read FDecodes;
property Encodes : PAbDfLongintList read FEncodes;
end;
var
AbStaticLiteralTree : TAbDfDecodeHuffmanTree;
AbStaticDistanceTree : TAbDfDecodeHuffmanTree;
implementation
uses
SysUtils;
const
PowerOfTwo : array [0..dfc_MaxCodeLength] of integer =
(1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
4096, 8192, 16384, 32768);
{===Debug helper routine=============================================}
{$IFDEF EnableMegaLog}
function CodeToStr(aCode : longint; aLen : integer) : string;
var
i : integer;
begin
if (aLen = 0) then
Result := 'no code'
else begin
SetLength(Result, 32);
FillChar(Result[1], 32, ' ');
for i := 32 downto (33-aLen) do begin
if Odd(aCode) then
Result[i] := '1'
else
Result[i] := '0';
aCode := aCode shr 1;
end;
end;
end;
{$ENDIF}
{====================================================================}
{===TAbDfDecodeHuffmanTree===========================================}
constructor TAbDfDecodeHuffmanTree.Create(
aAlphabetSize : integer;
aDefMaxCodeLen: integer;
aUsage : TAbDfHuffmanUsage);
begin
{protect against dumb programming mistakes}
Assert(aAlphabetSize >= 2,
'TAbDfDecodeHuffmanTree.Create: a huffman tree must be for at least two symbols');
{let the ancestor initialize}
inherited Create;
{save the alphabet size, etc}
FAlphaSize := aAlphabetSize;
FDefMaxCodeLen := aDefMaxCodeLen;
FUsage := aUsage;
{allocate the encoder array (needs to be initialized to zeros)}
if (aUsage <> huDecoding) then
FEncodes := AllocMem(FAlphaSize * sizeof(longint));
end;
{--------}
destructor TAbDfDecodeHuffmanTree.Destroy;
begin
{destroy the codes arrays}
if (FDecodes <> nil) then
FreeMem(FDecodes);
if (FEncodes <> nil) then
FreeMem(FEncodes);
{let the ancestor die}
inherited Destroy;
end;
{--------}
procedure TAbDfDecodeHuffmanTree.Build(
const aCodeLengths : array of integer;
aStartInx : integer;
aCount : integer;
const aExtraBits : array of byte;
aExtraOffset : integer);
const
ByteRevTable : array [0..255] of byte = (
$00, $80, $40, $C0, $20, $A0, $60, $E0, $10, $90, $50, $D0,
$30, $B0, $70, $F0, $08, $88, $48, $C8, $28, $A8, $68, $E8,
$18, $98, $58, $D8, $38, $B8, $78, $F8, $04, $84, $44, $C4,
$24, $A4, $64, $E4, $14, $94, $54, $D4, $34, $B4, $74, $F4,
$0C, $8C, $4C, $CC, $2C, $AC, $6C, $EC, $1C, $9C, $5C, $DC,
$3C, $BC, $7C, $FC, $02, $82, $42, $C2, $22, $A2, $62, $E2,
$12, $92, $52, $D2, $32, $B2, $72, $F2, $0A, $8A, $4A, $CA,
$2A, $AA, $6A, $EA, $1A, $9A, $5A, $DA, $3A, $BA, $7A, $FA,
$06, $86, $46, $C6, $26, $A6, $66, $E6, $16, $96, $56, $D6,
$36, $B6, $76, $F6, $0E, $8E, $4E, $CE, $2E, $AE, $6E, $EE,
$1E, $9E, $5E, $DE, $3E, $BE, $7E, $FE, $01, $81, $41, $C1,
$21, $A1, $61, $E1, $11, $91, $51, $D1, $31, $B1, $71, $F1,
$09, $89, $49, $C9, $29, $A9, $69, $E9, $19, $99, $59, $D9,
$39, $B9, $79, $F9, $05, $85, $45, $C5, $25, $A5, $65, $E5,
$15, $95, $55, $D5, $35, $B5, $75, $F5, $0D, $8D, $4D, $CD,
$2D, $AD, $6D, $ED, $1D, $9D, $5D, $DD, $3D, $BD, $7D, $FD,
$03, $83, $43, $C3, $23, $A3, $63, $E3, $13, $93, $53, $D3,
$33, $B3, $73, $F3, $0B, $8B, $4B, $CB, $2B, $AB, $6B, $EB,
$1B, $9B, $5B, $DB, $3B, $BB, $7B, $FB, $07, $87, $47, $C7,
$27, $A7, $67, $E7, $17, $97, $57, $D7, $37, $B7, $77, $F7,
$0F, $8F, $4F, $CF, $2F, $AF, $6F, $EF, $1F, $9F, $5F, $DF,
$3F, $BF, $7F, $FF);
var
i : integer;
Symbol : integer;
LengthCount : array [0..dfc_MaxCodeLength] of integer;
NextCode : array [0..dfc_MaxCodeLength] of integer;
Code : longint;
CodeLen : integer;
CodeData : longint;
DecoderLen : integer;
CodeIncr : integer;
Decodes : PAbDfLongintList;
Encodes : PAbDfLongintList;
{$IFDEF CPU386}
DecodesEnd : pointer;
{$ENDIF}
TablePtr : pointer;
begin
{count the number of instances of each code length and calculate the
maximum code length at the same time}
FillChar(LengthCount, sizeof(LengthCount), 0);
FMaxCodeLen := 0;
for i := 0 to pred(aCount) do begin
CodeLen := aCodeLengths[i + aStartInx];
Assert((CodeLen <= FDefMaxCodeLen),
Format('TAbDfDecodeHuffmanTree.Build: a code length is greater than %d',
[FDefMaxCodeLen]));
if (CodeLen > FMaxCodeLen) then
FMaxCodeLen := CodeLen;
inc(LengthCount[CodeLen]);
end;
{now we know the maximum code length we can allocate our decoder
array}
{$IFNDEF CPU386}
DecoderLen := 0;
{$ENDIF}
if (FUsage <> huEncoding) then begin
DecoderLen := PowerOfTwo[FMaxCodeLen];
GetMem(FDecodes, DecoderLen * sizeof(longint));
{$IFDEF CPU386}
DecodesEnd := PAnsiChar(FDecodes) + (DecoderLen * sizeof(longint));
{$ENDIF}
{$IFOPT C+}
FillChar(FDecodes^, DecoderLen * sizeof(longint), $FF);
FMask := not (DecoderLen - 1);
{$ENDIF}
end;
{calculate the start codes for each code length}
Code := 0;
LengthCount[0] := 0;
for i := 1 to FDefMaxCodeLen do begin
Code := (Code + LengthCount[i-1]) shl 1;
NextCode[i] := Code;
end;
{for speed and convenience}
Decodes := FDecodes;
Encodes := FEncodes;
TablePtr := @ByteRevTable;
{for each symbol...}
for Symbol := 0 to pred(aCount) do begin
{calculate the code length}
CodeLen := aCodeLengths[Symbol + aStartInx];
{if the code length were zero, just set the relevant entry in the
encoder array; the decoder array doesn't need anything}
if (CodeLen = 0) then begin
if (FUsage <> huDecoding) then
Encodes^[Symbol] := -1
end
{otherwise we need to fill elements in both the encoder and
decoder arrays}
else begin
{calculate *reversed* code}
Code := NextCode[CodeLen];
{$IFDEF CPU386}
asm
push esi
mov eax, Code
mov esi, TablePtr
xor ecx, ecx
xor edx, edx
mov cl, ah
mov dl, al
mov al, [esi+ecx]
mov ah, [esi+edx]
mov ecx, 16
pop esi
sub ecx, CodeLen
shr eax, cl
mov Code, eax
end;
{$ELSE}
CodeData:= Code;
LongRec(Code).Bytes[1]:= ByteRevTable[LongRec(CodeData).Bytes[0]];
LongRec(Code).Bytes[0]:= ByteRevTable[LongRec(CodeData).Bytes[1]];
Code:= Code shr (16-CodeLen);
{$ENDIF}
{set the code data (bit count, extra bits required, symbol),
everywhere the reversed code would appear in the decoder array;
set the code data in the encoder array as well}
if (Symbol >= aExtraOffset) then begin
if (FUsage <> huEncoding) then
CodeData := Symbol + { symbol}
(CodeLen shl 16) + { code length}
(aExtraBits[Symbol-aExtraOffset] shl 24);
{ extra bits required}
if (FUsage <> huDecoding) then
Encodes^[Symbol] := Code + { code}
(CodeLen shl 16) + { code length}
(aExtraBits[Symbol-aExtraOffset] shl 24)
{ extra bits required}
end
else begin
if (FUsage <> huEncoding) then
CodeData := Symbol + { symbol}
(CodeLen shl 16); { code length}
if (FUsage <> huDecoding) then
Encodes^[Symbol] := Code + { code}
(CodeLen shl 16); { code length}
end;
{OPTIMIZATION NOTE: the following code
CodeIncr := PowerOfTwo[CodeLen];
while Code < DecoderLen do begin
Decodes^[Code] := CodeData;
inc(Code, CodeIncr);
end;
was replaced by the asm code below to improve the speed. The
code in the loop is the big time sink in this routine so it was
best to replace it.}
if (FUsage <> huEncoding) then begin
{$IFDEF CPU386}
CodeIncr := PowerOfTwo[CodeLen] * sizeof(longint);
asm
push edi { save edi}
mov eax, Decodes { get the Decodes array}
mov edi, DecodesEnd { get the end of the Decodes array}
mov edx, Code { get Code and..}
shl edx, 1 { ..multiply by 4}
shl edx, 1
add eax, edx { eax => first element to be set}
mov edx, CodeData { get the CodeData}
mov ecx, CodeIncr { get the increment per loop}
@@1:
mov [eax], edx { set the element}
add eax, ecx { move to the next element}
cmp eax, edi { if we haven't gone past the end..}
jl @@1 { ..go back for the next one}
pop edi { retrieve edi}
end;
{$ELSE}
CodeIncr := PowerOfTwo[CodeLen];
while Code < DecoderLen do begin
Decodes^[Code] := CodeData;
inc(Code, CodeIncr);
end;
{$ENDIF}
end;
{we've used this code up for this symbol, so increment for the
next symbol at this code length}
inc(NextCode[CodeLen]);
end;
end;
end;
{--------}
{$IFDEF UseLogging}
procedure TAbDfDecodeHuffmanTree.DebugPrint(aLog : TAbLogger);
{$IFDEF EnableMegaLog}
var
i : integer;
Code : longint;
{$ENDIF}
begin
{to print the huffman tree, we must have a logger...}
Assert(aLog <> nil,
'TAbDfDecodeHuffmanTree.DebugPrint needs a logger object to which to print');
if (FUsage <> huEncoding) then begin
aLog.WriteLine('Huffman decoder array');
aLog.WriteLine(Format('Alphabet size: %d', [FAlphaSize]));
aLog.WriteLine(Format('Max codelength: %d', [FMaxCodeLen]));
{$IFDEF EnableMegaLog}
aLog.WriteLine('Index Len Xtra Symbol Reversed Code');
for i := 0 to pred(PowerOfTwo[FMaxCodeLen]) do begin
Code := FDecodes^[i];
if (Code = -1) then
aLog.WriteLine(Format('%5d%49s', [i, 'no code']))
else
aLog.WriteLine(Format('%5d%4d%5d%7d%33s',
[i,
((Code shr 16) and $FF),
((Code shr 24) and $FF),
(Code and $FFFF),
CodeToStr(i, ((Code shr 16) and $FF))]));
end;
aLog.WriteLine('---end decoder array---');
{$ENDIF}
end;
if (FUsage <> huDecoding) then begin
aLog.WriteLine('Huffman encoder array');
aLog.WriteLine(Format('Alphabet size: %d', [FAlphaSize]));
{$IFDEF EnableMegaLog}
aLog.WriteLine('Symbol Len Xtra Reversed Code');
for i := 0 to pred(FAlphaSize) do begin
Code := FEncodes^[i];
if (Code = -1) then
aLog.WriteLine(Format('%6d%42s', [i, 'no code']))
else
aLog.WriteLine(Format('%6d%4d%5d%33s',
[i,
((Code shr 16) and $FF),
((Code shr 24) and $FF),
CodeToStr((Code and $FFFF), ((Code shr 16) and $FF))]));
end;
aLog.WriteLine('---end encoder array---');
{$ENDIF}
end;
end;
{$ENDIF}
{--------}
function TAbDfDecodeHuffmanTree.Decode(aLookupBits : integer) : longint;
begin
{protect against dumb programming mistakes (note: FMask only exists
if assertions are on)}
{$IFOPT C+}
Assert((aLookupBits and FMask) = 0,
'TAbDfDecodeHuffmanTree.Decode: trying to decode too many bits, use LookupBitLength property');
{$ENDIF}
{return the code data}
Result := FDecodes^[aLookupBits];
end;
{--------}
function TAbDfDecodeHuffmanTree.Encode(aSymbol : integer) : longint;
begin
{protect against dumb programming mistakes}
Assert((0 <= aSymbol) and (aSymbol < FAlphaSize),
'TAbDfDecodeHuffmanTree.Encode: trying to encode a symbol that is not in the alphabet');
{return the code data}
Result := FEncodes^[aSymbol];
{if the result is -1, it's another programming mistake: the user is
attempting to get a code for a symbol that wasn't being used}
Assert(Result <> -1,
'TAbDfDecodeHuffmanTree.Encode: trying to encode a symbol that was not used');
end;
{====================================================================}
{===BuildStaticTrees=================================================}
procedure BuildStaticTrees;
var
i : integer;
CodeLens : array [0..287] of integer;
begin
{this routine builds the static huffman trees, those whose code
lengths are determined by the deflate spec}
{the static literal tree first}
for i := 0 to 143 do
CodeLens[i] := 8;
for i := 144 to 255 do
CodeLens[i] := 9;
for i := 256 to 279 do
CodeLens[i] := 7;
for i := 280 to 287 do
CodeLens[i] := 8;
AbStaticLiteralTree := TAbDfDecodeHuffmanTree.Create(288, 15, huBoth);
AbStaticLiteralTree.Build(CodeLens, 0, 288,
dfc_LitExtraBits, dfc_LitExtraOffset);
{the static distance tree afterwards}
for i := 0 to 31 do
CodeLens[i] := 5;
AbStaticDistanceTree := TAbDfDecodeHuffmanTree.Create(32, 15, huBoth);
AbStaticDistanceTree.Build(CodeLens, 0, 32,
dfc_DistExtraBits, dfc_DistExtraOffset);
end;
{====================================================================}
initialization
BuildStaticTrees;
finalization
AbStaticLiteralTree.Free;
AbStaticDistanceTree.Free;
end.