531 lines
18 KiB
ObjectPascal
531 lines
18 KiB
ObjectPascal
(* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is TurboPower Abbrevia
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* TurboPower Software
|
|
*
|
|
* Portions created by the Initial Developer are Copyright (C) 1997-2002
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*
|
|
* ***** END LICENSE BLOCK ***** *)
|
|
|
|
{*********************************************************}
|
|
{* ABBREVIA: AbDfHufD.pas *}
|
|
{*********************************************************}
|
|
{* Deflate Huffman tree for decoder *}
|
|
{*********************************************************}
|
|
|
|
unit AbDfHufD;
|
|
|
|
{$I AbDefine.inc}
|
|
|
|
{Activate this compiler define and rebuild if you want the complete
|
|
huffman tree output to print to the current log. The output is
|
|
voluminous to say the least...}
|
|
{$IFDEF UseLogging}
|
|
{.$DEFINE EnableMegaLog}
|
|
{$ENDIF}
|
|
|
|
{Notes:
|
|
|
|
The object of this class is to build a decoder array, not to build a
|
|
Huffman tree particularly. We don't want to decode huffman strings bit
|
|
by bit. moving down the Huffman tree sometimes left, sometimes right.
|
|
Instead we want to grab a set of bits and look them up in an array.
|
|
Sometimes we'll grab too many bits, sure, but we can deal with that
|
|
later. So, the object of the exercise is to calculate the code for a
|
|
symbol, reverse it ('cos that's how the input bit stream will present
|
|
it to us) and set that element of the array to the decoded symbol
|
|
value (plus some extra information: bit lengths).
|
|
|
|
If the alphabet size were 19 (the codelengths huffman tree) and the
|
|
maximum code length 5, for example, the decoder array would be 2^5
|
|
elements long, much larger than the alphabet size. The user of this
|
|
class will be presenting sets of 5 bits for us to decode. We would
|
|
like to look up these 5 bits in the array (as an index) and have the
|
|
symbol returned. Now, since the alphabet size is much less than the
|
|
number of elements in the decoder array, we must set the other
|
|
elements in the array as well. Consider a symbol that has a code of
|
|
110 in this scenario. The reversed code is 011, or 3, so we'd be
|
|
setting element 3. However we should also be setting elements 01011,
|
|
10011, and 11011 to this symbol information as well, since the lookup
|
|
will be 5 bits long.
|
|
|
|
Because the code is a huffman code from a prefix tree, we won't get
|
|
any index clashes between actual codes by this "filling in" process.
|
|
|
|
For the codelength Huffman tree, the maximum code length is at most 7.
|
|
This equates to a 128 element array. For the literal and distance
|
|
trees, the max code length is at most 15. This equates to a 32768
|
|
element array.
|
|
|
|
For a given lookup value the decoder will return a 32-bit value. The
|
|
lower 16 bits is the decoded symbol, the next 8 bits is the code
|
|
length for that symbol, the last 8 bits (the most significant) are the
|
|
number of extra bits that must be extracted from the input bit stream.
|
|
}
|
|
|
|
interface
|
|
|
|
uses
|
|
AbDfBase;
|
|
|
|
type
|
|
TAbDfHuffmanUsage = ( {usage of a huffman decoder..}
|
|
huEncoding, {..encoding}
|
|
huDecoding, {..decoding}
|
|
huBoth); {..both (used for static trees)}
|
|
|
|
TAbDfDecodeHuffmanTree = class
|
|
private
|
|
FAlphaSize : integer;
|
|
FDecodes : PAbDfLongintList;
|
|
FDefMaxCodeLen : integer;
|
|
FEncodes : PAbDfLongintList;
|
|
{$IFOPT C+}
|
|
FMask : integer;
|
|
{$ENDIF}
|
|
FMaxCodeLen : integer;
|
|
FUsage : TAbDfHuffmanUsage;
|
|
protected
|
|
public
|
|
constructor Create(aAlphabetSize : integer;
|
|
aDefMaxCodeLen: integer;
|
|
aUsage : TAbDfHuffmanUsage);
|
|
destructor Destroy; override;
|
|
|
|
procedure Build(const aCodeLengths : array of integer;
|
|
aStartInx : integer;
|
|
aCount : integer;
|
|
const aExtraBits : array of byte;
|
|
aExtraOffset : integer);
|
|
function Decode(aLookupBits : integer) : longint;
|
|
function Encode(aSymbol : integer) : longint;
|
|
|
|
{$IFDEF UseLogging}
|
|
procedure DebugPrint(aLog : TAbLogger);
|
|
{$ENDIF}
|
|
|
|
property LookupBitLength : integer read FMaxCodeLen;
|
|
property Decodes : PAbDfLongintList read FDecodes;
|
|
property Encodes : PAbDfLongintList read FEncodes;
|
|
end;
|
|
|
|
var
|
|
AbStaticLiteralTree : TAbDfDecodeHuffmanTree;
|
|
AbStaticDistanceTree : TAbDfDecodeHuffmanTree;
|
|
|
|
implementation
|
|
|
|
uses
|
|
SysUtils;
|
|
|
|
const
|
|
PowerOfTwo : array [0..dfc_MaxCodeLength] of integer =
|
|
(1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
|
|
4096, 8192, 16384, 32768);
|
|
|
|
{===Debug helper routine=============================================}
|
|
{$IFDEF EnableMegaLog}
|
|
function CodeToStr(aCode : longint; aLen : integer) : string;
|
|
var
|
|
i : integer;
|
|
begin
|
|
if (aLen = 0) then
|
|
Result := 'no code'
|
|
else begin
|
|
SetLength(Result, 32);
|
|
FillChar(Result[1], 32, ' ');
|
|
for i := 32 downto (33-aLen) do begin
|
|
if Odd(aCode) then
|
|
Result[i] := '1'
|
|
else
|
|
Result[i] := '0';
|
|
aCode := aCode shr 1;
|
|
end;
|
|
end;
|
|
end;
|
|
{$ENDIF}
|
|
{====================================================================}
|
|
|
|
|
|
{===TAbDfDecodeHuffmanTree===========================================}
|
|
constructor TAbDfDecodeHuffmanTree.Create(
|
|
aAlphabetSize : integer;
|
|
aDefMaxCodeLen: integer;
|
|
aUsage : TAbDfHuffmanUsage);
|
|
begin
|
|
{protect against dumb programming mistakes}
|
|
Assert(aAlphabetSize >= 2,
|
|
'TAbDfDecodeHuffmanTree.Create: a huffman tree must be for at least two symbols');
|
|
|
|
{let the ancestor initialize}
|
|
inherited Create;
|
|
|
|
{save the alphabet size, etc}
|
|
FAlphaSize := aAlphabetSize;
|
|
FDefMaxCodeLen := aDefMaxCodeLen;
|
|
FUsage := aUsage;
|
|
|
|
{allocate the encoder array (needs to be initialized to zeros)}
|
|
if (aUsage <> huDecoding) then
|
|
FEncodes := AllocMem(FAlphaSize * sizeof(longint));
|
|
end;
|
|
{--------}
|
|
destructor TAbDfDecodeHuffmanTree.Destroy;
|
|
begin
|
|
{destroy the codes arrays}
|
|
if (FDecodes <> nil) then
|
|
FreeMem(FDecodes);
|
|
if (FEncodes <> nil) then
|
|
FreeMem(FEncodes);
|
|
|
|
{let the ancestor die}
|
|
inherited Destroy;
|
|
end;
|
|
{--------}
|
|
procedure TAbDfDecodeHuffmanTree.Build(
|
|
const aCodeLengths : array of integer;
|
|
aStartInx : integer;
|
|
aCount : integer;
|
|
const aExtraBits : array of byte;
|
|
aExtraOffset : integer);
|
|
const
|
|
ByteRevTable : array [0..255] of byte = (
|
|
$00, $80, $40, $C0, $20, $A0, $60, $E0, $10, $90, $50, $D0,
|
|
$30, $B0, $70, $F0, $08, $88, $48, $C8, $28, $A8, $68, $E8,
|
|
$18, $98, $58, $D8, $38, $B8, $78, $F8, $04, $84, $44, $C4,
|
|
$24, $A4, $64, $E4, $14, $94, $54, $D4, $34, $B4, $74, $F4,
|
|
$0C, $8C, $4C, $CC, $2C, $AC, $6C, $EC, $1C, $9C, $5C, $DC,
|
|
$3C, $BC, $7C, $FC, $02, $82, $42, $C2, $22, $A2, $62, $E2,
|
|
$12, $92, $52, $D2, $32, $B2, $72, $F2, $0A, $8A, $4A, $CA,
|
|
$2A, $AA, $6A, $EA, $1A, $9A, $5A, $DA, $3A, $BA, $7A, $FA,
|
|
$06, $86, $46, $C6, $26, $A6, $66, $E6, $16, $96, $56, $D6,
|
|
$36, $B6, $76, $F6, $0E, $8E, $4E, $CE, $2E, $AE, $6E, $EE,
|
|
$1E, $9E, $5E, $DE, $3E, $BE, $7E, $FE, $01, $81, $41, $C1,
|
|
$21, $A1, $61, $E1, $11, $91, $51, $D1, $31, $B1, $71, $F1,
|
|
$09, $89, $49, $C9, $29, $A9, $69, $E9, $19, $99, $59, $D9,
|
|
$39, $B9, $79, $F9, $05, $85, $45, $C5, $25, $A5, $65, $E5,
|
|
$15, $95, $55, $D5, $35, $B5, $75, $F5, $0D, $8D, $4D, $CD,
|
|
$2D, $AD, $6D, $ED, $1D, $9D, $5D, $DD, $3D, $BD, $7D, $FD,
|
|
$03, $83, $43, $C3, $23, $A3, $63, $E3, $13, $93, $53, $D3,
|
|
$33, $B3, $73, $F3, $0B, $8B, $4B, $CB, $2B, $AB, $6B, $EB,
|
|
$1B, $9B, $5B, $DB, $3B, $BB, $7B, $FB, $07, $87, $47, $C7,
|
|
$27, $A7, $67, $E7, $17, $97, $57, $D7, $37, $B7, $77, $F7,
|
|
$0F, $8F, $4F, $CF, $2F, $AF, $6F, $EF, $1F, $9F, $5F, $DF,
|
|
$3F, $BF, $7F, $FF);
|
|
var
|
|
i : integer;
|
|
Symbol : integer;
|
|
LengthCount : array [0..dfc_MaxCodeLength] of integer;
|
|
NextCode : array [0..dfc_MaxCodeLength] of integer;
|
|
Code : longint;
|
|
CodeLen : integer;
|
|
CodeData : longint;
|
|
DecoderLen : integer;
|
|
CodeIncr : integer;
|
|
Decodes : PAbDfLongintList;
|
|
Encodes : PAbDfLongintList;
|
|
{$IFDEF CPU386}
|
|
DecodesEnd : pointer;
|
|
{$ENDIF}
|
|
TablePtr : pointer;
|
|
begin
|
|
{count the number of instances of each code length and calculate the
|
|
maximum code length at the same time}
|
|
FillChar(LengthCount, sizeof(LengthCount), 0);
|
|
FMaxCodeLen := 0;
|
|
for i := 0 to pred(aCount) do begin
|
|
CodeLen := aCodeLengths[i + aStartInx];
|
|
Assert((CodeLen <= FDefMaxCodeLen),
|
|
Format('TAbDfDecodeHuffmanTree.Build: a code length is greater than %d',
|
|
[FDefMaxCodeLen]));
|
|
if (CodeLen > FMaxCodeLen) then
|
|
FMaxCodeLen := CodeLen;
|
|
inc(LengthCount[CodeLen]);
|
|
end;
|
|
|
|
{now we know the maximum code length we can allocate our decoder
|
|
array}
|
|
{$IFNDEF CPU386}
|
|
DecoderLen := 0;
|
|
{$ENDIF}
|
|
if (FUsage <> huEncoding) then begin
|
|
DecoderLen := PowerOfTwo[FMaxCodeLen];
|
|
GetMem(FDecodes, DecoderLen * sizeof(longint));
|
|
{$IFDEF CPU386}
|
|
DecodesEnd := PAnsiChar(FDecodes) + (DecoderLen * sizeof(longint));
|
|
{$ENDIF}
|
|
{$IFOPT C+}
|
|
FillChar(FDecodes^, DecoderLen * sizeof(longint), $FF);
|
|
FMask := not (DecoderLen - 1);
|
|
{$ENDIF}
|
|
end;
|
|
|
|
{calculate the start codes for each code length}
|
|
Code := 0;
|
|
LengthCount[0] := 0;
|
|
for i := 1 to FDefMaxCodeLen do begin
|
|
Code := (Code + LengthCount[i-1]) shl 1;
|
|
NextCode[i] := Code;
|
|
end;
|
|
|
|
{for speed and convenience}
|
|
Decodes := FDecodes;
|
|
Encodes := FEncodes;
|
|
TablePtr := @ByteRevTable;
|
|
|
|
{for each symbol...}
|
|
for Symbol := 0 to pred(aCount) do begin
|
|
{calculate the code length}
|
|
CodeLen := aCodeLengths[Symbol + aStartInx];
|
|
|
|
{if the code length were zero, just set the relevant entry in the
|
|
encoder array; the decoder array doesn't need anything}
|
|
if (CodeLen = 0) then begin
|
|
if (FUsage <> huDecoding) then
|
|
Encodes^[Symbol] := -1
|
|
end
|
|
|
|
{otherwise we need to fill elements in both the encoder and
|
|
decoder arrays}
|
|
else begin
|
|
{calculate *reversed* code}
|
|
Code := NextCode[CodeLen];
|
|
{$IFDEF CPU386}
|
|
asm
|
|
push esi
|
|
mov eax, Code
|
|
mov esi, TablePtr
|
|
xor ecx, ecx
|
|
xor edx, edx
|
|
mov cl, ah
|
|
mov dl, al
|
|
mov al, [esi+ecx]
|
|
mov ah, [esi+edx]
|
|
mov ecx, 16
|
|
pop esi
|
|
sub ecx, CodeLen
|
|
shr eax, cl
|
|
mov Code, eax
|
|
end;
|
|
{$ELSE}
|
|
CodeData:= Code;
|
|
LongRec(Code).Bytes[1]:= ByteRevTable[LongRec(CodeData).Bytes[0]];
|
|
LongRec(Code).Bytes[0]:= ByteRevTable[LongRec(CodeData).Bytes[1]];
|
|
Code:= Code shr (16-CodeLen);
|
|
{$ENDIF}
|
|
|
|
{set the code data (bit count, extra bits required, symbol),
|
|
everywhere the reversed code would appear in the decoder array;
|
|
set the code data in the encoder array as well}
|
|
if (Symbol >= aExtraOffset) then begin
|
|
if (FUsage <> huEncoding) then
|
|
CodeData := Symbol + { symbol}
|
|
(CodeLen shl 16) + { code length}
|
|
(aExtraBits[Symbol-aExtraOffset] shl 24);
|
|
{ extra bits required}
|
|
if (FUsage <> huDecoding) then
|
|
Encodes^[Symbol] := Code + { code}
|
|
(CodeLen shl 16) + { code length}
|
|
(aExtraBits[Symbol-aExtraOffset] shl 24)
|
|
{ extra bits required}
|
|
end
|
|
else begin
|
|
if (FUsage <> huEncoding) then
|
|
CodeData := Symbol + { symbol}
|
|
(CodeLen shl 16); { code length}
|
|
if (FUsage <> huDecoding) then
|
|
Encodes^[Symbol] := Code + { code}
|
|
(CodeLen shl 16); { code length}
|
|
end;
|
|
|
|
{OPTIMIZATION NOTE: the following code
|
|
|
|
CodeIncr := PowerOfTwo[CodeLen];
|
|
while Code < DecoderLen do begin
|
|
Decodes^[Code] := CodeData;
|
|
inc(Code, CodeIncr);
|
|
end;
|
|
|
|
was replaced by the asm code below to improve the speed. The
|
|
code in the loop is the big time sink in this routine so it was
|
|
best to replace it.}
|
|
if (FUsage <> huEncoding) then begin
|
|
{$IFDEF CPU386}
|
|
CodeIncr := PowerOfTwo[CodeLen] * sizeof(longint);
|
|
asm
|
|
push edi { save edi}
|
|
mov eax, Decodes { get the Decodes array}
|
|
mov edi, DecodesEnd { get the end of the Decodes array}
|
|
mov edx, Code { get Code and..}
|
|
shl edx, 1 { ..multiply by 4}
|
|
shl edx, 1
|
|
add eax, edx { eax => first element to be set}
|
|
mov edx, CodeData { get the CodeData}
|
|
mov ecx, CodeIncr { get the increment per loop}
|
|
@@1:
|
|
mov [eax], edx { set the element}
|
|
add eax, ecx { move to the next element}
|
|
cmp eax, edi { if we haven't gone past the end..}
|
|
jl @@1 { ..go back for the next one}
|
|
pop edi { retrieve edi}
|
|
end;
|
|
{$ELSE}
|
|
CodeIncr := PowerOfTwo[CodeLen];
|
|
while Code < DecoderLen do begin
|
|
Decodes^[Code] := CodeData;
|
|
inc(Code, CodeIncr);
|
|
end;
|
|
{$ENDIF}
|
|
end;
|
|
|
|
{we've used this code up for this symbol, so increment for the
|
|
next symbol at this code length}
|
|
inc(NextCode[CodeLen]);
|
|
end;
|
|
end;
|
|
end;
|
|
{--------}
|
|
{$IFDEF UseLogging}
|
|
procedure TAbDfDecodeHuffmanTree.DebugPrint(aLog : TAbLogger);
|
|
{$IFDEF EnableMegaLog}
|
|
var
|
|
i : integer;
|
|
Code : longint;
|
|
{$ENDIF}
|
|
begin
|
|
{to print the huffman tree, we must have a logger...}
|
|
Assert(aLog <> nil,
|
|
'TAbDfDecodeHuffmanTree.DebugPrint needs a logger object to which to print');
|
|
|
|
if (FUsage <> huEncoding) then begin
|
|
aLog.WriteLine('Huffman decoder array');
|
|
aLog.WriteLine(Format('Alphabet size: %d', [FAlphaSize]));
|
|
aLog.WriteLine(Format('Max codelength: %d', [FMaxCodeLen]));
|
|
|
|
{$IFDEF EnableMegaLog}
|
|
aLog.WriteLine('Index Len Xtra Symbol Reversed Code');
|
|
for i := 0 to pred(PowerOfTwo[FMaxCodeLen]) do begin
|
|
Code := FDecodes^[i];
|
|
if (Code = -1) then
|
|
aLog.WriteLine(Format('%5d%49s', [i, 'no code']))
|
|
else
|
|
aLog.WriteLine(Format('%5d%4d%5d%7d%33s',
|
|
[i,
|
|
((Code shr 16) and $FF),
|
|
((Code shr 24) and $FF),
|
|
(Code and $FFFF),
|
|
CodeToStr(i, ((Code shr 16) and $FF))]));
|
|
end;
|
|
aLog.WriteLine('---end decoder array---');
|
|
{$ENDIF}
|
|
end;
|
|
|
|
if (FUsage <> huDecoding) then begin
|
|
aLog.WriteLine('Huffman encoder array');
|
|
aLog.WriteLine(Format('Alphabet size: %d', [FAlphaSize]));
|
|
|
|
{$IFDEF EnableMegaLog}
|
|
aLog.WriteLine('Symbol Len Xtra Reversed Code');
|
|
for i := 0 to pred(FAlphaSize) do begin
|
|
Code := FEncodes^[i];
|
|
if (Code = -1) then
|
|
aLog.WriteLine(Format('%6d%42s', [i, 'no code']))
|
|
else
|
|
aLog.WriteLine(Format('%6d%4d%5d%33s',
|
|
[i,
|
|
((Code shr 16) and $FF),
|
|
((Code shr 24) and $FF),
|
|
CodeToStr((Code and $FFFF), ((Code shr 16) and $FF))]));
|
|
end;
|
|
aLog.WriteLine('---end encoder array---');
|
|
{$ENDIF}
|
|
end;
|
|
end;
|
|
{$ENDIF}
|
|
{--------}
|
|
function TAbDfDecodeHuffmanTree.Decode(aLookupBits : integer) : longint;
|
|
begin
|
|
{protect against dumb programming mistakes (note: FMask only exists
|
|
if assertions are on)}
|
|
{$IFOPT C+}
|
|
Assert((aLookupBits and FMask) = 0,
|
|
'TAbDfDecodeHuffmanTree.Decode: trying to decode too many bits, use LookupBitLength property');
|
|
{$ENDIF}
|
|
|
|
{return the code data}
|
|
Result := FDecodes^[aLookupBits];
|
|
end;
|
|
{--------}
|
|
function TAbDfDecodeHuffmanTree.Encode(aSymbol : integer) : longint;
|
|
begin
|
|
{protect against dumb programming mistakes}
|
|
Assert((0 <= aSymbol) and (aSymbol < FAlphaSize),
|
|
'TAbDfDecodeHuffmanTree.Encode: trying to encode a symbol that is not in the alphabet');
|
|
|
|
{return the code data}
|
|
Result := FEncodes^[aSymbol];
|
|
|
|
{if the result is -1, it's another programming mistake: the user is
|
|
attempting to get a code for a symbol that wasn't being used}
|
|
Assert(Result <> -1,
|
|
'TAbDfDecodeHuffmanTree.Encode: trying to encode a symbol that was not used');
|
|
end;
|
|
{====================================================================}
|
|
|
|
|
|
{===BuildStaticTrees=================================================}
|
|
procedure BuildStaticTrees;
|
|
var
|
|
i : integer;
|
|
CodeLens : array [0..287] of integer;
|
|
begin
|
|
{this routine builds the static huffman trees, those whose code
|
|
lengths are determined by the deflate spec}
|
|
|
|
{the static literal tree first}
|
|
for i := 0 to 143 do
|
|
CodeLens[i] := 8;
|
|
for i := 144 to 255 do
|
|
CodeLens[i] := 9;
|
|
for i := 256 to 279 do
|
|
CodeLens[i] := 7;
|
|
for i := 280 to 287 do
|
|
CodeLens[i] := 8;
|
|
AbStaticLiteralTree := TAbDfDecodeHuffmanTree.Create(288, 15, huBoth);
|
|
AbStaticLiteralTree.Build(CodeLens, 0, 288,
|
|
dfc_LitExtraBits, dfc_LitExtraOffset);
|
|
|
|
{the static distance tree afterwards}
|
|
for i := 0 to 31 do
|
|
CodeLens[i] := 5;
|
|
AbStaticDistanceTree := TAbDfDecodeHuffmanTree.Create(32, 15, huBoth);
|
|
AbStaticDistanceTree.Build(CodeLens, 0, 32,
|
|
dfc_DistExtraBits, dfc_DistExtraOffset);
|
|
end;
|
|
{====================================================================}
|
|
|
|
initialization
|
|
BuildStaticTrees;
|
|
|
|
finalization
|
|
AbStaticLiteralTree.Free;
|
|
AbStaticDistanceTree.Free;
|
|
|
|
end.
|