[alt.sources] LZRW1/KH

GHGAEA8@cc1.kuleuven.ac.be (05/15/91)

---------------------------------------------------------------
    This posting includes the sources for the Turbo Pascal
version of the LZRW1/KH compression algoritm.
---------------------------------------------------------------
File #1 : The LZRW1KH unit
--------------------------

{    ###################################################################   }
{    ##                                                               ##   }
{    ##      ##    ##### #####  ##   ##  ##      ## ##  ## ##  ##     ##   }
{    ##      ##      ### ##  ## ## # ## ###     ##  ## ##  ##  ##     ##   }
{    ##      ##     ###  #####  #######  ##    ##   ####   ######     ##   }
{    ##      ##    ###   ##  ## ### ###  ##   ##    ## ##  ##  ##     ##   }
{    ##      ##### ##### ##  ## ##   ## #### ##     ##  ## ##  ##     ##   }
{    ##                                                               ##   }
{    ##   EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM  ##   }
{    ##                                                               ##   }
{    ###################################################################   }
{    ##                                                               ##   }
{    ##   This unit implements the updated LZRW1/KH algoritm which    ##   }
{    ##   also implements  some RLE coding  which is usefull  when    ##   }
{    ##   compress files  containing  a lot  of consecutive  bytes    ##   }
{    ##   having the same value.   The algoritm is not as good  as    ##   }
{    ##   LZH, but can compete with Lempel-Ziff.   It's the fasted    ##   }
{    ##   one I've encountered upto now.                              ##   }
{    ##                                                               ##   }
{    ##                                                               ##   }
{    ##                                                               ##   }
{    ##                                                Kurt HAENEN    ##   }
{    ##                                                               ##   }
{    ###################################################################   }

UNIT LZRW1KH;

INTERFACE

CONST     BufferMaxSize  = 32768;
          BufferMax      = BufferMaxSize-1;
          FLAG_Copied    = $80;
          FLAG_Compress  = $40;

TYPE BufferIndex    = 0..BufferMax;
     BufferSize     = 0..BufferMaxSize;
     BufferArray    = ARRAY [BufferIndex] OF BYTE;
     BufferPtr      = ^BufferArray;

FUNCTION  Compression    (    Source,Dest    : BufferPtr;
                              SourceSize     : BufferSize   )    : BufferSize;

FUNCTION  Decompression  (    Source,Dest    : BufferPtr;
                              SourceSize     : BufferSize   )    : BufferSize;

IMPLEMENTATION

TYPE HashTable      = ARRAY [0..4095] OF INTEGER;

FUNCTION  GetMatch       (    Source         : BufferPtr;
                              X              : BufferIndex;
                              SourceSize     : BufferSize;
                          VAR Hash           : HashTable;
                          VAR Size           : WORD;
                          VAR Pos            : BufferIndex  )    : BOOLEAN;
VAR  HashValue      : WORD;
BEGIN
     HashValue := (40543*((((Source^[X] SHL 4) XOR Source^[X+1]) SHL 4) XOR
                 Source^[X+2]) SHR 4) AND $0FFF;
     GetMatch := FALSE;
     IF (Hash[HashValue] <> -1) AND (X-Hash[HashValue] < 4096) THEN
          BEGIN
               Pos := Hash[HashValue];
               Size := 0;
               WHILE ((Size < 18) AND (Source^[X+Size] = Source^[Pos+Size])
                    AND (X+Size < SourceSize)) DO
                    INC(Size);
               GetMatch := (Size >= 3)
          END;
     Hash[HashValue] := X
END;


FUNCTION  Compression    (    Source,Dest    : BufferPtr;
                              SourceSize     : BufferSize   )    : BufferSize;
VAR  Hash                     : HashTable;
     Key,Bit,Command,Size     : WORD;
     X,Y,Z,Pos                : BufferIndex;
BEGIN
     FOR Key := 0 TO 4095 DO Hash[Key] := -1;
     Dest^[0] := FLAG_Compress;
     X := 0;
     Y := 3;
     Z := 1;
     Bit := 0;
     Command := 0;
     WHILE (X < SourceSize) AND (Y <= SourceSize) DO
          BEGIN
               IF (Bit > 15) THEN
                    BEGIN
                         Dest^[Z] := HI(Command);
                         Dest^[Z+1] := LO(Command);
                         Z := Y;
                         Bit := 0;
                         INC(Y,2)
                    END;
               Size := 1;
               WHILE ((Source^[X] = Source^[X+Size]) AND (Size < $FFF)
                    AND (X+Size < SourceSize)) DO
                         INC(Size);
               IF (Size >= 16)
                    THEN BEGIN
                              Dest^[Y] := 0;
                              Dest^[Y+1] := HI(Size-16);
                              Dest^[Y+2] := LO(Size-16);
                              Dest^[Y+3] := Source^[X];
                              INC(Y,4);
                              INC(X,Size);
                              Command := (Command SHL 1) + 1
                         END
                    ELSE IF (GetMatch(Source,X,SourceSize,Hash,Size,Pos))
                              THEN BEGIN
                                        Key := ((X-Pos) SHL 4) + (Size-3);
                                        Dest^[Y] := HI(Key);
                                        Dest^[Y+1] := LO(Key);
                                        INC(Y,2);
                                        INC(X,Size);
                                        Command := (Command SHL 1) + 1
                                   END
                              ELSE BEGIN
                                        Dest^[Y] := Source^[X];
                                        INC(Y);
                                        INC(X);
                                        Command := Command SHL 1
                                   END;
               INC(Bit);
          END;
     Command := Command SHL (16-Bit);
     Dest^[Z] := HI(Command);
     Dest^[Z+1] := LO(Command);
     IF (Y > SourceSize) THEN
          BEGIN
               MOVE(Source^[0],Dest^[1],SourceSize);
               Dest^[0] := FLAG_Copied;
               Y := SUCC(SourceSize)
          END;
     Compression := Y
END;

FUNCTION  Decompression  (    Source,Dest    : BufferPtr;
                              SourceSize     : BufferSize   )    : BufferSize;
VAR  X,Y,Pos                  : BufferIndex;
     Command,Size,K           : WORD;
     Bit                      : BYTE;
BEGIN
     IF (Source^[0] = FLAG_Copied)
          THEN FOR Y := 1 TO PRED(SourceSize) DO
                    Dest^[PRED(Y)] := Source^[Y]
          ELSE BEGIN
                    Y := 0;
                    X := 3;
                    Command := (Source^[1] SHL 8)+Source^[2];
                    Bit := 16;
                    WHILE (X < SourceSize) DO
                         BEGIN
                              IF (Bit = 0) THEN
                                   BEGIN
                                        Command := (Source^[X] SHL 8)
                                                      +Source^[X+1];
                                        Bit := 16;
                                        INC(X,2)
                                   END;
                              IF ((Command AND $8000) = 0)
                                   THEN BEGIN
                                             Dest^[Y] := Source^[X];
                                             INC(X);
                                             INC(Y)
                                        END
                                   ELSE BEGIN
                                             Pos := ((Source^[X] SHL 4)
                                                    +(Source^[X+1] SHR 4));
                                             IF (Pos = 0)
                                                  THEN BEGIN
{----------------------------------------------------------------}
     Size := (Source^[X+1] SHL 8) + Source^[X+2] + 15;
     FOR K := 0 TO Size DO
          Dest^[Y+K] := Source^[X+3];
     INC(X,4);
     INC(Y,Size+1)
{----------------------------------------------------------------}
                                                       END
                                                  ELSE BEGIN
{----------------------------------------------------------------}
     Size := (Source^[X+1] AND $0F)+2;
     FOR K := 0 TO Size DO
          Dest^[Y+K] := Dest^[Y-Pos+K];
     INC(X,2);
     INC(Y,Size+1)
{----------------------------------------------------------------}
                                                       END;
                                        END;
                              Command := Command SHL 1;
                              DEC(Bit)
                         END
               END;
     Decompression := Y
END;

END.

-------------------------------------------------------------------
End of File # 1



File #2 : A small demonstration
-------------------------------


{    ###################################################################   }
{    ##                                                               ##   }
{    ##      ##    ##### #####  ##   ##  ##      ## ##  ## ##  ##     ##   }
{    ##      ##      ### ##  ## ## # ## ###     ##  ## ##  ##  ##     ##   }
{    ##      ##     ###  #####  #######  ##    ##   ####   ######     ##   }
{    ##      ##    ###   ##  ## ### ###  ##   ##    ## ##  ##  ##     ##   }
{    ##      ##### ##### ##  ## ##   ## #### ##     ##  ## ##  ##     ##   }
{    ##                                                               ##   }
{    ##   EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM  ##   }
{    ##                                                               ##   }
{    ###################################################################   }
{    ##                                                               ##   }
{    ##   In an earlier posting I've already presented a 680x0        ##   }
{    ##   assembler routine to implement optimized LZRW1 compression. ##   }
{    ##   I've chosen then name LZRW1/KH for this optimized           ##   }
{    ##   algoritm, to distinguish it from the original one.  The     ##   }
{    ##   changes can be found in the maximum length for a match,     ##   }
{    ##   which is 16 in the original algoritm, but 18 in the         ##   }
{    ##   optimized one.  This is not a big change, but nevertheless  ##   }
{    ##   it can increase the compression by 1/8.  Another thing      ##   }
{    ##   I've tried to do is to make this program easy to            ##   }
{    ##   understand.  Although I have some knowledge of C, I always  ##   }
{    ##   find it difficult to understand someone else his programs.  ##   }
{    ##   Especially if they depend on the SHORT CIRCUIT BOOLEAN      ##   }
{    ##   evaluation of C : Test || Test || Test will only be         ##   }
{    ##   executed fully if Test was TRUE the first three times ...   ##   }
{    ##   Took me awhile to figure it out, although it seems quite    ##   }
{    ##   natural now ... Enough of this, let's see some code ...     ##   }
{    ##                                                               ##   }
{    ##   Sorry, no list of people to thank this time ... It hasn't   ##   }
{    ##   changed since my last posting.                              ##   }
{    ##                                                Kurt Haenen    ##   }
{    ##                                                               ##   }
{    ###################################################################   }

PROGRAM   CompressionDemo(input,output);

USES LZRW1KH;

CONST     CompIdentifier : LONGINT      = (((((((ORD('L') SHL 8)+ORD('Z'))
                                   SHL 8)+ORD('R')) SHL 8)+ORD('W')) SHL 8);

VAR  SRCFP,DSTFP         : FILE;
     SRCBuf,DSTBuf       : ARRAY [0..16390] OF BYTE;
     SRCSize,DSTSize     : WORD;
     Tmp                 : WORD;
     Identifier          : LONGINT;
     InSize,OutSize      : LONGINT;

BEGIN
     IF ((PARAMCOUNT <> 2) AND ((PARAMCOUNT <> 3) OR (PARAMSTR(1) <> '-D')))
             THEN
          BEGIN
               WRITELN;
               WRITELN('USAGE : COMPRESS [-D] <Source File> <Dest File>');
               WRITELN('        (The -D option is case sensitive !)');
               WRITELN;
               HALT
          END;
     IF (PARAMCOUNT = 2)
          THEN BEGIN
                    WRITELN('TRYING TO COMPRESS ',PARAMSTR(1),' TO ',
                                    PARAMSTR(2),'.');
                    ASSIGN(SRCFP,PARAMSTR(1));
                    ASSIGN(DSTFP,PARAMSTR(2));
                    RESET(SRCFP,1);
                    IF (IOResult <> 0) THEN
                         BEGIN
                              WRITELN;
                              WRITELN('FILE ',PARAMSTR(1),' NOT FOUND !');
                              WRITELN;
                              HALT
                         END;
                    REWRITE(DSTFP,1);
                    IF (IOResult <> 0) THEN
                         BEGIN
                              CLOSE(SRCFP);
                              WRITELN;
                              WRITELN('FILE ',PARAMSTR(2),' COULD NOT '+
                                                   'BE OPENED !');
                              WRITELN;
                              HALT
                         END;
                    BLOCKWRITE(DSTFP,CompIdentifier,SIZEOF(LONGINT),Tmp);
                    IF (Tmp <> SIZEOF(LONGINT)) THEN
                         BEGIN
                              CLOSE(SRCFP);
                              CLOSE(DSTFP);
                              ERASE(DSTFP);
                              WRITELN;
                              WRITELN('ERROR WRITING TO ',PARAMSTR(2),' !');
                              WRITELN;
                              HALT
                         END;
                    SRCSize := 16384;
                    InSize := 0;
                    OutSize := SIZEOF(LONGINT);
                    WHILE (SRCSize = 16384) DO
                         BEGIN
                              BLOCKREAD(SRCFP,SRCBuf[0],16384,SRCSize);
                              INC(InSize,SRCSize);
                              WRITE('READ : ',InSize,'  WRITTEN : ',
                                            OutSize,#13);
                              DSTSize := Compression(ADDR(SRCBuf[0]),
                                              ADDR(DSTBuf[0]),SRCSize);
                              BLOCKWRITE(DSTFP,DSTSize,SIZEOF(WORD),Tmp);
                              INC(OutSize,Tmp);
                              WRITE('READ : ',InSize,'  WRITTEN : ',
                                             OutSize,#13);
                              IF (Tmp <> SIZEOF(WORD)) THEN
                                   BEGIN
                                        CLOSE(SRCFP);
                                        CLOSE(DSTFP);
                                        ERASE(DSTFP);
                                        WRITELN;
                                        WRITELN('ERROR WRITING TO ',
                                               PARAMSTR(2),' !');
                                        WRITELN;
                                        HALT
                                   END;
                              BLOCKWRITE(DSTFP,DSTBuf[0],DSTSize,Tmp);
                              INC(OutSize,Tmp);
                              WRITE('READ : ',InSize,'  WRITTEN : ',
                                   OutSize,#13);
                              IF (Tmp <> DSTSize) THEN
                                   BEGIN
                                        CLOSE(SRCFP);
                                        CLOSE(DSTFP);
                                        ERASE(DSTFP);
                                        WRITELN;
                                        WRITELN('ERROR WRITING TO ',
                                                   PARAMSTR(2),' !');
                                        WRITELN;
                                        HALT
                                   END;
                         END;
                    WRITELN;
                    WRITELN('FILE SUCCESFULLY COMPRESSED !');
                    CLOSE(SRCFP);
                    CLOSE(DSTFP)
               END
          ELSE BEGIN
                    WRITELN('TRYING TO DECOMPRESS ',PARAMSTR(2),' TO ',
                                       PARAMSTR(3),'.');
                    ASSIGN(SRCFP,PARAMSTR(2));
                    ASSIGN(DSTFP,PARAMSTR(3));
                    RESET(SRCFP,1);
                    IF (IOResult <> 0) THEN
                         BEGIN
                              WRITELN;
                              WRITELN('FILE ',PARAMSTR(2),' NOT FOUND !');
                              WRITELN;
                              HALT
                         END;
                    REWRITE(DSTFP,1);
                    IF (IOResult <> 0) THEN
                         BEGIN
                              CLOSE(SRCFP);
                              WRITELN;
                              WRITELN('FILE ',PARAMSTR(3),' COULD NOT '+
                                                      'BE OPENED !');
                              WRITELN;
                              HALT
                         END;
                    BLOCKREAD(SRCFP,Identifier,SIZEOF(LONGINT),Tmp);
                    IF (Tmp <> SIZEOF(LONGINT)) THEN
                         BEGIN
                              CLOSE(SRCFP);
                              CLOSE(DSTFP);
                              ERASE(DSTFP);
                              WRITELN;
                              WRITELN('ERROR READING FROM ',PARAMSTR(2),' !');
                              WRITELN;
                              HALT
                         END;
                    IF (Identifier <> CompIdentifier) THEN
                         BEGIN
                              CLOSE(SRCFP);
                              CLOSE(DSTFP);
                              ERASE(DSTFP);
                              WRITELN;
                              WRITELN('FILE ',PARAMSTR(2),
                                            ' IS NOT A COMPRESSED FILE !');
                              WRITELN;
                              HALT
                         END;
                    DSTSize := 16384;
                    InSize := SIZEOF(LONGINT);
                    OutSize := 0;
                    WHILE (DSTSize = 16384) DO
                         BEGIN
                              BLOCKREAD(SRCFP,SRCSize,SIZEOF(WORD),Tmp);
                              IF (Tmp <> SIZEOF(WORD)) THEN
                                   BEGIN
                                        CLOSE(SRCFP);
                                        CLOSE(DSTFP);
                                        ERASE(DSTFP);
                                        WRITELN;
                                        WRITELN('ERROR READING FROM ',
                                              PARAMSTR(2),' !');
                                        WRITELN;
                                        HALT
                                   END;
                              BLOCKREAD(SRCFP,SRCBuf[0],SRCSize,Tmp);
                              INC(InSize,Tmp+SIZEOF(WORD));
                              WRITE('READ : ',InSize,'  WRITTEN : ',
                                        OutSize,#13);
                              IF (Tmp <> SRCSize) THEN
                                   BEGIN
                                        CLOSE(SRCFP);
                                        CLOSE(DSTFP);
                                        ERASE(DSTFP);
                                        WRITELN;
                                        WRITELN('ERROR READING FROM ',
                                                  PARAMSTR(2),' !');
                                        WRITELN;
                                        HALT
                                   END;
                              DSTSize := Decompression(ADDR(SRCBuf[0]),
                                               ADDR(DstBuf[0]),SRCSize);
                              BLOCKWRITE(DSTFP,DSTBuf[0],DSTSize,Tmp);
                              INC(OutSize,Tmp);
                              WRITE('READ : ',InSize,'  WRITTEN : ',
                                            OutSize,#13);
                              IF (Tmp <> DSTSize) THEN
                                   BEGIN
                                        CLOSE(SRCFP);
                                        CLOSE(DSTFP);
                                        ERASE(DSTFP);
                                        WRITELN;
                                        WRITELN('ERROR WRITING TO ',
                                                   PARAMSTR(3),' !');
                                        WRITELN;
                                        HALT
                                   END;
                         END;
                    WRITELN;
                    WRITELN('FILE SUCCESFULLY DECOMPRESSED !');
                    CLOSE(SRCFP);
                    CLOSE(DSTFP)
               END
END.

-------------------------------------------------------------------
End of file # 2


Ok, that was the listing ...   I hope everything is ok.  Some of
you may get some word-wraps etc. because of the long lines I used
in the source.  I tried to make them as readable as possible, but
sometime I had to include some backwards indented blocks, which
are separated from the rest of the program by {----}.  I hope this
still remains readable ... Hope to hear from you all soon ...

                                                    Kurt Haenen