[comp.compression] C source for LZRW1/KH

GHGAEA8@cc1.kuleuven.ac.be (05/14/91)

   The source code for the C version consists of two files :

   LZRW1KH.h  is a header file containing the actual compression
              routines.
   COMPRESS.c is the source for a small demonstration program.

   I'll post them here separated by a line of underscore characters.

_____________________________________________________________________

LZRW1KH.H    Header file containing the compression routines
_____________________________________________________________________


/*   ###################################################################
     ##                                                               ##
     ##      ##    ##### #####  ##   ##  ##      ## ##  ## ##  ##     ##
     ##      ##      ### ##  ## ## # ## ###     ##  ## ##  ##  ##     ##
     ##      ##     ###  #####  #######  ##    ##   ####   ######     ##
     ##      ##    ###   ##  ## ### ###  ##   ##    ## ##  ##  ##     ##
     ##      ##### ##### ##  ## ##   ## #### ##     ##  ## ##  ##     ##
     ##                                                               ##
     ##   EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM  ##
     ##                                                               ##
     ###################################################################
     ##                                                               ##
     ##   This header file implements the  LZRW1/KH algoritm which    ##
     ##   also implements  some RLE coding  which is usefull  when    ##
     ##   compressing files containing a lot of consecutive  bytes    ##
     ##   having the same value.   The algoritm is not as good  as    ##
     ##   LZH, but can compete with Lempel-Ziff.   It's the fasted    ##
     ##   one I've encountered upto now.                              ##
     ##                                                               ##
     ##                                                Kurt HAENEN    ##
     ##                                                               ##
     ###################################################################   */

#define   FLAG_Copied         0x80
#define   FLAG_Compress       0x40
#define   TRUE                1
#define   FALSE               0
typedef   signed char         byte;
typedef   signed short        word;
typedef   unsigned char       ubyte;
typedef   unsigned short      uword;
typedef   unsigned long       ulong;
typedef   unsigned char       bool;

bool      GetMatch(Source,X,SourceSize,Hash,Size,Pos)
ubyte     *Source;
uword     X;
uword     SourceSize;
word      *Hash;
uword     *Size;
word      *Pos;
{    uword     HashValue      = (40543L*((((Source[X] << 4) ^
                                   Source[X+1]) << 4) ^ Source[X+2]) >> 4) &
0xfff;
     *Pos = Hash[HashValue];
     Hash[HashValue] = X;
     if ((*Pos != -1) && ((X-*Pos) < 4096))
          {    for (*Size = 0; ((*Size < 18)
                         && (Source[X+*Size] == Source[*Pos+*Size])
                         && ((X+*Size) < SourceSize)); (*Size)++);
               return(*Size >= 3);
          }
     return(FALSE);
}

uword     Compression(Source,Dest,SourceSize)
ubyte     *Source,*Dest;
uword     SourceSize;
{    word      Hash[4096];
     uword     Key,Size,Pos;
     ubyte     Bit                      = 0;
     uword     Command;
     uword     X                        = 0;
     uword     Y                        = 3;
     uword     Z                        = 1;
     for (Key = 0; Key < 4096; Key++)
          Hash[Key] = -1;
     Dest[0] = FLAG_Compress;
     for (; (X < SourceSize) && (Y <= SourceSize);)
         {     printf("X = %i Y = %i Z = %i",X,Y,Z);
               if (Bit > 15)
                    {    Dest[Z++] = (Command >> 8) & 0x00ff;
                         Dest[Z] = Command & 0x00ff;
                         Z = Y;
                         Bit = 0;
                         Y += 2;
                    }
               for (Size = 1; (Source[X] == Source[X+Size]) && (Size < 0x0fff)
                                   && (X+Size < SourceSize); Size++);
               if (Size >= 16)
                    {    Dest[Y++] = 0;
                         Dest[Y++] = ((Size - 16) >> 8) & 0x00ff;
                         Dest[Y++] = (Size - 16) & 0x00ff;
                         Dest[Y++] = Source[X];
                         X += Size;
                         Command = (Command << 1) + 1;
                    }
               else if (GetMatch(Source,X,SourceSize,Hash,&Size,&Pos))
                         {    Key = ((X-Pos) << 4) + (Size - 3);
                              Dest[Y++] = (Key >> 8) & 0x00ff;
                              Dest[Y++] = Key & 0x00ff;
                              X += Size;
                              Command = (Command << 1) + 1;
                         }
                    else {    Dest[Y++] = Source[X++];
                              Command = (Command << 1);
                         }
               Bit++;
          }
     Command <<= (16-Bit);
     Dest[Z++] = (Command >> 8) & 0x00ff;
     Dest[Z] = Command & 0x00ff;
     if (Y > SourceSize)
          {    for(Y = 0; Y < SourceSize; Dest[Y+1] = Source[Y++]);
               Dest[0] = FLAG_Copied;
               return(SourceSize+1);
          }
     return(Y);
}

uword     Decompression(Source,Dest,SourceSize)
ubyte     *Source,*Dest;
uword     SourceSize;
{    uword     X                        = 3;
     uword     Y                        = 0;
     uword     Pos,Size,K;
     uword     Command                  = (Source[1] << 8) + Source[2];
     ubyte     Bit                      = 16;
     if (Source[0] == FLAG_Copied)
          {    for (Y = 1; Y < SourceSize; Dest[Y-1] = Source[Y++]);
               return(SourceSize-1);
          }
     for (; X < SourceSize;)
          {    if (Bit == 0)
                    {    Command = (Source[X++] << 8);
                         Command += Source[X++];
                         Bit = 16;
                    }
               if (Command & 0x8000)
                    {    Pos = (Source[X++] << 4);
                         Pos += (Source[X] >> 4);
                         if (Pos)
                              {    Size = (Source[X++] & 0x0f)+3;
                                   for (K = 0; K < Size; K++)
                                        Dest[Y+K] = Dest[Y-Pos+K];
                                   Y += Size;
                              }
                         else {    Size = (Source[X++] << 8);
                                   Size += Source[X++] + 16;
                                   for (K = 0; K < Size; Dest[Y+K++] =
Source[X]);
                                   X++;
                                   Y += Size;
                              }
                    }
               else Dest[Y++] = Source[X++];
               Command <<= 1;
               Bit--;
          }
     return(Y);
}

_____________________________________________________________________

COMPRESS.C   A small demonstration program
_____________________________________________________________________

/***********************************************************************
 **                                                                   **
 **       COMPRESS.C               A fast compression utility         **
 **                                                                   **
 ***********************************************************************/

#include  <stdio.h>
#include  "lzrw1kh.h"

#define   OurIdentifier  0xa5b6c7d8

#define   Header         "\
\n\
   This program is only purpose is to demonstrate the working of the\n\
   LZRW1KH (de)compression routines which can be found in the header\n\
   file LZRW1KH.h !  This  program  simply  takes  two  parameters :\n\
   the name of a source file and the name  of the destination  file.\n\
   If the source file has  already been compressed,  it will be  de-\n\
   compressed and stored into the  destination file.  Otherwise  the\n\
   process will compress the file !\n\
\n\
   Usage :  COMPRESS <Source> <Destination>\n\
\n\
   Program and compression routines written by Kurt Haenen.\n\
\n"

          main(ac,av)
int       ac;
char      *av[];
{    FILE      *FP1,*FP2;
     ulong     Identifier;
     ubyte     Source[16388],Dest[16388];
     uword     SourceSize,DestSize;
     printf(Header);
     if (ac != 3)
          {    printf("Illegal number of parameters !\n");
               exit(1);
          }
     if ((FP1 = fopen(av[1],"rb")) == NULL)
          {    printf("File %s not found !\n",av[1]);
               exit(1);
          }
     if ((FP2 = fopen(av[2],"wb")) == NULL)
          {    fclose(FP1);
               printf("Couldn't open %s for output !\n",av[2]);
               exit(1);
          }
     if (((SourceSize = fread(&Identifier,sizeof(ulong),1,FP1)) != 1)
               || (Identifier != OurIdentifier))
          {    Identifier = OurIdentifier;
               printf("Trying to compress %s to %s !\n",av[1],av[2]);
               puts("writing filesize");
               if (fwrite(&Identifier,sizeof(ulong),1,FP2) != 1)
                    {    fclose(FP1);
                         fclose(FP2);
                         printf("Couldn't write to %s !\n",av[2]);
                         exit(1);
                    }
               if (fseek(FP1,0L,SEEK_SET) != 0)
                    {    fclose(FP1);
                         fclose(FP2);
                         printf("Couldn't seek in %s !\n",av[1]);
                         exit(1);
                    }
               for (; (SourceSize = fread(Source,1,16384,FP1)) != 0;)
                    {    DestSize = Compression(Source,Dest,SourceSize);
                         puts("writing block to disk");
                         if ((fwrite(&DestSize,sizeof(uword),1,FP2)  != 1)
                                   || (fwrite(Dest,1,DestSize,FP2) != DestSize))
                              {    fclose(FP1);
                                   fclose(FP2);
                                   printf("Couldn't write to %s !\n",av[2]);
                                   exit(1);
                              }
                    }
          }
     else {    printf("Trying to decompress %s to %s !\n",av[1],av[2]);
               for (; (fread(&SourceSize,sizeof(uword),1,FP1) == 1);)
                    {    if (fread(Source,1,SourceSize,FP1) != SourceSize)
                              {    fclose(FP1);
                                   fclose(FP2);
                                   printf("Unexspected EOF in file %s
!\n",av[1]);
                                   exit(1);
                              }
                         DestSize = Decompression(Source,Dest,SourceSize);
                         if (fwrite(Dest,1,DestSize,FP2) != DestSize)
                              {    fclose(FP1);
                                   fclose(FP2);
                                   printf("Couldn't write to %s !\n",av[2]);
                                   exit(1);
                              }
                    }
          }
     fclose(FP1);
     fclose(FP2);
}

_____________________________________________________________________

Ok, that's it for C !  Bug reports are always welcome at :
GHGAEA8 @ BLEKUL11.  I'm trying to post the new version for
Turbo Pascal, but VM doesn't appreciate the TABS I used, so
that may take a while ...  Oh, of course : other comments are
also welcome at the address listed above.  I'll try to answer
all the mails, although the tests are coming up ...


                                            Kurt Haenen