GHGAEA8@cc1.kuleuven.ac.be (05/14/91)
The source code for the C version consists of two files : LZRW1KH.h is a header file containing the actual compression routines. COMPRESS.c is the source for a small demonstration program. I'll post them here separated by a line of underscore characters. _____________________________________________________________________ LZRW1KH.H Header file containing the compression routines _____________________________________________________________________ /* ################################################################### ## ## ## ## ##### ##### ## ## ## ## ## ## ## ## ## ## ## ### ## ## ## # ## ### ## ## ## ## ## ## ## ## ### ##### ####### ## ## #### ###### ## ## ## ### ## ## ### ### ## ## ## ## ## ## ## ## ##### ##### ## ## ## ## #### ## ## ## ## ## ## ## ## ## EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM ## ## ## ################################################################### ## ## ## This header file implements the LZRW1/KH algoritm which ## ## also implements some RLE coding which is usefull when ## ## compressing files containing a lot of consecutive bytes ## ## having the same value. The algoritm is not as good as ## ## LZH, but can compete with Lempel-Ziff. It's the fasted ## ## one I've encountered upto now. ## ## ## ## Kurt HAENEN ## ## ## ################################################################### */ #define FLAG_Copied 0x80 #define FLAG_Compress 0x40 #define TRUE 1 #define FALSE 0 typedef signed char byte; typedef signed short word; typedef unsigned char ubyte; typedef unsigned short uword; typedef unsigned long ulong; typedef unsigned char bool; bool GetMatch(Source,X,SourceSize,Hash,Size,Pos) ubyte *Source; uword X; uword SourceSize; word *Hash; uword *Size; word *Pos; { uword HashValue = (40543L*((((Source[X] << 4) ^ Source[X+1]) << 4) ^ Source[X+2]) >> 4) & 0xfff; *Pos = Hash[HashValue]; Hash[HashValue] = X; if ((*Pos != -1) && ((X-*Pos) < 4096)) { for (*Size = 0; ((*Size < 18) && (Source[X+*Size] == Source[*Pos+*Size]) && ((X+*Size) < SourceSize)); (*Size)++); return(*Size >= 3); } return(FALSE); } uword Compression(Source,Dest,SourceSize) ubyte *Source,*Dest; uword SourceSize; { word Hash[4096]; uword Key,Size,Pos; ubyte Bit = 0; uword Command; uword X = 0; uword Y = 3; uword Z = 1; for (Key = 0; Key < 4096; Key++) Hash[Key] = -1; Dest[0] = FLAG_Compress; for (; (X < SourceSize) && (Y <= SourceSize);) { printf("X = %i Y = %i Z = %i",X,Y,Z); if (Bit > 15) { Dest[Z++] = (Command >> 8) & 0x00ff; Dest[Z] = Command & 0x00ff; Z = Y; Bit = 0; Y += 2; } for (Size = 1; (Source[X] == Source[X+Size]) && (Size < 0x0fff) && (X+Size < SourceSize); Size++); if (Size >= 16) { Dest[Y++] = 0; Dest[Y++] = ((Size - 16) >> 8) & 0x00ff; Dest[Y++] = (Size - 16) & 0x00ff; Dest[Y++] = Source[X]; X += Size; Command = (Command << 1) + 1; } else if (GetMatch(Source,X,SourceSize,Hash,&Size,&Pos)) { Key = ((X-Pos) << 4) + (Size - 3); Dest[Y++] = (Key >> 8) & 0x00ff; Dest[Y++] = Key & 0x00ff; X += Size; Command = (Command << 1) + 1; } else { Dest[Y++] = Source[X++]; Command = (Command << 1); } Bit++; } Command <<= (16-Bit); Dest[Z++] = (Command >> 8) & 0x00ff; Dest[Z] = Command & 0x00ff; if (Y > SourceSize) { for(Y = 0; Y < SourceSize; Dest[Y+1] = Source[Y++]); Dest[0] = FLAG_Copied; return(SourceSize+1); } return(Y); } uword Decompression(Source,Dest,SourceSize) ubyte *Source,*Dest; uword SourceSize; { uword X = 3; uword Y = 0; uword Pos,Size,K; uword Command = (Source[1] << 8) + Source[2]; ubyte Bit = 16; if (Source[0] == FLAG_Copied) { for (Y = 1; Y < SourceSize; Dest[Y-1] = Source[Y++]); return(SourceSize-1); } for (; X < SourceSize;) { if (Bit == 0) { Command = (Source[X++] << 8); Command += Source[X++]; Bit = 16; } if (Command & 0x8000) { Pos = (Source[X++] << 4); Pos += (Source[X] >> 4); if (Pos) { Size = (Source[X++] & 0x0f)+3; for (K = 0; K < Size; K++) Dest[Y+K] = Dest[Y-Pos+K]; Y += Size; } else { Size = (Source[X++] << 8); Size += Source[X++] + 16; for (K = 0; K < Size; Dest[Y+K++] = Source[X]); X++; Y += Size; } } else Dest[Y++] = Source[X++]; Command <<= 1; Bit--; } return(Y); } _____________________________________________________________________ COMPRESS.C A small demonstration program _____________________________________________________________________ /*********************************************************************** ** ** ** COMPRESS.C A fast compression utility ** ** ** ***********************************************************************/ #include <stdio.h> #include "lzrw1kh.h" #define OurIdentifier 0xa5b6c7d8 #define Header "\ \n\ This program is only purpose is to demonstrate the working of the\n\ LZRW1KH (de)compression routines which can be found in the header\n\ file LZRW1KH.h ! This program simply takes two parameters :\n\ the name of a source file and the name of the destination file.\n\ If the source file has already been compressed, it will be de-\n\ compressed and stored into the destination file. Otherwise the\n\ process will compress the file !\n\ \n\ Usage : COMPRESS <Source> <Destination>\n\ \n\ Program and compression routines written by Kurt Haenen.\n\ \n" main(ac,av) int ac; char *av[]; { FILE *FP1,*FP2; ulong Identifier; ubyte Source[16388],Dest[16388]; uword SourceSize,DestSize; printf(Header); if (ac != 3) { printf("Illegal number of parameters !\n"); exit(1); } if ((FP1 = fopen(av[1],"rb")) == NULL) { printf("File %s not found !\n",av[1]); exit(1); } if ((FP2 = fopen(av[2],"wb")) == NULL) { fclose(FP1); printf("Couldn't open %s for output !\n",av[2]); exit(1); } if (((SourceSize = fread(&Identifier,sizeof(ulong),1,FP1)) != 1) || (Identifier != OurIdentifier)) { Identifier = OurIdentifier; printf("Trying to compress %s to %s !\n",av[1],av[2]); puts("writing filesize"); if (fwrite(&Identifier,sizeof(ulong),1,FP2) != 1) { fclose(FP1); fclose(FP2); printf("Couldn't write to %s !\n",av[2]); exit(1); } if (fseek(FP1,0L,SEEK_SET) != 0) { fclose(FP1); fclose(FP2); printf("Couldn't seek in %s !\n",av[1]); exit(1); } for (; (SourceSize = fread(Source,1,16384,FP1)) != 0;) { DestSize = Compression(Source,Dest,SourceSize); puts("writing block to disk"); if ((fwrite(&DestSize,sizeof(uword),1,FP2) != 1) || (fwrite(Dest,1,DestSize,FP2) != DestSize)) { fclose(FP1); fclose(FP2); printf("Couldn't write to %s !\n",av[2]); exit(1); } } } else { printf("Trying to decompress %s to %s !\n",av[1],av[2]); for (; (fread(&SourceSize,sizeof(uword),1,FP1) == 1);) { if (fread(Source,1,SourceSize,FP1) != SourceSize) { fclose(FP1); fclose(FP2); printf("Unexspected EOF in file %s !\n",av[1]); exit(1); } DestSize = Decompression(Source,Dest,SourceSize); if (fwrite(Dest,1,DestSize,FP2) != DestSize) { fclose(FP1); fclose(FP2); printf("Couldn't write to %s !\n",av[2]); exit(1); } } } fclose(FP1); fclose(FP2); } _____________________________________________________________________ Ok, that's it for C ! Bug reports are always welcome at : GHGAEA8 @ BLEKUL11. I'm trying to post the new version for Turbo Pascal, but VM doesn't appreciate the TABS I used, so that may take a while ... Oh, of course : other comments are also welcome at the address listed above. I'll try to answer all the mails, although the tests are coming up ... Kurt Haenen