GHGAEA8@cc1.kuleuven.ac.be (05/14/91)
The source code for the C version consists of two files :
LZRW1KH.h is a header file containing the actual compression
routines.
COMPRESS.c is the source for a small demonstration program.
I'll post them here separated by a line of underscore characters.
_____________________________________________________________________
LZRW1KH.H Header file containing the compression routines
_____________________________________________________________________
/* ###################################################################
## ##
## ## ##### ##### ## ## ## ## ## ## ## ## ##
## ## ### ## ## ## # ## ### ## ## ## ## ## ##
## ## ### ##### ####### ## ## #### ###### ##
## ## ### ## ## ### ### ## ## ## ## ## ## ##
## ##### ##### ## ## ## ## #### ## ## ## ## ## ##
## ##
## EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM ##
## ##
###################################################################
## ##
## This header file implements the LZRW1/KH algoritm which ##
## also implements some RLE coding which is usefull when ##
## compressing files containing a lot of consecutive bytes ##
## having the same value. The algoritm is not as good as ##
## LZH, but can compete with Lempel-Ziff. It's the fasted ##
## one I've encountered upto now. ##
## ##
## Kurt HAENEN ##
## ##
################################################################### */
#define FLAG_Copied 0x80
#define FLAG_Compress 0x40
#define TRUE 1
#define FALSE 0
typedef signed char byte;
typedef signed short word;
typedef unsigned char ubyte;
typedef unsigned short uword;
typedef unsigned long ulong;
typedef unsigned char bool;
bool GetMatch(Source,X,SourceSize,Hash,Size,Pos)
ubyte *Source;
uword X;
uword SourceSize;
word *Hash;
uword *Size;
word *Pos;
{ uword HashValue = (40543L*((((Source[X] << 4) ^
Source[X+1]) << 4) ^ Source[X+2]) >> 4) &
0xfff;
*Pos = Hash[HashValue];
Hash[HashValue] = X;
if ((*Pos != -1) && ((X-*Pos) < 4096))
{ for (*Size = 0; ((*Size < 18)
&& (Source[X+*Size] == Source[*Pos+*Size])
&& ((X+*Size) < SourceSize)); (*Size)++);
return(*Size >= 3);
}
return(FALSE);
}
uword Compression(Source,Dest,SourceSize)
ubyte *Source,*Dest;
uword SourceSize;
{ word Hash[4096];
uword Key,Size,Pos;
ubyte Bit = 0;
uword Command;
uword X = 0;
uword Y = 3;
uword Z = 1;
for (Key = 0; Key < 4096; Key++)
Hash[Key] = -1;
Dest[0] = FLAG_Compress;
for (; (X < SourceSize) && (Y <= SourceSize);)
{ printf("X = %i Y = %i Z = %i",X,Y,Z);
if (Bit > 15)
{ Dest[Z++] = (Command >> 8) & 0x00ff;
Dest[Z] = Command & 0x00ff;
Z = Y;
Bit = 0;
Y += 2;
}
for (Size = 1; (Source[X] == Source[X+Size]) && (Size < 0x0fff)
&& (X+Size < SourceSize); Size++);
if (Size >= 16)
{ Dest[Y++] = 0;
Dest[Y++] = ((Size - 16) >> 8) & 0x00ff;
Dest[Y++] = (Size - 16) & 0x00ff;
Dest[Y++] = Source[X];
X += Size;
Command = (Command << 1) + 1;
}
else if (GetMatch(Source,X,SourceSize,Hash,&Size,&Pos))
{ Key = ((X-Pos) << 4) + (Size - 3);
Dest[Y++] = (Key >> 8) & 0x00ff;
Dest[Y++] = Key & 0x00ff;
X += Size;
Command = (Command << 1) + 1;
}
else { Dest[Y++] = Source[X++];
Command = (Command << 1);
}
Bit++;
}
Command <<= (16-Bit);
Dest[Z++] = (Command >> 8) & 0x00ff;
Dest[Z] = Command & 0x00ff;
if (Y > SourceSize)
{ for(Y = 0; Y < SourceSize; Dest[Y+1] = Source[Y++]);
Dest[0] = FLAG_Copied;
return(SourceSize+1);
}
return(Y);
}
uword Decompression(Source,Dest,SourceSize)
ubyte *Source,*Dest;
uword SourceSize;
{ uword X = 3;
uword Y = 0;
uword Pos,Size,K;
uword Command = (Source[1] << 8) + Source[2];
ubyte Bit = 16;
if (Source[0] == FLAG_Copied)
{ for (Y = 1; Y < SourceSize; Dest[Y-1] = Source[Y++]);
return(SourceSize-1);
}
for (; X < SourceSize;)
{ if (Bit == 0)
{ Command = (Source[X++] << 8);
Command += Source[X++];
Bit = 16;
}
if (Command & 0x8000)
{ Pos = (Source[X++] << 4);
Pos += (Source[X] >> 4);
if (Pos)
{ Size = (Source[X++] & 0x0f)+3;
for (K = 0; K < Size; K++)
Dest[Y+K] = Dest[Y-Pos+K];
Y += Size;
}
else { Size = (Source[X++] << 8);
Size += Source[X++] + 16;
for (K = 0; K < Size; Dest[Y+K++] =
Source[X]);
X++;
Y += Size;
}
}
else Dest[Y++] = Source[X++];
Command <<= 1;
Bit--;
}
return(Y);
}
_____________________________________________________________________
COMPRESS.C A small demonstration program
_____________________________________________________________________
/***********************************************************************
** **
** COMPRESS.C A fast compression utility **
** **
***********************************************************************/
#include <stdio.h>
#include "lzrw1kh.h"
#define OurIdentifier 0xa5b6c7d8
#define Header "\
\n\
This program is only purpose is to demonstrate the working of the\n\
LZRW1KH (de)compression routines which can be found in the header\n\
file LZRW1KH.h ! This program simply takes two parameters :\n\
the name of a source file and the name of the destination file.\n\
If the source file has already been compressed, it will be de-\n\
compressed and stored into the destination file. Otherwise the\n\
process will compress the file !\n\
\n\
Usage : COMPRESS <Source> <Destination>\n\
\n\
Program and compression routines written by Kurt Haenen.\n\
\n"
main(ac,av)
int ac;
char *av[];
{ FILE *FP1,*FP2;
ulong Identifier;
ubyte Source[16388],Dest[16388];
uword SourceSize,DestSize;
printf(Header);
if (ac != 3)
{ printf("Illegal number of parameters !\n");
exit(1);
}
if ((FP1 = fopen(av[1],"rb")) == NULL)
{ printf("File %s not found !\n",av[1]);
exit(1);
}
if ((FP2 = fopen(av[2],"wb")) == NULL)
{ fclose(FP1);
printf("Couldn't open %s for output !\n",av[2]);
exit(1);
}
if (((SourceSize = fread(&Identifier,sizeof(ulong),1,FP1)) != 1)
|| (Identifier != OurIdentifier))
{ Identifier = OurIdentifier;
printf("Trying to compress %s to %s !\n",av[1],av[2]);
puts("writing filesize");
if (fwrite(&Identifier,sizeof(ulong),1,FP2) != 1)
{ fclose(FP1);
fclose(FP2);
printf("Couldn't write to %s !\n",av[2]);
exit(1);
}
if (fseek(FP1,0L,SEEK_SET) != 0)
{ fclose(FP1);
fclose(FP2);
printf("Couldn't seek in %s !\n",av[1]);
exit(1);
}
for (; (SourceSize = fread(Source,1,16384,FP1)) != 0;)
{ DestSize = Compression(Source,Dest,SourceSize);
puts("writing block to disk");
if ((fwrite(&DestSize,sizeof(uword),1,FP2) != 1)
|| (fwrite(Dest,1,DestSize,FP2) != DestSize))
{ fclose(FP1);
fclose(FP2);
printf("Couldn't write to %s !\n",av[2]);
exit(1);
}
}
}
else { printf("Trying to decompress %s to %s !\n",av[1],av[2]);
for (; (fread(&SourceSize,sizeof(uword),1,FP1) == 1);)
{ if (fread(Source,1,SourceSize,FP1) != SourceSize)
{ fclose(FP1);
fclose(FP2);
printf("Unexspected EOF in file %s
!\n",av[1]);
exit(1);
}
DestSize = Decompression(Source,Dest,SourceSize);
if (fwrite(Dest,1,DestSize,FP2) != DestSize)
{ fclose(FP1);
fclose(FP2);
printf("Couldn't write to %s !\n",av[2]);
exit(1);
}
}
}
fclose(FP1);
fclose(FP2);
}
_____________________________________________________________________
Ok, that's it for C ! Bug reports are always welcome at :
GHGAEA8 @ BLEKUL11. I'm trying to post the new version for
Turbo Pascal, but VM doesn't appreciate the TABS I used, so
that may take a while ... Oh, of course : other comments are
also welcome at the address listed above. I'll try to answer
all the mails, although the tests are coming up ...
Kurt Haenen