[comp.binaries.ibm.pc] v02i098: flzh_rn.c, faster lzhuf

nelson@sun.soe.clarkson.edu (Russ Nelson) (05/23/89)

Checksum: 2850493429  (Verify with "brik -cv")
Posting-number: Volume 02, Issue 098
Submitted-by: Russ Nelson <nelson@sun.soe.clarkson.edu>
Archive-name: flzh/flzh_rn.c

/*
Here is another text posting.  It speaks for itself.  Just in case
others come up with further revisions, I have named this one
"flzh_rn.c" after "Faster LZHuf by Russ Nelson".   Note that I have
added the copyright statement as requested by Kenji Rikitake in an
earlier Usenet article.  There seems to be a compiler dependency here,
because it won't execute correctly if compiled with Turbo C 1.0, but
Russ Nelson tells me it does work with later versions of Turbo C,
1.5 or 2, but I don't remember which.  -- R.D
*/

#ifdef USE_ASM
#pragma inline
#endif

/*
LZHUF.C (c)1989 by Haruyasu Yoshizaki, Haruhiko Okumura, and Kenji Rikitake.
All rights reserved. Permission granted for non-commercial use.
*/

/*
 * LZHUF.C English version 1.0
 * Based on Japanese version 29-NOV-1988
 * LZSS coded by Haruhiko OKUMURA
 * Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
 * Edited and translated to English by Kenji RIKITAKE
 * Assembly language added by Russell Nelson (nelson@clutx.clarkson.edu)
 *   Makes it 1.56 times faster in compression,
 *   and 1.53 times faster in decompression.
 * Warning!  If you change anything, verify that the register use doesn't
 * change.
 * Some C optimization added by Russell Nelson.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* These values are Turbo-C dependent;
   EXIT_SUCCESS, EXIT_FAILURE
   renamed by Kenji */

#define EXIT_OK 0
#define EXIT_FAILED -1

FILE  *infile, *outfile;
unsigned long int  textsize = 0, codesize = 0, printcount = 0;

void Error(char *message)
{
	printf("\n%s\n", message);
	exit(EXIT_FAILED);
}

/* LZSS Parameters */

#define N		4096	/* Size of string buffer */
#define F		60	/* Size of look-ahead buffer */
#define THRESHOLD	2
#define NIL		N	/* End of tree's node  */

unsigned char
		text_buf[N + F - 1];
int		match_position, match_length,
		lson[N + 1], rson[N + 257], dad[N + 1];

void InitTree(void)  /* Initializing tree */
{
	int  i;

	for (i = N + 1; i <= N + 256; i++)
		rson[i] = NIL;			/* root */
	for (i = 0; i < N; i++)
		dad[i] = NIL;			/* node */
}

void InsertNode(int r)  /* Inserting node to the tree */
{
	int  i, p, cmp;
	unsigned char  *key, keychar;
	unsigned c;

	cmp = 1;
	key = &text_buf[r];
	keychar = key[1];
	p = N + 1 + key[0];
	rson[r] = lson[r] = NIL;
	match_length = 0;

#ifdef USE_ASM

/* for speed's sake, we use a bunch of hacks.  If you change this code, be
 * sure to tcc -S it before you attempt to run it.  If you can't figure
 * out what something's doing, look at the standard C version of it in the
 * #else clause.
 */

#define SF 0x1000			/* 8086 sign flag */

/* We're going to hold p in _SI.  Turbo C would ordinarily put it in a
 * register for us, but it refuses to do so if it sees any mention of
 * the register, either in a 'asm' statement or the _SI pseudovariable.
 * When we actually use _SI, we push it first.
 *
 * Similarly for r in _DI.  Note that the algorithm doesn't change r.
 */

	_SI = p;
#define p _SI
	_DI = r;
#define r _DI

	_ES = _DS;			/* we're going to use cmpsb */
	asm cld

/* many times the initial characters don't match, so we spend a fair amount
 * of time in the following unstructured code.
 */

	for ( ; ; ) {
		if ((cmp & SF) == 0) {
right:
			asm mov bx,si
			asm mov ax,rson[bx+si]
			if (_AX != NIL) {
				asm mov si,ax;
				asm mov al,text_buf[si+1];
				asm cmp keychar,al;
				asm jg right;
				asm jl left;
			} else {
				rson[p] = r;
				dad[r] = p;
				return;
			}
		} else {
left:
			asm mov bx,si
			asm mov ax,lson[bx+si]
			if (_AX != NIL) {
				asm mov si,ax;
				asm mov al,text_buf[si+1];
				asm cmp keychar,al;
				asm jg right;
				asm jl left;
			} else {
				lson[p] = r;
				dad[r] = p;
				return;
			}
		}
equal:
		asm push si
		asm push di
		_DI = (unsigned) &text_buf[p+1];
		_SI = (unsigned) &key[1];
		_CX = F - 1;
/* The semantics of cmpsb are not well understood.  Every comparison decrements
 * _CX and bumps _SI and _DI.  If the values compared are equal and _CX <> 0
 * then the cmpsb repeats.  Otherwise the flags are set to the result of the
 * comparison.  The consequence of this is that the only way to determine
 * whether the entire string was equal is to check the flags.  If the two
 * strings are identical up to the last character, _CX will be zero
 * whether or not the last characters match.
 *
 * The Microsoft Macro Assembler 5.0 Reference Booklet gets it wrong, even
 * though Intel documents it very precisely and accurately.  Boo!  Hiss!
 *
 * If _CX is zero before the cmpsb, the flags are unchanged.  This affects
 * the interpretation of zero length strings.  Are they equal or different?
 * If you wish them to be equal, you can "or cx,cx".  If you wish them to
 * be different you can "or sp,sp".  In a subroutine, sp is guaranteed to
 * be nonzero.
 */
		asm	repe cmpsb	/* 7% of runtime is spent here! */
/* remember the sign flag to see if it was larger or smaller */
		asm lahf
		cmp = _AX;
/* if it matched, we want _CX to be zero */
		asm je matched;
		_CX++;
matched:
		i = F - _CX;
		asm pop di;
		asm pop si;
		if (i > THRESHOLD) {
			if (i > match_length) {
				match_position = ((r - p) & (N - 1)) - 1;
				if ((match_length = i) >= F)
					break;
			}
			if (i == match_length) {
				if (((r - p) & (N - 1)) - 1 < match_position) {
					match_position = _AX;
				}
			}
		}
	}
#else
	for ( ; ; ) {
		if (cmp >= 0) {
			if (rson[p] != NIL)
				p = rson[p];
			else {
				rson[p] = r;
				dad[r] = p;
				return;
			}
		} else {
			if (lson[p] != NIL)
				p = lson[p];
			else {
				lson[p] = r;
				dad[r] = p;
				return;
			}
		}
		for (i = 1; i < F; i++)
			if ((cmp = key[i] - text_buf[p + i]) != 0)
				break;
		if (i > THRESHOLD) {
			if (i > match_length) {
				match_position = ((r - p) & (N - 1)) - 1;
				if ((match_length = i) >= F)
					break;
			}
			if (i == match_length) {
				if ((c = ((r - p) & (N - 1)) - 1) < match_position) {
					match_position = c;
				}
			}
		}
	}
#endif
	dad[r] = dad[p];
	lson[r] = lson[p];
	rson[r] = rson[p];
	dad[lson[p]] = r;
	dad[rson[p]] = r;
	if (rson[dad[p]] == p)
		rson[dad[p]] = r;
	else
		lson[dad[p]] = r;
	dad[p] = NIL;  /* remove p */
#undef p
#undef r
}

void DeleteNode(int p)  /* Deleting node from the tree */
{
	int  q;

	if (dad[p] == NIL)
		return;			/* unregistered */
	if (rson[p] == NIL)
		q = lson[p];
	else
	if (lson[p] == NIL)
		q = rson[p];
	else {
		q = lson[p];
		if (rson[q] != NIL) {
			do {
				q = rson[q];
			} while (rson[q] != NIL);
			rson[dad[q]] = lson[q];
			dad[lson[q]] = dad[q];
			lson[q] = lson[p];
			dad[lson[p]] = q;
		}
		rson[q] = rson[p];
		dad[rson[p]] = q;
	}
	dad[q] = dad[p];
	if (rson[dad[p]] == p)
		rson[dad[p]] = q;
	else
		lson[dad[p]] = q;
	dad[p] = NIL;
}

/* Huffman coding parameters */

#define N_CHAR  	(256 - THRESHOLD + F)
				/* character code (= 0..N_CHAR-1) */
#define T 		(N_CHAR * 2 - 1)	/* Size of table */
#define R 		(T - 1)			/* root position */
#define MAX_FREQ	0x8000
					/* update when cumulative frequency */
					/* reaches to this value */

typedef unsigned char uchar;

/*
 * Tables for encoding/decoding upper 6 bits of
 * sliding dictionary pointer
 */
/* encoder table */
uchar p_len[64] = {
	0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
};

uchar p_code[64] = {
	0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
	0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
	0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
	0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
	0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
	0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
};

/* decoder table */
uchar d_code[256] = {
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
	0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
	0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
	0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
	0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
	0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
	0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
	0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
	0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
	0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
	0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
	0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
	0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
	0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
	0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
};

uchar d_len[256] = {
	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
};

unsigned freq[T + 1];	/* cumulative freq table */

/*
 * pointing parent nodes.
 * area [T..(T + N_CHAR - 1)] are pointers for leaves
 */
int prnt[T + N_CHAR];

/* pointing children nodes (son[], son[] + 1)*/
int son[T];

unsigned getbuf = 0;
uchar getlen = 0;

int GetBit(void)	/* get one bit */
{
	int i;

	while (getlen <= 8) {
		if ((i = getc(infile)) < 0) i = 0;
		getbuf |= i << (8 - getlen);
		getlen += 8;
	}
	i = getbuf;
	getbuf <<= 1;
	getlen--;
	return (i < 0);
}

int GetByte(void)	/* get a byte */
{
	unsigned i;

	while (getlen <= 8) {
		if ((i = getc(infile)) < 0) i = 0;
		getbuf |= i << (8 - getlen);
		getlen += 8;
	}
#ifdef USE_ASM
	_AX = *(((unsigned char *)&getbuf)+1);
	_BX = getbuf;
	_BH = _BL;
	_BL = 0;
	asm mov getbuf,bx;
 	getlen -= 8;
	return _AX;
#else
	i = getbuf;
	getbuf <<= 8;
	getlen -= 8;
	return i >> 8;
#endif
}

unsigned putbuf = 0;
uchar putlen = 0;

void Putcode(int l, unsigned c)		/* output c bits */
{
	putbuf |= c >> putlen;
	if ((putlen += l) >= 8) {
		putc(putbuf >> 8, outfile);
		if ((putlen -= 8) >= 8) {
			putc(putbuf, outfile);
			codesize += 2;
			putlen -= 8;
			putbuf = c << (l - putlen);
		} else {
			putbuf <<= 8;
			codesize++;
		}
	}
}


/* initialize freq tree */

void StartHuff()
{
	int i, j;

	for (i = 0; i < N_CHAR; i++) {
		freq[i] = 1;
		son[i] = i + T;
		prnt[i + T] = i;
	}
	i = 0; j = N_CHAR;
	while (j <= R) {
		freq[j] = freq[i] + freq[i + 1];
		son[j] = i;
		prnt[i] = prnt[i + 1] = j;
		i += 2; j++;
	}
	freq[T] = 0xffff;
	prnt[R] = 0;
}


/* reconstruct freq tree */

void reconst()
{
	int i, j, k;
	unsigned f, l;

	/* halven cumulative freq for leaf nodes */
	j = 0;
	for (i = 0; i < T; i++) {
		if (son[i] >= T) {
			freq[j] = (freq[i] + 1) / 2;
			son[j] = son[i];
			j++;
		}
	}
	/* make a tree : first, connect children nodes */
	for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
		k = i + 1;
		f = freq[j] = freq[i] + freq[k];
		for (k = j - 1; f < freq[k]; k--);
		k++;
		l = (j - k) * 2;
		(void)memmove(&freq[k + 1], &freq[k], l);
		freq[k] = f;
		(void)memmove(&son[k + 1], &son[k], l);
		son[k] = i;
	}
	/* connect parent nodes */
	for (i = 0; i < T; i++) {
		if ((k = son[i]) >= T) {
			prnt[k] = i;
		} else {
			prnt[k] = prnt[k + 1] = i;
		}
	}
}


/* update freq tree */

void update(int c)
{
	register int k, l;
	int i, j;

	if (freq[R] == MAX_FREQ) {
		reconst();
	}
#ifdef USE_ASM
#define k _DX				/* _DX is safe to use. */
	_SI = prnt[c + T];
#define	c _SI
	do {
	more_k:
		k = ++freq[c];
		asm	cmp	dx,word ptr DGROUP:_freq+2[bx];
		asm	ja	start;
		asm	mov	si,word ptr DGROUP:_prnt[bx];
		asm	or	si,si;
		asm	jne	more_k;
		break;
	start:
		_BX = (unsigned)&freq[c+1];
	again:
		asm cmp dx,[bx]
		asm jbe done
		_BX += 4;
		asm cmp dx,[bx-2]
		asm ja again
		_BX -= 2;
	done:
		_BX -= (unsigned) &freq;
		l = _BX >> 1;
#else
	c = prnt[c + T];
	do {
		/* keep the outer loop together so stupid compilers
		 * can optimize.
		 */
		do {
			k = ++freq[c];
			/* swap nodes to keep the tree freq-ordered */
			if (k > freq[c + 1]) goto start;
		} while ((c = prnt[c]) != 0);
		break;
	start:
		l = c + 1;
		/* this is the inner loop -- unroll it a few times */
		while (k > freq[++l] &&
		       k > freq[++l] &&
		       k > freq[++l]);
#endif
		l--;
		freq[c] = freq[l];
		freq[l] = k;

		i = son[c];
		prnt[i] = l;
		if (i < T) prnt[i + 1] = l;

		j = son[l];
		son[l] = i;

		prnt[j] = c;
		if (j < T) prnt[j + 1] = c;
		son[c] = j;

		c = l;
	} while ((c = prnt[c]) != 0);	/* do it until reaching the root */
#undef k
#undef c
}

unsigned code, len;

void EncodeChar(unsigned c)
{
	unsigned i;
	int j, k;

	i = 0;
	j = 0;
	k = prnt[c + T];

	/* search connections from leaf node to the root */
	do {
		i >>= 1;

		/*
		if node's address is odd, output 1
		else output 0
		*/
		if (k & 1) i += 0x8000;

		j++;
	} while ((k = prnt[k]) != R);
	Putcode(j, i);
	code = i;
	len = j;
	update(c);
}

void EncodePosition(unsigned c)
{
	unsigned i;

	/* output upper 6 bits with encoding */
	i = c >> 6;
	Putcode(p_len[i], (unsigned)p_code[i] << 8);

	/* output lower 6 bits directly */
	Putcode(6, (c & 0x3f) << 10);
}

void EncodeEnd()
{
	if (putlen) {
		putc(putbuf >> 8, outfile);
		codesize++;
	}
}

int DecodeChar()
{
	unsigned c;
	c = son[R];

	/*
	 * start searching tree from the root to leaves.
	 * choose node #(son[]) if input bit == 0
	 * else choose #(son[]+1) (input bit == 1)
	 */
	while (c < T) {
		if(getlen){
			getlen--;
#ifdef USE_ASM
			getbuf<<=1;
			asm jnc zerobit;
			c++;
		zerobit:;
#else
			if (getbuf < 0)
				c++;
			getbuf<<=1;
#endif
		} else
			c += GetBit();
		c = son[c];
	}
	c -= T;
	update(c);
	return c;
}

int DecodePosition()
{
	unsigned i, j, c;

	/* decode upper 6 bits from given table */
	i = GetByte();
	c = (unsigned)d_code[i] << 6;
	j = d_len[i];

	/* input lower 6 bits directly */
	j -= 2;
	while (j--) {
		i <<= 1;
		if(getlen){
			getlen--;
#ifdef USE_ASM
			getbuf<<=1;
			asm jnc zerobit;
			i++;
		zerobit:;
#else
			if (getbuf < 0)
				i++;
			getbuf<<=1;
#endif
		} else
			i += GetBit();
	}
	return c | i & 0x3f;
}

/* Compression */

void Encode(void)  /* Encoding/Compressing */
{
	int  i, c, len, r, s, last_match_length;

	fseek(infile, 0L, 2);
	textsize = ftell(infile);
	if (fwrite(&textsize, sizeof textsize, 1, outfile) < 1)
		Error("Unable to write");	/* write size of original text */
	if (textsize == 0)
		return;
	rewind(infile);
	textsize = 0;			/* rewind and rescan */
	StartHuff();
	InitTree();
	s = 0;
	r = N - F;
	for (i = s; i < r; i++)
		text_buf[i] = ' ';
	for (len = 0; len < F && (c = getc(infile)) != EOF; len++)
		text_buf[r + len] = c;
	textsize = len;
	for (i = 1; i <= F; i++)
		InsertNode(r - i);
	InsertNode(r);
	do {
		if (match_length > len)
			match_length = len;
		if (match_length <= THRESHOLD) {
			match_length = 1;
			EncodeChar(text_buf[r]);
		} else {
			EncodeChar(255 - THRESHOLD + match_length);
			EncodePosition(match_position);
		}
		last_match_length = match_length;
		for (i = 0; i < last_match_length &&
				(c = getc(infile)) != EOF; i++) {
			DeleteNode(s);
			text_buf[s] = c;
			if (s < F - 1)
				text_buf[s + N] = c;
			s = (s + 1) & (N - 1);
			r = (r + 1) & (N - 1);
			InsertNode(r);
		}
		if ((textsize += i) > printcount) {
			printf("%12ld\r", textsize);
			printcount += 1024;
		}
		while (i++ < last_match_length) {
			DeleteNode(s);
			s = (s + 1) & (N - 1);
			r = (r + 1) & (N - 1);
			if (--len) InsertNode(r);
		}
	} while (len > 0);
	EncodeEnd();
	printf("input: %ld bytes\n", textsize);
	printf("output: %ld bytes\n", codesize);
	printf("output/input: %.3f\n", (double)codesize / textsize);
}

void Decode(void)  /* Decoding/Uncompressing */
{
	int  i, j, k, r, c;
	unsigned long int  count;

	if (fread(&textsize, sizeof textsize, 1, infile) < 1)
		Error("Unable to read");  /* read size of original text */
	if (textsize == 0)
		return;
	StartHuff();
	for (i = 0; i < N - F; i++)
		text_buf[i] = ' ';
	r = N - F;
	for (count = 0; count < textsize; ) {
		c = DecodeChar();
		if (c < 256) {
			putc(c, outfile);
			text_buf[r++] = c;
			r &= (N - 1);
			count++;
		} else {
			i = (r - DecodePosition() - 1) & (N - 1);
			j = c - 255 + THRESHOLD;
			if (r + j < N
			 && i + j < N
			 && (i + j <= r || i >= r)
#ifdef __TURBOC__
			 && outfile->level < -j){
				memcpy(outfile->curp,
			    	    memmove(&text_buf[r],&text_buf[i], j),
				    j);
				outfile->curp += j;
				outfile->level += j;
#else
			 ){
				fwrite(memcpy(&text_buf[r],&text_buf[i], j),
				    1, j, outfile);
#endif
				r += j;
				count += j;
			} else

			for (k = i, j += i; k < j; k++) {
				c = text_buf[k & (N - 1)];
				putc(c, outfile);
				text_buf[r++] = c;
				r &= (N - 1);
				count++;
			}
		}
		if (count > printcount) {
			printf("%12ld\r", count);
			printcount += 4096;
		}
	}
	printf("%12ld\n", count);
}

int main(int argc, char *argv[])
{
	char  *s;

	if (argc != 4) {
		printf("Usage:lzhuf e(compression)|d(uncompression)"
			" infile outfile\n");
		return EXIT_FAILED;
	}
	if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL)
	 || (s = argv[2], (infile  = fopen(s, "rb")) == NULL)
	 || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) {
		printf("Trouble with arg %s\n", s);
		return EXIT_FAILED;
	}
	setvbuf(outfile, NULL, _IOFBF, 1<<12);
	setvbuf(infile, NULL, _IOFBF, 1<<12);
	if (toupper(*argv[1]) == 'E')
		Encode();
	else
		Decode();
	fclose(infile);
	fclose(outfile);
	return EXIT_OK;
}