[comp.os.minix] huffman encode/decode

hall@cod.NOSC.MIL (Robert R. Hall) (09/18/89)

This is posted on behave of William Rose


The files sharred in this message are a toolkit I built for experiments in 
file compression.  A word of warning - they were put together fast under 
MSDOS; when I moved them to MINIX I found my version of atof() was broken.  
They compile ok, but I haven't actually run them under MINIX.  I'm posting 
them now because I'm out of time, and must turn my attention to 'The 
Contributions of Administrative Behaviour to Strategic Management' and other 
worthy subjects.

The reference I used was 'Data Compression (2nd. ed)' by Gilbert Held, 
published by Wiley and Sons in 1987.  The book is a fairly effective overview, 
with the notable omission of the LZW algorithm.  There's some code in BASIC 
and Fortran, that looks a bit limited at first glance.

I tried two methods of encoding, the Huffman and the Shannon-Fano.  Both are 
statistical methods which a bit-stream of near-optimal compression, but 
their algorithms are slightly different.  The Huffman code produced the 
shorter weighted average code length, so that was the only one I wrote a 
code/decode routine for.  Since the aim was optimal compression of C code, I 
added a 'strip' program that converts C keywords into single bytes in the 
range 0-32 ASCII; the resulting file can be encoded by the Huffman process 
in the normal way.  Typical results are:

                      File size in bytes.        Percent reduction. 
Initial:                    15488
Post-strip:                 13976                     9.76
Huffman encoded:             9300                    39.95
Endcoded w/o strip:          9568                    38.22

PKPAK 3.61:                  7181                    53.63

As far as I know Pkpak uses the LZW algorithm - it looks as if that's the way 
to go.  However, the 'count' program gave 'space' as the most common character
at 16%, and 'tab' as the next most common at 7.5%.  If 'strip' converted tabs
to spaces, and then encoded runs of spaces in the two-byte form esc/runlength,
considerable savings might be made. 

The enclosed files are:

strip - removes ASCII codes 0-32 w/e tab, cr, lf and replaces the standard C 
keywords by bytes in this range. -u option reverses the operation.

count - collects statistics on files for use by the 'build' program. The -s 
option gives a sorted output in decreasing frequency of ocurrence.  (I think 
the sort routine came from Software Tools, or K&R).

bldhuff - builds a Huffman code from the output of count, producing a file 
used by 'enc' and 'dec'.

bldfano - builds a Shannon-Fano code from the output of count.  Since the 
weighted average code length is greater than that produced by Huffman, no 
encode/decode routines have been written.

enc - encode using Huffman coding.

dec - decode using Huffman coding.

For all programs, the -d option gives more information for eg. debugging.

These programs could easily be improved in both speed and style, but I'm not 
sure that it's worth it; unless I've made some egregious error, the LZW or 
some other recent advance is the way to go.

Good luck - Will

----------- In case of fire, cut along the dotted line and jump ------------
echo x - Makefile
sed '/^X/s///' > Makefile << '/'
X#
X#      makefile for Huffman encode/decode project
X#
Xl=/usr/lib
XCFLAGS=	-f -Di8088
X
Xstrip:	strip.c
X	cc -Di8088 strip.c -o strip
X
Xcount:	count.c
X	cc $(CFLAGS) count.c -o count
X
Xbldhuff:	bldhuff.c
X		cc $(CFLAGS) bldhuff.c -o bldhuff
X
Xbldfano:	bldfano.c
X		cc $(CFLAGS) bldfano.c -o bldfano
X 
Xenc:	enc.c
X	cc $(CFLAGS) enc.c -o enc
X	
Xdec:	dec.c
X	cc $(CFLAGS) dec.c -o dec
X
/
echo x - bldfano.c
sed '/^X/s///' > bldfano.c << '/'
X/* Build Shannon-Fano code, given count of characters - version 0.3 of 12 Sep 89.
X * Part of the Huffman encode/decode project.
X */
X
X#include <stdio.h>
X#include <math.h>
X#include <float.h>
X
X#define MAXLEN 32
X#define FALSE 0
X#define TRUE 1
X				/* #define TEST	insert test code */
Xint Debug ;
X
Xint Charlst[150] ;
Xdouble Charprob[150] ;
Xchar Charcode[150][MAXLEN] ;
Xint Charlen[150] ;
X
X
Xmain(argc, argv)
Xint argc ; char *argv[] ;
X{
X	char c ;
X	char buff[100], s1[20], s2[20], s3[20] ;
X
X	int j, k, m ;
X	int posn, maxposn, maxpath ;
X
X	double tot, avpath ;
X
X	/* initialise variables */
X
X	Debug = 0 ;
X
X	for (j = 0 ; j < 150 ; j++) {
X		Charlst[j] = 0 ;
X		Charprob[j] = 0 ;
X		for (k = 0 ; k < MAXLEN ; k++)
X			Charcode[j][k] = '\0' ; 
X		Charlen[j] = 0 ;
X	}
X
X	/* sort out arguments */
X
X	while (--argc) {
X		if (**(++argv) == '-' && *(*argv + 1) != '\0')
X			switch(*++*argv) {
X				case 'd':
X				case 'D':
X					++Debug ;
X					if (Debug == 2)
X						for (j = 0 ; j < argc ; j++)
X							printf("%s\n", argv[j]) ;
X					break;
X
X				default:
X					error("buildfano [-d]\n", TRUE) ;
X					break ;
X		 	}
X	}
X
X
X 	/* The input file format is: description of character, ASCII value,
X	 * number found, percentage occurrence. Each set takes up one line.
X 	 */
X
X	j = 0 ;
X	while (gets(buff)) {
X		sscanf(buff, "%s %s %s %s", s1, s2, s1, s3) ;
X		Charlst[j] = atoi(s2) ;
X		Charprob[j] = atof(s3) ;
X		j++ ;
X	}
X
X	maxposn = j - 1  ;
X
X	codebit(0, maxposn, 0) ;
X
X	maxpath = avpath = 0 ;
X	for (j = 0 ; j < maxposn + 1 ; j++) {
X		printf("%3d %7.3f %-32s %3d\n",
X			j, Charprob[j], Charcode[j], Charlen[j]) ;
X		if (Charlen[j] > maxpath) maxpath = Charlen[j] ;
X		avpath += Charprob[j] * Charlen[j] ;  
X	}
X	if (Debug > 0)
X		fprintf(stderr, "\nWav. pathlength %7.3f.  Max. pathlength %3d.\n",
X			avpath/maxposn, maxpath) ;			
X
X	exit(0) ;
X}
X
X
X
Xint codebit(start, stop, level)
Xint start, stop, level ; 
X{
X	char c ;
X
X	int j, k, middle ;
X
X	double total, sum ;
X
X	if (level > (MAXLEN - 2))
X		return ;
X
X	/* find the total probability of the current range */
X	total = 0 ;
X	for (j = start ; j <= stop ; j++)
X		total += Charprob[j] ; 
X
X	/* find the half-way point */
X	total = total/2 ;
X	sum = 0 ;
X	for (j = start ; j <= stop ; j++)
X		if ((sum += Charprob[j]) > total)
X			break ;
X	if (j == start)
X		middle = j ;
X	else
X		middle = j - 1 ;
X
X	if (Debug > 1)
Xprintf("Start %3d. Middle %3d. Stop %3d. Level %3d. Sum %7.3f. Total %7.3f\n",
X		 start, middle, stop, level, sum, total) ;
X
X	/* build the code accordingly */
X	for (j = start ; j <= middle ; j++) {
X		Charcode[j][level] = '1' ;
X		Charlen[j]++ ;
X	}
X	for (j = middle + 1 ; j <= stop ; j++) {
X		Charcode[j][level] = '0' ;
X		Charlen[j]++ ;
X	}
X
X	/* sort out the sub-sequences */
X
X	if (start < middle)
X		codebit(start, middle, level + 1) ;
X	if ((middle + 1) < stop)
X		codebit(middle + 1, stop, level + 1) ;
X
X	return ;
X} 
X
X
Xint error(s, f)
Xchar *s ; int f ;
X{
X	fputs(s, stderr) ;
X	if (f)
X		exit (1) ;
X}
/
echo x - bldhuff.c
sed '/^X/s///' > bldhuff.c << '/'
X/* Build Huffman code, given count of characters - version 0.3 of 12 Sep 89.
X * Part of the Huffman encode/decode project.
X */
X
X#include <stdio.h>
X#include <math.h>
X#include <float.h>
X
X#define FALSE 0
X#define TRUE 1
X				/* #define TEST	insert test code */
Xint Debug ;
Xdouble Avpath ;			/* Weighted average path (code) length */
Xint Maxpath ;			/* Maximum path (code) length */
X
Xstruct tnode {
X	int id ;		/* node reference number */
X	int val ;		/* node value */
X	int level ;		/* node priority */
X	double prob ;		/* probability of value */
X	unsigned long path ;	/* path to root node (Huffman code bits) */
X	int pathlen ;		/* length of path in bits */
X	struct tnode *tptr ;	/* pointer to upper (left) node */
X	struct tnode *bptr ;	/* pointer to lower (right) node */
X} ;
X
Xstruct tnode * talloc()
X{
X	return ((struct tnode *) malloc(sizeof(struct tnode))) ;
X}
X
X
Xmain(argc, argv)
Xint argc ; char *argv[] ;
X{
X	char c ;
X	char buff[100], s1[20], s2[20], s3[20], s4[20] ;
X
X	int j, k, m ;
X	int posn, id, qty ;
X
X	struct tnode *freelist[300] ;
X
X	struct tnode *pn, *pn1, *pn2 ;
X
X	/* initialise variables */
X
X	Debug = Avpath = Maxpath = 0 ;
X
X	/* sort out arguments */
X
X	while (--argc) {
X		if (**(++argv) == '-' && *(*argv + 1) != '\0')
X			switch(*++*argv) {
X				case 'd':
X				case 'D':
X					++Debug ;
X					if (Debug == 2)
X						for (j = 0 ; j < argc ; j++)
X							printf("%s\n", argv[j]) ;
X					break;
X
X				default:
X					error("buildhuff [-d]\n", TRUE) ;
X					break ;
X		 	}
X	}
X
X
X 	/* The input file format is: description of character, ASCII value,
X	 * number found, percentage occurrence. Each set takes up one line.
X 	 */
X
X	posn = 0 ;
X	while (gets(buff)) {
X		sscanf(buff, "%s %s %s %s", s1, s2, s3, s4) ;
X		freelist[posn] = talloc() ;
X		freelist[posn]->id = posn ;  
X		freelist[posn]->val = atoi(s2) ;
X		freelist[posn]->level = posn ;
X		freelist[posn]->prob = (double) atof(s4) ;
X		freelist[posn]->path = 0L ;
X		freelist[posn]->pathlen = 0 ;
X		freelist[posn]->tptr = NULL ;
X		freelist[posn]->bptr = NULL ;
X		posn++ ;
X
X		if (Debug > 1)
X			printf("(1) %s (2) %s (3) %s (4) %s (F) %7.3f\n",
X			s1, s2, s3, s4, freelist[posn-1]->prob) ;
X	}
X	id = qty = posn ;
X
X	while (posn > 1) { /* posn points to next free space */
X
X		pn = talloc() ; /* get space for new node */
X
X		/* combine the two lowest probabilities */
X
X		/* get the lowest */
X
X		m = 0 ;
X		for (j = 0 ; j < posn ; j++) {
X			if (freelist[j]->prob < freelist[m]->prob)
X				m = j ;
X		}
X		pn1 = freelist[m] ;
X
X		/* remove the pointer from the freelist */
X
X		for (j = m ; j < posn - 1 ; j++ )
X			freelist[j] = freelist[j+1] ;
X		freelist[--posn] = NULL ;
X 
X		/* get the next lowest */
X
X		m = 0 ;
X		for (j = 0 ; j < posn ; j++) {
X			if (freelist[j]->prob < freelist[m]->prob)
X				m = j ;
X		}
X		pn2 = freelist[m] ;
X
X		/* remove the pointer from the freelist */
X
X		for (j = m ; j < posn - 1 ; j++ )
X			freelist[j] = freelist[j+1] ;
X		freelist[--posn] = NULL ;
X
X		/* build a neat tree - *** new algorithm needed */ 
X
X		if (pn2->level > pn1->level) {
X			pn->tptr = pn1 ;
X			pn->bptr = pn2 ;
X		}
X		else {
X			pn->tptr = pn2 ;
X			pn->bptr = pn1 ;
X		}
X 
X		/* find new probability, and add new node to list */
X
X		pn->id = id++ ;
X		pn->val = -1 ;
X		pn->level = (pn1->level + pn2->level)/2 ;
X		pn->prob = pn->tptr->prob + pn->bptr->prob ;
X		pn->path = 0L ;
X		pn->pathlen = 0 ;
X		freelist[posn++] = pn ;
X	}
X
X	/* print out tree */
X
X	treeprint(freelist[0], 0) ;
X
X	if (Debug > 0)
X		fprintf(stderr,"Wav. length %7.3f  Max. length %3d.\n",
X			Avpath/((double) qty), Maxpath) ;
X
X	exit(0) ;
X}
X
X
Xint treeprint(tpn, depth)
Xstruct tnode *tpn ; int depth ;
X{
X	int j, k, m ;
X
X	if (tpn != NULL) {
X		if (tpn->tptr != NULL) tpn->tptr->path = tpn->path ;
X		if (tpn->bptr != NULL) tpn->bptr->path = tpn->path + (1L<<depth) ;
X
X		treeprint(tpn->tptr, (depth + 1)) ;
X
X		j = k = m = -1 ;
X		if (tpn->tptr != NULL) j = tpn->tptr->id ;
X 		if (tpn->bptr != NULL) k = tpn->bptr->id ;
X		tpn->pathlen = depth ;
X
X		if (tpn->val != -1) {
X			Avpath = Avpath + (tpn->prob * tpn->pathlen) ;
X			if (Maxpath < tpn->pathlen) Maxpath = tpn->pathlen ;
X		}
X
X		printf(
X"ID: %3d Value: %3d Prob: %7.3f Path: %8lu Length: %3d Tnode: %3d Bnode: %3d\n",
X			tpn->id,tpn->val,tpn->prob,tpn->path,tpn->pathlen,j,k) ;
X			
X		treeprint(tpn->bptr, (depth + 1)) ;
X	}
X}
X
X
X
Xint error(s, f)
Xchar *s ; int f ;
X{
X	fputs(s, stderr) ;
X	if (f)
X		exit (1) ;
X}
X
/
echo x - count.c
sed '/^X/s///' > count.c << '/'
X/* Count of all ASCII characters - version 0.2 of 9 Sep 89.
X * Part of the Huffman encode/decode project.
X */
X
X#include <stdio.h>
X#include <math.h>
X#include <float.h>
X
X#define FALSE 0
X#define TRUE 1
X#define MAXLEN 40
X
Xint Debug ;
X
X
Xmain(argc, argv)
Xint argc ; char **argv ;
X{
X	char c ;
X	char *list[35] ;
X	char label[3] ;
X	char *outlst[128] ;
X
X	int j, k, m, posn, sortflg ;
X	long int total ;
X	long int count[128] ;
X
X	float q, tq ;
X 
X	FILE *fileptr ;
X
X	int numcomp() ;			/* comparison function */
X	void swap() ;			/* exchange function */
X
X	/* initialise variables */
X
X	list[0] = "^@" ; list[1] = "^A" ; list[2] = "^B" ; list[3] = "^C" ; 
X	list[4] = "^D" ; list[5] = "^E" ; list[6] = "^F" ; list[7] = "^G" ; 
X	list[8] = "^H" ; list[9] = "^I" ; list[10] = "^J" ; list[11] = "^K" ; 
X	list[12] = "^L" ; list[13] = "^M" ; list[14] = "^N" ; list[15] = "^O" ; 
X	list[16] = "^P" ; list[17] = "^Q" ; list[18] = "^R" ; list[19] = "^S" ; 
X	list[20] = "^T" ; list[21] = "^U" ; list[22] = "^V" ; list[23] = "^W" ; 
X	list[24] = "^X" ; list[25] = "^Y" ; list[26] = "^Z" ; list[27] = "^[" ;
X	list[28] = "^\\" ; list[29] = "^]" ; list[30] = "^^" ; list[31] = "^_" ;
X	list[32] = "^ " ; list[33] = "^~" ;
X
X	for (j = 0 ; j < 128 ; j++ ) {
X		count[j] = 0 ;
X		outlst[j] = (char *) malloc(40 * sizeof(char)) ;
X		*outlst[j] = '\0' ;
X	}
X
X	total = 0 ;
X	q = tq = 0 ;
X
X	Debug = sortflg = 0 ;
X
X	/* accumulate data */
X
X#ifdef MSDOS
X	wildexp(&argc, &argv, 1) ;	/* expand the filenames given */
X#endif	
X
X	while (--argc) {
X		if (**(++argv) == '-' && *(*argv + 1) != '\0') {
X			switch(*++*argv) {
X				case 'd':
X				case 'D':
X					++Debug ;
X					if (Debug == 2)
X						for (j = 0 ; j < argc ; j++)
X							printf("%s\n", argv[j]) ;
X					break;
X
X				case 's':
X				case 'S':
X					sortflg++ ;
X					break ;
X
X				default:
X					error("count [-ds]\n", TRUE) ;
X					break ;
X		 	}
X		}
X#ifdef MSDOS
X		else {	/* process wildcard argument under MSDOS */
X			if (Debug == 2)
X				printf("%s\n", *argv) ;
X			if ( !(fileptr = fopen(*argv, "r")) )
X				error("file open failure\n", TRUE) ;
X			while (j = getc(fileptr)) {
X				if (j == EOF) break ;
X				++count[j] ;
X				++total ;
X			}
X			fclose(fileptr) ;
X		}
X#endif
X	}
X
X#ifndef MSDOS
X	while ( (j = getchar()) != EOF) { /* the UNIX shell does it for us */
X		++count[j] ;
X		++total ;
X	} 
X#endif
X
X	/* develop output */
X
X	for (j = 0 ; j < 128 ; j++) {
X		if (j < 33)
X			strcpy(label, list[j]) ;
X		else if (j == 127)
X			strcpy(label, list[33]) ;
X		else {
X			label[0] = ' ' ;
X			label[1] = j & 0x7f ;
X			label[2] = '\0' ;
X		}
X		q = 100*(float)count[j]/(float)total ;
X		tq += q ;
X
X		sprintf(outlst[j], "%3s  %3d  %5ld  %7.3f",
X				label, j, count[j], q) ;
X	}
X
X	/* sort output */
X
X	if (sortflg != 0)
X		sort(outlst, 128, numcomp, swap, 16, 0) ;
X
X	/* display data */
X
X	if (Debug == 1)
X		puts("Chr. Value Count  Percent\n") ;
X
X	for (j = 0 ; j <128 ; j++)
X		puts(outlst[j]) ;
X
X	if (Debug == 1)
X	    printf("\nTotal characters %ld.\nTotal percentage %.3f.\n",total, tq) ;
X
X	exit(0) ;
X}
X
X
Xint sort(v, n, comp, exch, pos1, pos2) /* sort strings v[0] ... v[n]. */
Xchar *v[] ; int n, pos1, pos2 ; int (*comp)() ; void (*exch)() ;
X/* *** why must this be int and not void */ 
X{
X	int gap, i, j ;
X
X	for (gap = n/2 ; gap > 0 ; gap /= 2)
X		for (i = gap ; i < n ; i++)
X			for (j = i-gap ; j >= 0 ; j -= gap) {
X				if ((*comp)(v[j], v[j+gap], pos1, pos2) <= 0)
X					break ;
X				(*exch)(&v[j], &v[j+gap]) ;
X			}
X}
X
X
Xint numcomp(s1, s2, pos1, pos2) /* compare s1 and s2 numerically.  */
Xchar *s1, *s2 ; int pos1, pos2 ;
X{
X	float v1, v2 ;
X	char str[MAXLEN] ;
X
X	substr(s1, pos1, pos2, str, MAXLEN) ;
X	v1 = atof(str) ;
X	substr(s2, pos1, pos2, str, MAXLEN) ;
X	v2 = atof(str) ;
X
X	if (v1 > v2)
X		return (-1) ;
X	else if (v1 < v2)
X		return (1) ;
X	else 
X		return (0) ;
X}
X
X
Xint substr(s, pos1, pos2, str, maxlen) /* get substring from s.  */
Xchar s[], str[] ;
Xint pos1, pos2, maxlen ;
X{
X	int i, j, len ;
X
X	len = strlen(s) ;
X	if (pos2 == 0)			/* end pos not specified ? */
X		pos2 = len ;
X	if (len < pos1 || len < pos2)
X		error("substr: string too short") ;
X	if (pos2 - pos1 > maxlen)
X		error("substr: string too long") ;
X	for (j = 0 , i = pos1 ; i < pos2 ; i++, j++)
X		str[j] = s[i] ;
X	str[j] = '\0' ;
X}
X
X
Xvoid swap(px, py) /* interchange *px and *py  */
Xchar *px[], *py[] ;
X{
X	char *tmp ;
X
X	tmp = *px ;
X	*px = *py ;
X	*py = tmp ;
X}
X
X
Xint error(s, f)
Xchar *s ; int f ;
X{
X	fputs(s, stderr) ;
X	if (f)
X		exit (1) ;
X}
/
echo x - dec.c
sed '/^X/s///' > dec.c << '/'
X /* Decode file with Huffman coding - version 0.3 of 12 Sep 89.
X  * Part of the Huffman encode/decode project.
X  */
X
X#include <stdio.h>
X
X#define FALSE 0
X#define TRUE 1
X
Xint Debug ;
X
Xstruct tnode {
X	int id ;		/* node id */
X	int val ;		/* node value */
X	struct tnode *tptr ;	/* pointer to upper (left) node */
X	struct tnode *bptr ;	/* pointer to lower (right) node */
X} ;
X
Xstruct tnode *Freelist[300] ;
Xstruct tnode *Rootptr ;
Xstruct tnode *Pn ;
X
Xstruct tnode *talloc()
X{
X	return ((struct tnode *) malloc(sizeof(struct tnode))) ;
X}
X
X
Xmain(argc, argv)
Xint argc ; char *argv[] ;
X{
X	char c, d ;
X	char buff[100], s[20] ;
X
X	int j, k , m, codefile ;
X
X	FILE *fileptr ;
X
X	/* initialise variables */
X
X	Debug = codefile = 0 ;
X	Rootptr = Pn = NULL ;
X	for (j = 0 ; j < 256 ; j++) {
X		Freelist[j] = talloc() ;
X		Freelist[j]->val = 0 ;
X		Freelist[j]->tptr = Freelist[j]->bptr = NULL ;
X	}
X
X	/* sort out arguments */
X
X	while (--argc) {
X		if (**(++argv) == '-' && *(*argv + 1) != '\0' ) {
X			
X			switch (*++*argv) { /* process dash options */
X				case 'D':
X				case 'd':
X					++Debug ;
X					if (Debug == 2)
X						for (j = 0 ; j < argc ; j++)
X							printf("%s\n", argv[j]) ;
X					break;
X
X				case 'F':
X 				case 'f':
X					codefile = 1 ;
X					 break;
X
X				default:
X					error("dec [-d] -f codefile\n", TRUE) ;
X					break ;
X			}
X		}
X		else if (codefile) { /* next file contains the Huffman code */
X			if (!(fileptr = fopen(*argv, "r")) ) 
X				error("Huffman code file not found\n", TRUE) ;
X		/*
X		 * The file format is: node id, ASCII value, probability of
X		 * ocurrence, path to root of tree (code value), code length,
X		 * node reached by top pointer, node reached by bottom pointer.
X		 * All values are integers bar probability, which is floating
X		 * point.  A null top or bottom node pointer is shown by -1,
X		 * as is a 'val' field in a non-leaf node.
X		 */
X			while (fgets(buff, 100, fileptr) != NULL) {
X
X				sscanf(buff,
X				    "%s %d %s %d %s %s %s %s %s %s %s %s %s %s",
X				    s, &j, s, s, s, s, s, s, s, s, s, s, s, s) ;
X
X				Pn = Freelist[j] ;
X				Pn->id = j ;
X 
X				sscanf(buff,
X				"%s %s %s %d %s %s %s %s %s %d %s %d %s %d",
X				s, s, s, &Pn->val, s, s, s, s,
X				s, &m, s, &j, s, &k) ;
X
X				Pn->tptr = Pn->bptr = NULL ;
X				if (j != -1) Pn->tptr = Freelist[j] ;
X				if (k != -1) Pn->bptr = Freelist[k] ;
X				if (m == 0) Rootptr = Pn ;
X			}
X			fclose(fileptr) ;
X		}
X	}
X
X	if (Debug > 0)
X		treeshow(Rootptr) ;
X
X	/* decode stdio and put output on stdout */
X
X	Pn = Rootptr ;
X	while ((k = getchar()) != EOF) {
X		for (j = 8 ; j > 0 ; j--) {
X			m = k & 0x80 ;
X			if (m != 0)
X				Pn = Pn->bptr ;
X			else
X				Pn = Pn->tptr ;
X			if (Pn->val != -1) {
X				putchar((char) Pn->val) ;
X				Pn = Rootptr ;
X			}
X			k = (k << 1) ;
X		}
X	}
X
X	/* tidy up */
X
X	exit(0) ;
X}
X
X
Xint treeshow(tpn)
Xstruct tnode *tpn ;
X{
X	int j, k ;
X 
X	if (tpn != NULL) {
X		treeshow(tpn->tptr) ;
X
X		j = k = -1 ;
X		if (tpn->tptr != NULL) j = tpn->tptr->id ;
X		if (tpn->bptr != NULL) k = tpn->bptr->id ;
X
X		printf("ID: %3d  Value: %3d  Tnode: %3d  Bnode: %3d\n",
X			tpn->id, tpn->val, j, k) ;
X
X		treeshow(tpn->bptr) ;
X	}
X}
X
X
Xint pbin(j,p)	/* print 8-bit value as binary string */ 
Xint j, p ;	/* if p ==0 print all bits, else print pth bit */
X{
X	int k, m ;
X
X	m = 0x80 ;
X	for (k = 8 ; k ; k--) {
X		if (p == 0 || k == p)
X			putchar((j & m) ? '1' : '0') ;
X		m = m/2 ;
X	}
X}
X
X
Xint error(s, f)
Xchar *s ; int f ;
X{
X	fputs(s, stderr) ;
X	if (f)
X		exit (1) ;
X}
/
echo x - enc.c
sed '/^X/s///' > enc.c << '/'
X/* Encode file with Huffman coding - version 0.3 of 12 Sep 89.
X * Part of the Huffman encode/decode project.
X */
X
X#include <stdio.h>
X#include <math.h>
X#include <float.h>
X
X#define FALSE 0
X#define TRUE 1
X
Xint Debug ;
X
Xunsigned long Huffcode[300] ;
Xint Hufflen[300] ;
X
X
Xmain(argc, argv)
Xint argc ; char *argv[] ;
X{
X	char c, d ;
X	char buff[100], s[20] ;
X
X	int j, k , m, codefile, inbit, outbit ;
X
X	FILE *fileptr ;
X
X	Debug = codefile = inbit = outbit = 0 ;
X
X	for (j = 0 ; j < 300 ; j++) {
X		Huffcode[j] = 0L ;
X		Hufflen[j] = 0 ;
X	}
X
X	while (--argc) {
X		if (**(++argv) == '-' && *(*argv + 1) != '\0' ) {
X			
X			switch (*++*argv) { /* process dash options */
X				case 'D':
X				case 'd':
X					++Debug ;
X					if (Debug == 2)
X						for (j = 0 ; j < argc ; j++)
X							printf("%s\n", argv[j]) ;
X					break;
X
X				case 'F':
X 				case 'f':
X					codefile = 1 ;
X					break;
X
X				default:
X					error("enc [-d] -f codefile\n", TRUE) ;
X					break ;
X			}
X		}
X		else if (codefile) { /* next file contains the Huffman code */
X			if (!(fileptr = fopen(*argv, "r")))
X				error("Huffman code file not found\n", TRUE) ;
X		/*
X		 * The file format is: node id, ASCII value, probability of
X		 * ocurrence, path to root of tree (code value), code length,
X		 * node reached by top pointer, node reached by bottom pointer.
X		 * All values are integers bar probability, which is floating
X		 * point.  A null top or bottom node pointer is shown by -1.
X		 */
X			while (fgets(buff, 100, fileptr) != NULL) {
X				/* one pass won't hack it */
X				m = sscanf(buff,
X				"%s %s %s %d %s %s %s %s %s %s %s %s %s %s",
X				s, s, s, &j, s, s, s, s, s, s, s, s, s, s) ;
X
X				if (j != -1)
X				    m = sscanf(buff,
X				    "%s %s %s %s %s %s %s %lu %s %d %s %s %s %s",
X				    s, s, s, s, s, s, s, &Huffcode[j],
X				    s, &Hufflen[j], s, s, s, s) ; 
X			}
X
X			fclose(fileptr) ;
X		}
X	}
X
X	if (Debug > 1)
X		for (j = 0 ; j < 128 ; j++)
X			printf("Val: %3d  Code: %8lu  Length: %3d\n",
X						j, Huffcode[j], Hufflen[j]) ;  
X
X	/* encode stdio and put output on stdout */
X
X	outbit = 7 ; 
X	d = '\0' ;
X	while ((k = getchar()) != EOF) {
X		for (inbit = 0 ; inbit < Hufflen[k] ; inbit++) {
X			/* an array of masks would be quicker than shifting */
X			if ( (Huffcode[k] & (1L<<inbit)) != 0L )
X				d = d + (1<<outbit) ;
X			if (outbit == 0) {
X				if (Debug == 0)
X					putchar(d) ;
X				else {
X					pbin(d, 0) ;
X				}
X				outbit = 7 ;
X				d = '\0' ;
X			}
X			else
X				outbit-- ;
X		}
X	}
X
X	/* tidy up - trailing bits are an (unsolved) problem */
X
X	if (outbit != 7) {
X		if (Debug == 0)
X			putchar(d) ;
X		else
X			pbin(d, 0) ;
X	}
X
X	exit(0) ;
X}
X
X
Xint pbin(j,p)	/* print 8-bit value as binary string */ 
Xint j, p ;	/* if p ==0 print all bits, else print pth bit */
X{
X	int k, m ;
X
X	m = 0x80 ;
X	for (k = 8 ; k ; k--) {
X		if (p == 0 || k == p)
X			putchar((j & m) ? '1' : '0') ;
X		m = m/2 ;
X	}
X}
X
X
Xint error(s, f)
Xchar *s ; int f ;
X{
X	fputs(s, stderr) ;
X	if (f)
X		exit (1) ;
X}
/
echo x - strip.c
sed '/^X/s///' > strip.c << '/'
X/* Convert C keywords to ASCII characters - vn 0.2 of 12 Sep 89 */
X/* Part of the Huffman code/decode project */
X
X#include <stdio.h>
X#include <ctype.h>
X
X#define FALSE 0
X#define TRUE 1
X
X#define MAXWORD	20
X#define NKEYS	22
X#define INWORD  'a'
X
Xint Debug ;
X
Xstruct key {
X	char *keyword ;
X	char keychar ;
X} ;
Xstruct key keytab[NKEYS] ;
X
X
Xmain(argc, argv)
Xint argc ; char **argv ;
X{
X	int j, n, t, unstrip ;
X	char word[MAXWORD] ;
X
X	/* initialise variables */
X
X	Debug = unstrip = 0 ;
X
X	keytab[0].keyword = "begin" ;    keytab[0].keychar = 0 ;
X	keytab[1].keyword = "break" ;    keytab[1].keychar = 1 ;
X	keytab[2].keyword = "case"  ;    keytab[2].keychar = 2 ;
X	keytab[3].keyword = "char"  ;    keytab[3].keychar = 3 ;
X	keytab[4].keyword = "continue" ; keytab[4].keychar = 4 ;
X	keytab[5].keyword = "default" ;  keytab[5].keychar = 5 ;
X	keytab[6].keyword = "do" ;       keytab[6].keychar = 6 ;
X	keytab[7].keyword = "else" ;     keytab[7].keychar = 7 ;
X	keytab[8].keyword = "end" ;      keytab[8].keychar = 8 ;
X	keytab[9].keyword = "for" ;      keytab[9].keychar = 9 ;
X	keytab[10].keyword = "goto" ;    keytab[10].keychar = 10 ;
X	keytab[11].keyword = "if" ;      keytab[11].keychar = 11 ;
X	keytab[12].keyword = "int" ;     keytab[12].keychar = 12 ;
X	keytab[13].keyword = "register" ;keytab[13].keychar = 13 ;
X	keytab[14].keyword = "return" ;  keytab[14].keychar = 14 ;
X	keytab[15].keyword = "sizeof" ;  keytab[15].keychar = 15 ;
X	keytab[16].keyword = "struct" ;  keytab[16].keychar = 16 ;
X	keytab[17].keyword = "switch" ;  keytab[17].keychar = 17 ;
X	keytab[18].keyword = "union" ;   keytab[18].keychar = 18 ;
X	keytab[19].keyword = "unsigned" ;keytab[19].keychar = 19 ;
X	keytab[20].keyword = "void" ;    keytab[20].keychar = 20 ;
X	keytab[21].keyword = "while" ;   keytab[21].keychar = 21 ;
X
X	/* sort out arguments */
X
X	while (--argc) {
X		if (**(++argv) == '-' && *(*argv + 1) != '\0')
X			switch(*++*argv) {
X				case 'd':
X				case 'D':
X					++Debug ;
X					if (Debug == 2)
X						for (j = 0 ; j < argc ; j++)
X							printf("%s\n", argv[j]) ;
X					break;
X
X				case 'u':
X				case 'U':
X					unstrip++ ;
X					break ;
X
X				default:
X					error("strip [-u]\n", TRUE) ;
X					break ;
X		 	}
X	}
X
X	if (unstrip == 0) {
X		while ((t = getword(word, MAXWORD)) != EOF) {
X			if (t == INWORD) {
X				if ((n = binary(word, keytab, NKEYS)) >= 0)
X					putchar(keytab[n].keychar) ;
X				else
X					putword(word, MAXWORD) ;
X			}
X			else if (t == 9)
X				putchar(23) ;
X			else if (t == 10)
X				putchar(24) ;
X			else if (t == 13)
X				putchar(25) ; 
X			else
X				putchar(t) ;
X		}
X	}
X	else {
X		while ((t = getchar()) != EOF) {
X			if (t < 22)
X				putword(keytab[t].keyword, MAXWORD) ; 
X			else if (t == 23)
X				putchar(9) ;
X			else if (t == 24)
X				putchar(10) ;
X			else if (t == 25)
X				putchar(13) ;
X			else
X				putchar(t) ;
X		}
X	}
X	exit (0) ;
X}
X
Xint getword(w, lim) /* get word from stdin */
Xchar *w ;int lim ;
X{
X	int c ;
X
X	while ((c = *w++ = getchar()) < 22
X			&& c != 0 && c != 9 && c != 10 && c != 13 && c != EOF)
X		;
X	if (!isalpha(c)) {
X		*w = '\0' ;
X		return (c) ;
X	}
X	while (--lim > 0) {
X		c = *w++ = getchar() ;
X		if (!isalnum(c)) {
X			ungetc(c, stdin) ;
X			break ;
X		}
X	}
X	*(w-1) = '\0' ;
X
X	return (INWORD) ;
X}
X
Xint putword(w, lim) /* write word to stdout */
Xchar *w ; int lim ;
X{
X	int j ;
X
X	for (j = 0 ; j < lim ; j++) {
X		if (w[j] != '\0')
X			putchar(w[j]) ;
X		else
X			break ;
X	}
X
X	return (j) ; 
X}
X
Xint binary(word, tab, n) /* find word in tab[0] ... tab[n-1] */
Xchar *word ; struct key tab[] ;int n ;
X{
X	int low, high, mid, cond ;
X
X	low = 0 ;
X	high = n-1 ;
X	while (low <= high) {
X		mid = (low+high)/2 ;
X		if ((cond = strcmp(word, tab[mid].keyword)) < 0)
X			high = mid-1 ;
X		else if (cond > 0)
X			low = mid+1 ;
X		else 
X			return(mid) ;
X	}
X	return (-1) ;
X}
X
X
Xerror(s, f)
Xchar *s ; int f ;
X{
X	fputs(s, stderr) ;
X	if (f)
X		exit (1) ;
X}
/
------------------------------- end of file ---------------------------