[net.sources] CP/M compatiable file compressers

sch@linus.UUCP (Stephen C. Hemminger) (08/05/83)

These three programs xusq, xsq, and xtype compress/expand files
using the same method as the CP/M User's Group programs SQ/USQ and XTYPE.
Typical usage is to squeeze a file prior to up/down load from/to
a Un*x system.

The programs can be extracted by the shell.  Originally from
MIT-MC archives.

-------------------------------------
: Run this shell script with "sh" not "csh"
PATH=:/bin:/usr/bin:/usr/ucb
export PATH
all=FALSE
if [ $1x = -ax ]; then
	all=TRUE
fi
/bin/echo 'Extracting xsq.c'
sed 's/^X//' <<'//go.sysin dd *' >xsq.c
static char *sccsid = "@(#)sq.c         1.7u (UCF) 82/12/15";
X/*
 *   sq.c - CP/M compatible file squeezer utility
 *
 *   compile as follows:
 *   cc [-DVAX] -O sq.c -o sq
 *       (define VAX only if running on VAX)
 */

#include <stdio.h>
#include <signal.h>

#define TRUE 1
#define FALSE 0
#define ERROR (-1)
#define PATHLEN	312	/* Number of characters allowed in pathname */
#define ALTNAME "sq.out"

X/* Definitions and external declarations */

#define RECOGNIZE 0xFF76	/* unlikely pattern */

X/* *** Stuff for first translation module *** */

#define DLE 0x90


X/* *** Stuff for second translation module *** */

#define SPEOF 256	/* special endfile token */
#define NUMVALS 257	/* 256 data values plus SPEOF*/


#ifdef VAX   /*  we only want 16 bit integers  */

typedef short INT;
typedef unsigned short UNSIGNED;

#else   /*  PDP-11 and other 16-bit machines  */

typedef int INT;
typedef unsigned UNSIGNED;

#endif


X/* Definitions and external declarations */

INT Usestd;	/* Use stdout for squeezed output */
UNSIGNED crc;	/* error check code */

X/* *** Stuff for first translation module *** */

INT likect;	/*count of consecutive identical chars */
INT lastchar, newchar;
char state;

X/* states */

#define NOHIST	0 	/*don't consider previous input*/
#define SENTCHAR 1 	/*lastchar set, no lookahead yet */
#define SENDNEWC 2 	/*newchar set, previous sequence done */
#define SENDCNT 3 	/*newchar set, DLE sent, send count next */

X/* *** Stuff for second translation module *** */

#define NOCHILD -1	/* indicates end of path through tree */
#define NUMNODES (NUMVALS + NUMVALS - 1)	/* nbr of nodes */

#define MAXCOUNT (UNSIGNED) 65535	/* biggest UNSIGNED integer */

X/* The following array of structures are the nodes of the
 * binary trees. The first NUMVALS nodes becomethe leaves of the
 * final tree and represent the values of the data bytes being
 * encoded and the special endfile, SPEOF.
 * The remaining nodes become the internal nodes of the final tree.
 */

struct	nd {
	UNSIGNED weight;	/* number of appearances */
	INT tdepth;		/* length on longest path in tre*/
	INT lchild, rchild;	/* indexes to next level */
} node[NUMNODES];

INT dctreehd;	/*index to head node of final tree */

X/* This is the encoding table:
 * The bit strings have first bit in  low bit.
 * Note that counts were scaled so code fits UNSIGNED integer
 */

INT codelen[NUMVALS];		/* number of bits in code */
UNSIGNED code[NUMVALS];		/* code itself, right adjusted */
UNSIGNED tcode;			/* temporary code value */


X/* Variables used by encoding process */

INT curin;	/* Value currently being encoded */
INT cbitsrem;	/* Number of code string bits remaining */
UNSIGNED ccode;	/* Current code shifted so next code bit is at right */

X/* This program compresses a file without losing information.
 * The usq.com program is required to unsqueeze the file
 * before it can be used.
 *
 * Typical compression rates are:
 *	.COM	6%	(Don't bother)
 *	.ASM	33%	(using full ASCII set)
 *	.DIC	46%	(using only uppercase and a few others)
 * Squeezing a really big file takes a few minutes.
 *
 * Useage:
 *	sq file ...
 *
 *
 * The squeezed file name is formed by changing the second from last
 * letter of the file type to Q. If there is no file type,
 * the squeezed file type is QQQ. If the name exists it is
 * overwritten!
 *
 * The transformations compress strings of identical bytes and
 * then encode each resulting byte value and EOF as bit strings
 * having lengths in inverse proportion to their frequency of
 * occurrence in the intermediate input stream. The latter uses
 * the Huffman algorithm. Decoding information is included in
 * the squeezed file, so squeezing short files or files with
 * uniformly distributed byte values will actually increase size.
 */

X/* CHANGE HISTORY:
 * 1.5u **nix version - means output to stdout.
 *  (stdin not allowed becuase sq needs to rewind input, which
 *  won't work with pipes.)
 * Filename generation changed to suit **nix and stdio.
 * 1.6u machine independent output for file compatibility with
 *	original CP/M version SQ, when running on machine with
 *	IBM byte ordering such as Z8000 and 68000.
 * 1.7u machine independence was still lacking for 32-bit machines
 *      like the VAX-11/780, so a typedef was added.  No action
 *      need be taken if running on a 16-bit machine, but if
 *      running on a VAX, define VAX either on the cc line or
 *      in the program preamble.   Ben Goldfarb 12/13/82
 */


#define VERSION "1.7u   12-13-82"

INT inbackground = 0;

INT buildenc(), gethuff(), getc_crc();

main(argc, argv)
INT argc;
char *argv[];
{
	register INT i,c;


	if (signal(SIGINT, SIG_IGN)==SIG_IGN)
		inbackground++;
	else
		signal(SIGINT, SIG_DFL);
	signal(SIGHUP, SIG_IGN);

	/* Process the parameters in order */
	for(i = 1; i < argc; ++i) {
		if(strcmp(argv[i], "-")==0)
			Usestd = TRUE;
	}
	for(i = 1; i < argc; ++i) {
		if(strcmp(argv[i], "-")!=0)
			obey(argv[i]);
	}

	if(argc < 2) {
		fprintf(stderr,"File squeezer version %s by\n\tRichard Greenlaw\n\t251 Colony Ct.\n\tGahanna, Ohio 43230\n", VERSION);
		fprintf(stderr, "Usage: sq [-] pathname ...\n");
		fprintf(stderr, "\t- squeezed output to stdout\n");
		exit(1);
	}
	exit(0);
}

obey(p)
char *p;
{
	char *w,*q;
	char outfile[PATHLEN+2];	/* output file spec. */

	/* First build output file name */

	strcpy(outfile, p);
	/* Find and change output file type */
	for(w=q = outfile; *q != '\0'; ++q)	/* skip leading /'s */
		if( *q == '/')
			w = q+1;
	for(q = w; *q != '\0'; ++q)
		if(*q == '.')
			if(*(q + 1) == '\0')
				*q = '\0';	/* kill trailing dot */
			else
				switch(*(q+2)) {
				case 'q':
				case 'Q':
					fprintf(stderr, "sq: %s ignored ( already squeezed?)", p);
					return;
				case '\0':
					*(q+3) = '\0';
					/* fall thru */
				default:
					*(q + 2) = 'Q';
					goto named;
				}
	/* No file type */
	strcat(outfile, ".QQQ");
named:
	if(strlen(w)>14)
		strcpy(outfile, ALTNAME);	/* check for too long name */
	squeeze(p, outfile);
}

squeeze(infile, outfile)
char *infile, *outfile;
{
	register INT i, c;
	FILE *inbuff, *outbuff;	/* file buffers */

	if (!inbackground)
		fprintf(stderr, "\n%s -> %s: ", infile, outfile);

	if((inbuff=fopen(infile, "r")) == NULL) {
		fprintf(stderr, "sq: can't open %s\n", infile);
		return;
	}
	if(Usestd)
		outbuff=stdout;
	else if((outbuff=fopen(outfile, "w")) == NULL) {
		fprintf(stderr, "sq: can't create %s\n", outfile);
		fclose(inbuff);
		return;
	}

	/* First pass - get properties of file */
	crc = 0;	/* initialize checksum */
	if (!inbackground)
		fprintf(stderr, "analyzing, ");
	init_ncr();
	init_huff(inbuff);   
	rewind(inbuff);

	/* Write output file header with decoding info */
	wrt_head(outbuff, infile);

	/* Second pass - encode the file */
	if (!inbackground)
		fprintf(stderr, "squeezing, ");

	init_ncr();	/* For second pass */

	/* Translate the input file into the output file */
	while((c = gethuff(inbuff)) != EOF)
		if(putc(c, outbuff) == ERROR && ferror(outbuff)) {
			fprintf(stderr, "sq: write error\n");
			goto closeall;
		}
	if (!inbackground)
		fprintf(stderr, " done.\n");
closeall:
	fclose(inbuff);
closeout:
	fflush(outbuff);
	fclose(outbuff);
}


X/* First translation - encoding of repeated characters
 * The code is byte for byte pass through except that
 * DLE is encoded as DLE, zero and repeated byte values
 * are encoded as value, DLE, count for count >= 3.
 */

init_ncr()	/*initialize getcnr() */
{
	state = NOHIST;
}

INT
getcnr(iob)
FILE *iob;
{
	switch(state) {
	case NOHIST:
		/* No relevant history */
		state = SENTCHAR;
		return lastchar = getc_crc(iob);   
	case SENTCHAR:
		/* Lastchar is set, need lookahead */
		switch(lastchar) {
		case DLE:
			state = NOHIST;
			return 0;	/* indicates DLE was the data */
		case EOF:
			return EOF;
		default:
			for(likect = 1; (newchar = getc_crc(iob)) == lastchar && likect < 255; ++likect)
				;
			switch(likect) {
			case 1:
				return lastchar = newchar;
			case 2:
				/* just pass through */
				state = SENDNEWC;
				return lastchar;
			default:
				state = SENDCNT;
				return DLE;
			}
		}
	case SENDNEWC:
		/* Previous sequence complete, newchar set */
		state = SENTCHAR;
		return lastchar = newchar;
	case SENDCNT:
		/* Sent DLE for repeat sequence, send count */
		state = SENDNEWC;
		return likect;
	default:
		fprintf(stderr,"sq: Bug - bad state\n");
		exit(1);
		/* NOTREACHED */
	}
}


X/******** Second translation - bytes to variable length bit strings *********/


X/* This translation uses the Huffman algorithm to develop a
 * binary tree representing the decoding information for
 * a variable length bit string code for each input value.
 * Each string's length is in inverse proportion to its
 * frequency of appearance in the incoming data stream.
 * The encoding table is derived from the decoding table.
 *
 * The range of valid values into the Huffman algorithm are
 * the values of a byte stored in an integer plus the special
 * endfile value chosen to be an adjacent value. Overall, 0-SPEOF.
 *
 * The "node" array of structures contains the nodes of the
 * binary tree. The first NUMVALS nodes are the leaves of the
 * tree and represent the values of the data bytes being
 * encoded and the special endfile, SPEOF.
 * The remaining nodes become the internal nodes of the tree.
 *
 * In the original design it was believed that
 * a Huffman code would fit in the same number of
 * bits that will hold the sum of all the counts.
 * That was disproven by a user's file and was a rare but
 * infamous bug. This version attempts to choose among equally
 * weighted subtrees according to their maximum depths to avoid
 * unnecessarily long codes. In case that is not sufficient
 * to guarantee codes <= 16 bits long, we initially scale
 * the counts so the total fits in an unsigned integer, but
 * if codes longer than 16 bits are generated the counts are
 * rescaled to a lower ceiling and code generation is retried.
 */

X/* Initialize the Huffman translation. This requires reading
 * the input file through any preceding translation functions
 * to get the frequency distribution of the various values.
 */

init_huff(ib)          
FILE *ib;
{
	register INT c, i;
	INT btlist[NUMVALS];	/* list of intermediate binary trees */
	INT listlen;		/* length of btlist */
	UNSIGNED *wp;		/* simplifies weight counting */
	UNSIGNED ceiling;	/* limit for scaling */

	/* Initialize tree nodes to no weight, no children */
	init_tree();

	/* Build frequency info in tree */
	do {
		c = getcnr(ib);        
		if(c == EOF)
			c = SPEOF;
		if(*(wp = &node[c].weight) !=  MAXCOUNT)
			++(*wp);
	} 
	while(c != SPEOF);

	ceiling = MAXCOUNT;

	do {	/* Keep trying to scale and encode */
		if(ceiling != MAXCOUNT)
			fprintf(stderr, "sq: *** rescaling ***, ");
		scale(ceiling);
		ceiling /= 2;	/* in case we rescale */

		/* Build list of single node binary trees having
				 * leaves for the input values with non-zero counts
				 */
		for(i = listlen = 0; i < NUMVALS; ++i)
			if(node[i].weight != 0) {
				node[i].tdepth = 0;
				btlist[listlen++] = i;
			}

		/* Arrange list of trees into a heap with the entry
				 * indexing the node with the least weight at the top.
				 */
		heap(btlist, listlen);

		/* Convert the list of trees to a single decoding tree */
		bld_tree(btlist, listlen);

		/* Initialize the encoding table */
		init_enc();

		/* Try to build encoding table.
				 * Fail if any code is > 16 bits long.
				 */
	} 
	while(buildenc(0, dctreehd) == ERROR);

	/* Initialize encoding variables */
	cbitsrem = 0;	/*force initial read */
	curin = 0;	/*anything but endfile*/
}

X/* The count of number of occurrances of each input value
 * have already been prevented from exceeding MAXCOUNT.
 * Now we must scale them so that their sum doesn't exceed
 * ceiling and yet no non-zero count can become zero.
 * This scaling prevents errors in the weights of the
 * interior nodes of the Huffman tree and also ensures that
 * the codes will fit in an unsigned integer. Rescaling is
 * used if necessary to limit the code length.
 */

scale(ceil)
UNSIGNED ceil;	/* upper limit on total weight */
{
	register INT i,c;
	INT ovflw, divisor;
	UNSIGNED w, sum;
	char increased;		/* flag */

	do {
		for(i = sum = ovflw = 0; i < NUMVALS; ++i) {
			if(node[i].weight > (ceil - sum))
				++ovflw;
			sum += node[i].weight;
		}

		divisor = ovflw + 1;

		/* Ensure no non-zero values are lost */
		increased = FALSE;
		for(i = 0; i < NUMVALS; ++i) {
			w = node[i].weight;
			if (w < divisor && w != 0) {
				/* Don't fail to provide a code if it's used at all */
				node[i].weight = divisor;
				increased = TRUE;
			}
		}
	} 
	while(increased);

	/* Scaling factor choosen, now scale */
	if(divisor > 1)
		for(i = 0; i < NUMVALS; ++i)
			node[i].weight /= divisor;
}

X/* heap() and adjust() maintain a list of binary trees as a
 * heap with the top indexing the binary tree on the list
 * which has the least weight or, in case of equal weights,
 * least depth in its longest path. The depth part is not
 * strictly necessary, but tends to avoid long codes which
 * might provoke rescaling.
 */

heap(list, length)
INT list[], length;
{
	register INT i;

	for(i = (length - 2) / 2; i >= 0; --i)
		adjust(list, i, length - 1);
}

X/* Make a heap from a heap with a new top */

adjust(list, top, bottom)
INT list[], top, bottom;
{
	register INT k, temp;

	k = 2 * top + 1;	/* left child of top */
	temp = list[top];	/* remember root node of top tree */
	if( k <= bottom) {
		if( k < bottom && cmptrees(list[k], list[k + 1]))
			++k;

		/* k indexes "smaller" child (in heap of trees) of top */
		/* now make top index "smaller" of old top and smallest child */
		if(cmptrees(temp, list[k])) {
			list[top] = list[k];
			list[k] = temp;
			/* Make the changed list a heap */
			adjust(list, k, bottom); /*recursive*/
		}
	}
}

X/* Compare two trees, if a > b return true, else return false
 * note comparison rules in previous comments.
 */

cmptrees(a, b)
INT a, b;	/* root nodes of trees */
{
	if(node[a].weight > node[b].weight)
		return TRUE;
	if(node[a].weight == node[b].weight)
		if(node[a].tdepth > node[b].tdepth)
			return TRUE;
	return FALSE;
}

X/* HUFFMAN ALGORITHM: develops the single element trees
 * into a single binary tree by forming subtrees rooted in
 * interior nodes having weights equal to the sum of weights of all
 * their descendents and having depth counts indicating the
 * depth of their longest paths.
 *
 * When all trees have been formed into a single tree satisfying
 * the heap property (on weight, with depth as a tie breaker)
 * then the binary code assigned to a leaf (value to be encoded)
 * is then the series of left (0) and right (1)
 * paths leading from the root to the leaf.
 * Note that trees are removed from the heaped list by
 * moving the last element over the top element and
 * reheaping the shorter list.
 */

bld_tree(list, len)
INT list[];
INT len;
{
	register INT freenode;		/* next free node in tree */
	register struct nd *frnp;	/* free node pointer */
	INT lch, rch;		/* temporaries for left, right children */
	INT i;

	/* Initialize index to next available (non-leaf) node.
		 * Lower numbered nodes correspond to leaves (data values).
		 */
	freenode = NUMVALS;

	while(len > 1) {
		/* Take from list two btrees with least weight
				 * and build an interior node pointing to them.
				 * This forms a new tree.
				 */
		lch = list[0];	/* This one will be left child */

		/* delete top (least) tree from the list of trees */
		list[0] = list[--len];
		adjust(list, 0, len - 1);

		/* Take new top (least) tree. Reuse list slot later */
		rch = list[0];	/* This one will be right child */

		/* Form new tree from the two least trees using
				 * a free node as root. Put the new tree in the list.
				 */
		frnp = &node[freenode];	/* address of next free node */
		list[0] = freenode++;	/* put at top for now */
		frnp->lchild = lch;
		frnp->rchild = rch;
		frnp->weight = node[lch].weight + node[rch].weight;
		frnp->tdepth = 1 + maxchar(node[lch].tdepth, node[rch].tdepth);
		/* reheap list  to get least tree at top*/
		adjust(list, 0, len - 1);
	}
	dctreehd = list[0];	/*head of final tree */
}

X/* ???????????? */
maxchar(a, b)
{
	return a > b ? a : b;
}
X/* Initialize all nodes to single element binary trees
 * with zero weight and depth.
 */

init_tree()
{
	register INT i;

	for(i = 0; i < NUMNODES; ++i) {
		node[i].weight = 0;
		node[i].tdepth = 0;
		node[i].lchild = NOCHILD;
		node[i].rchild = NOCHILD;
	}
}

init_enc()
{
	register INT i;

	/* Initialize encoding table */
	for(i = 0; i < NUMVALS; ++i) {
		codelen[i] = 0;
	}
}

X/* Recursive routine to walk the indicated subtree and level
 * and maintain the current path code in bstree. When a leaf
 * is found the entire code string and length are put into
 * the encoding table entry for the leaf's data value .
 *
 * Returns ERROR if codes are too long.
 */

INT		/* returns ERROR or NULL */
buildenc(level, root)
INT level;/* level of tree being examined, from zero */
INT root; /* root of subtree is also data value if leaf */
{
	register INT l, r;

	l = node[root].lchild;
	r = node[root].rchild;

	if( l == NOCHILD && r == NOCHILD) {
		/* Leaf. Previous path determines bit string
				 * code of length level (bits 0 to level - 1).
				 * Ensures unused code bits are zero.
				 */
		codelen[root] = level;
		code[root] = tcode & (((UNSIGNED)~0) >> (16 - level));
		return (level > 16) ? ERROR : NULL;
	} 
	else {
		if( l != NOCHILD) {
			/* Clear path bit and continue deeper */
			tcode &= ~(1 << level);
			/* NOTE RECURSION */
			if(buildenc(level + 1, l) == ERROR)
				return ERROR;
		}
		if(r != NOCHILD) {
			/* Set path bit and continue deeper */
			tcode |= 1 << level;
			/* NOTE RECURSION */
			if(buildenc(level + 1, r) == ERROR)
				return ERROR;
		}
	}
	return NULL;	/* if we got here we're ok so far */
}

X/* Write out the header of the compressed file */

wrt_head(ob, infile)
FILE *ob;
char *infile;	/* input file name (w/ or w/o drive) */
{
	register INT l,r;
	INT i, k;
	INT numnodes;		/* nbr of nodes in simplified tree */

	putwe(RECOGNIZE, ob);	/* identifies as compressed */
	putwe(crc, ob);		/* unsigned sum of original data */

	/* Record the original file name w/o drive */
	if(*(infile + 1) == ':')
		infile += 2;	/* skip drive */

	do {
		putce(*infile, ob);
	} 
	while(*(infile++) != '\0');


	/* Write out a simplified decoding tree. Only the interior
		 * nodes are written. When a child is a leaf index
		 * (representing a data value) it is recoded as
		 * -(index + 1) to distinguish it from interior indexes
		 * which are recoded as positive indexes in the new tree.
		 * Note that this tree will be empty for an empty file.
		 */

	numnodes = dctreehd < NUMVALS ? 0 : dctreehd - (NUMVALS -1);
	putwe(numnodes, ob);

	for(k = 0, i = dctreehd; k < numnodes; ++k, --i) {
		l = node[i].lchild;
		r = node[i].rchild;
		l = l < NUMVALS ? -(l + 1) : dctreehd - l;
		r = r < NUMVALS ? -(r + 1) : dctreehd - r;
		putwe(l, ob);	/* left child */
		putwe(r, ob);	/* right child */
	}
}

X/* Get an encoded byte or EOF. Reads from specified stream AS NEEDED.
 *
 * There are two unsynchronized bit-byte relationships here.
 * The input stream bytes are converted to bit strings of
 * various lengths via the static variables named c...
 * These bit strings are concatenated without padding to
 * become the stream of encoded result bytes, which this
 * function returns one at a time. The EOF (end of file) is
 * converted to SPEOF for convenience and encoded like any
 * other input value. True EOF is returned after that.
 *
 * The original gethuff() called a seperate function,
 * getbit(), but that more readable version was too slow.
 */

INT		/*  Returns byte values except for EOF */
gethuff(ib)
FILE *ib;
{
	INT rbyte;	/* Result byte value */
	INT need, take;	/* numbers of bits */

	rbyte = 0;
	need = 8;	/* build one byte per call */

	/* Loop to build a byte of encoded data
		 * Initialization forces read the first time
		 */

loop:
	if(cbitsrem >= need) {
		/* Current code fullfills our needs */
		if(need == 0)
			return rbyte;
		/* Take what we need */
		rbyte |= ccode << (8 - need);
		/* And leave the rest */
		ccode >>= need;
		cbitsrem -= need;
		return rbyte;
	}

	/* We need more than current code */
	if(cbitsrem > 0) {
		/* Take what there is */
		rbyte |= ccode << (8 - need);
		need -= cbitsrem;
	}
	/* No more bits in current code string */
	if(curin == SPEOF) {
		/* The end of file token has been encoded. If
				 * result byte has data return it and do EOF next time
				 */
		cbitsrem = 0;

		/*NOTE: +0 is to fight compiler bug? */
		return (need == 8) ? EOF : rbyte + 0;
	}

	/* Get an input byte */
	if((curin = getcnr(ib)) == EOF)
		curin = SPEOF;	/* convenient for encoding */

	/* Get the new byte's code */
	ccode = code[curin];
	cbitsrem = codelen[curin];

	goto loop;
}


X/* Get next byte from file and update checksum */

INT
getc_crc(ib)
FILE *ib;
{
	register INT c;

	c = getc(ib);
	if(c != EOF)
		crc += c;	/* checksum */
	return c;
}

X/* Output functions with error reporting */

putce(c, iob)
INT c;
FILE *iob;
{
	if(putc(c, iob) == ERROR && ferror(iob)) {
		fprintf(stderr, "sq: write error\n");
		exit(1);
	}
}

X/*
 * machine independent put-word that writes low order byte first
 *  (compatible with CP/M original) regardless of host cpu.
 */
putwe(w, iob)
INT w;
FILE *iob;
{
	putc(w, iob);
	putc(w>>8, iob);
	if (ferror(iob)) {
		fprintf(stderr, "sq: write error\n");
		exit(1);
	}
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
	/bin/chmod 664 xsq.c
	/bin/echo -n '	'; /bin/ls -ld xsq.c
fi
/bin/echo 'Extracting xtype.c'
sed 's/^X//' <<'//go.sysin dd *' >xtype.c
static char *sccsid = "@(#)type.c       1.7u (UCF) 82/12/15";
X/*
 * 	TYPE - types regular or squeezed files
 *
 * 	Compile as follows:
 *      cc [-DVAX] -O type.c -o type
 *         (define VAX only if running on VAX)
 *
  Derived from cnode 'cat', typesq, and others. W. Earnest 5/28/82
  Credit to the following and others for parts of the software:
	Dick Greenlaw	(usq)
	Bob Mathias	(typesq)
	Steve Passe	(cnode)
	Joe Shannon	(cnode)
	Chuck Forsberg (Unix version)

Type recognizes CPMEOF (0x1A) on all files.  A header line is printed
for each file.  If the last text character of a file is not linefeed,
one is output before the header line for the next file.  If the file is
squeezed, the original filename is printed.  Quit causes type to
advance to the next file.

*/

#include <stdio.h>
#include <signal.h>
#define ERROR (-1)
#define OK 0
#define CPMEOF 0x1A
#define PATHLEN 128

#define RECOGNIZE 0xFF76	/* unlikely pattern */
#define DLE 0x90		/* repeat byte flag */
#define SPEOF 256		/* special endfile token */
#define NUMVALS 257		/* 256 data values plus SPEOF*/
#define LARGE 30000

#ifdef VAX	/*  simulate 16-bit integers and define type for uniformity */
typedef short INT;
#else		/*  if PDP-11 (or whatever), they were already 16 bits  */
typedef int INT;
#endif

struct _sqleaf {		/* Decoding tree */
	INT _children[2];	/* left, right */
};
struct _sqleaf Dnode[NUMVALS - 1];

INT early_exit = 0;
INT lastchar = '\n';	/* controls whether we need an extra nl after file */
FILE *infile;
char stobuf[BUFSIZ];	/* use buffered output for efficiency */

INT Bpos;		/* last bit position read */
INT Curin;		/* last byte value read */
INT Repct;		/* Number of times to retirn value */
INT Value;		/* current byte value or EOF */

INT getcr(), getuhuff();

snextfil()
{
	signal(SIGQUIT, snextfil);
	++early_exit;
}

main(p_argc, p_argv)
INT p_argc;
char **p_argv;
{
	char file[PATHLEN];
	register INT c, d, x;


	if (signal(SIGQUIT, snextfil)==SIG_IGN)
		signal(SIGQUIT, SIG_IGN);
	setbuf(stdout, stobuf);
	setbuf(stderr, 0);

	if (p_argc < 2) {
		fprintf(stderr, "Usage: type file ...\n");
		exit(1);
	}

	for (x = 1;x < p_argc;++x) {
		if(early_exit || lastchar != '\n')
			printf("\n");
		d= 39 - strlen(p_argv[x]);
		for (c=d; --c>=0 ;)
			putchar('=');
		printf(" %s ",p_argv[x]);
		for (c=d; --c>=0 ;)
			putchar('=');
		putchar('\n');
		fflush(stdout);
		early_exit = 0;
		switch (catvalid(file, p_argv[x])) {
		case 'q':
			switch (qsend(file)) {
			case ERROR:
				break;
			case 'a':
				send_text(file);
			}
			break;
		case 'a':
			send_text(file); break;
		case 'x':
			break;
		}
	}
	return (OK);
}

send_text(file)
char *file;
{
	register INT c, lc;

	while( ((c = getc(infile)) != EOF) && c != CPMEOF && (!early_exit) )
		putchar(lc=c);
	lastchar=c;
	fclose(infile);
	fflush(stdout);
	return (OK);
}

catvalid(pname, name)
char *pname;
char *name;
{
	if((infile=fopen(name, "r"))==NULL) {
		fprintf(stderr, "type: Can't open %s\n", name);
		return ERROR;
	}
	strcpy(pname, name);
	return ('q');
}

X/*
	The following code is primarily from typesq.c and utr.c.  Typesq
is a modification of USQ by Dick Greenlaw.  Those modifications (usq
to typesq) were made by Bob Mathias, I am responsible for the butchery
done to make it work with cat.

*/

qsend(fname)
char *fname;
{
	register INT i, c, lc;
	register char *p;
	register INT numnodes;			/* size of decoding tree */
	char origname[PATHLEN];		/* Original file name */

	init_cr(); init_huff();

	if(portgetw(infile) != RECOGNIZE) {	/* Process header */
		rewind(infile);
		return 'a';			/* not squeezed after all */
	}
	portgetw(infile);			/* discard checksum */
	p = origname;				/* Get original file name */
	do {					/* send it to array */
		*p = getc(infile);
	} while(*p++ != '\0');

	numnodes = portgetw(infile);
	if(numnodes < 0 || numnodes >= NUMVALS) {
		fprintf(stderr, "%s has invalid decode tree size\n", fname);
		fclose(infile);
		return ERROR;
	}
	/* Initialize for possible empty tree (SPEOF only) */
	Dnode[0]._children[0] = -(SPEOF + 1);
	Dnode[0]._children[1] = -(SPEOF + 1);

	for(i = 0; i < numnodes; ++i) {	/* Get decoding tree from file */
		Dnode[i]._children[0] = portgetw(infile);
		Dnode[i]._children[1] = portgetw(infile);
	}
	/* Get translated output bytes and write file */
	printf("\n%s -> %s\n\n",fname,origname);
	while( ((c = getcr(infile)) != EOF) && c != CPMEOF && (!early_exit) )
		putchar(lc=c);
	lastchar=lc;
	fclose(infile);
	fflush(stdout);
	return OK;
}
X/*** from utr.c - */
X/* initialize decoding functions */

init_cr()
{
	Repct = 0;
}

init_huff()
{
	Bpos = 99;	/* force initial read */
}

X/* Get bytes with decoding - this decodes repetition,
 * calls getuhuff to decode file stream into byte
 * level code with only repetition encoding.
 *
 * The code is simple passing through of bytes except
 * that DLE is encoded as DLE-zero and other values
 * repeated more than twice are encoded as value-DLE-count.
 */

INT
getcr()
{
	register INT c;

	if(Repct > 0) {
		/* Expanding a repeated char */
		--Repct;
		return Value;
	} else {
		/* Nothing unusual */
		if((c = getuhuff()) != DLE) {
			/* It's not the special delimiter */
			Value = c;
			if(Value == EOF)
				Repct = LARGE;
			return Value;
		} else {
			/* Special token */
			if((Repct = getuhuff()) == 0)
				/* DLE, zero represents DLE */
				return DLE;
			else {
				/* Begin expanding repetition */
				Repct -= 2;	/* 2nd time */
				return Value;
			}
		}
	}
}
X/* Decode file stream INTo a byte level code with only
 * repetition encoding remaining.
 */

INT
getuhuff()
{
	register INT i;
	register INT bitval;

	/* Follow bit stream in tree to a leaf*/
	i = 0;	/* Start at root of tree */
	do {
		if(++Bpos > 7) {
			if((Curin = getc(infile)) == ERROR)
				return ERROR;
			Bpos = 0;
			/* move a level deeper in tree */
			i = Dnode[i]._children[1 & Curin];
		} else
			i = Dnode[i]._children[1 & (Curin >>= 1)];
	} while(i >= 0);

	/* Decode fake node index to original data value */
	i = -(i + 1);
	/* Decode special endfile token to normal EOF */
	i = (i == SPEOF) ? EOF : i;
	return i;
}
X/*
 * Machine independent getw which always gets bytes in the same order
 *  as the CP/M version of SQ wrote them
 */
portgetw(f)
FILE *f;
{
	register INT c;

	c = getc(f)&0377;
	return c + (getc(f)<<8);
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
	/bin/chmod 664 xtype.c
	/bin/echo -n '	'; /bin/ls -ld xtype.c
fi
/bin/echo 'Extracting xusq.c'
sed 's/^X//' <<'//go.sysin dd *' >xusq.c
static char *sccsid = "@(#)usq.c        1.7u (UCF) 82/12/15";
X/*
 * 	usq.c - CP/M compatible file unsqueezer utility
 *
 *	compile as follows:
 *	cc [-DVAX] -O usq.c -o usq
 *	   (define VAX only if running on VAX)
 */

#include <stdio.h>
#include <signal.h>
#include <ctype.h>

#define TRUE 1
#define FALSE 0
#define ERROR (-1)
#define PATHLEN	312	/* Number of characters allowed in pathname */
#define OK 0

#define RECOGNIZE 0xFF76	/* unlikely pattern */
#define DLE 0x90		/* repeat byte flag */
#define SPEOF 256		/* special endfile token */
#define NUMVALS 257		/* 256 data values plus SPEOF*/
#define LARGE 30000

#ifdef VAX   /* then we don't want 32 bit integers */

typedef short INT;
typedef unsigned short UNSIGNED;

#else   /*  16 bit machines  */

typedef int INT;
typedef unsigned UNSIGNED;

#endif

struct _sqleaf {		/* Decoding tree */
	INT _children[2];	/* left, right */
};
struct _sqleaf Dnode[NUMVALS - 1];


INT Bpos;		/* last bit position read */
INT Curin;		/* last byte value read */
INT Repct;		/* Number of times to return value */
INT Value;		/* current byte value or EOF */

INT MakeLCPathname=TRUE;	/* translate pathname to lc if all caps */
INT Nlmode=FALSE;		/* zap cr's if true */
INT Inbackground = FALSE;

INT getcr(), getuhuff(), portgetw();

main(argc, argv)
char *argv[];
{
	register char *cp;
	register INT npats=0;
	char **patts;
	INT n, errorstat;

	if (signal(SIGINT, SIG_IGN)==SIG_IGN)
		Inbackground++;
	else
		signal(SIGINT, SIG_DFL);
	signal(SIGHUP, SIG_IGN);
	errorstat=0;
	if(argc<2)
		goto usage;
	while (--argc) {
		cp = *++argv;
		if(*cp == '-') {
			while( *++cp) {
				switch(*cp) {
				case 'n':
					Nlmode=TRUE; break;
				case 'u':
					MakeLCPathname=FALSE; break;
				default:
					goto usage;
				}
			}
		}
		else if( !npats && argc>0) {
			if(argv[0][0]) {
				npats=argc;
				patts=argv;
			}
		}
	}
	if(npats < 1) {
usage:
		fprintf(stderr,"Usage: usq [-nu] file ...\n");
		fprintf(stderr,"\t-n Nlmode: remove carriage returns\n");
		fprintf(stderr,"\t-u preserve Uppercase pathnames\n");
		exit(1);
	}
	for(n=0; n<npats; ++n)
		errorstat |= squeeze(patts[n]);
	exit(errorstat != 0);
}

X/*
	The following code is primarily from typesq.c and utr.c.  Typesq
is a modification of USQ by Dick Greenlaw.  Those modifications (usq
to typesq) were made by Bob Mathias, I am responsible for the butchery
done to make it work with cat.

*/

FILE *in, *out;
squeeze(fname)
char *fname;
{
	register INT i, c;
	register char *p;
	register INT numnodes;			/* size of decoding tree */
	register UNSIGNED crc;
	UNSIGNED filecrc;
	char origname[PATHLEN];		/* Original file name without drive */

	init_cr(); init_huff(); crc=0;

	if((in=fopen( fname, "r"))==NULL) {
		fprintf(stderr, "usq: can't open %s\n", fname);
		return ERROR;
	}
	if(portgetw(in) != (INT) RECOGNIZE) {/* Process header */
		fprintf(stderr, "usq: %s is not a SQueezed file\n", fname);
		return(ERROR);
	}
	filecrc = (UNSIGNED) portgetw(in);	/* checksum */
	p = origname;				/* Get original file name */
	do {					/* send it to array */
		*p = getc(in);
	} while(*p++ != '\0');

	numnodes = portgetw(in);
	if(numnodes < 0 || numnodes >= NUMVALS) {
		fprintf(stderr, "usq: %s has invalid decode tree\n", fname);
		fclose(in);
		return(ERROR);
	}
	/* Initialize for possible empty tree (SPEOF only) */
	Dnode[0]._children[0] = -(SPEOF + 1);
	Dnode[0]._children[1] = -(SPEOF + 1);

	for(i = 0; i < numnodes; ++i) {	/* Get decoding tree from file */
		Dnode[i]._children[0] = portgetw(in);
		Dnode[i]._children[1] = portgetw(in);
	}
	/* Get translated output bytes and write file */
	if(MakeLCPathname && !IsAnyLower(origname))
		uncaps(origname);
	for(p=origname; *p; ++p)		/* change / to _ */
		if( *p == '/')
			*p = '_';
	if (!Inbackground)
		fprintf(stderr, "usq: %s -> %s\n",fname,origname);
	if((out=fopen(origname, "w"))==NULL) {
		fprintf(stderr, "usq: can't create %s\n", origname);
	}
	while ((c = getcr()) != EOF) {
		crc += (UNSIGNED) c;
		if ( c == '\r' && Nlmode)
			continue;
		putc(c, out);
	}
	fclose(in);
	fflush(out);
	fclose(out);
	if( crc != filecrc ) {
		fprintf(stderr, "usq: bad checksum in %s\n", fname);
		fflush(stdout);
		return(ERROR);
	}
	return(OK);
}
X/*** from utr.c - */
X/* initialize decoding functions */

init_cr()
{
	Repct = 0;
}

init_huff()
{
	Bpos = 99;	/* force initial read */
}

X/* Get bytes with decoding - this decodes repetition,
 * calls getuhuff to decode file stream into byte
 * level code with only repetition encoding.
 *
 * The code is simple passing through of bytes except
 * that DLE is encoded as DLE-zero and other values
 * repeated more than twice are encoded as value-DLE-count.
 */

INT
getcr()
{
	register INT c;

	if(Repct > 0) {
		/* Expanding a repeated char */
		--Repct;
		return(Value);
	} else {
		/* Nothing unusual */
		if((c = getuhuff()) != DLE) {
			/* It's not the special delimiter */
			Value = c;
			if(Value == EOF)
				Repct = LARGE;
			return(Value);
		} else {
			/* Special token */
			if((Repct = getuhuff()) == 0)
				/* DLE, zero represents DLE */
				return(DLE);
			else {
				/* Begin expanding repetition */
				Repct -= 2;	/* 2nd time */
				return(Value);
			}
		}
	}
}
X/* Decode file stream into a byte level code with only
 * repetition encoding remaining.
 */

INT
getuhuff()
{
	register INT i;

	/* Follow bit stream in tree to a leaf*/
	i = 0;	/* Start at root of tree */
	do {
		if(++Bpos > 7) {
			if((Curin = getc(in)) == ERROR)
				return(ERROR);
			Bpos = 0;
			/* move a level deeper in tree */
			i = Dnode[i]._children[1 & Curin];
		} else
			i = Dnode[i]._children[1 & (Curin >>= 1)];
	} while(i >= 0);

	/* Decode fake node index to original data value */
	i = -(i + 1);
	/* Decode special endfile token to normal EOF */
	i = (i == SPEOF) ? EOF : i;
	return(i);
}
X/*
 * Machine independent getw which always gets bytes in the same order
 *  as the CP/M version of SQ wrote them
 */
INT
portgetw(f)
FILE *f;
{
	register INT c;

	c = getc(f) & 0377;
	return(c | (getc(f) << 8));
}


X/* make string s lower case */
uncaps(s)
char *s;
{
	for( ; *s; ++s)
		if(isupper(*s))
			*s = tolower(*s);
}


X/*
 * IsAnyLower returns TRUE if string s has lower case letters.
 */
IsAnyLower(s)
char *s;
{
	for( ; *s; ++s)
		if (islower(*s))
			return(TRUE);
	return(FALSE);
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
	/bin/chmod 664 xusq.c
	/bin/echo -n '	'; /bin/ls -ld xusq.c
fi

-- 
Stephen Hemminger,  Mitre Corp. Bedford MA 
	{allegra,genrad,ihnp4, utzoo}!linus!sch	(UUCP)
	linus!sch@mitre-bedford			(ARPA)