[comp.sources.misc] v05i074: u16 1 of 2 - 16 bit uncompress for IBM PC

tom@SSD.HARRIS.COM (Tom Horsley) (12/08/88)

Posting-number: Volume 5, Issue 74
Submitted-by: "Tom Horsley" <tom@SSD.HARRIS.COM>
Archive-name: u16.pc/part01

u16 is a 16 bit LZW uncompress program for the IBM PC. It can
decompress any files compressed with the net "compress" utility.
--------------------------cut here-----------------------------
#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of archive 1 (of 2)."
# Contents:  MANIFEST Makefile u16.c
# Wrapped by tom@hcx2 on Tue Dec  6 10:58:18 1988
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'MANIFEST' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'MANIFEST'\"
else
echo shar: Extracting \"'MANIFEST'\" \(238 characters\)
sed "s/^X//" >'MANIFEST' <<'END_OF_FILE'
X   File Name		Archive #	Description
X-----------------------------------------------------------
X MANIFEST                   1	This shipping list
X Makefile                   1	
X u16.c                      1	
X xcode.asm                  2	
END_OF_FILE
if test 238 -ne `wc -c <'MANIFEST'`; then
    echo shar: \"'MANIFEST'\" unpacked with wrong size!
fi
# end of 'MANIFEST'
fi
if test -f 'Makefile' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'Makefile'\"
else
echo shar: Extracting \"'Makefile'\" \(302 characters\)
sed "s/^X//" >'Makefile' <<'END_OF_FILE'
XDESTDIR=c:/progs/unix
X
Xu16.exe: xcode.obj u16.obj
X	link /NOI u16.obj+xcode.obj ;
X
Xxcode.obj: xcode.asm
X	masm /ML xcode ;
X
Xu16.obj: u16.c
X	cl -c -Ox u16.c
X
Xinstall: u16.exe
X        rm -f $(DESTDIR)/u16.exe
X	exepack u16.exe pu16.exe
X        mv -f pu16.exe $(DESTDIR)/u16.exe
X
Xclean:
X	rm -f *.obj u16.exe
END_OF_FILE
if test 302 -ne `wc -c <'Makefile'`; then
    echo shar: \"'Makefile'\" unpacked with wrong size!
fi
# end of 'Makefile'
fi
if test -f 'u16.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'u16.c'\"
else
echo shar: Extracting \"'u16.c'\" \(12424 characters\)
sed "s/^X//" >'u16.c' <<'END_OF_FILE'
X/* This is an adaptation of the decompress part of the widespread net
X * "compress" program. It is specifically designed for the IBM PC and
X * clones and probably has to be compiled with the Microsoft C
X * compiler (quick C won't do, it doesn't support the "huge" model).
X *
X * Parts written (other parts plagarized) by Tom Horsley
X *   (tahorsley@ssd.harris.com)
X *   Dec 1988.
X */
X#include <stdio.h>
X#include <fcntl.h>
X#include <malloc.h>
X
X/* Magic number stored in first two bytes.
X */
Xunsigned char magic_header[] = { "\037\235" }; /* 1F 9D */
X
X/* Defines for third byte of header */
X#define BIT_MASK     0x1f    /* Max number of bits in codes */
X#define BLOCK_MASK   0x80    /* This bit set if should recognize CLEAR code */
X
X/* Space to use for input file buffer.
X */
X#define MAXBUF 4096
X
X/* a codebuf struct is used to interface with the xcode() routine.
X */
Xstruct codebuf {
X   void		 (*codep)();
X   unsigned char * bufp;
X} cb;
X
X/* a codesize struct is used for advancing from one size code to
X * the next.
X */
Xstruct codesize {
X   void		 (*initp)(struct codebuf *);
X   int		 n_bits;
X   long int	 maxcode;
X   void		 (*origp)();
X};
X
Xextern void init9(struct codebuf *);
Xextern void init10(struct codebuf *);
Xextern void init11(struct codebuf *);
Xextern void init12(struct codebuf *);
Xextern void init13(struct codebuf *);
Xextern void init14(struct codebuf *);
Xextern void init15(struct codebuf *);
Xextern void init16(struct codebuf *);
Xextern unsigned int xcode(struct codebuf *);
X
X/* vartab tracks the variable size codes. For each size code the
X * initialization routine, code size, largest code, and assembly state
X * information is recorded.
X *
X * To advance to next sized code, read codes at current size while not
X * at the original state, then call init routine for next size (and
X * record initial state info).
X */
Xstruct codesize vartab [] = {
X   { init9,  9,	 0x1ffL,  0 },
X   { init10, 10, 0x3ffL,  0 },
X   { init11, 11, 0x7ffL,  0 },
X   { init12, 12, 0xfffL,  0 },
X   { init13, 13, 0x1fffL, 0 },
X   { init14, 14, 0x3fffL, 0 },
X   { init15, 15, 0x7fffL, 0 },
X   { init16, 16, 0x10000L, 0 }
X};
X
X/* Record current entry in vartab.
X */
Xint curvartab = 0;
X
X#ifdef DEBUG
Xlong bytes_out = 0;
X#endif
X
X
X/* buf is the input file buffer. Also used to store the initial help
X * message you get with the -H option.
X */
Xunsigned char buf[MAXBUF] = "\
Xu16 - 16 bit LZW uncompress for the IBM PC\n\
Xu16 [-H] [files...]\n\
X\n\
X-H\tPrint this message and exit.\n\
X\n\
XUncompresses each input file and writes result to stdout.  With no\n\
Xinput file specified, reads stdin.  Probably requires 270-280K of free\n\
Xmemory to run.\n\
X\n\
XWritten for the IBM PC by tahorsley@ssd.harris.com (Tom Horsley).\n\
X\n\
XNOTE: this is kind of like zcat, but it does not try to stick any .Z's\n\
Xon the ends of file names.\n"
X;
X
X/* Number of bytes of file data resident in buf.
X */
Xint	      bufsize = 0;
X
X/* Address of first byte in buffer past end of file
X * (only set when the last buffer is read).
X */
Xchar *	      eofmark = NULL;
X
X/* Address of byte near end of buffer (used to determine
X * when to read additional data).
X */
Xchar *	      endbuf;
X
X
X/* Flag data read from file.
X */
Xint block_compress;
Xint maxbits;
X
X/* State variables controlling decompression
X */
X#define FIRST  257   /* first free entry */
X
X#define CLEAR  256   /* table clear output code */
X
Xint clear_flg = 0;
X
Xlong free_ent = 0;
X
Xlong maxcode;
X
X#define FAR far
X
Xchar FAR * de_stack;
X
X/* tabprefix is the only real fly in the ointment, it needs to be a
X * huge array, but could probably be changed to a couple of far arrays
X * with the resulting additional complications in the tab_prefixof()
X * macro.
X */
Xunsigned int huge * tabprefix;
X
Xunsigned char FAR * tabsuffix;
X
X#define tab_prefixof(_i) tabprefix[_i]
X
X#define tab_suffixof(_i) tabsuffix[_i]
X
Xlong maxmaxcode = 65536L;
X
X/* ReadBuf reads some data into the buffer following the data already
X * in the buffer (if any). It tries to fill it up, and sets the end of
X * file flag if it can't.
X */
Xvoid
XReadBuf()
X{
X   int		   cursize;
X   int		   want;
X
X   while ((eofmark == NULL) && ((want = MAXBUF - bufsize) > 0)) {
X      cursize = read(fileno(stdin), &buf[bufsize], want);
X      if (cursize < 0) {
X	 perror("u16");
X	 exit(1);
X      } else if (cursize == 0) {
X	 eofmark = &buf[bufsize];
X      } else {
X	 bufsize += cursize;
X      }
X   }
X   if (eofmark == NULL) {
X      endbuf = &buf[bufsize] - 32;
X   } else {
X      endbuf = eofmark;
X   }
X}
X
X/* getcode deals with buffer filling, switching code size, and calling
X * the assembler unpacking routines.
X */
Xlong int
Xgetcode()
X{
X   int		   leftover;
X
X   if (cb.bufp >= endbuf) {
X      if (eofmark != NULL) {
X	 return(-1L);
X      } else {
X	 /* move the un-read data to the top of the buffer, then read
X	  * some additional data.
X	  */
X	 leftover = &buf[bufsize] - cb.bufp;
X	 memmove(&buf[0], cb.bufp, leftover);
X	 cb.bufp = &buf[0];
X	 bufsize = leftover;
X	 ReadBuf();
X      }
X   }
X   if (clear_flg > 0 || free_ent > maxcode) {
X      /* If the next entry will be too big for the current code, or we
X       * have recieved a clear code then flush the current size code
X       * and advance to next size.
X       */
X      while (cb.codep != vartab[curvartab].origp) xcode(&cb);
X      if (cb.bufp >= endbuf) return(-1L);
X      if (clear_flg > 0) {
X	 curvartab = 0;
X	 clear_flg = 0;
X      } else {
X	 ++curvartab;
X	 if (curvartab > (16 - 9)) {
X#ifdef DEBUG
X	    fputs("Attempt to overflow 16 bit codes.\n",stderr);
X#endif
X	    curvartab = 16 - 9;
X	 }
X      }
X      (*vartab[curvartab].initp)(&cb);
X      vartab[curvartab].origp = cb.codep;
X      maxcode = vartab[curvartab].maxcode;
X#ifdef DEBUG
X      fprintf(stderr,
X	 "switching to %d bit codes, bytes_out = %ld, free_ent = %ld\n",
X	 vartab[curvartab].n_bits,bytes_out, free_ent);
X#endif
X   }
X   return (long)(xcode(&cb));
X}
X
X/* Decompress stdin to stdout.	This routine adapts to the codes in
X * the file building the "string" table on-the-fly; requiring no table
X * to be stored in the compressed file.
X *
X * This routine taken practically verbatim from the net compress
X * program:
X *
X * $Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $
X *
X * compress.c - File compression ala IEEE Computer, June 1984.
X *
X * Authors:
X *    Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
X *    Jim McKie		(decvax!mcvax!jim)
X *    Steve Davies	(decvax!vax135!petsd!peora!srd)
X *    Ken Turkowski	(decvax!decwrl!turtlevax!ken)
X *    James A. Woods	(decvax!ihnp4!ames!jaw)
X *    Joe Orost		(decvax!vax135!petsd!joe)
X */
Xint
Xdecompress() {
X   register unsigned char FAR * stackp;
X   register int finchar;
X   register long code, oldcode, incode;
X#ifdef DEBUG
X   long stacksize = 0;
X#endif
X
X   /* No buffering on stdin, we do all our own buffering.
X    */
X   setvbuf(stdin, NULL, _IONBF, 0);
X
X   /* Operate in the binary file domain, don't want DOS screwing
X    * around with '\r''s.
X    */
X   setmode(fileno(stdin), O_BINARY);
X   setmode(fileno(stdout), O_BINARY);
X
X   /* Read the iniital buffer worth of data and check magic numbers
X    * and flags.
X    */
X   ReadBuf();
X   if (bufsize < 3) {
X      fputs("u16: Missing file header.\n",stderr);
X      return 1;
X   }
X   if (memcmp(buf,magic_header,2) != 0) {
X      fputs("u16: Bad magic number.\n",stderr);
X      return 1;
X   }
X   block_compress = buf[2] & BLOCK_MASK;
X   maxbits = buf[2] & BIT_MASK;
X   if (maxbits > 16) {
X      fputs("u16: Cannot decompress, compressed with more than 16 bits.\n",
X	 stderr);
X      return 1;
X   }
X
X   /* Initialize the xcode routine to start reading 9 bit codes at the
X    * third byte of the initial buffer.
X    */
X   cb.bufp = &buf[3];
X   init9(&cb);
X   vartab[0].origp = cb.codep;
X   curvartab = 0;
X
X   /*
X    * initialize the first 256 entries in the table.
X    */
X   maxcode = vartab[0].maxcode;
X   for ( code = 255; code >= 0; code-- ) {
X      tab_prefixof(code) = 0;
X      tab_suffixof(code) = (unsigned char)code;
X   }
X   free_ent = ((block_compress) ? FIRST : 256 );
X
X   finchar = oldcode = getcode();
X   if(oldcode == -1)	     /* EOF already? */
X      return;		     /* Get out of here */
X   putchar((char)finchar );  /* first code must be 8 bits = char */
X#ifdef DEBUG
X   ++bytes_out;
X#endif
X   if(ferror(stdout)) {	     /* Crash if can't write */
X      perror("u16");
X      exit(1);
X   }
X   stackp = de_stack;
X
X   while ( (code = getcode()) > -1 ) {
X
X      if ( (code == CLEAR) && block_compress ) {
X#ifdef DEBUG
X	 fprintf(stderr,
X	    "Input CLEAR code bytes_out = %ld, free_ent = %ld\n",
X	    bytes_out, free_ent);
X#endif
X	 for ( code = 255; code >= 0; code-- )
X	    tab_prefixof(code) = 0;
X	 clear_flg = 1;
X	 free_ent = FIRST - 1;
X	 if ( (code = getcode ()) == -1 )   /* O, untimely death! */
X	    break;
X      }
X      incode = code;
X
X      /* Special case for KwKwK string.
X       */
X      if ( code >= free_ent ) {
X#ifdef DEBUG
X	 ++stacksize;
X	 if (stacksize >= 65536L) {
X	    fputs("stacksize overflow.\n",stderr);
X	    exit(1);
X	 }
X#endif
X	 *stackp++ = finchar;
X	 code = oldcode;
X      }
X
X      /* Generate output characters in reverse order
X       */
X      while ( code >= 256 ) {
X#ifdef DEBUG
X	 ++stacksize;
X	 if (stacksize >= 65536L) {
X	    fputs("stacksize overflow.\n",stderr);
X	    exit(1);
X	 }
X	 if ((code < 0) || (code >= 65536L)) {
X	    fprintf(stderr,"bad subscript, code = %ld\n",code);
X	 }
X#endif
X	 *stackp++ = tab_suffixof(code);
X	 code = tab_prefixof(code);
X      }
X#ifdef DEBUG
X      ++stacksize;
X      if (stacksize >= 65536L) {
X	 fputs("stacksize overflow.\n",stderr);
X	 exit(1);
X      }
X      if ((code < 0) || (code >= 65536L)) {
X	 fprintf(stderr,"bad subscript, code = %ld\n",code);
X      }
X#endif
X      *stackp++ = finchar = tab_suffixof(code);
X#ifdef DEBUG
X      if (stacksize > 65536L) {
X	 fprintf(stderr,"stacksize reached %ld\n",stacksize);
X      }
X#endif
X
X      /* And put them out in forward order
X       */
X      do {
X	 putchar ( *--stackp );
X#ifdef DEBUG
X	 ++bytes_out;
X	 --stacksize;
X#endif
X      } while ( stackp > de_stack );
X
X#ifdef DEBUG
X      if (stacksize != 0) {
X	 fprintf(stderr,"stacksize = %ld, not empty!\n",stacksize);
X      }
X#endif
X
X      /* Generate the new entry.
X       */
X      if ( (code=free_ent) < maxmaxcode ) {
X#ifdef DEBUG
X	 if ((code < 0) || (code >= 65536L)) {
X	    fprintf(stderr,"bad subscript, code = %ld\n",code);
X	 }
X#endif
X	 tab_prefixof(code) = (unsigned short)oldcode;
X	 tab_suffixof(code) = finchar;
X	 free_ent = code+1;
X      } 
X
X      /* Remember previous code.
X       */
X      oldcode = incode;
X   }
X   fflush( stdout );
X   if(ferror(stdout)) {
X      perror("u16");
X      return 1;
X   }
X   return 0;
X}
X
X/* 16 bit uncompress optimized for 8086 architecture.  The getcode
X * routine is in 8086 assembler optimized for extracting the variable
X * sized code rapidly.
X */
Xvoid
Xmain(argc, argv)
X   int		   argc;
X   char *	   argv[];
X{
X   int		   errors = 0;
X
X   /* Process options (only supports -H)
X    */
X   --argc;
X   ++argv;
X   while ((argc > 0) && (argv[0][0] == '-')) {
X      if (argv[0][1] == 'H') {
X	 fputs(buf,stderr);
X	 exit(0);
X      } else {
X	 fputs("u16: unrecognized option ",stderr);
X	 fputs(argv[0],stderr);
X	 fputs("\n",stderr);
X	 fputs("usage: u16 [-H] [files...]\n",stderr);
X	 exit(1);
X      }
X      --argc;
X      ++argv;
X   }
X
X   /* Allocate a large buffer for stdout (speeds up the program by a
X    * fair percentage).
X    */
X   setvbuf(stdout, NULL, _IOFBF, MAXBUF);
X
X   /* Allocate space for tables
X    */
X   de_stack = (unsigned char FAR *)halloc(65536L, sizeof(unsigned char));
X   tabprefix = (unsigned int huge *)halloc(65536L, sizeof(unsigned int));
X   tabsuffix = (unsigned char FAR *)halloc(65536L, sizeof(unsigned char));
X   if ((de_stack == NULL) || (tabprefix == NULL) || (tabsuffix == NULL)) {
X      fputs("u16: out of memory.\n",stderr);
X      exit(1);
X   }
X
X   if (argc == 0) {
X      /* Just decompress stdin
X       */
X      if (decompress()) {
X	 ++errors;
X	 fputs("u16: error decompressing stdin.\n",stderr);
X      }
X   } else {
X      while (argc > 0) {
X	 if (freopen(argv[0], "r", stdin) == NULL) {
X	    fputs("u16: cannot read ",stderr);
X	    fputs(argv[0],stderr);
X	    fputs("\n",stderr);
X	    ++errors;
X	 } else {
X	    if (decompress()) {
X	       fputs("u16: error in ",stderr);
X	       fputs(argv[0],stderr);
X	       fputs("\n",stderr);
X	       ++errors;
X	    }
X	    fclose(stdin);
X	 }
X	 --argc;
X	 ++argv;
X      }
X   }
X#ifdef DEBUG
X   fprintf(stderr,"Total bytes out = %ld\n",bytes_out);
X#endif
X   exit(errors);
X}
END_OF_FILE
if test 12424 -ne `wc -c <'u16.c'`; then
    echo shar: \"'u16.c'\" unpacked with wrong size!
fi
# end of 'u16.c'
fi
echo shar: End of archive 1 \(of 2\).
cp /dev/null ark1isdone
MISSING=""
for I in 1 2 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have unpacked both archives.
    rm -f ark[1-9]isdone
else
    echo You still need to unpack the following archives:
    echo "        " ${MISSING}
fi
##  End of shell archive.
exit 0
=====================================================================
    usenet: tahorsley@ssd.harris.com  USMail: Tom Horsley
compuserve: 76505,364                         511 Kingbird Circle
     genie: T.HORSLEY                         Delray Beach, FL  33444
======================== Aging: Just say no! ========================