[alt.sources] code for reading from .Z files

gtoal@tharr.UUCP (Graham Toal) (11/15/90)

Archive-name: zlib.shr

This posting consists of a set of routines which roughly simulate fopen,
fgetc, fgets, and fclose.  The difference between these and the originals is
that these will read data from a .Z compressed file, decompressing it on the
fly.  It does *not* uses pipes, processes, or intermediate files.  This makes
it useful to add to any programs which read large text files sequentially.

An example of this might be a version of LaTeX which read its .sty files in
compressed form -- it satisfies the following criteria: 1) the files are read
sequentially; 2) the files are read from *much* more often than they are
written to.

I passed this code around a couple of years back, and forgot about it since. 
I recently had to resurrect it, and have taken the chance to fix a couple of
bugs which had surfaced in the mean time, and to port it to MSDOS. (Of course
it still works on Unix or any standard ANSI C system)

I include as a test program a simple version of zcat; someone was asking
recently for a small uncompress program; well this is it.

The source is heavily based on the original compress.  I've removed as much
unneccesary code as I could get away with, and simplified many expressions to
get them through the dismal MSDOS compilers.

Any comments/bug reports to me; Graham Toal <gtoal@ed.ac.uk>

#!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
# shar:	Shell Archiver
#	Run the following text with /bin/sh to create:
#	zcat.c 
#	zlib.h 
#	zlib.c 
cat - << \SHAR_EOF > zcat.c
#include <stdio.h>
#include "zlib.h"
/*#include "zlib.c"*/   /* Written so it can be either included or linked in */

/* This part is optional... you probably wouldn't do this in real life */
#define FILE ZFILE
#define fgetc(in) zfgetc(in)
#define fopen(f, m) zfopen(f, m)
#define fclose(f) zfclose(f)

#ifndef __STDC__
int main(argc, argv)
int argc;
char **argv;
#else
int main(int argc, char **argv)
#endif
{
  FILE *in;
  int i, c;

  if (argc == 1) {
    in = zfilter(stdin);
    for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ;
    fclose(in);
  } else if (argc > 1) {
    for (i = 1; i < argc; i++) {
      in = fopen(argv[i], "r");
      if (in != NULL) {
        for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ;
        fclose(in);
      } else {
       fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]);
      }
    }
  }
  return(0);
}
SHAR_EOF
cat - << \SHAR_EOF > zlib.h
#ifndef _ZLIB_H
#define _ZLIB_H 1

#ifdef MSDOS
#define PC_HUGE huge  /* Microsoft C and contemptibles */ 
#else
#define PC_HUGE
#endif


#define ZEXT ".Z"

#ifdef __arm
#undef ZEXT
#define ZEXT "-z"
#endif

typedef struct zfiletype {
#define Z_BITS 16
#define Z_MAXBUF 256
  FILE *file;
  int flags;
  int n_bits;                            /* number of bits/code */
  int maxbits;                           /* user settable max # bits/code */
  long maxcode;                           /* maximum code, given n_bits */
  long free_ent;                          /* first unused entry */
  int block_compress;
  int clear_flg;

  long stackp;
  long finchar;
  long code, oldcode, incode;
  int offset, size;
  unsigned char buf[Z_BITS]; /* Passed to getcode */
  unsigned char PC_HUGE *tab_suffixof;   /* There is a flag bit to say whether */
  long PC_HUGE *tab_prefixof;    /* these have been allocated.         */
  int init;

  int bufput, bufget, bufend;
  unsigned char buff[Z_MAXBUF];
  int c1, c2;
  int zeof;
} ZFILE;

#ifndef __STDC__
ZFILE *zfopen(/* char *fileptr, char *how */);
void zfclose(/* ZFILE *z */);
ZFILE *zfilter(/* FILE *f */);
int zfgetc(/* ZFILE *z */);
int zfeof(/* ZFILE *z */);
char *zfgets(/* char *line, int len, ZFILE *zfp */);
#else
ZFILE *zfopen(char *fileptr, char *how);
void zfclose(ZFILE *z);
ZFILE *zfilter(FILE *f);
int zfgetc(ZFILE *z);
int zfeof(ZFILE *z);
char *zfgets(char *line, int len, ZFILE *zfp);
#endif /* Not __STDC__ */
#endif
SHAR_EOF
cat - << \SHAR_EOF > zlib.c
/*#define MAIN*/
/*int debug = 1;*/
/*#define DEBUG 1*/

/* These wondrous debugging macros helped me find the nasty bug which
   only manifested itself on msdos -- stackp has to be a long on msdos
   because the array it is indexing is 'huge' ... */
#ifdef DEBUG
#define TRACT(lev, stmnt) \
  if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt);
#define TRACE(lev, stmnt) \
  if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt); stmnt
#define TRACA(lev, stmnt) \
  stmnt; if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt);
#define TRACL(lev, var) \
  if (lev <= debug) fprintf(stderr, "%d: %s <- %ld\n", __LINE__, #var, var);
#else
#define TRACT(lev, stmnt)
#define TRACE(lev, stmnt) stmnt
#define TRACA(lev, stmnt) stmnt
#define TRACL(lev, var)
#endif
/* 
 *
 * Originally:
 *
 * compress.c - File compression ala IEEE Computer, June 1984.
 *
 * Authors:    Spencer W. Thomas       (decvax!harpo!utah-cs!utah-gr!thomas)
 *             Jim McKie               (decvax!mcvax!jim)
 *             Steve Davies            (decvax!vax135!petsd!peora!srd)
 *             Ken Turkowski           (decvax!decwrl!turtlevax!ken)
 *             James A. Woods          (decvax!ihnp4!ames!jaw)
 *             Joe Orost               (decvax!vax135!petsd!joe)
 *
 * $Header: zlib.c,v 4.1 90/11/12 14:52:24 gtoal Release $
 *
 * Graham Toal, 3rd September 1988.  My changes released to public domain.
 *                                   Updated Nov 90.
 *
 * The original decompress has been restructured so that data can be
 * fetched on demand a byte at a time.  This lets it be used as a filter
 * for programs which read large data files - you do not need the disk
 * space to decompress the input files first.
 *
 * (Incidentally, programs reading data off floppies will be speeded up
 *  because decompression is always faster than the equivalent amount
 *  of disk I/O).
 *
 * This implementation supplies 'z' versions of fopen, fputc, feof and fclose
 * to be used as direct substitutes for the originals; it would be cleaner
 * and more transparent if the decompress filter were hidden under the
 * real stdio procedures.  An extra call zfilter() is supplied to convert
 * an already-opened stream into a z-stream: see the example at the end
 * of this file.
 *
 * If a file opened by zfopen() was not compressed, the files contents are
 * still recovered correctly at the low expense of an extra procedure call
 * per byte.  This makes the routines more generally usable - they can be
 * left in production programs which can be speeded up in the field by
 * compressing selected input files(*); also, files can be compressed or
 * not selectively depending on whether the compression makes them
 * smaller or not - code accessing the files does not need to know.
 *
 * [(*) reading from a compressed file off floppy disk is faster than
 * reading from an uncompressed file. This probably isn't true of
 * hard disks though.]
 *
 * BUGS: Opening a file "r" will not do CR/LF processing on computers with
 *       this file structure.
 */

#include <stdio.h>
#include <string.h>
#ifdef __STDC__
#include <stdlib.h>
#else
#define size_t int
#endif
#include <ctype.h>

#ifdef MSDOS
#include <malloc.h>
#endif

#ifndef min
#define min(a,b)        ((a>b) ? b : a)
#endif
#define HSIZE           69001L  /* 95% occupancy */

/*
 * the next two codes should not be changed lightly, as they must not
 * lie within the contiguous general code space.
 */

#define FIRST  257L     /* first free entry */
#define CLEAR  256L     /* table clear output code */

#define BIT_MASK        0x1f
#define BLOCK_MASK      0x80
#define INIT_BITS       9       /* initial number of bits/code */

#define CHECK_GAP 10000L/* ratio check interval */

#include "zlib.h"
#define NOT_COMPRESSED 1
#define ALLOCATED 2

#ifndef __STDC__
static void decompress_more( /* register ZFILE *z */ );
static long getcode( /* register ZFILE *z */ );
#else
static void decompress_more(register ZFILE *z);
static long getcode(register ZFILE *z);
#endif

#ifndef __STDC__
ZFILE *zfopen(fileptr, how)
   char *fileptr;
   char *how;
#else
ZFILE *zfopen(char *fileptr, char *how)
#endif
{
register ZFILE *z;

   z = (ZFILE *) malloc(sizeof(ZFILE));
   z->flags = 0;
   z->maxbits = Z_BITS;         /* user settable max # bits/code */
   z->free_ent = 0;             /* first unused entry */
   z->block_compress = BLOCK_MASK;
   z->clear_flg = 0;
   z->init = 0;

   z->zeof = (0 != 0);
   z->c1 = EOF;
   z->c2 = EOF;
   z->bufput = 0;
   z->bufget = 0;
   z->bufend = Z_MAXBUF - 1;

   z->maxbits = Z_BITS;         /* user settable max # bits/code */

   /* Open input file */
   if (*how == 'r') {
      z->file = fopen(fileptr, "rb");
      if (z->file == NULL) {
char tempfname[256];

         strcpy(tempfname, fileptr);
         strcat(tempfname, ZEXT);
         z->file = fopen(tempfname, "rb");
      }
   } else {
      /* No compressed output yet, if ever...                  */
      /* Compress the file explicitly once it has been written */
      z->file = fopen(fileptr, how);
      z->flags |= NOT_COMPRESSED;
   }
   if (z->file == NULL) {
      free(z);
      z = NULL;
   }
   /* Check the magic number */
   if ((z != NULL) 
       && ((fgetc(z->file) != 0x1F) || (fgetc(z->file) != 0x9D))) {
      z->flags |= NOT_COMPRESSED;
      fclose(z->file);
      z->file = fopen(fileptr, how);
      if (z->file == NULL) {
         free(z);
         z = NULL;
      }
   }
   if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0))
      return (z);
   z->maxbits = fgetc(z->file); /* set -b from file */
   z->block_compress = z->maxbits & BLOCK_MASK;
   z->maxbits &= BIT_MASK;
   if (z->maxbits > Z_BITS) {
      fprintf(stderr,
        "%s: compressed with %d bits; decompress can only handle %d bits\n",
              fileptr, z->maxbits, Z_BITS);
      exit(0);
   }
   return (z);
}

#ifndef __STDC__
ZFILE *zfilter(f)
   FILE *f;
#else
ZFILE *zfilter(FILE *f)
#endif
{
register ZFILE *z;

   z = (ZFILE *) malloc(sizeof(ZFILE));
   z->flags = 0;
   z->maxbits = Z_BITS;         /* user settable max # bits/code */
   z->free_ent = 0;             /* first unused entry */
   z->block_compress = BLOCK_MASK;
   z->clear_flg = 0;
   z->init = 0;

   z->zeof = (0 != 0);
   z->c1 = EOF;
   z->c2 = EOF;
   z->bufput = 0;
   z->bufget = 0;
   z->bufend = Z_MAXBUF - 1;

   z->maxbits = Z_BITS;         /* user settable max # bits/code */

   /* Open input file */
   z->file = f;
   if (z->file == NULL) {
      free(z);
      z = NULL;
   }
   /* Check the magic number */
   if (z != NULL) {
      z->c1 = fgetc(z->file);
      z->c2 = fgetc(z->file);
      if ((z->c1 != 0x1F) || (z->c2 != 0x9D)) {
         z->flags |= NOT_COMPRESSED;
      }
   }
   if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0))
      return (z);
   z->maxbits = fgetc(z->file); /* set -b from file */
   z->block_compress = z->maxbits & BLOCK_MASK;
   z->maxbits &= BIT_MASK;
   if (z->maxbits > Z_BITS) {
      fprintf(stderr,
      "stdin compressed with %d bits; decompress can only handle %d bits\n",
              z->maxbits, Z_BITS);
      exit(0);
   }
   return (z);
}

#ifndef __STDC__
int zfgetc(z)
   ZFILE *z;
#else
int zfgetc(ZFILE *z)
#endif
{
int c;

   /*
      If buffer empty, and not end-of-file, call decompress_more(); return
      next in buffer.  
   */
   if ((z->flags & NOT_COMPRESSED) != 0) {
      if ((c = z->c1) >= 0) {
         z->c1 = z->c2;
         z->c2 = EOF;
         return (c);
      }
      return (fgetc(z->file));
   }
   if ((z->bufget == z->bufput) && (!z->zeof)) {
      decompress_more(z);
   }
   z->zeof = (z->bufput == z->bufget);
   if (z->zeof) {
      if ((z->flags & ALLOCATED) != 0) {
#ifdef MSDOS
         hfree(z->tab_suffixof);
         hfree(z->tab_prefixof);
#else
         free(z->tab_suffixof);
         free(z->tab_prefixof);
#endif
         z->flags &= (~ALLOCATED);
      }
      return (EOF);
   }
   c = z->buff[z->bufget];
   z->bufget++;
   return (c);
}

#ifndef __STDC__
int zfeof(z)
   ZFILE *z;
#else
int zfeof(ZFILE *z)
#endif
{
   if ((z->flags & NOT_COMPRESSED) != 0) {
      if (z->c1 != EOF) {
         return (0 != 0);
      }
      return (feof(z->file));
   }
   return (z->zeof);
}

#ifndef __STDC__
void zfclose(z)
   ZFILE *z;
#else
void zfclose(ZFILE *z)
#endif
{
   if (z == 0)
      return;
   if (z->zeof) {
      if ((z->flags & ALLOCATED) != 0) {
#ifdef MSDOS
         hfree(z->tab_suffixof);
         hfree(z->tab_prefixof);
#else
         free(z->tab_suffixof);
         free(z->tab_prefixof);
#endif
         z->flags &= (~ALLOCATED);
      }
   }
   free(z);
}

#ifndef __STDC__
char *zfgets(line, len, zfp)
   char *line;
   int len;
   ZFILE *zfp;
#else
char *zfgets(char *line, int len, ZFILE *zfp)
#endif
{
/* I *hope* this is what fgets does - I only added it
   here when I came across a program that needed it; I'm
   including the '\n' in the string. */
int c, pos = 0;

   for (;;) {
      c = zfgetc(zfp);
      if (c == EOF)
         return (NULL);
      c &= 255;
      line[pos] = (char) c;
      if (pos + 1 == len)       /* Too long! */
         break;
      pos++;
      if (c == '\n')
         break;
   }
   line[pos] = '\0';
   return (line);
}

#ifndef __STDC__
static void decompress_more(z)
   register ZFILE *z;
#else
static void decompress_more(register ZFILE *z)
#endif
{
   z->bufput = 0;
   z->bufget = 0;

   if (z->init != 0)
      goto resume;
   z->init = 1;

   z->offset = 0;
   z->size = 0;
#ifdef MSDOS
   z->tab_suffixof =
     (unsigned char PC_HUGE *) halloc(HSIZE, sizeof(unsigned char));
   z->tab_prefixof =
     (long PC_HUGE *) halloc(HSIZE, sizeof(long));
#else
   z->tab_suffixof =
    (unsigned char *) malloc((size_t) HSIZE * sizeof(unsigned char));
   z->tab_prefixof = (long *) malloc((size_t) HSIZE * sizeof(long));
#endif
   z->flags |= ALLOCATED;

   z->n_bits = INIT_BITS;
   z->maxcode = ((1L << (z->n_bits)) - 1L);
   for (z->code = 255L; z->code >= 0L; z->code--) {
      z->tab_prefixof[z->code] = 0L;
      z->tab_suffixof[z->code] = (unsigned char) z->code;
   }
   z->free_ent = ((z->block_compress) ? FIRST : 256L);

   z->finchar = z->oldcode = getcode(z);
   if (z->oldcode == -1L)
      return;                   /* EOF already? */
   if (z->finchar < 0L || z->finchar >= 256L)
      fprintf(stderr, "****\n");
   z->buff[z->bufput] = (char) (z->finchar & 0xff);
   z->bufput++;

   z->stackp = 1L << Z_BITS;    /* The 1L is for DOS huge arrays */

   while ((z->code = getcode(z)) != EOF) {
      if ((z->code == CLEAR) && z->block_compress) {
         for (z->code = 255; z->code >= 0; z->code--)
            z->tab_prefixof[z->code] = 0;
         z->clear_flg = 1;
         z->free_ent = FIRST - 1;
         if ((z->code = getcode(z)) == EOF)
            break;              /* O, untimely death! */
      }                         /* if */
      z->incode = z->code;
      if (z->code >= z->free_ent) {
         z->tab_suffixof[z->stackp] = (unsigned char) z->finchar;
         z->stackp += 1L;
         z->code = z->oldcode;
      }
      while (z->code >= 256L) {
         z->tab_suffixof[z->stackp] = z->tab_suffixof[z->code];
         z->stackp += 1L;
         z->code = z->tab_prefixof[z->code];
      }
      z->finchar = z->tab_suffixof[z->code];
      z->tab_suffixof[z->stackp] = (unsigned char) z->finchar;
      z->stackp += 1L;
      do {
long tmp;

         z->stackp -= 1L;
         tmp = z->tab_suffixof[z->stackp];
         z->buff[z->bufput++] = (unsigned char) (tmp & 255L);
         if (z->bufput == z->bufend) {
            return;             /* Logically a setjmp/longjump, but this is
                                   more portable */
      resume:;                  /* jumped to here -- is jumping into a loop
                                   safe? */
            /* - or should I use jumps for the loop too?      */
         }                      /* if */
      } while (z->stackp > (1L << Z_BITS));
      /* ^ This is why I changed stackp from a pointer. */
      /* Pointer comparisons can be dubious...          */
      if ((z->code = z->free_ent) < (1L << z->maxbits)) {
         z->tab_prefixof[z->code] = z->oldcode;
         z->tab_suffixof[z->code] = (unsigned char) z->finchar;
         z->free_ent = z->code + 1;
      }
      z->oldcode = z->incode;
   }                            /* while */
}                       /* decompress more */

static unsigned char rmask[9] =
{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};

#ifndef __STDC__
static long getcode(z)
   register ZFILE *z;
#else
static long getcode(register ZFILE *z)
#endif
{                       /* Should be int!!! */
register long code;
register long r_off, bits;
register int bp;

   bp = 0;
   if (z->clear_flg != 0 ||
       z->offset >= z->size ||
       z->free_ent > z->maxcode) {
      if (z->free_ent > z->maxcode) {
         z->n_bits++;
         if (z->n_bits == z->maxbits) {
            z->maxcode = (1L << z->maxbits);    /* won't get any bigger now */
         } else {
            z->maxcode = ((1L << (z->n_bits)) - 1L);
         }
      }
      if (z->clear_flg != 0) {
         z->n_bits = INIT_BITS;
         z->maxcode = ((1L << (z->n_bits)) - 1L);
         z->clear_flg = 0;
      }
      z->size = fread(z->buf, 1, (size_t) z->n_bits, z->file);
      if (z->size <= 0) {
         fclose(z->file);
         return (EOF);          /* end of file */
      }
      z->offset = 0;
      z->size = (z->size << 3) - (z->n_bits - 1);
   }
   r_off = z->offset;
   bits = z->n_bits;
   bp = bp + ((int) r_off >> 3);
   r_off = r_off & 7;
   code = ((long) z->buf[bp++] >> r_off);
   bits = bits - 8 + r_off;
   r_off = 8 - r_off;           /* now, offset into code word */
   if (bits >= 8) {
      code = code | ((long) z->buf[bp++] << r_off);
      r_off = r_off + 8;
      bits = bits - 8;
   }
   code = code 
      | ((long) ((long) (z->buf[bp]) & (long) rmask[bits]) << (long) r_off);
   z->offset = z->offset + z->n_bits;
   return (code);
}

#ifdef MAIN

/* This part is optional... */
#define FILE ZFILE
#define fgetc(in) zfgetc(in)
#define fopen(f, m) zfopen(f, m)
#define fclose(f) zfclose(f)

#ifndef __STDC__
int main(argc, argv)
   int argc;
   char **argv;
#else
int main(int argc, char **argv)
#endif
{
FILE *in;
int i, c;

   if (argc == 1) {
      in = zfilter(stdin);
      for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in));
      zfclose(in);
   } else if (argc > 1) {
      for (i = 1; i < argc; i++) {
         in = fopen(argv[i], "r");
         if (in != NULL) {
            for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in));
            fclose(in);
         } else {
            fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]);
         }
      }
   }
   return (0);
}

#endif
SHAR_EOF

-- 
(* Posted from tharr.uucp - Public Access Unix - +44 (234) 261804 *)