gtoal@tharr.UUCP (Graham Toal) (11/15/90)
Archive-name: zlib.shr This posting consists of a set of routines which roughly simulate fopen, fgetc, fgets, and fclose. The difference between these and the originals is that these will read data from a .Z compressed file, decompressing it on the fly. It does *not* uses pipes, processes, or intermediate files. This makes it useful to add to any programs which read large text files sequentially. An example of this might be a version of LaTeX which read its .sty files in compressed form -- it satisfies the following criteria: 1) the files are read sequentially; 2) the files are read from *much* more often than they are written to. I passed this code around a couple of years back, and forgot about it since. I recently had to resurrect it, and have taken the chance to fix a couple of bugs which had surfaced in the mean time, and to port it to MSDOS. (Of course it still works on Unix or any standard ANSI C system) I include as a test program a simple version of zcat; someone was asking recently for a small uncompress program; well this is it. The source is heavily based on the original compress. I've removed as much unneccesary code as I could get away with, and simplified many expressions to get them through the dismal MSDOS compilers. Any comments/bug reports to me; Graham Toal <gtoal@ed.ac.uk> #!/bin/sh-----cut here-----cut here-----cut here-----cut here----- # shar: Shell Archiver # Run the following text with /bin/sh to create: # zcat.c # zlib.h # zlib.c cat - << \SHAR_EOF > zcat.c #include <stdio.h> #include "zlib.h" /*#include "zlib.c"*/ /* Written so it can be either included or linked in */ /* This part is optional... you probably wouldn't do this in real life */ #define FILE ZFILE #define fgetc(in) zfgetc(in) #define fopen(f, m) zfopen(f, m) #define fclose(f) zfclose(f) #ifndef __STDC__ int main(argc, argv) int argc; char **argv; #else int main(int argc, char **argv) #endif { FILE *in; int i, c; if (argc == 1) { in = zfilter(stdin); for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ; fclose(in); } else if (argc > 1) { for (i = 1; i < argc; i++) { in = fopen(argv[i], "r"); if (in != NULL) { for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ; fclose(in); } else { fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]); } } } return(0); } SHAR_EOF cat - << \SHAR_EOF > zlib.h #ifndef _ZLIB_H #define _ZLIB_H 1 #ifdef MSDOS #define PC_HUGE huge /* Microsoft C and contemptibles */ #else #define PC_HUGE #endif #define ZEXT ".Z" #ifdef __arm #undef ZEXT #define ZEXT "-z" #endif typedef struct zfiletype { #define Z_BITS 16 #define Z_MAXBUF 256 FILE *file; int flags; int n_bits; /* number of bits/code */ int maxbits; /* user settable max # bits/code */ long maxcode; /* maximum code, given n_bits */ long free_ent; /* first unused entry */ int block_compress; int clear_flg; long stackp; long finchar; long code, oldcode, incode; int offset, size; unsigned char buf[Z_BITS]; /* Passed to getcode */ unsigned char PC_HUGE *tab_suffixof; /* There is a flag bit to say whether */ long PC_HUGE *tab_prefixof; /* these have been allocated. */ int init; int bufput, bufget, bufend; unsigned char buff[Z_MAXBUF]; int c1, c2; int zeof; } ZFILE; #ifndef __STDC__ ZFILE *zfopen(/* char *fileptr, char *how */); void zfclose(/* ZFILE *z */); ZFILE *zfilter(/* FILE *f */); int zfgetc(/* ZFILE *z */); int zfeof(/* ZFILE *z */); char *zfgets(/* char *line, int len, ZFILE *zfp */); #else ZFILE *zfopen(char *fileptr, char *how); void zfclose(ZFILE *z); ZFILE *zfilter(FILE *f); int zfgetc(ZFILE *z); int zfeof(ZFILE *z); char *zfgets(char *line, int len, ZFILE *zfp); #endif /* Not __STDC__ */ #endif SHAR_EOF cat - << \SHAR_EOF > zlib.c /*#define MAIN*/ /*int debug = 1;*/ /*#define DEBUG 1*/ /* These wondrous debugging macros helped me find the nasty bug which only manifested itself on msdos -- stackp has to be a long on msdos because the array it is indexing is 'huge' ... */ #ifdef DEBUG #define TRACT(lev, stmnt) \ if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt); #define TRACE(lev, stmnt) \ if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt); stmnt #define TRACA(lev, stmnt) \ stmnt; if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt); #define TRACL(lev, var) \ if (lev <= debug) fprintf(stderr, "%d: %s <- %ld\n", __LINE__, #var, var); #else #define TRACT(lev, stmnt) #define TRACE(lev, stmnt) stmnt #define TRACA(lev, stmnt) stmnt #define TRACL(lev, var) #endif /* * * Originally: * * compress.c - File compression ala IEEE Computer, June 1984. * * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) * Jim McKie (decvax!mcvax!jim) * Steve Davies (decvax!vax135!petsd!peora!srd) * Ken Turkowski (decvax!decwrl!turtlevax!ken) * James A. Woods (decvax!ihnp4!ames!jaw) * Joe Orost (decvax!vax135!petsd!joe) * * $Header: zlib.c,v 4.1 90/11/12 14:52:24 gtoal Release $ * * Graham Toal, 3rd September 1988. My changes released to public domain. * Updated Nov 90. * * The original decompress has been restructured so that data can be * fetched on demand a byte at a time. This lets it be used as a filter * for programs which read large data files - you do not need the disk * space to decompress the input files first. * * (Incidentally, programs reading data off floppies will be speeded up * because decompression is always faster than the equivalent amount * of disk I/O). * * This implementation supplies 'z' versions of fopen, fputc, feof and fclose * to be used as direct substitutes for the originals; it would be cleaner * and more transparent if the decompress filter were hidden under the * real stdio procedures. An extra call zfilter() is supplied to convert * an already-opened stream into a z-stream: see the example at the end * of this file. * * If a file opened by zfopen() was not compressed, the files contents are * still recovered correctly at the low expense of an extra procedure call * per byte. This makes the routines more generally usable - they can be * left in production programs which can be speeded up in the field by * compressing selected input files(*); also, files can be compressed or * not selectively depending on whether the compression makes them * smaller or not - code accessing the files does not need to know. * * [(*) reading from a compressed file off floppy disk is faster than * reading from an uncompressed file. This probably isn't true of * hard disks though.] * * BUGS: Opening a file "r" will not do CR/LF processing on computers with * this file structure. */ #include <stdio.h> #include <string.h> #ifdef __STDC__ #include <stdlib.h> #else #define size_t int #endif #include <ctype.h> #ifdef MSDOS #include <malloc.h> #endif #ifndef min #define min(a,b) ((a>b) ? b : a) #endif #define HSIZE 69001L /* 95% occupancy */ /* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257L /* first free entry */ #define CLEAR 256L /* table clear output code */ #define BIT_MASK 0x1f #define BLOCK_MASK 0x80 #define INIT_BITS 9 /* initial number of bits/code */ #define CHECK_GAP 10000L/* ratio check interval */ #include "zlib.h" #define NOT_COMPRESSED 1 #define ALLOCATED 2 #ifndef __STDC__ static void decompress_more( /* register ZFILE *z */ ); static long getcode( /* register ZFILE *z */ ); #else static void decompress_more(register ZFILE *z); static long getcode(register ZFILE *z); #endif #ifndef __STDC__ ZFILE *zfopen(fileptr, how) char *fileptr; char *how; #else ZFILE *zfopen(char *fileptr, char *how) #endif { register ZFILE *z; z = (ZFILE *) malloc(sizeof(ZFILE)); z->flags = 0; z->maxbits = Z_BITS; /* user settable max # bits/code */ z->free_ent = 0; /* first unused entry */ z->block_compress = BLOCK_MASK; z->clear_flg = 0; z->init = 0; z->zeof = (0 != 0); z->c1 = EOF; z->c2 = EOF; z->bufput = 0; z->bufget = 0; z->bufend = Z_MAXBUF - 1; z->maxbits = Z_BITS; /* user settable max # bits/code */ /* Open input file */ if (*how == 'r') { z->file = fopen(fileptr, "rb"); if (z->file == NULL) { char tempfname[256]; strcpy(tempfname, fileptr); strcat(tempfname, ZEXT); z->file = fopen(tempfname, "rb"); } } else { /* No compressed output yet, if ever... */ /* Compress the file explicitly once it has been written */ z->file = fopen(fileptr, how); z->flags |= NOT_COMPRESSED; } if (z->file == NULL) { free(z); z = NULL; } /* Check the magic number */ if ((z != NULL) && ((fgetc(z->file) != 0x1F) || (fgetc(z->file) != 0x9D))) { z->flags |= NOT_COMPRESSED; fclose(z->file); z->file = fopen(fileptr, how); if (z->file == NULL) { free(z); z = NULL; } } if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0)) return (z); z->maxbits = fgetc(z->file); /* set -b from file */ z->block_compress = z->maxbits & BLOCK_MASK; z->maxbits &= BIT_MASK; if (z->maxbits > Z_BITS) { fprintf(stderr, "%s: compressed with %d bits; decompress can only handle %d bits\n", fileptr, z->maxbits, Z_BITS); exit(0); } return (z); } #ifndef __STDC__ ZFILE *zfilter(f) FILE *f; #else ZFILE *zfilter(FILE *f) #endif { register ZFILE *z; z = (ZFILE *) malloc(sizeof(ZFILE)); z->flags = 0; z->maxbits = Z_BITS; /* user settable max # bits/code */ z->free_ent = 0; /* first unused entry */ z->block_compress = BLOCK_MASK; z->clear_flg = 0; z->init = 0; z->zeof = (0 != 0); z->c1 = EOF; z->c2 = EOF; z->bufput = 0; z->bufget = 0; z->bufend = Z_MAXBUF - 1; z->maxbits = Z_BITS; /* user settable max # bits/code */ /* Open input file */ z->file = f; if (z->file == NULL) { free(z); z = NULL; } /* Check the magic number */ if (z != NULL) { z->c1 = fgetc(z->file); z->c2 = fgetc(z->file); if ((z->c1 != 0x1F) || (z->c2 != 0x9D)) { z->flags |= NOT_COMPRESSED; } } if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0)) return (z); z->maxbits = fgetc(z->file); /* set -b from file */ z->block_compress = z->maxbits & BLOCK_MASK; z->maxbits &= BIT_MASK; if (z->maxbits > Z_BITS) { fprintf(stderr, "stdin compressed with %d bits; decompress can only handle %d bits\n", z->maxbits, Z_BITS); exit(0); } return (z); } #ifndef __STDC__ int zfgetc(z) ZFILE *z; #else int zfgetc(ZFILE *z) #endif { int c; /* If buffer empty, and not end-of-file, call decompress_more(); return next in buffer. */ if ((z->flags & NOT_COMPRESSED) != 0) { if ((c = z->c1) >= 0) { z->c1 = z->c2; z->c2 = EOF; return (c); } return (fgetc(z->file)); } if ((z->bufget == z->bufput) && (!z->zeof)) { decompress_more(z); } z->zeof = (z->bufput == z->bufget); if (z->zeof) { if ((z->flags & ALLOCATED) != 0) { #ifdef MSDOS hfree(z->tab_suffixof); hfree(z->tab_prefixof); #else free(z->tab_suffixof); free(z->tab_prefixof); #endif z->flags &= (~ALLOCATED); } return (EOF); } c = z->buff[z->bufget]; z->bufget++; return (c); } #ifndef __STDC__ int zfeof(z) ZFILE *z; #else int zfeof(ZFILE *z) #endif { if ((z->flags & NOT_COMPRESSED) != 0) { if (z->c1 != EOF) { return (0 != 0); } return (feof(z->file)); } return (z->zeof); } #ifndef __STDC__ void zfclose(z) ZFILE *z; #else void zfclose(ZFILE *z) #endif { if (z == 0) return; if (z->zeof) { if ((z->flags & ALLOCATED) != 0) { #ifdef MSDOS hfree(z->tab_suffixof); hfree(z->tab_prefixof); #else free(z->tab_suffixof); free(z->tab_prefixof); #endif z->flags &= (~ALLOCATED); } } free(z); } #ifndef __STDC__ char *zfgets(line, len, zfp) char *line; int len; ZFILE *zfp; #else char *zfgets(char *line, int len, ZFILE *zfp) #endif { /* I *hope* this is what fgets does - I only added it here when I came across a program that needed it; I'm including the '\n' in the string. */ int c, pos = 0; for (;;) { c = zfgetc(zfp); if (c == EOF) return (NULL); c &= 255; line[pos] = (char) c; if (pos + 1 == len) /* Too long! */ break; pos++; if (c == '\n') break; } line[pos] = '\0'; return (line); } #ifndef __STDC__ static void decompress_more(z) register ZFILE *z; #else static void decompress_more(register ZFILE *z) #endif { z->bufput = 0; z->bufget = 0; if (z->init != 0) goto resume; z->init = 1; z->offset = 0; z->size = 0; #ifdef MSDOS z->tab_suffixof = (unsigned char PC_HUGE *) halloc(HSIZE, sizeof(unsigned char)); z->tab_prefixof = (long PC_HUGE *) halloc(HSIZE, sizeof(long)); #else z->tab_suffixof = (unsigned char *) malloc((size_t) HSIZE * sizeof(unsigned char)); z->tab_prefixof = (long *) malloc((size_t) HSIZE * sizeof(long)); #endif z->flags |= ALLOCATED; z->n_bits = INIT_BITS; z->maxcode = ((1L << (z->n_bits)) - 1L); for (z->code = 255L; z->code >= 0L; z->code--) { z->tab_prefixof[z->code] = 0L; z->tab_suffixof[z->code] = (unsigned char) z->code; } z->free_ent = ((z->block_compress) ? FIRST : 256L); z->finchar = z->oldcode = getcode(z); if (z->oldcode == -1L) return; /* EOF already? */ if (z->finchar < 0L || z->finchar >= 256L) fprintf(stderr, "****\n"); z->buff[z->bufput] = (char) (z->finchar & 0xff); z->bufput++; z->stackp = 1L << Z_BITS; /* The 1L is for DOS huge arrays */ while ((z->code = getcode(z)) != EOF) { if ((z->code == CLEAR) && z->block_compress) { for (z->code = 255; z->code >= 0; z->code--) z->tab_prefixof[z->code] = 0; z->clear_flg = 1; z->free_ent = FIRST - 1; if ((z->code = getcode(z)) == EOF) break; /* O, untimely death! */ } /* if */ z->incode = z->code; if (z->code >= z->free_ent) { z->tab_suffixof[z->stackp] = (unsigned char) z->finchar; z->stackp += 1L; z->code = z->oldcode; } while (z->code >= 256L) { z->tab_suffixof[z->stackp] = z->tab_suffixof[z->code]; z->stackp += 1L; z->code = z->tab_prefixof[z->code]; } z->finchar = z->tab_suffixof[z->code]; z->tab_suffixof[z->stackp] = (unsigned char) z->finchar; z->stackp += 1L; do { long tmp; z->stackp -= 1L; tmp = z->tab_suffixof[z->stackp]; z->buff[z->bufput++] = (unsigned char) (tmp & 255L); if (z->bufput == z->bufend) { return; /* Logically a setjmp/longjump, but this is more portable */ resume:; /* jumped to here -- is jumping into a loop safe? */ /* - or should I use jumps for the loop too? */ } /* if */ } while (z->stackp > (1L << Z_BITS)); /* ^ This is why I changed stackp from a pointer. */ /* Pointer comparisons can be dubious... */ if ((z->code = z->free_ent) < (1L << z->maxbits)) { z->tab_prefixof[z->code] = z->oldcode; z->tab_suffixof[z->code] = (unsigned char) z->finchar; z->free_ent = z->code + 1; } z->oldcode = z->incode; } /* while */ } /* decompress more */ static unsigned char rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; #ifndef __STDC__ static long getcode(z) register ZFILE *z; #else static long getcode(register ZFILE *z) #endif { /* Should be int!!! */ register long code; register long r_off, bits; register int bp; bp = 0; if (z->clear_flg != 0 || z->offset >= z->size || z->free_ent > z->maxcode) { if (z->free_ent > z->maxcode) { z->n_bits++; if (z->n_bits == z->maxbits) { z->maxcode = (1L << z->maxbits); /* won't get any bigger now */ } else { z->maxcode = ((1L << (z->n_bits)) - 1L); } } if (z->clear_flg != 0) { z->n_bits = INIT_BITS; z->maxcode = ((1L << (z->n_bits)) - 1L); z->clear_flg = 0; } z->size = fread(z->buf, 1, (size_t) z->n_bits, z->file); if (z->size <= 0) { fclose(z->file); return (EOF); /* end of file */ } z->offset = 0; z->size = (z->size << 3) - (z->n_bits - 1); } r_off = z->offset; bits = z->n_bits; bp = bp + ((int) r_off >> 3); r_off = r_off & 7; code = ((long) z->buf[bp++] >> r_off); bits = bits - 8 + r_off; r_off = 8 - r_off; /* now, offset into code word */ if (bits >= 8) { code = code | ((long) z->buf[bp++] << r_off); r_off = r_off + 8; bits = bits - 8; } code = code | ((long) ((long) (z->buf[bp]) & (long) rmask[bits]) << (long) r_off); z->offset = z->offset + z->n_bits; return (code); } #ifdef MAIN /* This part is optional... */ #define FILE ZFILE #define fgetc(in) zfgetc(in) #define fopen(f, m) zfopen(f, m) #define fclose(f) zfclose(f) #ifndef __STDC__ int main(argc, argv) int argc; char **argv; #else int main(int argc, char **argv) #endif { FILE *in; int i, c; if (argc == 1) { in = zfilter(stdin); for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in)); zfclose(in); } else if (argc > 1) { for (i = 1; i < argc; i++) { in = fopen(argv[i], "r"); if (in != NULL) { for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in)); fclose(in); } else { fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]); } } } return (0); } #endif SHAR_EOF -- (* Posted from tharr.uucp - Public Access Unix - +44 (234) 261804 *)