ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)
$Part13:
$ File_is="ARCS.C"
$ Check_Sum_is=290919933
$ Copy SYS$Input VMS_SHAR_DUMMY.DUMMY
Xstatic char *RCSid = "$Header: arcsq.c,v 1.2 86/07/15 07:54:05 turner Exp $";
X
X/*
X * $Log:`009arcsq.c,v $
X * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp
X * `009Bludgeoned into submission for VAX 11/780 BSD4.2
X *`009(ugly code, but fewer core dumps)
X *
X * Revision 1.2 86/07/15 07:54:05 turner
X *
X *
X * Revision 1.1 86/06/26 15:00:48 turner
X * initial version
X *
X *
X */
X
X/* ARC - Archive utility - ARCSQ
X
X$define(tag,$$segment(@1,$$index(@1,=)+1))#
X$define(version,Version $tag(
XTED_VERSION DB =3.10), created on $tag(
XTED_DATE DB =01/30/86) at $tag(
XTED_TIME DB =20:10:46))#
X$undefine(tag)#
X $version
X
X(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
X
X By: Thom Henderson
X
X Description:
X This file contains the routines used to squeeze a file
X when placing it in an archive.
X
X Language:
X Computer Innovations Optimizing C86
X
X Programming notes:
X Most of the routines used for the Huffman squeezing algorithm
X were lifted from the SQ program by Dick Greenlaw, as adapted
X to CI-C86 by Robert J. Beilstein.
X*/
X#include <stdio.h>
X#include "arc.h"
X
X/* stuff for Huffman squeezing */
X
X#define TRUE 1
X#define FALSE 0
X#define ERROR (-1)
X#define SPEOF 256 /* special endfile token */
X#define NOCHILD (-1) /* marks end of path through tree */
X#define NUMVALS 257 /* 256 data values plus SPEOF*/
X#define NUMNODES (NUMVALS+NUMVALS-1) /* number of nodes */
X#define MAXCOUNT (unsigned INT) 65535 /* biggest unsigned integer */
X
X/* The following array of structures are the nodes of the
X binary trees. The first NUMVALS nodes become the leaves of the
X final tree and represent the values of the data bytes being
X encoded and the special endfile, SPEOF.
X The remaining nodes become the internal nodes of the final tree.
X*/
X
Xstruct nd /* shared by unsqueezer */
X{ unsigned INT weight; /* number of appearances */
X INT tdepth; /* length on longest path in tree */
X INT lchild, rchild; /* indices to next level */
X} node[NUMNODES]; /* use large buffer */
X
Xstatic INT dctreehd; /* index to head of final tree */
X
X/* This is the encoding table:
X The bit strings have first bit in low bit.
X Note that counts were scaled so code fits unsigned integer.
X*/
X
Xstatic INT codelen[NUMVALS]; /* number of bits in code */
Xstatic unsigned INT code[NUMVALS]; /* code itself, right adjusted */
Xstatic unsigned INT tcode; /* temporary code value */
Xstatic long valcount[NUMVALS]; /* actual count of times seen */
X
X/* Variables used by encoding process */
X
Xstatic INT curin; /* value currently being encoded */
Xstatic INT cbitsrem; /* # of code string bits left */
Xstatic unsigned INT ccode; /* current code right justified */
X
XINT init_sq() /* prepare for scanning pass */
X{
X INT i; /* node index */
X
X /* Initialize all nodes to single element binary trees
X with zero weight and depth.
X */
X
X for(i=0; i<NUMNODES; ++i)
X { node[i].weight = 0;
X node[i].tdepth = 0;
X node[i].lchild = NOCHILD;
X node[i].rchild = NOCHILD;
X }
X
X for(i=0; i<NUMVALS; i++)
X valcount[i] = 0;
X}
X
XINT scan_sq(c) /* add a byte to the tables */
XINT c; /* byte to add */
X{
X unsigned INT *wp; /* speeds up weight counting */
X
X /* Build frequency info in tree */
X
X if(c == EOF) /* it's traditional */
X c = SPEOF; /* dumb, but traditional */
X
X if(*(wp = &node[c].weight) != MAXCOUNT)
X ++(*wp); /* bump weight counter */
X
X valcount[c]++; /* bump byte counter */
X}
X
Xlong pred_sq() /* predict size of squeezed file */
X{
X INT i;
X INT btlist[NUMVALS]; /* list of intermediate b-trees */
X INT listlen; /* length of btlist */
X unsigned INT ceiling; /* limit for scaling */
X long size = 0; /* predicted size */
X INT numnodes; /* # of nodes in simplified tree */
X INT scale();
X INT heap();
X INT bld_tree();
X INT buildenc();
X INT init_enc();
X
X scan_sq(EOF); /* signal end of input */
X
X ceiling = MAXCOUNT;
X
X /* Keep trying to scale and encode */
X
X do
X { scale(ceiling);
X ceiling /= 2; /* in case we rescale */
X
X /* Build list of single node binary trees having
X leaves for the input values with non-zero counts
X */
X
X for(i=listlen=0; i<NUMVALS; ++i)
X { if(node[i].weight != 0)
X { node[i].tdepth = 0;
X btlist[listlen++] = i;
X }
X }
X
X /* Arrange list of trees into a heap with the entry
X indexing the node with the least weight at the top.
X */
X
X heap(btlist,listlen);
X
X /* Convert the list of trees to a single decoding tree */
X
X bld_tree(btlist,listlen);
X
X /* Initialize the encoding table */
X
X init_enc();
X
X /* Try to build encoding table.
X Fail if any code is > 16 bits long.
X */
X } while(buildenc(0,dctreehd) == ERROR);
X
X /* Initialize encoding variables */
X
X cbitsrem = 0; /* force initial read */
X curin = 0; /* anything but endfile */
X
X for(i=0; i<NUMVALS; i++) /* add bits for each code */
X size += valcount[i] * codelen[i];
X
X size = (size+7)/8; /* reduce to number of bytes */
X
X numnodes = dctreehd<NUMVALS ? 0 : dctreehd-(NUMVALS-1);
X
X size += sizeof(INT) + 2*numnodes*sizeof(INT);
X
X return size;
X}
X
X/* The count of number of occurrances of each input value
X have already been prevented from exceeding MAXCOUNT.
X Now we must scale them so that their sum doesn't exceed
X ceiling and yet no non-zero count can become zero.
X This scaling prevents errors in the weights of the
X interior nodes of the Huffman tree and also ensures that
X the codes will fit in an unsigned integer. Rescaling is
X used if necessary to limit the code length.
X*/
X
Xstatic INT scale(ceil)
Xunsigned INT ceil; /* upper limit on total weight */
X{
X register INT i,c;
X INT ovflw, divisor;
X unsigned INT w, sum;
X unsigned char increased; /* flag */
X
X do
X { for(i=sum=ovflw=0; i<NUMVALS; ++i)
X { if(node[i].weight > (ceil-sum))
X ++ovflw;
X sum += node[i].weight;
X }
X
X divisor = ovflw + 1;
X
X /* Ensure no non-zero values are lost */
X
X increased = FALSE;
X for(i=0; i<NUMVALS; ++i)
X { w = node[i].weight;
X if(w<divisor && w!=0)
X { /* Don't fail to provide a code if it's used at all */
X
X node[i].weight = divisor;
X increased = TRUE;
X }
X }
X } while(increased);
X
X /* Scaling factor choosen, now scale */
X
X if(divisor>1)
X for(i=0; i<NUMVALS; ++i)
X node[i].weight /= divisor;
X}
X
X/* heap() and adjust() maintain a list of binary trees as a
X heap with the top indexing the binary tree on the list
X which has the least weight or, in case of equal weights,
X least depth in its longest path. The depth part is not
X strictly necessary, but tends to avoid long codes which
X might provoke rescaling.
X*/
X
Xstatic INT heap(list,length)
XINT list[], length;
X{
X register INT i;
X INT adjust();
X
X for(i=(length-2)/2; i>=0; --i)
X adjust(list,i,length-1);
X}
X
X/* Make a heap from a heap with a new top */
X
Xstatic INT adjust(list,top,bottom)
XINT list[], top, bottom;
X{
X register INT k, temp;
X INT cmptrees();
X
X k = 2 * top + 1; /* left child of top */
X temp = list[top]; /* remember root node of top tree */
X
X if(k<=bottom)
X { if(k<bottom && cmptrees(list[k],list[k+1]))
X ++k;
X