ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts) (06/24/88)
$Part15: $ File_is="ARCSQ.C" $ Check_Sum_is=290919933 $ Copy SYS$Input VMS_SHAR_DUMMY.DUMMY Xstatic char *RCSid = "$Header: arcsq.c,v 1.2 86/07/15 07:54:05 turner Exp $"; X X/* X * $Log:`009arcsq.c,v $ X * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp X * `009Bludgeoned into submission for VAX 11/780 BSD4.2 X *`009(ugly code, but fewer core dumps) X * X * Revision 1.2 86/07/15 07:54:05 turner X * X * X * Revision 1.1 86/06/26 15:00:48 turner X * initial version X * X * X */ X X/* ARC - Archive utility - ARCSQ X X$define(tag,$$segment(@1,$$index(@1,=)+1))# X$define(version,Version $tag( XTED_VERSION DB =3.10), created on $tag( XTED_DATE DB =01/30/86) at $tag( XTED_TIME DB =20:10:46))# X$undefine(tag)# X $version X X(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED X X By: Thom Henderson X X Description: X This file contains the routines used to squeeze a file X when placing it in an archive. X X Language: X Computer Innovations Optimizing C86 X X Programming notes: X Most of the routines used for the Huffman squeezing algorithm X were lifted from the SQ program by Dick Greenlaw, as adapted X to CI-C86 by Robert J. Beilstein. X*/ X#include <stdio.h> X#include "arc.h" X X/* stuff for Huffman squeezing */ X X#define TRUE 1 X#define FALSE 0 X#define ERROR (-1) X#define SPEOF 256 /* special endfile token */ X#define NOCHILD (-1) /* marks end of path through tree */ X#define NUMVALS 257 /* 256 data values plus SPEOF*/ X#define NUMNODES (NUMVALS+NUMVALS-1) /* number of nodes */ X#define MAXCOUNT (unsigned INT) 65535 /* biggest unsigned integer */ X X/* The following array of structures are the nodes of the X binary trees. The first NUMVALS nodes become the leaves of the X final tree and represent the values of the data bytes being X encoded and the special endfile, SPEOF. X The remaining nodes become the internal nodes of the final tree. X*/ X Xstruct nd /* shared by unsqueezer */ X{ unsigned INT weight; /* number of appearances */ X INT tdepth; /* length on longest path in tree */ X INT lchild, rchild; /* indices to next level */ X} node[NUMNODES]; /* use large buffer */ X Xstatic INT dctreehd; /* index to head of final tree */ X X/* This is the encoding table: X The bit strings have first bit in low bit. X Note that counts were scaled so code fits unsigned integer. X*/ X Xstatic INT codelen[NUMVALS]; /* number of bits in code */ Xstatic unsigned INT code[NUMVALS]; /* code itself, right adjusted */ Xstatic unsigned INT tcode; /* temporary code value */ Xstatic long valcount[NUMVALS]; /* actual count of times seen */ X X/* Variables used by encoding process */ X Xstatic INT curin; /* value currently being encoded */ Xstatic INT cbitsrem; /* # of code string bits left */ Xstatic unsigned INT ccode; /* current code right justified */ X XINT init_sq() /* prepare for scanning pass */ X{ X INT i; /* node index */ X X /* Initialize all nodes to single element binary trees X with zero weight and depth. X */ X X for(i=0; i<NUMNODES; ++i) X { node[i].weight = 0; X node[i].tdepth = 0; X node[i].lchild = NOCHILD; X node[i].rchild = NOCHILD; X } X X for(i=0; i<NUMVALS; i++) X valcount[i] = 0; X} X XINT scan_sq(c) /* add a byte to the tables */ XINT c; /* byte to add */ X{ X unsigned INT *wp; /* speeds up weight counting */ X X /* Build frequency info in tree */ X X if(c == EOF) /* it's traditional */ X c = SPEOF; /* dumb, but traditional */ X X if(*(wp = &node[c].weight) != MAXCOUNT) X ++(*wp); /* bump weight counter */ X X valcount[c]++; /* bump byte counter */ X} X Xlong pred_sq() /* predict size of squeezed file */ X{ X INT i; X INT btlist[NUMVALS]; /* list of intermediate b-trees */ X INT listlen; /* length of btlist */ X unsigned INT ceiling; /* limit for scaling */ X long size = 0; /* predicted size */ X INT numnodes; /* # of nodes in simplified tree */ X INT scale(); X INT heap(); X INT bld_tree(); X INT buildenc(); X INT init_enc(); X X scan_sq(EOF); /* signal end of input */ X X ceiling = MAXCOUNT; X X /* Keep trying to scale and encode */ X X do X { scale(ceiling); X ceiling /= 2; /* in case we rescale */ X X /* Build list of single node binary trees having X leaves for the input values with non-zero counts X */ X X for(i=listlen=0; i<NUMVALS; ++i) X { if(node[i].weight != 0) X { node[i].tdepth = 0; X btlist[listlen++] = i; X } X } X X /* Arrange list of trees into a heap with the entry X indexing the node with the least weight at the top. X */ X X heap(btlist,listlen); X X /* Convert the list of trees to a single decoding tree */ X X bld_tree(btlist,listlen); X X /* Initialize the encoding table */ X X init_enc(); X X /* Try to build encoding table. X Fail if any code is > 16 bits long. X */ X } while(buildenc(0,dctreehd) == ERROR); X X /* Initialize encoding variables */ X X cbitsrem = 0; /* force initial read */ X curin = 0; /* anything but endfile */ X X for(i=0; i<NUMVALS; i++) /* add bits for each code */ X size += valcount[i] * codelen[i]; X X size = (size+7)/8; /* reduce to number of bytes */ X X numnodes = dctreehd<NUMVALS ? 0 : dctreehd-(NUMVALS-1); X X size += sizeof(INT) + 2*numnodes*sizeof(INT); X X return size; X} X X/* The count of number of occurrances of each input value X have already been prevented from exceeding MAXCOUNT. X Now we must scale them so that their sum doesn't exceed X ceiling and yet no non-zero count can become zero. X This scaling prevents errors in the weights of the X interior nodes of the Huffman tree and also ensures that X the codes will fit in an unsigned integer. Rescaling is X used if necessary to limit the code length. X*/ X Xstatic INT scale(ceil) Xunsigned INT ceil; /* upper limit on total weight */ X{ X register INT i,c; X INT ovflw, divisor; X unsigned INT w, sum; X unsigned char increased; /* flag */ X X do X { for(i=sum=ovflw=0; i<NUMVALS; ++i) X { if(node[i].weight > (ceil-sum)) X ++ovflw; X sum += node[i].weight; X } X X divisor = ovflw + 1; X X /* Ensure no non-zero values are lost */ X X increased = FALSE; X for(i=0; i<NUMVALS; ++i) X { w = node[i].weight; X if(w<divisor && w!=0) X { /* Don't fail to provide a code if it's used at all */ X X node[i].weight = divisor; X increased = TRUE; X } X } X } while(increased); X X /* Scaling factor choosen, now scale */ X X if(divisor>1) X for(i=0; i<NUMVALS; ++i) X node[i].weight /= divisor; X} X X/* heap() and adjust() maintain a list of binary trees as a X heap with the top indexing the binary tree on the list X which has the least weight or, in case of equal weights, X least depth in its longest path. The depth part is not X strictly necessary, but tends to avoid long codes which X might provoke rescaling. X*/ X Xstatic INT heap(list,length) XINT list[], length; X{ X register INT i; X INT adjust(); X X for(i=(length-2)/2; i>=0; --i) X adjust(list,i,length-1); X} X X/* Make a heap from a heap with a new top */ X Xstatic INT adjust(list,top,bottom) XINT list[], top, bottom; X{ X register INT k, temp; X INT cmptrees(); X X k = 2 * top + 1; /* left child of top */ X temp = list[top]; /* remember root node of top tree */ X X if(k<=bottom) X { if(k<bottom && cmptrees(list[k],list[k+1])) X ++k; X