wilhite@usceast.UUCP (Robert Wilhite) (01/14/87)
["Shar and enjoy"] Here's the Really Working version of BSD ARC (Part 2 of 3), which was forced into submission on our VAX. -------------- cut here -------------- : This is a shar archive. Extract with sh, not csh. echo x - arcext.c cat > arcext.c << '!Funky!Stuff!' /* $define(arc,$ifdef(xarc,off,on))# macro switch for ARC only code $define(xarc,$ifdef(xarc,on,off))# macro switch for XARC only code */ /* ARC - Archive utility - ARCEXT $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.18), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:55:19))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to extract files from an archive. Language: Computer Innovations Optimizing C86 */ #include <stdio.h> #include "arc.h" #if ARC /* $emit($arc)# */ INT extarc(num,arg,prt) /* extract files from archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ INT prt; /* true if printing */ #endif /* $emit($xarc)# */ #if XARC INT extarc() /* extract files from archive */ #endif /* $emit(on)# */ { struct heads hdr; /* file header */ #if ARC /* $emit($arc)# */ INT save; /* true to save current file */ INT did[MAXARG]; /* true when argument was used */ char *i, *rindex(); /* string index */ char **name, *malloc(); /* name pointer list, allocator */ INT n; /* index */ INT extfile(); #if MSDOS name = malloc(num*sizeof(char *)); /* get storage for name pointers */ #endif #if BSD name = (char **)malloc(num*sizeof(char *)); /* get storage for name pointers */ #endif for(n=0; n<num; n++) /* for each argument */ { did[n] = 0; /* reset usage flag */ if(!(i=rindex(arg[n],'\\'))) /* find start of name */ if(!(i=rindex(arg[n],'/'))) if(!(i=rindex(arg[n],':'))) i = arg[n]-1; name[n] = i+1; } #endif /* $emit(on)# */ openarc(0); /* open archive for reading */ #if ARC /* $emit($arc)# */ if(num) /* if files were named */ { while(readhdr(&hdr,arc)) /* while more files to check */ { save = 0; /* reset save flag */ for(n=0; n<num; n++) /* for each template given */ { if(match(hdr.name,name[n])) { save = 1; /* turn on save flag */ did[n] = 1; /* turn on usage flag */ break; /* stop looking */ } } if(save) /* extract if desired, else skip */ extfile(&hdr,arg[n],prt); else fseek(arc,hdr.size,1); } } else while(readhdr(&hdr,arc)) /* else extract all files */ extfile(&hdr,"",prt); #endif /* $emit($xarc)# */ #if XARC while(readhdr(&hdr,arc)) /* extract all files */ extfile(&hdr); #endif /* $emit(on)# */ closearc(0); /* close archive after reading */ #if ARC /* $emit($arc)# */ if(note) { for(n=0; n<num; n++) /* report unused args */ { if(!did[n]) { printf("File not found: %s\n",arg[n]); nerrs++; } } } free(name); #endif /* $emit(on)# */ } #if ARC /* $emit($arc)# */ static INT extfile(hdr,path,prt) /* extract a file */ struct heads *hdr; /* pointer to header data */ char *path; /* pointer to path name */ INT prt; /* true if printing */ #endif /* $emit($xarc)# */ #if XARC static INT extfile(hdr) /* extract a file */ #endif /* $emit(on)# */ /* $define(use,$ife($arc,on,fix,hdr->name))# */ #if ARC #define USE fix #else #define USE hdr->name #endif { FILE *f, *fopen(); /* extracted file, opener */ char buf[STRLEN]; /* input buffer */ #if ARC /* $emit($arc)# */ char fix[STRLEN]; /* fixed name buffer */ char *i, *rindex(); /* string index */ if(prt) /* printing is much easier */ { unpack(arc,stdout,hdr); /* unpack file from archive */ printf("\f"); /* eject the form */ return; /* see? I told you! */ } strcpy(fix,path); /* note path name template */ if(!(i=rindex(fix,'\\'))) /* find start of name */ if(!(i=rindex(fix,'/'))) if(!(i=rindex(fix,':'))) i = fix-1; strcpy(i+1,hdr->name); /* replace template with name */ #endif /* $emit(on)# */ if(note) printf("Extracting file: %s\n",USE); if(warn) { if(f=fopen(USE,"r")) /* see if it exists */ { fclose(f); printf("WARNING: File %s already exists!",USE); while(1) { printf(" Overwrite it (y/n)? "); fgets(buf,STRLEN,stdin); *buf = toupper(*buf); if(*buf=='Y' || *buf=='N') break; } if(*buf=='N') { printf("%s not extracted.\n",USE); fseek(arc,hdr->size,1); return; } } } if(!(f=fopen(USE,"w"))) { if(warn) { printf("Cannot create %s\n",USE); nerrs++; } fseek(arc,hdr->size,1); return; } /* now unpack the file */ unpack(arc,f,hdr); /* unpack file from archive */ setstamp(f,hdr->date,hdr->time); /* set the proper date/time stamp */ fclose(f); /* all done writing to file */ } !Funky!Stuff! echo x - arcio.c cat > arcio.c << '!Funky!Stuff!' static char *RCSid = "$Header: arcio.c,v 1.2 86/07/15 07:53:11 turner Exp $"; /* * $Log: arcio.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:11 turner * * * Revision 1.1 86/06/26 15:00:21 turner * initial version * * */ /* ARC - Archive utility - ARCIO $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.30), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:56:00))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the file I/O routines used to manipulate an archive. Language: Computer Innovations Optimizing C86 */ #include <stdio.h> #include "arc.h" INT readhdr(hdr,f) /* read a header from an archive */ struct heads *hdr; /* storage for header */ FILE *f; /* archive to read header from */ { #if BSD | ST unsigned char dummy[28]; INT i,j,k; #endif char name[FNLEN]; /* filename buffer */ INT try = 0; /* retry counter */ static INT first = 1; /* true only on first read */ if(!f) /* if archive didn't open */ return 0; /* then pretend it's the end */ if(feof(f)) /* if no more data */ return 0; /* then signal end of archive */ if(fgetc(f)!=ARCMARK) /* check archive validity */ { if(warn) { printf("An entry in %s has a bad header.\n",arcname); nerrs++; } while(!feof(f)) { try++; if(fgetc(f)==ARCMARK) { ungetc(hdrver=fgetc(f),f); if(hdrver>=0 && hdrver<=ARCVER) break; } } if(feof(f) && first) abort("%s is not an archive",arcname); if(warn) printf(" %d bytes skipped.\n",try); if(feof(f)) return 0; } hdrver = fgetc(f); /* get header version */ if(hdrver<0) abort("Invalid header in archive %s",arcname); if(hdrver==0) return 0; /* note our end of archive marker */ if(hdrver>ARCVER) { fread(name,sizeof(char),FNLEN,f); printf("I don't know how to handle file %s in archive %s\n", name,arcname); printf("I think you need a newer version of ARC.\n"); exit(1); } /* amount to read depends on header type */ if(hdrver==1) /* old style is shorter */ { fread(hdr,sizeof(struct heads)-sizeof(long),1,f); hdrver = 2; /* convert header to new format */ hdr->length = hdr->size; /* size is same when not packed */ } else { #if MSDOS fread(hdr,sizeof(struct heads),1,f); #endif #if BSD | ST fread(dummy,27,1,f); for(i=0;i<13;hdr->name[i]=dummy[i],i++); hdr->size = (long)((dummy[16]<<24) + (dummy[15]<<16) + (dummy[14]<<8) + dummy[13]); hdr->date = (short)((dummy[18]<<8) + dummy[17]); hdr->time = (short)((dummy[20]<<8) + dummy[19]); hdr->crc = (short)((dummy[22]<<8) + dummy[21]); hdr->length = (long)((dummy[26]<<24) + (dummy[25]<<16) + (dummy[24]<<8) + dummy[23]); #endif } first = 0; return 1; /* we read something */ } INT writehdr(hdr,f) /* write a header to an archive */ struct heads *hdr; /* header to write */ FILE *f; /* archive to write to */ { unsigned char dummy[28]; fputc(ARCMARK,f); /* write out the mark of ARC */ fputc(hdrver,f); /* write out the header version */ if(!hdrver) /* if that's the end */ return; /* then write no more */ #if MSDOS fwrite(hdr,sizeof(struct heads),1,f); #endif #if BSD | ST /* * put out the hdr in the brain damaged unaligned half back *sswards * way HAL does it */ fwrite(hdr->name,1,13,f); fwrite(&hdr->size,sizeof(long),1,f); fwrite(&hdr->date,sizeof(INT),1,f); fwrite(&hdr->time,sizeof(INT),1,f); fwrite(&hdr->crc ,sizeof(INT),1,f); fwrite(&hdr->length,sizeof(long),1,f); #endif /* note the newest file for updating the archive timestamp */ if(hdr->date>arcdate ||(hdr->date==arcdate && hdr->time>arctime)) { arcdate = hdr->date; arctime = hdr->time; } } INT filecopy(f,t,size) /* bulk file copier */ FILE *f, *t; /* from, to */ long size; /* number of bytes */ { INT len; /* length of a given copy */ INT putc_tst(); while(size--) /* while more bytes to move */ putc_tst(fgetc(f),t); } INT putc_tst(c,t) /* put a character, with tests */ char c; /* character to output */ FILE *t; /* file to write to */ { if(t) #if MSODS | ST if(fputc(c,t)==EOF) abort("Write fail (disk full?)"); #endif #if BSD /* * for reasons beyond me BSD unix returns EOF */ fputc(c,t); #endif } !Funky!Stuff! echo x - arclst.c cat > arclst.c << '!Funky!Stuff!' static char *RCSid = "$Header: arclst.c,v 1.2 86/07/15 07:53:15 turner Exp $"; /* * $Log: arclst.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:15 turner * * * Revision 1.1 86/06/26 15:00:23 turner * initial version * * */ /* ARC - Archive utility - ARCLST $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.34), created on $tag( TED_DATE DB =02/03/86) at $tag( TED_TIME DB =22:56:57))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to list the contents of an archive. Language: Computer Innovations Optimizing C86 */ #include <stdio.h> #include "arc.h" INT lstarc(num,arg) /* list files in archive */ INT num; /* number of arguments */ char *arg[]; /* pointers to arguments */ { struct heads hdr; /* header data */ INT list; /* true to list a file */ INT did[MAXARG]; /* true when argument was used */ long tnum, tlen, tsize; /* totals */ INT n; /* index */ INT lstfile(); tnum = tlen = tsize = 0; /* reset totals */ for(n=0; n<num; n++) /* for each argument */ did[n] = 0; /* reset usage flag */ rempath(num,arg); /* strip off paths */ if(!kludge) { printf("Name Length "); if(bose) printf(" Stowage SF Size now"); printf(" Date "); if(bose) printf(" Time CRC"); printf("\n"); printf("============ ========"); if(bose) printf(" ======== ==== ========"); printf(" ========="); if(bose) printf(" ====== ===="); printf("\n"); } openarc(0); /* open archive for reading */ if(num) /* if files were named */ { while(readhdr(&hdr,arc)) /* process all archive files */ { list = 0; /* reset list flag */ for(n=0; n<num; n++) /* for each template given */ { if(match(hdr.name,arg[n])) { list = 1; /* turn on list flag */ did[n] = 1; /* turn on usage flag */ break; /* stop looking */ } } if(list) /* if this file is wanted */ { if(!kludge) lstfile(&hdr); /* then tell about it */ tnum++; /* update totals */ tlen += hdr.length; tsize += hdr.size; } fseek(arc,hdr.size,1); /* move to next header */ } } else while(readhdr(&hdr,arc)) /* else report on all files */ { if(!kludge) lstfile(&hdr); tnum++; /* update totals */ tlen += hdr.length; tsize += hdr.size; fseek(arc,hdr.size,1); /* skip to next header */ } closearc(0); /* close archive after reading */ if(!kludge) { printf(" ==== ========"); if(bose) printf(" ==== ========"); printf("\n"); } printf("Total %6ld %8ld ",tnum,tlen); if(bose) printf(" %3ld%% %8ld ",100L - (100L*tsize)/tlen,tsize); printf("\n"); if(note) { for(n=0; n<num; n++) /* report unused args */ { if(!did[n]) { printf("File not found: %s\n",arg[n]); nerrs++; } } } } static INT lstfile(hdr) /* tell about a file */ struct heads *hdr; /* pointer to header data */ { INT yr, mo, dy; /* parts of a date */ INT hh, mm, ss; /* parts of a time */ static char *mon[] = /* month abbreviations */ { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; yr = (hdr->date >> 9) & 0x7f; /* dissect the date */ mo = (hdr->date >> 5) & 0x0f; dy = hdr->date & 0x1f; hh = (hdr->time >> 11) & 0x1f; /* dissect the time */ mm = (hdr->time >> 5) & 0x3f; ss = (hdr->time & 0x1f) * 2; printf("%-12s %8ld ",hdr->name,hdr->length); if(bose) { switch(hdrver) { case 1: case 2: printf(" -- "); break; case 3: printf(" Packed "); break; case 4: printf("Squeezed"); break; case 5: case 6: case 7: printf("crunched"); break; case 8: printf("Crunched"); break; default: printf("Unknown!"); } printf(" %3d%%",100L - (100L*hdr->size)/hdr->length); printf(" %8ld ",hdr->size); } printf("%2d %3s %02d", dy, mon[mo-1], (yr+80)%100); if(bose) printf(" %2d:%02d%c %04x", (hh>12?hh-12:hh), mm, (hh>12?'p':'a'), (unsigned INT)hdr->crc); printf("\n"); } !Funky!Stuff! echo x - arclzw.c cat > arclzw.c << '!Funky!Stuff!' static char *RCSid = "$Header: arclzw.c,v 1.2 86/07/15 07:53:20 turner Exp $"; /* * $Log: arclzw.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:20 turner * * * Revision 1.1 86/06/26 15:00:26 turner * initial version * * */ /* ARC - Archive utility - ARCLZW $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =1.88), created on $tag( TED_DATE DB =01/20/86) at $tag( TED_TIME DB =16:47:04))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains the routines used to implement Lempel-Zev data compression, which calls for building a coding table on the fly. This form of compression is especially good for encoding files which contain repeated strings, and can often give dramatic improvements over traditional Huffman SQueezing. Language: Computer Innovations Optimizing C86 Programming notes: In this section I am drawing heavily on the COMPRESS program from UNIX. The basic method is taken from "A Technique for High Performance Data Compression", Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. Also see "Knuth's Fundamental Algorithms", Donald Knuth, Vol 3, Section 6.4. As best as I can tell, this method works by tracing down a hash table of code strings where each entry has the property: if <string> <char> is in the table then <string> is in the table. */ #include <stdio.h> #include "arc.h" /* definitions for older style crunching */ #define FALSE 0 #define TRUE !FALSE #define TABSIZE 4096 #define NO_PRED 0xFFFF #define EMPTY 0xFFFF #define NOT_FND 0xFFFF static unsigned INT inbuf; /* partial input code storage */ static INT sp; /* current stack pointer */ static struct entry /* string table entry format */ { char used; /* true when this entry is in use */ unsigned INT next; /* ptr to next in collision list */ unsigned INT predecessor; /* code for preceeding string */ unsigned char follower; /* char following string */ } string_tab[TABSIZE]; /* the code string table */ /* definitions for the new dynamic Lempel-Zev crunching */ #define BITS 12 /* maximum bits per code */ #define HSIZE 5003 /* 80% occupancy */ #define INIT_BITS 9 /* initial number of bits/code */ static INT n_bits; /* number of bits/code */ static INT maxcode; /* maximum code, given n_bits */ #define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */ static INT maxcodemax = 1 << BITS; /* largest possible code (+1) */ static unsigned char buf[BITS]; /* input/output buffer */ static unsigned char lmask[9] = /* left side masks */ { 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 }; static unsigned char rmask[9] = /* right side masks */ { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; static INT offset; /* byte offset for code output */ static long in_count; /* length of input */ static long bytes_out; /* length of compressed output */ static unsigned INT ent; /* To save much memory (which we badly need at this point), we overlay * the table used by the previous version of Lempel-Zev with those used * by the new version. Since no two of these routines will be used * together, we can safely do this. Note that the tables used for Huffman * squeezing may NOT overlay these, since squeezing and crunching are done * in parallel. */ #if MSODS static long *htab = string_tab; /* hash code table (crunch) */ #endif #if BSD | ST static long htab[HSIZE]; /* hash code table (crunch) */ #endif static unsigned INT codetab[HSIZE]; /* string code table (crunch) */ static unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */ #if MSDOS static unsigned char *suffix = string_tab; /* suffix table (uncrunch) */ #endif #if BSD | ST static unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */ #endif static INT free_ent; /* first unused entry */ static INT firstcmp; /* true at start of compression */ static unsigned char stack[HSIZE]; /* local push/pop stack */ /* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */ static INT clear_flg; static long ratio; #define CHECK_GAP 10000 /* ratio check interval */ static long checkpoint; /* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257 /* first free entry */ #define CLEAR 256 /* table clear output code */ static INT cl_block(t) /* table clear for block compress */ FILE *t; /* our output file */ { long rat; INT putcode(); checkpoint = in_count + CHECK_GAP; if(in_count > 0x007fffff) /* shift will overflow */ { rat = bytes_out >> 8; if(rat == 0) /* Don't divide by zero */ rat = 0x7fffffff; else rat = in_count / rat; } else rat = (in_count<<8)/bytes_out;/* 8 fractional bits */ if(rat > ratio) ratio = rat; else { ratio = 0; setmem (htab,HSIZE*sizeof(long),0xff); free_ent = FIRST; clear_flg = 1; putcode(CLEAR,t); } } /***************************************************************** * * Output a given code. * Inputs: * code: A n_bits-bit integer. If == -1, then EOF. This assumes * that n_bits =< (long)wordsize - 1. * Outputs: * Outputs code to the file. * Assumptions: * Chars are 8 bits long. * Algorithm: * Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly). When the buffer fills up empty it and start over. */ static INT putcode(code,t) /* output a code */ INT code; /* code to output */ FILE *t; /* where to put it */ { INT r_off = offset; /* right offset */ INT bits = n_bits; /* bits to go */ unsigned char *bp = buf; /* buffer pointer */ INT n; /* index */ if(code >= 0) /* if a real code */ { /* * Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* * Since code is always >= 8 bits, only need to mask the first * hunk on the left. */ *bp = (*bp&rmask[r_off]) | (code<<r_off) & lmask[r_off]; bp++; bits -= (8 - r_off); code >>= (8 - r_off); /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if(bits >= 8) { *bp++ = code; code >>= 8; bits -= 8; } /* Last bits. */ if(bits) *bp = code; offset += n_bits; if(offset == (n_bits << 3)) { bp = buf; bits = n_bits; bytes_out += bits; do putc_pak(*bp++,t); while(--bits); offset = 0; } /* * If the next entry is going to be too big for the code size, * then increase it, if possible. */ if(free_ent>maxcode || clear_flg>0) { /* * Write the whole buffer, because the input side won't * discover the size increase until after it has read it. */ if(offset > 0) { bp = buf; /* reset pointer for writing */ bytes_out += n = n_bits; while(n--) putc_pak(*bp++,t); } offset = 0; if(clear_flg) /* reset if clearing */ { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } else /* else use more bits */ { n_bits++; if(n_bits == BITS) maxcode = maxcodemax; else maxcode = MAXCODE(n_bits); } } } else /* dump the buffer on EOF */ { bytes_out += n = (offset+7) / 8; if(offset > 0) while(n--) putc_pak(*bp++,t); offset = 0; } } /***************************************************************** * * Read one code from the standard input. If EOF, return -1. * Inputs: * cmpin * Outputs: * code or -1 is returned. */ static INT getcode(f) /* get a code */ FILE *f; /* file to get from */ { INT code; static INT offset = 0, size = 0; INT r_off, bits; unsigned char *bp = buf; if(clear_flg > 0 || offset >= size || free_ent > maxcode) { /* * If the next entry will be too big for the current code * size, then we must increase the size. This implies reading * a new buffer full, too. */ if(free_ent > maxcode) { n_bits++; if(n_bits == BITS) maxcode = maxcodemax; /* won't get any bigger now */ else maxcode = MAXCODE(n_bits); } if(clear_flg > 0) { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } for(size=0; size<n_bits; size++) { if((code=getc_unp(f))==EOF) break; else buf[size] = code; } if(size <= 0) return -1; /* end of file */ offset = 0; /* Round size down to integral number of codes */ size = (size << 3)-(n_bits - 1); } r_off = offset; bits = n_bits; /* * Get to the first byte. */ bp +=(r_off >> 3); r_off &= 7; /* Get first part (low order bits) */ code = (*bp++ >> r_off); bits -= 8 - r_off; r_off = 8 - r_off; /* now, offset into code word */ /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if(bits >= 8) { code |= *bp++ << r_off; r_off += 8; bits -= 8; } /* high order bits. */ code |= (*bp & rmask[bits]) << r_off; offset += n_bits; return code; } /* * compress a file * * Algorithm: use open addressing double hashing (no chaining) on the * prefix code / next character combination. We do a variant of Knuth's * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * secondary probe. Here, the modular division first probe is gives way * to a faster exclusive-or manipulation. Also do block compression with * an adaptive reset, where the code table is cleared when the compression * ratio decreases, but after the table fills. The variable-length output * codes are re-sized at this point, and a special CLEAR code is generated * for the decompressor. */ INT init_cm(f,t) /* initialize for compression */ FILE *f; /* file we will be compressing */ FILE *t; /* where we will put it */ { offset = 0; bytes_out = 1; clear_flg = 0; ratio = 0; in_count = 1; checkpoint = CHECK_GAP; maxcode = MAXCODE(n_bits = INIT_BITS); free_ent = FIRST; setmem(htab,HSIZE*sizeof(long),0xff); n_bits = INIT_BITS; /* set starting code size */ putc_pak(BITS,t); /* note our max code length */ firstcmp = 1; /* next byte will be first */ } INT putc_cm(c,t) /* compress a character */ unsigned char c; /* character to compress */ FILE *t; /* where to put it */ { static long fcode; static INT hshift; INT i; INT disp; if(firstcmp) /* special case for first byte */ { ent = c; /* remember first byte */ hshift = 0; for(fcode=(long)HSIZE; fcode<65536L; fcode*=2L) hshift++; hshift = 8 - hshift; /* set hash code range bund */ firstcmp = 0; /* no longer first */ return; } in_count++; fcode =(long)(((long)c << BITS)+ent); i = (c<<hshift)^ent; /* xor hashing */ if(htab[i]==fcode) { ent = codetab[i]; return; } else if(htab[i]<0) /* empty slot */ goto nomatch; disp = HSIZE - i; /* secondary hash (after G.Knott) */ if(i == 0) disp = 1; probe: if((i -= disp) < 0) i += HSIZE; if(htab[i] == fcode) { ent = codetab[i]; return; } if(htab[i] > 0) goto probe; nomatch: putcode(ent,t); ent = c; if(free_ent < maxcodemax) { codetab[i] = free_ent++; /* code -> hashtable */ htab[i] = fcode; } else if((long)in_count >= checkpoint) cl_block(t); } long pred_cm(t) /* finish compressing a file */ FILE *t; /* where to put it */ { putcode(ent,t); /* put out the final code */ putcode(-1,t); /* tell output we are done */ return bytes_out; /* say how big it got */ } /* * Decompress a file. This routine adapts to the codes in the file * building the string table on-the-fly; requiring no table to be stored * in the compressed file. The tables used herein are shared with those of * the compress() routine. See the definitions above. */ INT decomp(f,t) /* decompress a file */ FILE *f; /* file to read codes from */ FILE *t; /* file to write text to */ { unsigned char *stackp; INT finchar; INT code, oldcode, incode; if((code=getc_unp(f))!=BITS) abort("File packed with %d bits, I can only handle %d",code,BITS); n_bits = INIT_BITS; /* set starting code size */ clear_flg = 0; /* * As above, initialize the first 256 entries in the table. */ maxcode = MAXCODE(n_bits=INIT_BITS); for(code = 255; code >= 0; code--) { prefix[code] = 0; suffix[code] = (unsigned char)code; } free_ent = FIRST; finchar = oldcode = getcode(f); if(oldcode == -1) /* EOF already? */ return; /* Get out of here */ putc_ncr((char)finchar,t); /* first code must be 8 bits=char */ stackp = stack; while((code = getcode(f))> -1) { if(code==CLEAR) { for(code = 255; code >= 0; code--) prefix[code] = 0; clear_flg = 1; free_ent = FIRST - 1; if((code=getcode(f))==-1)/* O, untimely death! */ break; } incode = code; /* * Special case for KwKwK string. */ if(code >= free_ent) { *stackp++ = finchar; code = oldcode; } /* * Generate output characters in reverse order */ while(code >= 256) { *stackp++ = suffix[code]; code = prefix[code]; } *stackp++ = finchar = suffix[code]; /* * And put them out in forward order */ do putc_ncr(*--stackp,t); while(stackp > stack); /* * Generate the new entry. */ if((code=free_ent) < maxcodemax) { prefix[code] = (unsigned short)oldcode; suffix[code] = finchar; free_ent = code+1; } /* * Remember previous code. */ oldcode = incode; } } /************************************************************************* * Please note how much trouble it can be to maintain upwards * * compatibility. All that follows is for the sole purpose of unpacking * * files which were packed using an older method. * *************************************************************************/ /* The h() pointer points to the routine to use for calculating a hash value. It is set in the init routines to point to either of oldh() or newh(). oldh() calculates a hash value by taking the middle twelve bits of the square of the key. newh() works somewhat differently, and was tried because it makes ARC about 23% faster. This approach was abandoned because dynamic Lempel-Zev (above) works as well, and packs smaller also. However, inadvertent release of a developmental copy forces us to leave this in. */ static unsigned INT (*h)(); /* pointer to hash function */ static unsigned INT oldh(pred,foll) /* old hash function */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* value of following char */ { long local; /* local hash value */ local = (pred + foll) | 0x0800; /* create the hash key */ local *= local; /* square it */ return (local >> 6) & 0x0FFF; /* return the middle 12 bits */ } static unsigned INT newh(pred,foll) /* new hash function */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* value of following char */ { return ((pred+foll)*15073)&0xFFF; /* faster hash */ } /* The eolist() function is used to trace down a list of entries with duplicate keys until the last duplicate is found. */ static unsigned INT eolist(index) /* find last duplicate */ unsigned INT index; { INT temp; while(temp=string_tab[index].next) /* while more duplicates */ index = temp; return index; } /* The hash() routine is used to find a spot in the hash table for a new entry. It performs a "hash and linear probe" lookup, using h() to calculate the starting hash value and eolist() to perform the linear probe. This routine DOES NOT detect a table full condition. That MUST be checked for elsewhere. */ static unsigned INT hash(pred,foll) /* find spot in the string table */ unsigned INT pred; /* code for preceeding string */ unsigned char foll; /* char following string */ { unsigned INT local, tempnext; /* scratch storage */ struct entry *ep; /* allows faster table handling */ local = (*h)(pred,foll); /* get initial hash value */ if(!string_tab[local].used) /* if that spot is free */ return local; /* then that's all we need */ else /* else a collision has occured */ { local = eolist(local); /* move to last duplicate */ /* We must find an empty spot. We start looking 101 places down the table from the last duplicate. */ tempnext = (local+101) & 0x0FFF; ep = &string_tab[tempnext]; /* initialize pointer */ while(ep->used) /* while empty spot not found */ { if(++tempnext==TABSIZE) /* if we are at the end */ { tempnext = 0; /* wrap to beginning of table*/ ep = string_tab; } else ++ep; /* point to next element in table */ } /* local still has the pointer to the last duplicate, while tempnext has the pointer to the spot we found. We use this to maintain the chain of pointers to duplicates. */ string_tab[local].next = tempnext; return tempnext; } } /* The unhash() function is used to search the hash table for a given key. Like hash(), it performs a hash and linear probe search. It returns either the number of the entry (if found) or NOT_FND (if not found). */ static unsigned INT unhash(pred,foll) /* search string table for a key */ unsigned INT pred; /* code of preceeding string */ unsigned char foll; /* character following string */ { unsigned INT local, offset; /* scratch storage */ struct entry *ep; /* this speeds up access */ local = (*h)(pred,foll); /* initial hash */ while(1) { ep = &string_tab[local]; /* speed up table access */ if((ep->predecessor==pred) && (ep->follower==foll)) return local; /* we have a match */ if(!ep->next) /* if no more duplicates */ return NOT_FND; /* then key is not listed */ local = ep->next; /* move on to next duplicate */ } } /* The init_tab() routine is used to initialize our hash table. You realize, of course, that "initialize" is a complete misnomer. */ static INT init_tab() /* set ground state in hash table */ { unsigned INT i; /* table index */ INT upd_tab(); setmem((char *)string_tab,sizeof(string_tab),0); for(i=0; i<256; i++) /* list all single byte strings */ upd_tab(NO_PRED,i); inbuf = EMPTY; /* nothing is in our buffer */ } /* The upd_tab routine is used to add a new entry to the string table. As previously stated, no checks are made to ensure that the table has any room. This must be done elsewhere. */ INT upd_tab(pred,foll) /* add an entry to the table */ unsigned INT pred; /* code for preceeding string */ unsigned INT foll; /* character which follows string */ { struct entry *ep; /* pointer to current entry */ /* calculate offset just once */ ep = &string_tab[hash(pred,foll)]; ep->used = TRUE; /* this spot is now in use */ ep->next = 0; /* no duplicates after this yet */ ep->predecessor = pred; /* note code of preceeding string */ ep->follower = foll; /* note char after string */ } /* This algorithm encoded a file into twelve bit strings (three nybbles). The gocode() routine is used to read these strings a byte (or two) at a time. */ static INT gocode(fd) /* read in a twelve bit code */ FILE *fd; /* file to get code from */ { unsigned INT localbuf, returnval; if(inbuf==EMPTY) /* if on a code boundary */ { if((localbuf=getc_unp(fd))==EOF) /* get start of next code */ return EOF; /* pass back end of file status */ localbuf &= 0xFF; /* mask down to true byte value */ if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */ return EOF; /* this should never happen */ inbuf &= 0xFF; /* mask down to true byte value */ returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F); inbuf &= 0x000F; /* leave partial code pending */ } else /* buffer contains first nybble */ { if((localbuf=getc_unp(fd))==EOF) return EOF; localbuf &= 0xFF; returnval = localbuf + ((inbuf<<8)&0xF00); inbuf = EMPTY; /* note no hanging nybbles */ } return returnval; /* pass back assembled code */ } static INT push(c) /* push char onto stack */ INT c; /* character to push */ { stack[sp] = ((char) c); /* coerce integer into a char */ if(++sp >= TABSIZE) abort("Stack overflow\n"); } static INT pop() /* pop character from stack */ { if(sp>0) return ((INT) stack[--sp]); /* leave ptr at next empty slot */ else return EMPTY; } /***** LEMPEL-ZEV DECOMPRESSION *****/ static INT code_count; /* needed to detect table full */ static unsigned INT code; /* where we are so far */ static INT firstc; /* true only on first character */ INT init_ucr(new) /* get set for uncrunching */ INT new; /* true to use new hash function */ { if(new) /* set proper hash function */ h = newh; else h = oldh; sp = 0; /* clear out the stack */ init_tab(); /* set up atomic code definitions */ code_count = TABSIZE - 256; /* note space left in table */ firstc = 1; /* true only on first code */ } INT getc_ucr(f) /* get next uncrunched byte */ FILE *f; /* file containing crunched data */ { unsigned INT c; /* a character of input */ INT code, newcode; static INT oldcode, finchar; struct entry *ep; /* allows faster table handling */ if(firstc) /* first code is always known */ { firstc = FALSE; /* but next will not be first */ oldcode = gocode(f); return finchar = string_tab[oldcode].follower; } if(!sp) /* if stack is empty */ { if((code=newcode=gocode(f))==EOF) return EOF; ep = &string_tab[code]; /* initialize pointer */ if(!ep->used) /* if code isn't known */ { code = oldcode; ep = &string_tab[code]; /* re-initialize pointer */ push(finchar); } while(ep->predecessor!=NO_PRED) { push(ep->follower); /* decode string backwards */ code = ep->predecessor; ep = &string_tab[code]; } push(finchar=ep->follower); /* save first character also */ /* The above loop will terminate, one way or another, with string_tab[code].follower equal to the first character in the string. */ if(code_count) /* if room left in string table */ { upd_tab(oldcode,finchar); --code_count; } oldcode = newcode; } return pop(); /* return saved character */ } !Funky!Stuff! echo x - arcm.h cat > arcm.h << '!Funky!Stuff!' /* * $Header: arcm.h,v 1.2 86/07/15 07:53:40 turner Exp $ */ /* * $Log: arcm.h,v $ * Revision 1.2 86/07/15 07:53:40 turner * * * Revision 1.1 86/06/26 15:01:25 turner * initial version * * */ /* * * ARC archive utility - standard MACRO insert file. * * parameters: * */ #define ARCMARK 26 /* special archive marker */ #define ARCVER 8 /* archive header version code */ #define STRLEN 100 /* system standard string length */ #define FNLEN 13 /* file name length */ #define MAXARG 25 /* maximum number of arguments */ #define ARC 1 #define XARC 0 !Funky!Stuff! echo x - arcmatch.c cat > arcmatch.c << '!Funky!Stuff!' static char *RCSid = "$Header: arcmatch.c,v 1.2 86/07/15 07:53:42 turner Exp $"; /* * $Log: arcmatch.c,v $ * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) * * Revision 1.2 86/07/15 07:53:42 turner * * * Revision 1.1 86/06/26 15:00:34 turner * initial version * * */ /* ARC - Archive utility - ARCMATCH $define(tag,$$segment(@1,$$index(@1,=)+1))# $define(version,Version $tag( TED_VERSION DB =2.17), created on $tag( TED_DATE DB =12/17/85) at $tag( TED_TIME DB =20:32:18))# $undefine(tag)# $version (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED By: Thom Henderson Description: This file contains service routines needed to maintain an archive. Language: Computer Innovations Optimizing C86 */ #include <stdio.h> #include "arc.h" INT match(n,t) /* test name against template */ char *n; /* name to test */ char *t; /* template to test against */ { #if MSDOS upper(n); upper(t); /* avoid case problems */ #endif /* first match name part */ while((*n && *n!='.') || (*t && *t!='.')) { if(*n!=*t && *t!='?') /* match fail? */ { if(*t!='*') /* wildcard fail? */ return 0; /* then no match */ else /* else jump over wildcard */ { while(*n && *n!='.') n++; while(*t && *t!='.') t++; break; /* name part matches wildcard */ } } else /* match good for this char */ { n++; /* advance to next char */ t++; } } if(*n && *n=='.') n++; /* skip extension delimiters */ if(*t && *t=='.') t++; /* now match name part */ while(*n || *t) { if(*n!=*t && *t!='?') /* match fail? */ { if(*t!='*') /* wildcard fail? */ return 0; /* then no match */ else return 1; /* else good enough */ } else /* match good for this char */ { n++; /* advance to next char */ t++; } } return 1; /* match worked */ } INT rempath(nargs,arg) /* remove paths from filenames */ INT nargs; /* number of names */ char *arg[]; /* pointers to names */ { char *i, *rindex(); /* string index, reverse indexer */ INT n; /* index */ for(n=0; n<nargs; n++) /* for each supplied name */ { if(!(i=rindex(arg[n],'\\'))) /* search for end of path */ if(!(i=rindex(arg[n],'/'))) i=rindex(arg[n],':'); if(i) /* if path was found */ arg[n] = i+1; /* then skip it */ } } !Funky!Stuff! echo x - arcmisc.c cat > arcmisc.c << '!Funky!Stuff!' #include <stdio.h> #include "arc.h" /* split up a file name (subroutine for makefnam) * * Hack-attack 1.3 86/12/20 01:23:45 wilhite@usceast.uucp * Bludgeoned into submission for VAX 11/780 BSD4.2 * (ugly code, but fewer core dumps) */ static INT _makefn(source,dest) unsigned char *source; unsigned char *dest; { INT j; setmem (dest, 17, 0); /* clear result field */ if (strlen (source) > 1 && source[1] == ':') for (j = 0; j < 2;) dest[j++] = *source++; for (j = 3; *source && *source != '.'; ++source) if (j < 11) dest[j++] = *source; for (j = 12; *source; ++source) if (j < 16) dest[j++] = *source; } /* make a file name using a template */ char *makefnam(rawfn,template,result) unsigned char *rawfn; /* the original file name */ unsigned char *template; /* the template data */ unsigned char *result; /* where to place the result */ { unsigned char et[17],er[17]; _makefn(template,et); _makefn(rawfn,er); *result=0; /* assure no data */ strcat(result,er[0]?er:et); strcat(result,er[3]?er+3:et+3); strcat(result,er[12]?er+12:et+12); return ((char *)&result[0]); } INT freedir(dirs) register struct direct **dirs; { register INT ii; if(dirs == (struct direct **)0) return(-1); for(ii = 0; dirs[ii] != (struct direct *)0; ii++) free(dirs[ii]); free(dirs); return(0); } #if MSDOS #include <dir.h> INT alphasort(dirptr1, dirptr2) struct direct **dirptr1, **dirptr2; { return(strcmp((*dirptr1)->d_name, (*dirptr2)->d_name)); } #endif !Funky!Stuff! exit 0 -- --------------------- Robert Wilhite akgua!usceast!wilhite