mrr@softie.UUCP (09/26/87)
# This is a shell archive. Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file"
# Created Fri Sep 25 07:25:39 1987
#
# This archive contains:
# About.c
# Backup.c
# Compress.c
echo "Creating About.c"
cat > About.c <<"***EOF About.c***"
/* About.c: Tell the user a little about MRBackup. */
static char *abouts[] = {
"This is MRBackup, a hard disk backup utility written by Mark Rinfret.\n",
"This program allows you to perform complete or incremental backups.\n",
"Data compression / decompression are provided to economize on floppies.\n",
"If you find this program useful or if you have any suggestions,\n",
"Send the author mail at the following addresses:\n",
" Mark R. Rinfret\n",
" 348 Indian Avenue\n",
" Portsmouth, Rhode Island 02871\n\n",
" or, UseNet: mark@unisec.USI.COM\n",
(char *) 0L};
About()
{
int i;
for (i = 0; i < 5; ++i) WriteConsole("\n");
for (i = 0;abouts[i];++i) {
TypeAndSpeak(abouts[i]);
if (!do_speech) Delay(30L); /* little over 1/2 second */
}
}
***EOF About.c***
echo "Creating Backup.c"
cat > Backup.c <<"***EOF Backup.c***"
/* MRBackup: Backup Routines
* Filename: Backup.c
* Author: Mark R. Rinfret
* Date: 09/04/87
*
* History: (most recent change first)
*
* 09/04/87 -MRR- This package was (finally) extracted from Main.c.
*/
#define MAIN
#include "MRBackup.h"
/* Add a file/directory name to the list.
* Called with:
* name: filename string
* FIB: pointer to FileInfoBlock for this file or NULL.
* If NULL, file is starting directory.
* list: address of list header
* Returns:
* 0 => success, failure status otherwise
* Notes:
* The filename string MUST NOT contain the volume component,
* since it is used to build both source and destination
* pathnames. Also note that this routine inserts the name
* into the list in sorted order (case sensitive). Though this
* changes the "natural order" of the files, it makes them
* easier to locate on the backup disks.
*/
int
AddFile(name, FIB, list)
char *name; struct FileInfoBlock *FIB; T_FILE_LIST *list;
{
USHORT blks;
T_FILE *fnode, *tnode;
/* See if the exclude list exists, and if it does, see if
* the name can be matched by one of the patterns.
*/
if (excludelist) {
if (CheckExclude(name)) {
sprintf(conmsg,"Excluding %s\n", name);
WriteConsole(conmsg);
return 0;
}
}
if (!(fnode = (T_FILE *) calloc(1,sizeof(T_FILE)))) {
nomem:
TypeAndSpeak("AddFile: Out of memory!\n");
return ERROR_NO_FREE_STORE;
}
if (!(fnode->filename = calloc(1, strlen(name)+1)))
goto nomem;
strcpy(fnode->filename, name); /* copy the filename */
if (!FIB) {
blks = 1;
fnode->is_dir = true;
}
else if (fnode->is_dir = (FIB->fib_DirEntryType >= 0) )
blks = 1; /* assume 1 block for directory */
else {
blks = ((FIB->fib_Size/488)+2); /* compute file size in blocks */
blks += (blks/70);
}
fnode->blocks = blks;
if (!list->first_file) { /* file list is empty? */
list->first_file = fnode; /* this is the new head */
}
else {
/* Find the insertion point for this file. */
for (tnode = list->first_file; tnode; tnode = tnode->next) {
if (strcmp(fnode->filename, tnode->filename) <= 0) {
fnode->next = tnode; /* insert it here */
if (tnode->previous)
tnode->previous->next = fnode;
fnode->previous = tnode->previous;
tnode->previous = fnode;
if (list->first_file == tnode)
list->first_file = fnode;
return 0;
}
}
/* append to the end of the list */
fnode->previous = list->last_file;
list->last_file->next = fnode;
}
list->last_file = fnode;
return 0;
}
/* Main backup routine. */
Backup()
{
int compare_flag, status = 0;
Speak("O K, I'm ready. Lets back up your disk!");
DateStamp(now);
main_list.first_file = main_list.last_file = current_dir = NULL;
if (do_listing)
if ( OpenList(listpath) ) return;
/* Get the file comparison date. Only files created on or after
* this date are backed up.
*/
do {
*since = *now;
since->ds_Days -= 1; /* default interval is 1 day */
since->ds_Minute = 0;
since->ds_Tick = 0;
Speak("Enter the date since your last backup.");
DateRequest(mywindow, "Backup files since:",since, since);
if ( (compare_flag = CompareDS(since, now) ) >= 0)
DisplayBeep(NULL);
} while (compare_flag >= 0);
BreakPath(homepath, srcvol, srcpath);
BreakPath(backpath, destdrive, destpath);
#ifdef DEBUG
sprintf(debugmsg, "destdrive = %s, destpath = %s\n",destdrive,destpath);
DebugWrite(debugmsg);
sprintf(debugmsg, "srcvol = %s, srcpath = %s\n", srcvol,srcpath);
DebugWrite(debugmsg);
#endif
if (*destpath) {
TypeAndSpeak(
"On a backup, the backup path must only be a device name");
status = ERR_ABORT;
goto done;
}
strcat(destdrive,":");
if (*excludepath)
if (GetExcludes()) goto done;
level = 0;
back = 0;
size = 0;
if (*srcpath) { /* starting path is a directory */
status = AddFile(srcpath, NULL, &main_list);
}
else /* starting path is a device */
status = CollectFiles(srcpath,&main_list);
if (!status) {
while (main_list.first_file) { /* while something in the list */
if (status = BackupFiles(&main_list)) break;
if (status = BackupDirs(&main_list)) break;
}
}
done:
if (status == 0) {
TypeAndSpeak("That was a piece of cake.\n");
}
else {
sprintf(conmsg,"Oops! Backup terminated with error %d.\n",status);
TypeAndSpeak(conmsg);
}
}
/* Process the next directory in the file list.
* This routine is recursive.
* Called with:
* list: file list to be processed
* Returns:
* status code (0 => success)
*/
int
BackupDirs(list)
T_FILE_LIST *list;
{
T_FILE *dirnode;
T_FILE *saved_current_dir;
int status = 0;
T_FILE_LIST sublist; /* subdirectory list header */
sublist.first_file = sublist.last_file = NULL;
saved_current_dir = current_dir; /* remember current context */
/* There are a couple of things to note here. The first is that once
* we have reached here, there should be NO simple file nodes in "list".
* That currently is not handled as an error, but probably should be.
* Second, since this scan modifies the list, a removal of a directory
* node starts the scan at the beginning of the list since we shouldn't
* reference the links in the node we're removing. Since we should be
* removing the first node in the list anyway, who cares?
*/
for (dirnode = list->first_file; dirnode; ) {
if (dirnode->is_dir) { /* found one */
current_dir = dirnode; /* set current directory */
RemFile(dirnode, list);
if (status = NewDir(current_dir->filename)) break;
if (status = CollectFiles(current_dir->filename,&sublist))
break;
if (status = BackupFiles(&sublist)) break;
if (status = BackupDirs(&sublist)) break;
dirnode = list->first_file;
}
else /* should never get here !!! */
dirnode = dirnode->next;
}
current_dir = saved_current_dir;
return status;
}
/* Backup all simple files in the current list.
* Called with:
* list: file list header
* Returns:
* status code (0 => success)
*/
int
BackupFiles(list)
T_FILE_LIST *list;
{
T_FILE *primary, *secondary;
int status = 0;
/* The following loop continually scans the file list (from the front),
* looking for more file entries to process. If the primary choice
* fails, an attempt is made to find a file which will fit in the
* space remaining. If that attempt fails, then a new disk is formatted.
*/
while (primary = FindFile(list,false)) {/* find next file to process */
if (primary->blocks >= size) { /* file doesn't fit */
if (!(secondary = FindFile(list,true))) {
/* At this point, we know that there's at least one
* file to back up, but none that fit. Start a new
* disk.
*/
if (status = NewDisk(true)) return status;
continue; /* try that file again */
}
primary = secondary; /* use second choice */
}
if (status = DoFile(primary)) return status;
RemFile(primary,list); /* delete the node */
}
if (current_dir) { /* forget current directory */
FreeFile(current_dir);
current_dir = NULL;
}
return status;
}
/* Check the file name about to be added against the exclude patterns.
* Called with:
* name: pathname to be checked
* Returns:
* 0 => no match
* 1 => name was matched, ignore it.
*/
int
CheckExclude(name)
char *name;
{
int match = 0;
T_PATTERN *p;
for (p = excludelist; p; p = p->next_pattern) {
if (match = wildcmp(p->pattern, name)) break;
}
return match;
}
/* Check the current number of disk blocks (size) available. If
* less than 1, it's time to format a new disk.
* Called with:
* create_dir: true => OK to create continuation directory
* Returns:
* 0 => success
* 1 => failure
*/
int
CheckSize(create_dir)
int create_dir;
{
if (size > 0) return 0;
return NewDisk(create_dir);
}
/* Collect file names from a starting path.
* Called with:
* name: starting pathname
* Note that name may be a null string when copying
* the entire home device.
* list: pointer to file list header
* Returns:
* status code (0 => success)
* Notes:
* CollectFiles attempts to collect all file and directory names
* for a given level, starting with "name". If a simple filename
* is given as a starting path, only that name will be collected.
* If a directory name is given, then all pathnames contained
* within that directory (only) will be collected. For each
* directory name collected, CollectFiles will be called again to
* collect files for that particular directory. This iterative
* approach (vs. recursive) saves memory and allows us to maintain
* order at each directory level.
*/
int
CollectFiles(name, list)
char *name; T_FILE_LIST *list;
{
int status = 0;
struct FileInfoBlock *FIB = NULL;
T_FILE *fnode; /* file descriptor node */
struct Lock *lock = NULL;
char path[PATH_MAX+1];
USHORT top_level;
#ifdef DEBUG
sprintf(debugmsg,"Collecting files from %s\n",name);
DebugWrite(debugmsg);
#endif
top_level = (*name == '\0'); /* empty name implies top level */
if (!(FIB =
AllocMem((long)sizeof(struct FileInfoBlock),
MEMF_CHIP|MEMF_CLEAR))) {
TypeAndSpeak("CollectFiles: Can not allocate memory for FIB\n");
return ERROR_NO_FREE_STORE;
}
strcpy(path,srcvol); /* rebuild current home path */
strcat(path,":");
strcat(path, name);
if (!(lock=Lock(path,SHARED_LOCK))) {
status = IoErr();
sprintf(conmsg,"CollectFiles can not lock %s; error %d\n",
path, status);
TypeAndSpeak(conmsg);
goto out2;
}
if ((Examine(lock,FIB))==0){
status = IoErr();
sprintf(conmsg,"CollectFiles can not examine %s; error: %d\n",
name, status);
TypeAndSpeak(conmsg);
goto out2;
}
if (FIB->fib_DirEntryType > 0){ /* "name" is a directory */
while(!status && ExNext(lock,FIB)) {
if (FIB->fib_DirEntryType < 0) {
if (CompareDS(&FIB->fib_Date, since) < 0) {
#ifdef DEBUG
sprintf(debugmsg,"Skipping %s\n",
&FIB->fib_FileName[0]);
DebugWrite(debugmsg);
#endif
continue;
}
}
if (top_level)
*path = '\0';
else {
strcpy(path,name);
strcat(path,"/");
}
strcat(path,&FIB->fib_FileName[0]);
status = AddFile(path, FIB, list);
}
/* !!! Need check here for ERROR_NO_MORE_ENTRIES */
}
else {
if ( CompareDS(&FIB->fib_Date, since ) >= 0 )
status = AddFile(name, FIB, list);
}
out2:
UnLock(lock);
out1:
FreeMem(FIB, (long)sizeof(struct FileInfoBlock) );
/* Don't give up if somebody else is using the file - just
* ignore the error. The user can cancel us if there's a
* problem.
*/
if (status == ERROR_OBJECT_IN_USE)
status = 0;
return status;
}
/* Process one file.
* Called with:
* fnode: node describing file
* Returns:
* 0 => success
* 1 => failure
*/
int
DoFile(fnode)
T_FILE *fnode;
{
#define MAX_RETRY 3
int status = ERR_NONE;
char destname[PATH_MAX+1];
int oldsize;
USHORT retries = 0;
char srcname[PATH_MAX+1];
oldsize = size;
size -= fnode->blocks; /* deduct blocks for file */
if (CheckSize(true)) /* check disk space */
return ERR_ABORT;
if (size > oldsize) /* we just formatted */
oldsize = size;
/*#define NOCOPY*/ /* for fast debugging */
do {
if (status = CheckStop()) /* does user want out? */
return status;
sprintf(srcname,"%s:%s",srcvol,fnode->filename);
sprintf(conmsg,"Blocks left: %5d ",oldsize);
WriteConsole(conmsg);
if ( !do_compress || IsCompressed(srcname) || fnode->blocks < 4){
sprintf(destname,"%s%s",destvol,fnode->filename);
sprintf(conmsg,"Copying %s\n",srcname);
WriteConsole(conmsg);
#ifndef NOCOPY
status = CopyFile(srcname,destname);
#endif
}
else {
sprintf(destname,"%s%s.Z",destvol,fnode->filename);
sprintf(conmsg,"Compressing %s\n",srcname);
WriteConsole(conmsg);
#ifndef NOCOPY
if (!(status = compress(srcname,destname)))
status = CopyFileDate(srcname,destname);
#endif
}
if (status) {
/*
status = GetErrOpt("I/O error during copy or compress",
ERR_ABORT | ERR_IGNORE |
ERR_RETRY_FILE | ERR_RESTART_VOLUME);
*/
sprintf(conmsg,"!!!I/O error %d on file %s",status,destname);
ListLine(conmsg);
unlink(destname);
if (++retries <= MAX_RETRY) {
strcat(conmsg," - retrying this file.\n");
if (retries == MAX_RETRY) size = 0; /* try new output disk */
status = ERR_RETRY_FILE;
}
else {
strcat(conmsg," - aborting this file.\n");
/* Note that for now, we will proceed by brute force. A later revision
* of this program will provide context-saving so we can restart the
* series of files on this floppy.
*/
status = ERR_IGNORE;
}
WriteConsole(conmsg);
}
else
ListLine(destname);
} while (status == ERR_RETRY_FILE && (retries < MAX_RETRY));
if (status == ERR_IGNORE)
status = ERR_NONE;
#ifndef NOCOPY
if (!status){
if ((size = DiskBlocks(destvol)) < 0) /* update blocks left */
++status;
}
#endif
return status;
}
/* Attempt to find a file node in the file list. If the check_size
* parameter is true, only look at files which will fit in the space
* remaining on the disk.
* Called with:
* list: pointer to file list to be searched
* check_size: false => find first file in the list
* true => test space available
* Returns:
* file node or NULL (not found)
*/
T_FILE *FindFile(list,check_size)
T_FILE_LIST *list; int check_size;
{
T_FILE *fnode;
for (fnode = list->first_file; fnode; fnode = fnode->next) {
if (!fnode->is_dir) { /* don't consider directory nodes */
if (!check_size) break; /* take this one */
if (fnode->blocks < size) break;
}
}
return fnode;
}
/* Free memory allocated to a file node.
* Called with:
* node: file node
*/
FreeFile(node)
T_FILE *node;
{
free(node->filename);
free(node);
}
/* An exclude file pathname has been specified. Get the patterns it
* contains.
* Returns:
* status: 0 => success, failure otherwise
*/
int
GetExcludes()
{
USHORT i, length, nonwild;
T_PATTERN *p;
int status = 0;
char str[PATH_MAX+1];
FILE *xcld;
if (! exclude_has_changed)
return 0;
if (!(xcld = fopen(excludepath, "r"))) {
sprintf(conmsg,
"I couldn't open the exclude pattern file, error %d.", errno);
TypeAndSpeak(conmsg);
return errno;
}
/* Release any previous exclude list. */
for (p = excludelist; p; p = p->next_pattern) {
free(p->pattern);
free(p);
}
excludelist = lastexclude = NULL;
while (fgets(str, PATH_MAX, xcld)) {
if (length = strlen(str)) {
--length;
str[length] = '\0';
}
if (length && *str != '#') { /* ignore blank lines and comments */
nonwild = 0;
for (i = 0; i < length; ++i) {
if (str[i] != '*' && str[i] != '?' && str[i] != '/')
++nonwild;
}
if (! nonwild ) {
sprintf(conmsg,
"Very funny! %s will exclude everything! Ha ha ha!\n",
str);
TypeAndSpeak(conmsg);
status = 1;
goto done;
}
p = (T_PATTERN *) calloc(sizeof(T_PATTERN), 1);
p->pattern = calloc(length+1, 1);
strcpy(p->pattern, str);
if ( ! excludelist )
excludelist = p;
else
lastexclude->next_pattern = p;
lastexclude = p;
}
}
done:
fclose(xcld);
if (! status )
exclude_has_changed = false;
return status;
}
/* Format a new diskette.
* Called with:
* create_dir: true => create continuation directory, if necessary
* Returns:
* false => success
* true => failure
*/
int
NewDisk(create_dir)
int create_dir;
{
char datestr[20];
int status = 0;
if (back) /* not first disk? */
Delay(TICKS_PER_SECOND * 5L); /* let disk buffers flush */
++back; /* Increment the volume ID */
Speak("Attention! I need a disk for formatting.");
do {
if (!RequestDisk(mywindow, destdrive))
return ERR_ABORT;
/* Don't put the colon in the volume name -
* FormatDisk will happily use it as part of
* the name rather than as a delimiter.
*/
DS2Str(datestr, "%02m-%02d-%02y", now);
sprintf(destvol,"Backup %s.%d", datestr, back);
Inhibit(destdrive,1);
if (status = FormatDisk(destdrive,destvol)) {
TypeAndSpeak("I failed to format the disk. Sorry.\n");
}
} while (status);
strcat(destvol, ":"); /* add colon */
if ((size = DiskBlocks(destvol)) < 0)
status = -size;
else
if (create_dir && (current_dir != NULL) )
status = NewDir(current_dir->filename);
return status;
}
/* Remove a file node from the list.
* Called with:
* node: file node pointer
* list: file list header
* Returns:
* nothing
*/
RemFile(node,list)
T_FILE *node; T_FILE_LIST *list;
{
if (node->previous)
node->previous->next = node->next;
if (node->next)
node->next->previous = node->previous;
if (node == list->first_file)
list->first_file = node->next;
if (node == list->last_file)
list->last_file = node->previous;
if (!node->is_dir) FreeFile(node);
}
***EOF Backup.c***
echo "Creating Compress.c"
cat > Compress.c <<"***EOF Compress.c***"
/* MRBackup: File Compression/Decompression Routines
* Filename: Compress.c
* History: (most recent change first)
*
* 09/19/87 -MRR- Fixed bugs in decompression routine, introduced by
* new buffering scheme.
*
* 09/04/87 -MRR- This package now uses MRBackup's buffer to hold the
* compressed data and uses AmigaDOS I/O.
*/
/*
* Compress.c - data compression/decompression routines.
* This is an adaptation of the public domain Un*x compress v4.0.
*/
#include "MRBackup.h"
#define min(a,b) ((a>b) ? b : a)
#define INBUFSIZE 4L*1024L /* input buffer size */
#ifdef DEBUG
#define DBG(x) x
#else
#define DBG(x)
#endif
extern int errno;
/*
* Set USERMEM to the maximum amount of physical user memory available
* in bytes. USERMEM is used to determine the maximum BITS that can be used
* for compression.
*
* SACREDMEM is the amount of physical memory saved for others; compress
* will hog the rest.
*/
#ifndef SACREDMEM
#define SACREDMEM 0
#endif
#ifndef USERMEM
# define USERMEM 450000 /* default user memory */
#endif
# define MAXFILES 100 /* arbitrary maximum - see note below */
# define BITS 12 /* > 12 crashes system (?) */
# undef USERMEM
long filesize();
char *scdir();
#ifdef USERMEM
# if USERMEM >= (433484+SACREDMEM)
# define PBITS 16
# else
# if USERMEM >= (229600+SACREDMEM)
# define PBITS 15
# else
# if USERMEM >= (127536+SACREDMEM)
# define PBITS 14
# else
# if USERMEM >= (73464+SACREDMEM)
# define PBITS 13
# else
# define PBITS 12
# endif
# endif
# endif
# endif
# undef USERMEM
#endif /* USERMEM */
#ifdef PBITS /* Preferred BITS for this memory size */
# ifndef BITS
# define BITS PBITS
# endif BITS
#endif /* PBITS */
#if BITS == 16
# define HSIZE 69001L /* 95% occupancy */
#endif
#if BITS == 15
# define HSIZE 35023L /* 94% occupancy */
#endif
#if BITS == 14
# define HSIZE 18013L /* 91% occupancy */
#endif
#if BITS == 13
# define HSIZE 9001L /* 91% occupancy */
#endif
#if BITS <= 12
# define HSIZE 5003L /* 80% occupancy */
#endif
/*
* a code_int must be able to hold 2**BITS values of type int, and also -1
*/
#if BITS > 15
typedef long int code_int;
#else
typedef int code_int;
#endif
#ifdef SIGNED_COMPARE_SLOW
typedef unsigned long int count_int;
typedef unsigned short int count_short;
#else
typedef long int count_int;
#endif
#ifdef NO_UCHAR
typedef char char_type;
#else
typedef unsigned char char_type;
#endif /* UCHAR */
char_type magic_header[]= {
"\037\235"
}; /* 1F 9D */
/* Defines for third byte of header */
#define BIT_MASK 0x1f
#define BLOCK_MASK 0x80
/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
a fourth header byte (for expansion).
*/
#define INIT_BITS 9 /* initial number of bits/code */
/*
* compress.c - File compression ala IEEE Computer, June 1984.
*
* Authors:
* Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
* Mark R. Rinfret (mods) (mark@unisec.USI.COM)
*/
static unsigned endInput; /* true => input file exhausted */
static int n_bits; /* number of bits/code */
static int maxbits = BITS; /* user settable max # bits/code */
static code_int maxcode; /* maximum code, given n_bits */
static code_int maxmaxcode = 1 << BITS; /* NEVER generate this code */
#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
static count_int htab [HSIZE];
static USHORT codetab [HSIZE];
#define htabof(i) htab[i]
#define codetabof(i) codetab[i]
static code_int hsize = HSIZE; /* for dynamic table sizing */
static count_int fsize;
static struct FileHandle *infile, *outfile;
static char iname[256], oname[256];
static UBYTE inbuf[INBUFSIZE], *inbufptr;
static LONG inbufsize;
static UBYTE *outbufptr;
/*
* To save much memory, we overlay the table used by compress() with those
* used by decompress(). The tab_prefix table is the same size and type
* as the codetab. The tab_suffix table needs 2**BITS characters. We
* get this from the beginning of htab. The output stack uses the rest
* of htab, and contains characters. There is plenty of room for any
* possible stack (stack used to be 8000 characters).
*/
#define tab_prefixof(i) codetabof(i)
#define tab_suffixof(i) ((char_type *)(htab))[i]
#define de_stack ((char_type *)&tab_suffixof(1<<BITS))
static code_int free_ent = 0; /* first unused entry */
static int status = 0;
code_int getcode();
static int nomagic = 0; /* Use a 3-byte magic number header,
unless old file */
static int zcat_flg = 0; /* Write output on stdout, suppress
messages */
static int quiet = 1; /* don't tell me about compression */
/*
* block compression parameters -- after all codes are used up,
* and compression rate changes, start over.
*/
static int block_compress = BLOCK_MASK;
static int clear_flg = 0;
static long int ratio = 0;
#define CHECK_GAP 10000 /* ratio check interval */
static count_int checkpoint = CHECK_GAP;
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
static int force = 0;
static char ofname [100];
#ifdef DEBUG
static int verbose = 0;
#endif /* DEBUG */
int (*bgnd_flag)();
static int do_decomp = 0;
static int offset;
static LONG in_count; /* length of input */
static LONG bytes_out; /* length of compressed output */
static LONG out_count; /* # of codes output (for debugging) */
static LONG outbufcount; /* # bytes in output buffer */
/*****************************************************************
*
* Algorithm from "A Technique for High Performance Data Compression",
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
*
* Algorithm:
* Modified Lempel-Ziv method (LZW). Basically finds common
* substrings and replaces them with a variable size code. This is
* deterministic, and can be done on the fly. Thus, the decompression
* procedure needs no input table, but tracks the way the table was built.
*/
static void
comp_init()
{
if(maxbits < INIT_BITS)
maxbits = INIT_BITS;
if (maxbits > BITS)
maxbits = BITS;
maxmaxcode = 1 << maxbits;
outbufcount = 0;
outbufptr = buffer;
}
/*
* compress "from" to "to"
*
* Algorithm: use open addressing double hashing (no chaining) on the
* prefix code / next character combination. We do a variant of Knuth's
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
* secondary probe. Here, the modular division first probe is gives way
* to a faster exclusive-or manipulation. Also do block compression with
* an adaptive reset, whereby the code table is cleared when the compression
* ratio decreases, but after the table fills. The variable-length output
* codes are re-sized at this point, and a special CLEAR code is generated
* for the decompressor. Late addition: construct the table according to
* file size for noticeable speed improvement on small files. Please direct
* questions about this implementation to ames!jaw.
*/
int
compress(from, to)
char *from, *to;
{
register long fcode;
register code_int i = 0;
register int c;
register code_int ent;
register int disp;
register code_int hsize_reg;
register int hshift;
char *s;
char *err_msg[256];
comp_init();
inbufsize = 0;
endInput = false;
/*
* tune hash table size for small files -- ad hoc,
* but the sizes match earlier #defines, which
* serve as upper bounds on the number of output codes.
*/
hsize = HSIZE;
if (fsize < (1L << 12))
hsize = min (5003L,HSIZE );
else
if (fsize < (1L << 13))
hsize = min (9001L,HSIZE );
else
if (fsize < (1L << 14))
hsize = min (18013L,HSIZE );
else
if (fsize < (1L << 15))
hsize = min (35023L,HSIZE );
else
if (fsize < 47000L )
hsize = min (50021L,HSIZE );
if (!(infile = Open(from, MODE_OLDFILE))) {
return IoErr();
}
if (!(outfile = Open(to, MODE_NEWFILE))) {
status = IoErr();
Close(infile);
return status;
}
if (nomagic == 0){
putcbuf(magic_header[0]);
putcbuf(magic_header[1]);
putcbuf((char)(maxbits | block_compress));
if(status) {
cleanup:
Close(infile);
Close(outfile);
return status;
}
}
offset = 0;
bytes_out = 3; /* includes 3-byte header mojo */
out_count = 0;
clear_flg = 0;
ratio = 0L;
in_count = 1;
checkpoint = CHECK_GAP;
maxcode = MAXCODE(n_bits = INIT_BITS);
free_ent = ((block_compress)?FIRST :256 );
ent = ReadChar();
hshift = 0;
for (fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
hshift++;
hshift = 8 - hshift; /* set hash code range bound */
hsize_reg = hsize;
cl_hash((count_int)hsize_reg); /* clear hash table */
/*while ((c = getc(infile))!= EOF ){*/
while ( ( c = ReadChar() ) != EOF) {
in_count++;
fcode = (long)(((long)c << maxbits)+ ent);
i = ((c << hshift)^ ent); /* xor hashing */
if (htabof (i)== fcode ){
ent = codetabof (i);
continue;
}
else
if ((long)htabof (i)< 0 )/* empty slot */
goto nomatch;
disp = hsize_reg - i; /* secondary hash (after G. Knott) */
if (i == 0 )
disp = 1;
probe:
if ((i -= disp)< 0 )
i += hsize_reg;
if (htabof (i)== fcode ){
ent = codetabof (i);
continue;
}
if ((long)htabof (i)> 0 )
goto probe;
nomatch:
output ((code_int)ent );
out_count++;
ent = c;
if (free_ent < maxmaxcode ){
codetabof (i)= free_ent++;/* code -> hashtable */
htabof (i)= fcode;
}
else
if ((count_int)in_count >= checkpoint && block_compress )
cl_block ();
}
/*
* Put out the final code.
*/
output((code_int)ent );
out_count++;
output((code_int)-1 );
/*
* Print out stats on stderr
*/
if(zcat_flg == 0 && !quiet){
#ifdef DEBUG
fprintf(stderr,
"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
in_count,out_count,bytes_out );
prratio(stderr,in_count,bytes_out );
fprintf(stderr,"\n");
fprintf(stderr,"\tCompression as in compact: " );
prratio(stderr,in_count-bytes_out,in_count );
fprintf(stderr,"\n");
fprintf(stderr,"\tLargest code (of last block) was %d (%d bits)\n",
free_ent - 1,n_bits );
#endif /* DEBUG */
}
flushbuf(); /* dump the last block */
if(bytes_out > in_count) /* exit(2) if no savings */
status = 2;
goto cleanup;
}
/*****************************************************************
* TAG( output )
*
* Output the given code.
* Inputs:
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
* that n_bits =< (long)wordsize - 1.
* Outputs:
* Outputs code to the file.
* Assumptions:
* Chars are 8 bits long.
* Algorithm:
* Maintain a BITS character long buffer (so that 8 codes will
* fit in it exactly). Use the VAX insv instruction to insert each
* code in turn. When the buffer fills up empty it and start over.
*/
static char buf[BITS];
char_type lmask[9]= {
0xff,0xfe,0xfc,0xf8,0xf0,0xe0,0xc0,0x80,0x00
};
char_type rmask[9]= {
0x00,0x01,0x03,0x07,0x0f,0x1f,0x3f,0x7f,0xff
};
output(code)
code_int code;
{
#ifdef DEBUG
static int col = 0;
#endif /* DEBUG */
register int r_off = offset, bits = n_bits;
register char * bp = buf;
int count;
#ifdef DEBUG
if (verbose )
fprintf(stderr,"%5d%c",code,
(col+=6)>= 74 ?(col = 0,'\n'):' ' );
#endif /* DEBUG */
if (code >= 0 ){
/*
* byte/bit numbering on the VAX is simulated by the following code
*/
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/*
* Since code is always >= 8 bits, only need to mask the first
* hunk on the left.
*/
*bp = (*bp & rmask[r_off])| (code << r_off)& lmask[r_off];
bp++;
bits -= (8 - r_off);
code >>= 8 - r_off;
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8 ){
*bp++= code;
code >>= 8;
bits -= 8;
}
/* Last bits. */
if(bits)
*bp = code;
offset += n_bits;
if (offset == (n_bits << 3)){
bp = buf;
bits = n_bits;
bytes_out += bits;
do
putcbuf(*bp++);
while(--bits);
offset = 0;
}
/*
* If the next entry is going to be too big for the code size,
* then increase it, if possible.
*/
if (free_ent > maxcode || (clear_flg > 0)){
/*
* Write the whole buffer, because the input side won't
* discover the size increase until after it has read it.
*/
if (offset > 0 ){
putmcbuf(buf, n_bits);
if (status) return 1;
bytes_out += n_bits;
}
offset = 0;
if (clear_flg ){
maxcode = MAXCODE (n_bits = INIT_BITS);
clear_flg = 0;
}
else {
n_bits++;
if (n_bits == maxbits )
maxcode = maxmaxcode;
else
maxcode = MAXCODE(n_bits);
}
#ifdef DEBUG
if (debug ){
fprintf(stderr,"\nChange to %d bits\n",n_bits );
col = 0;
}
#endif /* DEBUG */
}
}
else {
/*
* At EOF, write the rest of the buffer.
*/
coun= MAXCODE (n_bits = INIT_BITS);
clear_flg = 0;
}
size = 0;
while (size < n_bits) {
if ((c = ReadChar()) == EOF || status ) break;
buf[size++] = c;
}
if (size == 0 || status) {
return EOF; /* end of file */
}
offset = 0;
/* Round size down to integral number of codes */
size = (size << 3)- (n_bits - 1);
}
r_off = offset;
bits = n_bits;
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/* Get first part (low order bits) */
#ifdef NO_UCHAR
code = ((*bp++ >> r_off)& rmask[8 - r_off]) & 0xff;
#else
code = (*bp++ >> r_off);
#endif /* NO_UCHAR */
bits -= (8 - r_off);
r_off = 8 - r_off; /* now, offset into code word */
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8 ){
#ifdef NO_UCHAR
code |= (*bp++ & 0xff) << r_off;
#else
code |= *bp++ << r_off;
#endif /* NO_UCHAR */
r_off += 8;
bits -= 8;
}
/* high order bits. */
code |= (*bp & rmask[bits]) << r_off;
offset += n_bits;
return code;
}
#ifndef AZTEC_C
char *
rindex(s,c)
register char *s,c;
{
char *p;
for (p = NULL;*s;s++)
if (*s == c)
p = s;
return(p);
}
#endif
/*
* This routine returns 1 if we are running in the foreground and stderr
* is a tty.
*/
foreground()
{
/* I don't know how to test for background, yet. */
return (isatty(2));
}
onintr()
{
unlink ( ofname );
exit (1 );
}
/* wild pointer -- assume bad input */
oops ()
{
if ( do_decomp == 1 )
fprintf (stderr,"uncompress: corrupt input\n" );
unlink ( ofname );
exit ( 1 );
}
/* table clear for block compress */
cl_block ()
{
register long int rat;
checkpoint = in_count + CHECK_GAP;
#ifdef DEBUG
if (debug ){
fprintf (stderr,"count: %ld, ratio: ",in_count );
prratio (stderr,in_count,bytes_out );
fprintf (stderr,"\n");
}
#endif /* DEBUG */
if(in_count > 0x007fffff){ /* shift will overflow */
rat = bytes_out >> 8;
if(rat == 0){ /* Don't divide by zero */
rat = 0x7fffffff;
}
else {
rat = in_count / rat;
}
}
else {
rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
}
if (rat > ratio ){
ratio = rat;
}
else {
ratio = 0;
#ifdef DEBUG
if(verbose)
dump_tab(); /* dump string table */
#endif
cl_hash ((count_int)hsize );
free_ent = FIRST;
clear_flg = 1;
output ((code_int)CLEAR );
#ifdef DEBUG
if(debug)
fprintf (stderr,"clear\n" );
#endif /* DEBUG */
}
}
cl_hash(hsize) /* reset code table */
register count_int hsize;
{
register count_int *htab_p = htab+hsize;
register long i;
register long m1 = -1;
i = hsize - 16;
do { /* might use Sys V memset(3) here */
*(htab_p-16)= m1;
*(htab_p-15)= m1;
*(htab_p-14)= m1;
*(htab_p-13)= m1;
*(htab_p-12)= m1;
*(htab_p-11)= m1;
*(htab_p-10)= m1;
*(htab_p-9)= m1;
*(htab_p-8)= m1;
*(htab_p-7)= m1;
*(htab_p-6)= m1;
*(htab_p-5)= m1;
*(htab_p-4)= m1;
*(htab_p-3)= m1;
*(htab_p-2)= m1;
*(htab_p-1)= m1;
htab_p -= 16;
} while ((i -= 16) >= 0);
for (i += 16; i > 0; i--)
*--htab_p = m1;
}
#ifdef DEBUG
prratio(stream,num,den)
FILE *stream;
long int num,den;
{
register int q; /* Doesn't need to be long */
if(num > 214748L){ /* 2147483647/10000 */
q = num / (den / 10000L);
}
else {
q = (10000L*num)/ den; /* Long calculations, though */
}
if (q < 0){
putc('-',stream);
q = -q;
}
fprintf(stream,"%d.%02d%%",q / 100,q % 100);
}
#endif
/* Flush the output buffer. */
flushbuf()
{
if (Write(outfile, buffer, outbufcount) != outbufcount) {
status = IoErr();
}
outbufcount = 0;
outbufptr = buffer;
}
/* Put a character into the output buffer. If the buffer is full,
* output the buffer, then reset its count to zero.
* Called with:
* c: character to be output
* Returns:
* status code (0 => OK)
*/
int
putcbuf(c)
{
if (outbufcount >= bufsize) flushbuf();
*outbufptr++ = c;
++outbufcount;
return status;
}
/* Put multiple characters in the output buffer.
* Called with:
* buf: address of character array
* count: number of characters to output
* Returns:
* status
*/
putmcbuf(buf, count)
char *buf; int count;
{
for (; count; --count) if ( putcbuf( *buf++ ) ) break;
return status;
}
/* Read 1 character from the input file.
* Returns:
* character code or -1 (EOF)
*/
int
ReadChar()
{
again:
if (inbufsize == 0) {
if (endInput)
return EOF;
if ((inbufsize = Read(infile, inbuf, INBUFSIZE)) == -1L) {
status = IoErr();
return EOF;
}
inbufptr = inbuf; /* reset buffer pointer */
if (inbufsize < INBUFSIZE) {
endInput = true;
goto again;
}
}
++in_count;
--inbufsize;
return (*inbufptr++);
}
***EOF Compress.c***
--
< Mark R. Rinfret, mrr@softie, ..rayssd!unisec!softie!mrr >
< SofTech, Inc. Home: 401-846-7639 >
< 1 Silva Lane, Work: 401-849-4174 >
< Middletown, RI 02840 "The name has changed but I'm still guilty." >