[comp.sys.m6809] AR archive program for OS9

megabyte@chinet.UUCP (01/09/87)

This is the 'ar' archive program written by Carl Krieder
and now being used to pack the user group library on CIS and
on DELPHI.  It provides a means to pack sever files into one
file while at the same time compressing them.

This is being posted to the net with Carl's Permission.

--------------------------------------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	ar.c
#	ar.doc
#	ar.h
#	ar.man
#	arsup.c
#	delz1.c
#	dir.c
#	lz1.c
#	lz1.h
#	lz1glob.c
#	lz1init.c
#	makefile.09
#	makefile.68
# This archive created: Thu Jan  8 19:39:49 1987
# By:	Dr. Megabyte ()
export PATH; PATH=/bin:/usr/bin:$PATH
if test -f 'ar.c'
then
	echo shar: "will not over-write existing file 'ar.c'"
else
cat << \SHAR_EOF > 'ar.c'
/*
** file archive utility loosely modeled after Kernigan & Plauger
*/

#include <stdio.h>
#include <ctype.h>
#include "ar.h"


FN    *fnhead = (FN *)NULL;
char  *hid = HID,
      *suf = SUF;
char  *mod,                               /* pointer to module name */
      *archfile;                            /* name of archive file */
int   all = FALSE,                   /* true to access old versions */
      supflag = FALSE,               /* true to supress compression */
      zflag = FALSE;               /* true if names come from stdin */

char  *emalloc();


main(argc, argv)
int   argc;
char  **argv;
   {
   char  command, *p;
   int   n, updating;
   FILE  *afp;

   pflinit();
   mod = *argv++;                              /* save program name */

   if ((argc < 3) || (*(p = *argv++) != '-'))
      help();

   command = tolower(*++p);

   proc_opt(++p);                      /* process command modifiers */

   archfile = *argv++;
   n = strlen(archfile);
   if ((strcmp(archfile + n - SUFSIZ, suf)) != 0)
      archfile = strcat(strcpy(emalloc(n + SUFSIZ + 1), archfile), suf);

   if (get_names(argc -= 3, argv, command == 'u') == 0)
      if (command == 'u')
         fatal(0, "none of the targets found\n");

   if (command == 'u')
      {
      if ((afp = fopen(archfile, "r+")) == NULL)   /* try old first */
         afp = fopen(archfile, "w+");      /* not there - create it */
      }
   else
      {
      if ((afp = fopen(archfile, "r")) == NULL)   /* save file date */
         fatal(errno, "can't find %s\n", archfile);
      }

   proc_cmd(command, afp);                     /* process a command */
   }
/*page*/
/*
** process command modifiers
*/

proc_opt(p)
char   *p;
   {
   while (*p)
      switch (tolower(*p++))
         {
         case 'a' :                          /* all - even old ones */
            all = TRUE;
            break;

         case 's' :                          /* supress compression */
            supflag = TRUE;
            break;

         case 'z' :                         /* get names from stdin */
            zflag = TRUE;
            break;

         default :
            help();
         }
   }


/*
** process the command
*/

proc_cmd(command, afp)
char   command;
FILE  *afp;
   {
   switch (command)
      {
      case 'd' :
         break;

      case 't' :                               /* table of contents */
         table(afp);
         break;

      case 'u' :                                   /* update or add */
         update(afp);
         break;

      case 'p' :                                 /* print member(s) */
         extract(afp, 0);
         break;

      case 'x' :                               /* extract member(s) */
         extract(afp, 1);
         break;

      default  :
         help();
      }
   }
/*page*/
/*
** extract file(s) from the archive
**  copy a file from archive and restore it's origional attrs
*/

extract(afp, flag)
FILE  *afp;
int   flag;                     /* 0 = listing, 1 = writing to file */
   {
   FILE     *ofp;                            /* assume just listing */
   HEADER   header;
   FN       *fnp;
   FILE     *spl_open();

   if (fnhead == (FN *) NULL)
      stash_name("*");                     /* fake for special case */

   while ((get_header(afp, &header)) != EOF)
      {
      for (fnp = fnhead; fnp; fnp = fnp->fn_link)
         if (patmatch(fnp->fn_name, header.a_name, TRUE) == TRUE
            || (header.a_stat != 0 && all == TRUE))
            break;
      if (fnp == 0)
         fseek(afp, header.a_size, 1);            /* file not found */
      else
         {
         if (!flag)
            copy_from(afp, stdout, &header);
         else
				{
				printf("extracting <%s>\n", header.a_name);
            ofp = spl_open(&header);
            copy_from(afp, ofp, &header);
            set_fstat(ofp->_fd, &header.a_attr);
            fclose(ofp);
            }
         }
      }
   }
/*page*/
/*
** list a table of contents for the archive file
**  only show files matching the search mask which are current
**  unless the all flag is set, whereupon we will show old ones too
*/

table(fp)
FILE  *fp;
   {
   HEADER   header;
   FN       *fnp;
   long     n, c4tol();

   if (fnhead == (FN *) NULL)
      stash_name("*");                     /* fake for special case */

   printf("                                                    file   stored\n");
   printf("  file name                   ver    file date      size    size\n");
   printf("----------------------------- --- --------------   -----   -----\n");
   while ((get_header(fp, &header)) != EOF)
      for (fnp = fnhead; fnp; fnp = fnp->fn_link)
         {
         if (patmatch(fnp->fn_name, header.a_name, TRUE) == TRUE
            && (header.a_stat == 0 || all == TRUE))
            printf("%-29s %2d  %02d/%02d/%02d %02d:%02d %7ld %7ld\n",
               header.a_name, header.a_stat, header.a_attr.fd_date[0],
               header.a_attr.fd_date[1], header.a_attr.fd_date[2],
               header.a_attr.fd_date[3], header.a_attr.fd_date[4],
               c4tol(header.a_attr.fd_fsize), header.a_size);
         fseek(fp, header.a_size, 1);
         }
   }
/*page*/
/*
** add new files or replace existing files
*/

update(afp)
FILE  *afp;
   {
   FILE     *ifp;
   HEADER   header;
   FN       *fnp;
   int      synch, n;
   long     bytes, head_pos, tail_pos, copy_to(), c4tol();

   while ((get_header(afp, &header)) != EOF)
      {
      for (fnp = fnhead; fnp; fnp = fnp->fn_link)
         if ((n = patmatch(fnp->fn_name, header.a_name, TRUE)) == TRUE)
            {
            ++header.a_stat;                       /* mark it older */
            fseek(afp, (long) -sizeof(HEADER), 1);
            if ((fwrite(&header, sizeof(HEADER), 1, afp)) == NULL)
               fatal(errno, "write failure on delete\n");
            }
      fseek(afp, header.a_size, 1);
      }

   for (fnp = fnhead; fnp; fnp = fnp->fn_link)
      {
      if ((ifp = fopen(fnp->fn_name, "r")) == NULL)
			if (errno == 214)
				continue;                    /* a directory, we presume */
			else
            fatal(errno, "can't find %s\n", fnp->fn_name);
      printf("archiving <%s>\n", fnp->fn_name);
      if (supflag || ((synch = getw(ifp)) == MSYNC))
         header.a_type = PLAIN;
      else
         header.a_type = COMP1;
      strcpy(header.a_hid, hid);
      setmem(header.a_name, FNSIZ + 1, ' ');
      strcpy(header.a_name, fnp->fn_name);
      get_fstat(ifp->_fd, &header.a_attr);
      header.a_stat = '\0';
      rewind(ifp);
      head_pos = ftell(afp);                     /* save for update */
      fwrite(&header, sizeof(HEADER), 1, afp);        /* skip ahead */
      bytes = head_pos + c4tol(header.a_attr.fd_fsize) + sizeof(HEADER);
      set_fsize(afp->_fd, bytes);
      header.a_size = copy_to(afp, ifp, &header);
      tail_pos = ftell(afp);
      fclose(ifp);
      fseek(afp, head_pos, 0);             /* back up to header pos */
      if ((fwrite(&header, sizeof(HEADER), 1, afp)) == NULL)
         fatal(errno, "write error on header for %s\n", fnp->fn_name);
      fseek(afp, tail_pos, 0);                 /* go to end of file */
      }
   set_fsize(afp->_fd, tail_pos);         /* now set real file size */
   }
/*page*/
/*
** gather file names from command line or std in
**  use linked list to avoid finite limit on number of names
*/

get_names(ac, av, updating)
int   ac;
char  **av;
int   updating;                        /* TRUE if command is update */
   {
   char     *p, *q, *r, buf[80], *strhcpy();
   int      dirfd, found = 0;
   DIRENT   *dirp, *nextdir();

   while (ac--)
      if (!updating || !(index(*av, '?') || index(*av, '*')))
         found += stash_name(*av++);
      else
         {
         *(p = buf) = '\0';
         if (q = rindex((r = *av++), '/'))
            {
            strcpy(p, r);                     /* copy all to buffer */
            *(r = rindex(p, '/')) = '\0';           /* break in two */
            ++q;                              /* pointer to pattern */
            }
         else
            {
            q = r;
            r = p;                                 /* swap pointers */
            }
         dirfd = opndir(*p ? p : ".");
         if (*p)
            *r++ = '/';                        /* set up for append */
         while ((dirp = nextdir(dirfd)) != -1)
            if (patmatch(q, strhcpy(r, dirp->dir_name), TRUE))
               if ((strucmp(p, archfile)) != 0)         /* not self */
                  found += stash_name(p);
         }
   if (zflag)
      while (gets(buf))
			if (buf[0] != '\0')
            found += stash_name(buf);
   return (found);
   }


/*page*/
/*
** squirrel a name away in the linked list of targets
*/

stash_name(p)
char  *p;
   {
   static   FN    *fnp;
   char           *q;

   if (*p == '/')
      fatal(1, "absolute path illegal <%s>\n", p);
   q = emalloc(sizeof(FN) + strlen(p));
   if (fnhead == (FN *)NULL)
      fnhead = fnp = q;
   else
      fnp = fnp->fn_link = q;
   strcpy(fnp->fn_name, p);
   fnp->fn_link = (FN *)NULL;
   return (1);                                 /* we saved one name */
   }
/*page*/
/*
** get the next header from the file, perhaps skipping the body
*/

get_header(fp, hp)
FILE     *fp;
HEADER   *hp;
   {
   if ((fread(hp, sizeof(HEADER), 1, fp)) == NULL)
      return (EOF);
   if (strncmp(hp->a_hid, hid, HIDSIZ) != 0)
      fatal(1, "file not archive or damaged\n");
   return (0);
   }


/*
** here we will recreate a tree that was collapsed into the archive file
*/

FILE  *
spl_open(hp)
HEADER   *hp;
   {
   char  buf[FNSIZ+3];
   FILE  *ofp;
   char  *p;
   long  c4tol();

   p = hp->a_name;
   while (p = index(p, '/'))
      {
      *p = '\0';                            /* truncate temporarily */
      if (access(hp->a_name, DIRCHK) == -1)
         if (mknod(hp->a_name, DIRMAK) == -1)     /* try to make it */
            fatal(errno, "can't make <%s>\n", hp->a_name);
      *p++ = '/';                             /* put back the delim */
      }

   strcpy(buf, hp->a_name);
   if (hp->a_stat)
      sprintf(&buf[strlen(buf)], ".%d", hp->a_stat); /* make  unique */
   if ((ofp = fopen(buf, "w")) == NULL)
      fatal(errno, "create failure on %s\n", buf);
   set_fsize(ofp->_fd, c4tol(hp->a_attr.fd_fsize));
   return (ofp);
   }
/*page*/
/*
** copy an archived file from an archive
*/

copy_from(ifp, ofp, hp)
FILE     *ifp, *ofp;
HEADER   *hp;
   {
   long  bytes = hp->a_size;
   int   byt;

   switch (hp->a_type)
      {
      case PLAIN :
         while (bytes--)
            {
            if ((byt = getc(ifp)) == ERROR)
               fatal(errno, "read error while copying\n");
            if (putc(byt, ofp) == ERROR)
               fatal(errno, "write error while copying\n");
            }
         break;

      case COMP1 :
         if ((byt = de_LZ_1(ifp, ofp, bytes)) == -2)
            fatal(1, "archive damaged or file not compressed\n");
         break;

      default :
         fatal(1, "unknown compression algo\n");
      }
   }
/*page*/
/*
** copy an file to an archive
*/

long
copy_to(ofp, ifp, hp)
FILE     *ofp, *ifp;
HEADER   *hp;
   {
   long  bytes = 0;
   int   byt;

   switch (hp->a_type)
      {
      case PLAIN :
         while ((byt = getc(ifp)) != ERROR)
            if (putc(byt, ofp) == ERROR)
               fatal(errno, "write error while copying\n");
            else
               ++bytes;
         if (ferror(ifp))
            fatal(errno, "read error while copying\n");
         break;

      case COMP1 :
         if ((byt = LZ_1(ifp, ofp, &bytes)) == -3)
            fatal(1, "string table overflow on compression\n");
         break;

      default :
         fatal(1, "unknown compression algo\n");
      }
   return (bytes);
   }
/*page*/
/*
** get memory from the system or die trying
*/

char  *
emalloc(n)
int   n;
   {
   char  *p;

   if ((p = malloc(n)) == NULL)
      fatal(errno, "Can't get memory\n");
   return (p);
   }


/*
** help for the user
*/

help()
   {
   fprintf(stderr, "Ar V1.2 - archive file manager\n");
   fprintf(stderr, "Usage:  Ar -<cmd>[<modifier>] [file .. ]\n");
   fprintf(stderr, "      <cmd> is one of the following:\n");
   fprintf(stderr, "         t  show table of contents for archive\n");
   fprintf(stderr, "         u  update/add file(s) to the archive\n");
   fprintf(stderr, "         p  print file(s) from the archive\n");
   fprintf(stderr, "         x  extract file(s) from the archive\n");
   fprintf(stderr, "      <modifier> is one of the following:\n");
   fprintf(stderr, "         a  all versions (for extract)\n");
   fprintf(stderr, "         s  supress file compression\n");
   fprintf(stderr, "         z  read names for <cmd> from std in\n");
   fprintf(stderr, "\n      File names can include the meta chars ");
   fprintf(stderr, "* and ?, or path lists.\n");
   exit (1);
   }


/*
** print a fatal error message and exit
*/

fatal(code, msg, arg1, arg2)
int   code;
char  *msg;
int   arg1, arg2;
   {
   fprintf(stderr, "%s: ", mod);
   fprintf(stderr, msg, arg1, arg2);
   exit (code);
   }

SHAR_EOF
fi
if test -f 'ar.doc'
then
	echo shar: "will not over-write existing file 'ar.doc'"
else
cat << \SHAR_EOF > 'ar.doc'

                            AR Version 1.2

Ar is a utility modeled loosely after the archive utility in the 
Kernigan & Plauger book, Software Tools. It's purpose is to gather 
together files into a common file, in order to save space, to keep 
related files together, to ease the transmission of files by 
telephone, and other similar uses.  It uses Lempel-Zev compression on 
text files, so substantial space savings can result.  Several versions 
of the same file can be kept in one archive without file name 
contention.  This version compiles and runs on OS9 LI and LII, and on 
OSK.  

The command line syntax is: 

  Ar <-command>[options] <archive_name> [file_specification(s)] 

The items enclosed in <> are required and those in [] are optional.  
The commands available operate on the archive in some way: 

  -p print a file(s) to standard output 
  -t print a table of contents for the archive 
  -u update/add file(s) to the archive 
  -x extract file(s) from the archive 

The options modify the behavior of a command: 

  -a causes all old files to be included in the command 
  -s supresses compress on storage, no effect on retrieval 
  -z names of files to process are read on standard input 

Most of these commands and modifiers are obvious, but not so the 
detail operation.  A version number is kept as part of the header 
record associated with each file in the archive.  The current file has 
a version number of '0'. Older versions of files with the same names 
have succedingly greater numbers.  Commands normally operate only on 
current copies, or those with a version of '0'. The 'a' option will 
cause all to be selected.  To avoid name conflicts on extraction, the 
version number for all but the current version will be appended to the 
name of the created file.  

A file spec can include the meta characters '?' (matches any 
character) and '*' (matches any string of characters).  Thus the 
following examples are valid file specifications: 

   *.c matches ar.c, dir.c, but not dir.h or dir.cc 
   ar.?  matches ar.c, ar.h, but not ar.ch 
   a*z matches az, abz, abcz, abcdz, etc.  
   *.* matches anything with a dot in it 
   * matches anything and everything 

Since the OS9 (6809) shell does not expand any meta characters, I have 
provided internal expansion of same.  When run on OSK this is 
transparent for update since 'ar' also accepts a list of files on the 
command line.  In the case of extraction and printing, we do not want 
the shell generating filenames from the current directory.  In this 
case, any file specification containing a meta character must be 
quoted to protect it from the shell.  

Meta characters can be used for files both bound for the archive and 
for files to be extracted.  The p, t, and x commands default to '*' if 
no explicit file list is given.  The u command MUST have a file spec.  
The file specification can include a path list if it is not absolute.  
That is, '/d0/work/*.c' will not be accepted, but 'work/*.c' will be.  
During extraction, if the directory in the path list of the archived 
file cannot be found, it will be created.  Here are some sample 
command lines: 

   ar -pa archfile print all files, even old versions 
   ar -p archfile print all current files 
   ar -p archfile *.c print all current files with '.c' suffix 
   ar -p archfile file.c print only 'file.c' 

The x and t commands would work the same way, but the u command is 
slightly different.  Sample command lines follow: 

   ar -us archfile *.c add all files with '.c' suffix without 
compression 
   ar -u archfile * add ALL files using compression unless binary 
   ar -u archfile file.c add only 'file.c' 

If a file with the same name as the one to be added exists in the 
archive already, the version number of the existing one is incremented 
and the new version is added to the end of the archive.  The atribute 
section of the file descriptor is saved in the archive and restored to 
the file (as much as is possible) on extraction.  OS9 limits this to 
owner, creation date, and last modified date.  Size of course will be 
the same, but the file permissions will default to some value.  It is 
possible (I do it in 'arc') to preserve permissions, but that requires 
that the disk be opened raw and written raw.  I chose not to do that 
here.  

Pre-extension is used during extraction to avoid segment table 
overflow problems, and will be added to update as soon as the 
mechanism becomes clear.  

There is no way in this version of 'ar' to recover space of obsoleted 
or updated files, or to delete files.  This does not seem important, 
given the design goal.  This can be simulated with a burst/re-archive 
operation, which is, in principle, how 'ar' will have to do it.  

The concatenation of two '.ar' files is still an archive, with certain 
limitations.  If a duplicate file with duplicate version appears in 
both archives prior to concatenation, only the last one physically in 
the archive file will be saved on extraction.  One could, of course, 
double the size of 'ar' with all manner of prompting, perusing, and 
choosing in cases like that but I prefer to rely on the inate ability 
of the user deal with such problems outside of 'ar'.  

The archive file is opened in update only if necessary, so the last 
modified date of the archive is not disturbed by extraction or 
perusal.  This should go a long way toward making rational backup of 
archives possible.  

I would like to maintain control of the evolution of this utility, 
primarily since it is used to generate and burst most of the files in 
the new User Group Data Base on CIS.  It is important to maintain 
backwards compatibility.  I will be open to suggestions for 
improvements, additions, and (of course) bug fixes.  The first likely 
addition will be the newer LZ compression to improve the compaction.  

                        Carl Kreider  71076,76


SHAR_EOF
fi
if test -f 'ar.h'
then
	echo shar: "will not over-write existing file 'ar.h'"
else
cat << \SHAR_EOF > 'ar.h'
#include <modes.h>

#define  DIRMAK   S_IFDIR+S_IOEXEC+S_IOWRITE+S_IOREAD+S_IEXEC+S_IWRITE+S_IREAD
#define  DIRCHK   S_IFDIR+S_IREAD

#ifdef   OSK
#define  MSYNC    0x4AFC
#else
#define  MSYNC    0x87CD
#endif

#define  FALSE    (0)
#define  TRUE     (!FALSE)
#define  ERROR    (-1)
#define  HID      "+AR0.0+"
#define  HIDSIZ   7
#define  SUF      ".ar"
#define  SUFSIZ   3
#define  FNSIZ    65
#define  MAXLINE  256

#define  PLAIN    0                         /* plain text or object */
#define  SQ       1                      /* old fashion CPM squeeze */
#define  COMP1    2                        /* LZ compression type 1 */
#define  COMP2    3                        /* LZ comperssion type 2 */



typedef struct {
      char     dir_name[28];
      long     dir_addr;          /* assumes that 29th char is null */
      } DIRENT;


typedef struct {                             /* obvious definitions */
      char     fd_att;
      char     fd_own[2];
      char     fd_date[5];
      char     fd_link;
      char     fd_fsize[4];
      char     fd_dcr[3];
      } FILDES;


/* NOTE that a_size is on an even byte boundary */
typedef struct {
      char     a_hid[HIDSIZ+1];                 /* header id string */
      char     a_name[FNSIZ+1];        /* name of the archived file */
      long     a_size;         /* size of archive (not virgin) file */
      char     a_type;          /* archive type - virg, packed, etc */
      char     a_stat;        /* status of file - good, deleted, .. */
      FILDES   a_attr;           /* attributes of the archived file */
      } HEADER;


typedef struct fn {
      struct fn *fn_link;                 /* link to next file name */
      char     fn_name[1];                       /* the name itself */
      } FN;

SHAR_EOF
fi
if test -f 'ar.man'
then
	echo shar: "will not over-write existing file 'ar.man'"
else
cat << \SHAR_EOF > 'ar.man'

                            AR Version 1.2

Ar is a utility modeled loosely after the archive utility in the 
Kernigan & Plauger book, Software Tools. It's purpose is to gather 
together files into a common file, in order to save space, to keep 
related files together, to ease the transmission of files by 
telephone, and other similar uses.  It uses Lempel-Zev compression on 
text files, so substantial space savings can result.  Several versions 
of the same file can be kept in one archive without file name 
contention.  This version compiles and runs on OS9 LI and LII, and on 
OSK.  

The command line syntax is: 

  Ar <-command>[options] <archive_name> [file_specification(s)] 

The items enclosed in <> are required and those in [] are optional.  
The commands available operate on the archive in some way: 

  -p print a file(s) to standard output 
  -t print a table of contents for the archive 
  -u update/add file(s) to the archive 
  -x extract file(s) from the archive 

The options modify the behavior of a command: 

  -a causes all old files to be included in the command 
  -s supresses compress on storage, no effect on retrieval 
  -z names of files to process are read on standard input 

Most of these commands and modifiers are obvious, but not so the 
detail operation.  A version number is kept as part of the header 
record associated with each file in the archive.  The current file has 
a version number of '0'. Older versions of files with the same names 
have succedingly greater numbers.  Commands normally operate only on 
current copies, or those with a version of '0'. The 'a' option will 
cause all to be selected.  To avoid name conflicts on extraction, the 
version number for all but the current version will be appended to the 
name of the created file.  

A file spec can include the meta characters '?' (matches any 
character) and '*' (matches any string of characters).  Thus the 
following examples are valid file specifications: 

   *.c matches ar.c, dir.c, but not dir.h or dir.cc 
   ar.?  matches ar.c, ar.h, but not ar.ch 
   a*z matches az, abz, abcz, abcdz, etc.  
   *.* matches anything with a dot in it 
   * matches anything and everything 

Since the OS9 (6809) shell does not expand any meta characters, I have 
provided internal expansion of same.  When run on OSK this is 
transparent for update since 'ar' also accepts a list of files on the 
command line.  In the case of extraction and printing, we do not want 
the shell generating filenames from the current directory.  In this 
case, any file specification containing a meta character must be 
quoted to protect it from the shell.  

Meta characters can be used for files both bound for the archive and 
for files to be extracted.  The p, t, and x commands default to '*' if 
no explicit file list is given.  The u command MUST have a file spec.  
The file specification can include a path list if it is not absolute.  
That is, '/d0/work/*.c' will not be accepted, but 'work/*.c' will be.  
During extraction, if the directory in the path list of the archived 
file cannot be found, it will be created.  Here are some sample 
command lines: 

   ar -pa archfile print all files, even old versions 
   ar -p archfile print all current files 
   ar -p archfile *.c print all current files with '.c' suffix 
   ar -p archfile file.c print only 'file.c' 

The x and t commands would work the same way, but the u command is 
slightly different.  Sample command lines follow: 

   ar -us archfile *.c add all files with '.c' suffix without 
compression 
   ar -u archfile * add ALL files using compression unless binary 
   ar -u archfile file.c add only 'file.c' 

If a file with the same name as the one to be added exists in the 
archive already, the version number of the existing one is incremented 
and the new version is added to the end of the archive.  The atribute 
section of the file descriptor is saved in the archive and restored to 
the file (as much as is possible) on extraction.  OS9 limits this to 
owner, creation date, and last modified date.  Size of course will be 
the same, but the file permissions will default to some value.  It is 
possible (I do it in 'arc') to preserve permissions, but that requires 
that the disk be opened raw and written raw.  I chose not to do that 
here.  

Pre-extension is used during extraction to avoid segment table 
overflow problems, and will be added to update as soon as the 
mechanism becomes clear.  

There is no way in this version of 'ar' to recover space of obsoleted 
or updated files, or to delete files.  This does not seem important, 
given the design goal.  This can be simulated with a burst/re-archive 
operation, which is, in principle, how 'ar' will have to do it.  

The concatenation of two '.ar' files is still an archive, with certain 
limitations.  If a duplicate file with duplicate version appears in 
both archives prior to concatenation, only the last one physically in 
the archive file will be saved on extraction.  One could, of course, 
double the size of 'ar' with all manner of prompting, perusing, and 
choosing in cases like that but I prefer to rely on the inate ability 
of the user deal with such problems outside of 'ar'.  

The archive file is opened in update only if necessary, so the last 
modified date of the archive is not disturbed by extraction or 
perusal.  This should go a long way toward making rational backup of 
archives possible.  

I would like to maintain control of the evolution of this utility, 
primarily since it is used to generate and burst most of the files in 
the new User Group Data Base on CIS.  It is important to maintain 
backwards compatibility.  I will be open to suggestions for 
improvements, additions, and (of course) bug fixes.  The first likely 
addition will be the newer LZ compression to improve the compaction.  

                        Carl Kreider  71076,76

SHAR_EOF
fi
if test -f 'arsup.c'
then
	echo shar: "will not over-write existing file 'arsup.c'"
else
cat << \SHAR_EOF > 'arsup.c'
/*
** some lowlevel routines that are also in my library.
**  this file contains the C versions for those who don't
**  have access to my library
*/

#ifndef  OSK
#include <os9.h>
#endif
#include "ar.h"
#include <stdio.h>
#include <ctype.h>

/*
** get file stats using _os9 for portability
*/

get_fstat(pn, fs)
int      pn;
FILDES   *fs;
   {
#ifdef   OSK
   _gs_gfd(pn, fs, sizeof(FILDES));
#else

   struct registers regs;

   regs.rg_a = pn;
   regs.rg_b = SS_FD;
   regs.rg_x = fs;
   regs.rg_y = sizeof(FILDES);
   _os9(I_GETSTT, &regs);
#endif
   }


/*
** set file attributes
*/

set_fstat(pn, fs)
int      pn;
FILDES   *fs;
   {
#ifdef   OSK
   _ss_pfd(pn, fs, sizeof(FILDES));
#else

   struct registers regs;

   regs.rg_a = pn;
   regs.rg_b = SS_FD;
   regs.rg_x = fs;
   regs.rg_y = sizeof(FILDES);
   _os9(I_SETSTT, &regs);
#endif
   }
/*page*/
/*
** get the file size
*/

long
get_fsize(pn)
int   pn;
   {
   long  size;

#ifdef   OSK
   size = _gs_size(pn);
#else
   getstat(SS_SIZE, pn, &size);
#endif
   return (size);
   }


/*
** change the file size
*/

set_fsize(pn, size)
int   pn;
long  size;
   {
#ifdef   OSK
   _ss_size(pn, size);
#else
   setstat(SS_SIZE, pn, size);
#endif
   }


/*
**  convert a long read from disk as an array of char
**   back to a long.
*/

long
c4tol(s)
char  *s;
   {
   long  x = 0;

   x = (x + (*s++ &0xff)) << 8;
   x = (x + (*s++ &0xff)) << 8;
   x = (x + (*s++ &0xff)) << 8;
   x = (x + (*s &0xff));
   return (x);
   }
/*page*/

/*
**      Returns true if string s matches pattern p.
*/

patmatch(p, s, f)
char           *p;                                       /* pattern */
register char  *s;                               /* string to match */
char           f;                            /* flag for case force */
   {
   char  pc;                     /* a single character from pattern */

   while (pc = (f ? toupper(*p++) : *p++))
      {
      if (pc == '*')
         {
         do {                    /* look for match till s exhausted */
            if (patmatch (p, s, f))
                  return (1);
            } while (*s++);
         return (0);
         }
      else
         if (*s == 0)
            return (0);                       /* s exhausted, p not */
         else
            if (pc == '?')
               s++;                       /* matches all, just bump */
            else
               if (pc != (f ? toupper(*s++) : *s++))
                  return (0);
      }
   return (!*s);            /* p exhausted, ret true if s exhausted */
   }


/*
** initialize memory (variables) of string type
*/

setmem(p, q, v)
char  *p;                                                 /* string */
int   q;                                                   /* count */
char  v;                                                   /* value */
   {
   while (q--)
      *p++ = v;
   }


#include <ctype.h>
/*
** special strcmp to ignore case
*/

strucmp(s1, s2)
char          *s1;
register char *s2;
   {
   while (toupper(*s1) == toupper(*s2))
      {
      if (*s2++ == 0)
            return (0);
      s1++;
      }
   return (toupper(*s1) - toupper(*s2));
   }
SHAR_EOF
fi
if test -f 'delz1.c'
then
	echo shar: "will not over-write existing file 'delz1.c'"
else
cat << \SHAR_EOF > 'delz1.c'
/*
 * Decompress the input file.
 */

#include <stdio.h>
#include "lz1.h"

WORD
de_LZ_1(infile, outfile, bytes)
FILE     *infile, *outfile;
long     bytes;
   {
   WORD  tag, finchar, code, oldcode, incode, stack_top = MAXSTACK;
   WORD  getcode();
   char  stack[MAXSTACK];

   if (fread(&tag, sizeof(tag), 1, infile) < 1 || tag != TAG)
      return (-2);

   lz1_init();
   lz_bytes = bytes - sizeof(tag);
   for (free_ent = 0; free_ent < 256; free_ent++)
      {
      Code[free_ent].next = Code[free_ent].chain = NULL;
      Code[free_ent].prefix = NULL;
      Code[free_ent].suffix = free_ent;
      }

   finchar = oldcode = getcode(infile);
   putc((char)finchar, outfile);       /* first code must be 8 bits */

   while ((code = getcode(infile)) != -1)
      {
      incode = code;
      if (code >= free_ent)       /* Special case for KwKwK string. */
         {
         stack[--stack_top] = finchar;
         code = oldcode;
         }

      while (code >= 256)   /* Generate characters in reverse order */
         {
         stack[--stack_top] = Code[code].suffix;
         code = Code[code].prefix;
         }
      stack[--stack_top] = finchar = Code[code].suffix;

      /* And put them out in forward order */
      fwrite(&stack[stack_top], 1, MAXSTACK - stack_top, outfile);
      stack_top = MAXSTACK;

      if ((code = free_ent) < maxmaxcode)     /* Generate new entry */
         {
         Code[code].prefix = oldcode;
         Code[code].suffix = finchar;
         free_ent++;
         }
      oldcode = incode;                  /* Remember previous code. */
      }
   return (code);
   }
/*page*/
/*
 * Read one code from the input file.  If EOF, return -1.
 */

WORD
getcode(infile)
FILE    *infile;
   {
   WORD    code;
   static WORD size = 0;

   if (offset >= size || free_ent > maxcode)
      {
      if (free_ent > maxcode)
         {
         n_bits++;              /* new entry too big, increase size */
         maxcode = (n_bits == maxbits) ? maxmaxcode : (1 << n_bits) - 1;
         }
      if (lz_bytes <= 0)
         return (-1);                                      /* "eof" */
      size = (lz_bytes > (long) n_bits) ? n_bits : (int) lz_bytes;
      size = fread(buf, 1, size, infile);    /* read new buffer too */
      if (size <= 0)
         return (-1);
      lz_bytes -= size;
      offset = 0;
      /*
       * Convert size to bits, and round down so that stray bits
       * at the end aren't treated as data
       */
      size = BytesToBits(size) - (n_bits - 1);
      }
   code = fetch();
   offset += n_bits;

   return(code);
   }


fetch()
   {
   WORD    w_offset, word, size2;

   word = offset >> LOG2WSIZE;
   w_offset = LowOrder(LOG2WSIZE) & offset;

   if (w_offset + n_bits <= WSIZE)
      {
      return((buf[word] >> ((WSIZE - n_bits) - w_offset))
         & LowOrder(n_bits));
      }
   else
      {
      size2 = n_bits - (WSIZE - w_offset);
      return(((buf[word] << size2) |
            (buf[word + 1] >> (WSIZE - size2)))
            & LowOrder(n_bits));
      }
   }
SHAR_EOF
fi
if test -f 'dir.c'
then
	echo shar: "will not over-write existing file 'dir.c'"
else
cat << \SHAR_EOF > 'dir.c'

#include "ar.h"

/*
** open a directory, returning an fd or -1
*/

int
opndir(s)
char  *s;
   {
   return (open(s, 0x81));
   }


/*
** return a pointer to the next directory entry
*/

static   DIRENT   dent;

DIRENT *
nextdir(fd)
int   fd;
   {
   char  c;

   do {
      if (read(fd, &dent, sizeof(dent)) <= 0)
         return (-1);
      } while ((c = dent.dir_name[0]) == 0 || (c & 0x7f) == '.');
   return (&dent);
   }
SHAR_EOF
fi
if test -f 'lz1.c'
then
	echo shar: "will not over-write existing file 'lz1.c'"
else
cat << \SHAR_EOF > 'lz1.c'
/*
 *      Writes compressed file to outfile.
 */

#include <stdio.h>
#include "lz1.h"

LZ_1(infile, outfile, bytes)
FILE     *infile, *outfile;
long     *bytes;
   {
   WORD  c, ent, n_ent, reslt, tag = TAG;

   for (free_ent = 0; free_ent < 256; free_ent++)
      {
      Code[free_ent].next = Code[free_ent].chain = NULL;
      Code[free_ent].suffix = free_ent;
      }

   lz1_init();
   lz_bytes = sizeof(tag);
   fwrite(&tag, 1, sizeof(tag), outfile);             /* mark as LZ */

   ent = getc(infile);
   while (!feof(infile) && (c = getc(infile)) != EOF)
      {
      /*
      ** Find the entry corresponding to the current entry suffixed
      ** with c.  Since the entries are sorted, suffix > c is as
      ** good as a null to indicate the need to create a new entry.
      */
      n_ent = Code[ent].chain;
      for (;;)
         {
         if (!n_ent || Code[n_ent].suffix > c)
            {
            output(ent, outfile);
            if (reslt = addentry(c, ent))
                /* return (reslt) */;
            ent = c;
            break;
            }
         else
            if (Code[n_ent].suffix == c)
               {
               ent = n_ent;
               break;
               }
            else
               n_ent = Code[n_ent].next;
         }
      }

   output(ent, outfile);                      /* put out final code */
   output(-1, outfile);               /* and -1 to flush and finish */
   *bytes = lz_bytes;
   return (0);
   }
/*page*/


addentry(c, ent)
WORD    c, ent;
   {
   register WORD   p_ent;

   /* if the table is full, there's nothing we can do. */
   if (free_ent >= maxmaxcode)
      return (-3);

   Code[free_ent].chain = NULL;
   Code[free_ent].suffix = c;

   if (!(p_ent = Code[ent].chain) || c < Code[p_ent].suffix)
      {
      Code[free_ent].next = p_ent;
      Code[ent].chain = free_ent;
      }
   else
      {
      while ((ent = Code[p_ent].next) && c >= Code[ent].suffix)
         p_ent = ent;
      Code[free_ent].next = ent;
      Code[p_ent].next = free_ent;
      }

   free_ent++;
   return (0);
   }
/*page*/
/*
 * Output the given code.
 */

output(code, ofp)
WORD     code;
FILE     *ofp;
   {
   WORD  n;

   if (code < 0)
      {
      /* at EOF--flush buffers and pack up */
      if (offset > 0)
         {
         fwrite(buf, 1, n = (offset + 7) / 8, ofp);
         lz_bytes += n;
         offset = 0;
         }
      fflush(ofp);
      return (0);
      }

   insert_bit(code);

   if ((offset += n_bits) == BytesToBits(n_bits))
      {
      fwrite(buf, 1, n_bits, ofp);
      lz_bytes += n_bits;
      offset = 0;
      }

   /*
    * If the next entry is going to be too big for the code size,
    * then increase it, if possible.
    */
   if (free_ent <= maxcode)
      return;

   /*
    * Write the whole buffer, because the input side won't
    * discover the size increase until after it has read it.
    */
   if (offset > 0)
      {
      fwrite(buf, 1, n_bits, ofp);
      lz_bytes += n_bits;
      offset = 0;
      }

   n_bits++;
   if (n_bits == maxbits)
      maxcode = maxmaxcode;
   else
      maxcode = (1 << n_bits) - 1;

   }


/* insert a value of "n_bits" bits at "offset" bits into buf */

insert_bit(value)
WORD    value;
   {
   WORD    w_offset, word, shift, size1, size2;

   word = offset >> LOG2WSIZE;
   w_offset = LowOrder(LOG2WSIZE) & offset;

   if (w_offset + n_bits <= WSIZE)
      {
      shift = (WSIZE - n_bits) - w_offset;
      buf[word] = (buf[word] & ~(LowOrder(n_bits) << shift)) |
         (value  << shift);
      }
   else
      {
      size1 = WSIZE - w_offset;
      size2 = n_bits - size1;
      buf[word] = (buf[word] & HighOrder(size1)) |
            ((unsigned) value >> size2);
      shift = WSIZE - size2;
      buf[word + 1] = (buf[word + 1] & ~(LowOrder(size2) << shift)) |
              (value << shift);
      }
   }
SHAR_EOF
fi
if test -f 'lz1.h'
then
	echo shar: "will not over-write existing file 'lz1.h'"
else
cat << \SHAR_EOF > 'lz1.h'
/*
 * header file for LZ compression routines
 */
#ifndef WORD
# ifdef  OSK
#  define WORD short
#  define UWORD unsigned short
#else
#  define WORD int
#  define UWORD unsigned int
# endif
#endif

#define BITS            11           /* maximum number of bits/code */
#define INIT_BITS       9            /* initial number of bits/code */
/*
 * One code can represent 1 << BITS characters, but to get a code of
 * length N requires an input string of at least N * (N - 1) / 2
 * characters. To overflow the decompress stack, an input file would
 * have to have at least MAXSTACK * (MAXSTACK - 1) / 2 consecutive
 * occurrences of a particular character, which is unlikely for the
 * value used here. (Do keep the initial advice in mind, though.)
 */
#define MAXSTACK        2000                /* size of output stack */

#define TAG             2995               /* suggested by M. Meyer */

/*
 * The following should be changed to fit your machine and the type
 * you choose for the elements of the array buf.
 * (If you avoid insert_bit() and fetch(), never mind.)
 */
#define LOG2WSIZE       4         /* log2(size of base type of buf) */
#define WSIZE           16              /* size of base type of buf */

#define BytesToBits(b)  ((b) << 3)
#define LowOrder(n)     (~(~0 << (n)))           /* thanks to K & R */
#define HighOrder(n)    (~0 << (n))

typedef struct {
     unsigned WORD   next,     /* chain of entries with same prefix */
                     chain,       /* chain prefixed with this entry */
                     prefix,          /* prefix code for this entry */
                     suffix;             /* last char in this entry */
        } CodeTable;

extern WORD       maxbits,         /* user settable max # bits/code */
                  n_bits,            /* initial number of bits/code */
                  maxmaxcode,      /* max permissible maxcode value */
                                            /* (i.e. 2 ** BITS - 1) */
                  maxcode,                       /* 2 ** n_bits - 1 */
                  free_ent,                   /* first unused entry */
                  offset,        /* cursor into buf (units of bits) */
                  stats_flg;         /* should we print statistics? */

extern WORD       verbose,
                  debug;

extern long       lz_bytes;
extern UWORD      buf[BITS];
extern CodeTable  Code[1 << BITS];
SHAR_EOF
fi
if test -f 'lz1glob.c'
then
	echo shar: "will not over-write existing file 'lz1glob.c'"
else
cat << \SHAR_EOF > 'lz1glob.c'
/* compressglob -- globals for compress */

#include "lz1.h"

WORD        maxbits,
            n_bits,
            maxmaxcode,
            maxcode,
            free_ent,
            offset,
            stats_flg,
            verbose,
            debug;

long        lz_bytes;
UWORD       buf[BITS];
CodeTable   Code[1 << BITS];
SHAR_EOF
fi
if test -f 'lz1init.c'
then
	echo shar: "will not over-write existing file 'lz1init.c'"
else
cat << \SHAR_EOF > 'lz1init.c'
/*
** routines to initialize the Lempel-Zev version one routines
*/

#include "lz1.h"

lz1_init()
   {
   maxbits = BITS;
   n_bits  = INIT_BITS;

   maxcode    = (1 << n_bits) - 1;
   maxmaxcode = (1 << maxbits) - 1;
   }
SHAR_EOF
fi
if test -f 'makefile.09'
then
	echo shar: "will not over-write existing file 'makefile.09'"
else
cat << \SHAR_EOF > 'makefile.09'
/d0/cmds/ar: ar.r arsup.r dir.r lz1.r delz1.r lz1glob.r lz1init.r
=d;cc -m=8 -f=Ar -MS ar.r arsup.r dir.r  lz1.r delz1.r lz1glob.r lz1init.r >map

lz1init.r: lz1init.c lz1.h
=cc -r lz1init.c

lz1glob.r: lz1glob.c lz1.h
=cc -r lz1glob.c

delz1.r: delz1.c lz1glob.c lz1.h
=cc -r delz1.c

lz1.r: lz1.c lz1glob.c lz1.h
=cc -r lz1.c

dir.r: dir.c
=cc -r dir.c

arsup.r: arsup.c ar.h
=cc -r arsup.c

ar.r: ar.c ar.h
=cc -r ar.c
SHAR_EOF
fi
if test -f 'makefile.68'
then
	echo shar: "will not over-write existing file 'makefile.68'"
else
cat << \SHAR_EOF > 'makefile.68'
/d0/cmds/ar: ar.r arsup.r dir.r lz1.r delz1.r lz1glob.r lz1init.r
  cc -m=8 -f=Ar ar.r arsup.r dir.r  lz1.r delz1.r lz1glob.r lz1init.r
lz1.r delz1.r lz1init.r lz1glob.r: lz1.h
ar.r arsup.r dir.c: ar.h
SHAR_EOF
fi
exit 0
#	End of shell archive
-- 
UUCP:	(1) seismo!why_not!scsnet!sunder		Mark E. Sunderlin
	(2) ihnp4!chinet!megabyte			aka Dr. Megabyte
CIS:	74026,3235					(202) 634-2529
Mail:	IRS  PM:PFR:D:NO  1111 Constitution Ave. NW  Washington,DC 20224  

blarson@castor.usc.edu (Bob Larson) (01/12/87)

The archiver Ar posted here has a number of minor problems.  

Ar.doc and ar.man are identical except for the number of blank lines
at the end, so one may safely be deleted. 

The makefile.68 contains some machine dependencies.  The /d0 needs to
be changed to /h0 or where ever you keep your cmds directory, and
adding the line "CFLAGS = -t=/r0" speeds up compilation.  The -i flag
may be added to the linking line if disk space, memory space, and
loading time are more of a consideration than running time.  Of
course, comments and blank lines will increase the readability.

Compiling it gives numerous error messages, mostly dealing with
possible portability problems.  There realy is no reason for not doing
the code as portably as possible, so the context diffs are below.

*** ar.c.orig
--- ar.c
**************
*** 5,11
  #include <ctype.h>
  #include "ar.h"
  
  
  FN    *fnhead = (FN *)NULL;
  char  *hid = HID,
--- 6,12 -----
  #include <ctype.h>
  #include "ar.h"
  
+ extern char *index(), *rindex(), *malloc(), *strcat(), *strcpy();
  
  
  FN    *fnhead = (FN *)NULL;
**************
*** 6,12
  #include "ar.h"
  
  
  FN    *fnhead = (FN *)NULL;
  char  *hid = HID,
        *suf = SUF;
--- 8,14 -----
  
  extern char *index(), *rindex(), *malloc(), *strcat(), *strcpy();
  
+ 
  FN    *fnhead = (FN *)NULL;
  char  *hid = HID,
        *suf = SUF;
**************
*** 289,295
           dirfd = opndir(*p ? p : ".");
           if (*p)
              *r++ = '/';                        /* set up for append */
!          while ((dirp = nextdir(dirfd)) != -1)
              if (patmatch(q, strhcpy(r, dirp->dir_name), TRUE))
                 if ((strucmp(p, archfile)) != 0)         /* not self */
                    found += stash_name(p);
--- 291,297 -----
           dirfd = opndir(*p ? p : ".");
           if (*p)
              *r++ = '/';                        /* set up for append */
!          while ((dirp = nextdir(dirfd)) != (DIRENT *)NULL)
              if (patmatch(q, strhcpy(r, dirp->dir_name), TRUE))
                 if ((strucmp(p, archfile)) != 0)         /* not self */
                    found += stash_name(p);
**************
*** 311,317
  char  *p;
     {
     static   FN    *fnp;
!    char           *q;
  
     if (*p == '/')
        fatal(1, "absolute path illegal <%s>\n", p);
--- 313,319 -----
  char  *p;
     {
     static   FN    *fnp;
!    FN             *q;
  
     if (*p == '/')
        fatal(1, "absolute path illegal <%s>\n", p);
**************
*** 315,321
  
     if (*p == '/')
        fatal(1, "absolute path illegal <%s>\n", p);
!    q = emalloc(sizeof(FN) + strlen(p));
     if (fnhead == (FN *)NULL)
        fnhead = fnp = q;
     else
--- 317,323 -----
  
     if (*p == '/')
        fatal(1, "absolute path illegal <%s>\n", p);
!    q = (FN *)emalloc(sizeof(FN) + strlen(p));
     if (fnhead == (FN *)NULL)
        fnhead = fnp = q;
     else
*** arsup.c.orig
--- arsup.c
**************
*** 158,165
        *p++ = v;
     }
  
- 
- #include <ctype.h>
  /*
  ** special strcmp to ignore case
  */
--- 158,163 -----
        *p++ = v;
     }
  
  /*
  ** special strcmp to ignore case
  */
*** dir.c.orig
--- dir.c
**************
*** 2,8
  #include "ar.h"
  
  /*
! ** open a directory, returning an fd or -1
  */
  
  int
--- 2,8 -----
  #include "ar.h"
  
  /*
! ** open a directory, returning an fd or NULL
  */
  
  int
**************
*** 27,33
  
     do {
        if (read(fd, &dent, sizeof(dent)) <= 0)
!          return (-1);
        } while ((c = dent.dir_name[0]) == 0 || (c & 0x7f) == '.');
     return (&dent);
     }
--- 27,33 -----
  
     do {
        if (read(fd, &dent, sizeof(dent)) <= 0)
!          return (DIRENT *)0;
        } while ((c = dent.dir_name[0]) == 0 || (c & 0x7f) == '.');
     return (&dent);
     }
-- 
Bob Larson
Arpa: Blarson@Usc-Eclb.Arpa
Uucp: (several backbone sites)!sdcrdcf!usc-oberon!castor.usc.edu!blarson
			seismo!cit-vax!usc-oberon!castor.usc.edu!blarson