[net.sources] packmail and unshar: make and extract shar files

mlm@cmu-cs-cad.ARPA (Michael Mauldin) (01/30/85)

#!/bin/sh
#
# This is a shell archive.  Remove any mail headers and run the rest
# of this file through the 'sh' command.  Or use the 'unshar' command.
# 
# --------------------------------------------------------------------
# 
# This message contains a pair of useful commands for manipulating
# shell archive files:
# 
#     packmail:	Creates multi-part shell archives using FFD
# 		bin packing to group files.
# 
#     unshar:	Unpackages one or more shell archives.  Automatically
# 		strips mail headers and other leading garbage.
# 
# Michael Mauldin (Fuzzy)
# Department of Computer Science
# Carnegie-Mellon University
# Pittsburgh, PA  15213
# (412) 578-3065,  mauldin@cmu-cs-a.arpa
#
echo 'Start of packmail and unshar, part 01 of 01:'
echo 'x - Makefile'
sed 's/^X//' > Makefile << '/'
XCFLAGS= -g
X
Xall: unshar packmail
X
Xunshar: unshar.c
X	cc $(CFLAGS) -o unshar unshar.c
X
Xpackmail: packmail.c
X	cc $(CFLAGS) -o packmail packmail.c
X
Xdist: unshar.1 unshar.c packmail.1 packmail.c Makefile packmail header
X	packmail -s48000 -t'packmail and unshar' -o'dist' -h'header' \
X		Makefile header *.[c1]
X	ls -l dist
/
echo 'x - header'
sed 's/^X//' > header << '/'
XThis is a shell archive.  Remove any mail headers and run the rest
Xof this file through the 'sh' command.  Or use the 'unshar' command.
X
X--------------------------------------------------------------------
X
XThis message contains a pair of useful commands for manipulating
Xshell archive files:
X
X    packmail:	Creates multi-part shell archives using FFD
X		bin packing to group files.
X
X    unshar:	Unpackages one or more shell archives.  Automatically
X		strips mail headers and other leading garbage.
X
XMichael Mauldin (Fuzzy)
XDepartment of Computer Science
XCarnegie-Mellon University
XPittsburgh, PA  15213
X(412) 578-3065,  mauldin@cmu-cs-a.arpa
/
echo 'x - packmail.1'
sed 's/^X//' > packmail.1 << '/'
X.TH PACKMAIL 1 01/29/85
X.UC 4
X.SH NAME
Xpackmail \- Package a set of files for easy mailing
X.SH SYNOPSIS
Xpackmail [-nq] [-{host}<value>] 
X.I files
X.SH DESCRIPTION
X.PP
X.I Packmail
Xpackages a set of files into a smaller number of larger files,
Xeach smaller than a given size. The resulting files are shell scripts
X(for the
X.I sh
Xcommand) which when run though the shell create the same files (with
Xdefault permission bits).  
XOptions allow the user to set
X.I maxsize
Xwhich determines the file length and
X.I outputname
Xwhich is the name of the output shell scripts.
X.PP
XIf 
X.I maxchars
Xis larger than the total size of the files, then only one file,
X.I outputname
Xis created, otherwise, several files,
X.I outputname.01, 
X.I outputname.02, ...
X.I outputname.nn 
Xare created.  Since First Fit Decreasing (FFD) bin packing is used,
Xthe number of mail files used is within 22% of the theoretical limit
X(Garey and Johnson, 1979).
X.PP
XEach file is self contained, 
X.I i.e. 
Xno source file is split across mail files.  Files larger than 
X.I maxchars
Xare copied into their own mail files.
XNo mapping of control characters is done (see uuencode(1)), but an 'X'
Xis prepended to each file, and the shell script removes these at the
Xreceiving end.  This is to prevent troubles with dots in column one
X(for example,
X.I Troff 
Xfiles have dots in column one) which can confuse some mail servers.
X.SH OPTIONS
X.TP
X.B -n
X.I check
Xoption, doesn't create the output files.  Just gives information on how
Xmany files the packaging will require and what the output files sizes 
Xwill be. The default is to create the output.
X.TP
X.B -q
X.I quick
Xoption, specifies that approximate file sizes are to be used.  
XSince the packaging overhead for a file is dependent on the
Xnumber of lines in that file, the file is read once to determine its
Xline count and (if output is generated) once again to do the actual
Xpackaging.  The -q option ignores the packaging overhead and saves one of
Xthese two file read operations.  The resulting files will be somewhat
Xlarger than the limit specified (by 2 to 3 percent). The default is to
Xuse accurate file sizes.
X.TP
X.B -h
X.I header
Xoption, specifies a file containing header text to be inserted at the
Xfront of each distribution file. For example:
X.sp
X	packmail -h'head.txt' f1 f2 f3 f4
X.sp
Xspecifies that the file
X.I head.txt
Xcontains header information for packaging the four files listed.
XThe default is no header information.
X.TP
X.B -o
X.I outputname
Xoption, specifies the name of the output file(s).  For example
X.sp
X	packmail -o'dist' f1 f2 f3 f4
X.sp
Xspecifies that the files are to be packaged in shell archives called
X.I
Xdist.01, dist.02, ...
X.sp
XThe default is "pack.out".
X.TP
X.B -s
X.I maxsize
Xoption, specifies the maximum size of each archive file.
XFor example:
X.sp
X	packmail -s10000 f1 f2 f3 f4
X.sp
Xspecifies that files f1 f2 f3 and f4 are to be packed into files not
Xmore than 10,000 characters each (although no file will be split in
Xhalf, so if f1 were larger than 10,000 characters long it would be put
Xin one file by itself).  The default is 48,000 characters, since some
Xlocal networks have limits of 50,000 bytes, and we want to leave 2000
Xbytes for mail headers.  Remember that some computers have 16 bit
Xwords, so files longer than 65,536 will break many site's mailers.
X.TP
X.B -t
X.I title
Xoption, specifies a title which will be echoed when each file is
Xrun through the 'sh' command at the receiving end.  For example:
X.sp
X	packamil -t'Junk file mailing' f1 f2 f3 f4
X.sp
XWould give the user at the other end the following output:
X.sp
X	Start of Junk file mailing, part XX of XX:
X.br
X	...
X.br
X	Part XX of Junk file mailing complete.
X.PP
X.SH SEE ALSO
Xunshar(1)
X.SH BUGS
X.PP
XThe number of input files is limited by maximum size of exec args.  There
Xis also a compile time limit of 512 files.  No escaping of control
Xcharacters is performed, and the 'X's are put in column 1 whether they
Xare needed or not. Files not ending with newlines have trailing
Xnewlines added.
X.PP
XAlso, since only two digits are used to number the files, there is a
Xlimit of 100 output files.
X.SH HISTORY
X.TP
X29-Jan-85  Michael Mauldin (mlm) at CMU
XAdded header and title options.
X.TP
X27-Jun-83 Michael Mauldin (mlm) at CMU
XModified to use FFD bin packing algorithm.
X.TP
X20-Jun-83 Michael Mauldin (mlm) at CMU
XCreated.
/
echo 'x - packmail.c'
sed 's/^X//' > packmail.c << '/'
X/* 
X * packmail.c:  Package a set of files for mailing.
X *
X * Usage: packmail [-nq] [-{host}<value>] files
X *
X * HISTORY:
X * 29-Jan-85  Michael Mauldin (mlm) at Carnegie-Mellon University
X *	Added header file option
X *
X * 25-Jun-83  Michael Mauldin (mlm) at Carnegie-Mellon University
X *	Changed first fit bin packing to first fit decreasing.
X *
X * 19-Jun-83  Michael Mauldin (mlm) at Carnegie-Mellon University
X *	Created.
X */
X
X# include <stdio.h>
X# include <sys/types.h>
X# include <sys/stat.h>
X
X# define MAXFILE	512
X# define SKIPARG	while (*++(*argv)); --(*argv)
X# define max(A,B)	((A)>(B)?(A):(B))
X
X/* Magic numbers for fixed and marginal overhead in bytes */
X# define FOVRHD		77
X# define HOVRHD		4
X# define MOVRHD		37
X
Xstruct filestruct { int size, bin; char *name; } pfile[MAXFILE];
Xint nf = 0;
X
X/* 
X * cmpsize: Compare two files by decreasing size. For qsort.
X */
X
Xint cmpsize (a, b)
Xstruct filestruct *a, *b;
X{ return (b->size - a->size);
X}
X
X/* 
X * cmpbin: Compare two files, by increasing bin and increasing
X * lexicographical name.
X */
X
Xint cmpbin (a, b)
Xstruct filestruct *a, *b;
X{ register int result;
X  if (result=(a->bin - b->bin)) return (result);
X  return (strcmp (a->name, b->name));
X}
X
X/* 
X * main: Do the bin packing and file copying.
X */
X
Xmain (argc, argv)
Xint argc;
Xchar *argv[];
X{ int maxsize=48000, count, bol, total=0, check=0, quick=0;
X  int i, assigned, b, maxb=1, ch, headsize = 0, overhead = 0;
X  char *outnam = "pack.out", *headnam = NULL, *title = NULL, *filnam;
X  char pname[128], fname[128];
X  FILE *infile = NULL, *outfil = NULL, *headfil = NULL;
X
X  /* get the arguments */
X  while (--argc > 0 && (*++argv)[0] == '-')
X  { while (*++(*argv))
X    { switch (**argv)
X      {	case 'n': check++; break;
X	case 'q': quick++; break;
X        case 'h': headnam = *argv+1; SKIPARG; break;
X        case 'o': outnam = *argv+1; SKIPARG; break;
X        case 's': maxsize = atoi (*argv+1); SKIPARG; break;
X        case 't': title = *argv+1; SKIPARG; break;
X        default:  fprintf (stderr,
X			"Usage: packmail [-nq] [-{host}<value>] files\n");
X		  exit (1);
X      }
X    }
X  }
X
X  /* Set The title defaults to the output nane */
X  if (!title) title = outnam;
X
X  /*
X   * Open the headerfile (if there is one)
X   */
X
X  if (headnam && *headnam)
X  { headsize = HOVRHD + filesize (headnam) + cnt_extra (headnam, 2);
X    total += headsize;
X    if ((headfil = fopen (headnam, "r")) == NULL)
X    { perror (headnam); exit (1); }
X  }
X
X  /* 
X   * Locate all files and determine their sizes. Assign each to bin 0,
X   * and set 'total' to the total size of all files. The size includes
X   * any overhead required by the output packaging, except in quick
X   * mode the extra characters prepended to each line are not counted.
X   */
X
X  for (i = 0; i < argc; i++, nf++)
X  { pfile[nf].size = filesize (pfile[nf].name = argv[i]);
X    pfile[nf].size += MOVRHD + 2 * strlen (pfile[nf].name);
X    if (!quick) pfile[nf].size += cnt_extra (pfile[nf].name, 1);
X    total += pfile[nf].size;
X    pfile[nf].bin = 0;
X  }
X
X# ifdef DEBUG
X  printf ("Header: %7d '%s'\n\n", headsize, headnam);
X
X  for (i=0; i<nf; i++)
X    printf ("%6d: %7d '%s'\n", i, pfile[i].size, pfile[i].name);
X# endif
X
X  /* 
X   * If the total size is larger than maxsize, do bin packing and
X   * assign each file to a bin from 1 to n.
X   */
X
X  overhead = FOVRHD + 2 * strlen (title) + headsize;
X
X  if (total > maxsize)
X  { /* Sort into decreasing order of size. */
X    qsort (pfile, nf, sizeof (*pfile), cmpsize);
X
X    /* Loop through bins, assigning each file that still fits */
X    b=0; assigned=0;
X    while (assigned < nf)
X    { count=overhead; b++; maxb=b;
X      for (i=0; i<nf; i++)
X      { if (pfile[i].bin==0)
X        { if (count==overhead || pfile[i].size+count <= maxsize)
X	  { pfile[i].bin = b;
X	    count += pfile[i].size;
X	    assigned++;
X	  }
X	}
X      }
X      /* Bin 'b' is now full, indicate its size */
X      printf ("File %s.%02d size %d.\n", outnam, b, count);
X    }
X
X    /* Now sort files into increasing order of <bin,name> */
X    qsort (pfile, nf, sizeof (*pfile), cmpbin);
X  }
X
X# ifdef DEBUG
X  printf ("\nAfter sorting:\n\n");
X  for (i=0; i<nf; i++)
X    printf ("[%02d] %7d '%s'\n", pfile[i].bin, pfile[i].size, pfile[i].name);
X# endif
X
X    if (check)
X    { fprintf (stderr, "Packaging would required %d file%s.\n",
X               maxb, (maxb != 1) ? "s" : "");
X      exit (0);
X    }
X
X  /* 
X   * Now loop through each file and copy it into the correct bin. Since
X   * the files are sorted by bin, we only open each output file once.
X   * Bin 0 is used if all files fit into one bin, otherwise we use bins 1
X   * to 'n'. 'b' is the current bin, -1 indicates no output file yet.
X   */
X
X  for (b= -1, i=0; i<nf; i++)
X  { filnam = pfile[i].name;
X
X    /* Open the next input file for reading */    
X    if ((infile = fopen (filnam, "r")) == NULL)
X    { perror (filnam); exit (1); }
X
X    /* If file.bin is not the current bin, open a new output file */
X    if (b != pfile[i].bin)
X    { /* Close the old file */
X      if (outfil)
X      { fprintf (outfil, "echo 'Part %02d of %s complete.'\n",
X		 max (b,1), title);
X        fprintf (outfil, "exit\n");
X        fclose (outfil);
X      }
X
X      /*
X       * Build the output name, bin 0 is just the output file name, and
X       * bin k, k>0, is outputname.kk.
X       */      
X
X      if ((b = pfile[i].bin) > 0)
X 	sprintf (fname, "%s.%02d", outnam, b);
X      else
X	strcpy (fname, outnam);
X
X      if ((outfil = fopen (fname, "w")) == NULL)
X      { perror (pname); exit (1); }
X
X      fprintf (outfil, "#!/bin/sh\n");
X      
X      if (headnam)
X      { fprintf (outfil, "#\n");
X        
X	/* Now copy the header file out with '#' in front */
X	rewind (headfil);
X	bol = 1;
X	while ((ch=getc (headfil)) != EOF)
X	{ if (bol) { fprintf (outfil, "# "); bol = 0; }
X	  putc (ch, outfil);
X	  if (ch == '\n') bol = 1;
X	}
X
X	if (!bol) putc ('\n', outfil);
X
X        fprintf (outfil, "#\n");
X      }
X      
X      fprintf (outfil,
X               "echo 'Start of %s, part %02d of %02d:'\n",
X               title, max (b,1), maxb);
X    }
X
X    /* 
X     * Now copy the input file to the current bin; create a shell script
X     * which will restore the original file with the original name (but
X     * with default permission bits, group, and owner).
X     */
X
X    printf ("%s: added %s\n", fname, filnam);
X    fprintf (outfil, "echo 'x - %s'\n", filnam);
X    fprintf (outfil, "sed 's/^X//' > %s << '/'\n", filnam);
X
X    /* Now copy the file out with 'X' in front */
X    bol = 1;
X    while ((ch=getc (infile)) != EOF)
X    { if (bol) { fprintf (outfil, "X"); bol = 0; }
X      putc (ch, outfil);
X      if (ch == '\n') bol = 1;
X    }
X    
X    if (!bol) putc ('\n', outfil);
X
X    /* Add the trailing EOF marker */
X    fprintf (outfil, "/\n");
X
X    /* Close the input file */
X    fclose (infile);
X  }
X
X  /* Close the remaining bin's file */
X  if (outfil)
X  { fprintf (outfil, "echo 'Part %02d of %s complete.'\n", 
X	     max (b,1), title);
X    fprintf (outfil, "exit\n");
X    fclose (outfil);
X  }
X
X  /* Tell the user how many files the packaging took */
X  fprintf (stderr, "Packaging required %d file%s.\n",
X           maxb, (maxb > 1) ? "s" : "");
X}
X
X/****************************************************************
X * filesize: Determine the number of bytes in a file using stat
X ****************************************************************/
X
Xfilesize (fname)
Xchar *fname;
X{ struct stat sbuf;
X
X  if (stat (fname, &sbuf) != 0) { perror (fname); exit (1); }
X  return (sbuf.st_size);
X}
X
X/* 
X * cnt_extra: Count the number of extra characters needed to 
X * pack file 'fname'.  That is 'add' characters added to each
X * line, plus 1 if the last line is missing a trailing newline,.
X */
X
Xcnt_extra (fname, add)
Xchar *fname;
Xint add;
X{ register int count = 0;
X  register FILE *cfil;
X  char c, lastc = '\0';
X
X  if (cfil = fopen (fname, "r"))
X  { while ((c = getc (cfil)) != EOF)
X    { lastc = c; if (c == '\n') count += add; }
X    if (lastc != '\n') count += (add+1);
X    fclose (cfil);
X  }
X  
X  return (count);
X}
/
echo 'x - unshar.1'
sed 's/^X//' > unshar.1 << '/'
X.TH PROGNAME 1 01/29/85
X.UC 4
X.SH NAME
Xunshar \- Unpackage one or more shell archive files
X.SH SYNOPSIS
Xunshar [
X.I files
X]
X.SH DESCRIPTION
X.PP
X.I Unshar
Xunpackages shell archive files by first removing any leading mail
Xheaders or other junk and then piping the rest of the file through
Xthe
X.I sh
Xcommand.  Trailing junk is not removed and may caused anomalous
Xbehavior unless the shell archive file has an
X.I
Xexit
Xcommand at the end.
X.PP
X.I Unshar
Xunderstands phrases like
X.I cut here
Xand also knows about shell command characters (#) and (:) and
Xthe Unix commands
X.I echo,
X.I cat,
Xand
X.I sed.
X.PP
XThe 
X.I unshar
Xcommand is not perfect, but it will correctly unpackage output from
Xmost shell archive generators.
X.SH OPTIONS
X.PP
XNone.
X.SH SEE ALSO
X.PP
Xpackmail(1)
X.SH BUGS
XNone known.
X.SH HISTORY
X.TP
X29-Jan-85  Michael Mauldin (mlm) at CMU
XCreated.
/
echo 'x - unshar.c'
sed 's/^X//' > unshar.c << '/'
X/****************************************************************
X * unshar.c: Unpackage one or more shell archive files
X *
X * Usage:	unshar [ files ]
X *
X * Description:	unshar is a filter which removes the front part
X *		of a file and passes the rest to the 'sh' command.
X *		It understands phrases like "cut here", and also
X *		knows about shell comment characters and the Unix
X *		commands "echo", "cat", and "sed".
X *
X * HISTORY
X * 29-Jan-85  Michael Mauldin (mlm) at Carnegie-Mellon University
X *	Created.
X ****************************************************************/
X
X# include <stdio.h>
X# define EOL '\n'
X
Xmain (argc, argv)
Xint argc;
Xchar *argv[];
X{ int i, ch;
X  FILE *in, *shpr, *popen ();
X
X  for (i=1; i<argc; i++)
X  { if ((in = fopen (argv[i], "r")) == NULL)
X    { fprintf (stderr, "unshar: file '%s' not found\n", argv[i]);
X      exit (1);
X    }
X
X    if (position (argv[i], in))
X    { printf ("%s:\n", argv[i]);
X      if ((shpr = popen ("sh", "w")) == NULL)
X	quit (1, "unshar: cannot open 'sh' process\n");
X
X	while ((ch = fgetc (in)) != EOF)
X	  fputc (ch, shpr);
X
X	pclose (shpr);
X	fclose (in);
X    }
X  }
X}
X
X/****************************************************************
X * position: position 'fil' at the start of the shell command
X * portion of a shell archive file.
X ****************************************************************/
X
Xposition (fn, fil)
Xchar *fn;
XFILE *fil;
X{ char buf[BUFSIZ];
X  long pos, ftell ();
X
X  /* Results from star matcher */
X  static char res1[BUFSIZ], res2[BUFSIZ], res3[BUFSIZ], res4[BUFSIZ];
X  static char *result[] = { res1, res2, res3, res4 };
X
X  rewind (fil);
X
X  while (1)
X  { /* Record position of the start of this line */
X    pos = ftell (fil);
X
X    /* Read next line, fail if no more */
X    if (fgets (buf, BUFSIZ, fil) == NULL)
X    { fprintf (stderr, "unshar: found no shell commands in %s\n", fn);
X      return (0);
X    }
X
X    /* Bail out if we see C preprocessor commands or C comments */
X    if (stlmatch (buf, "#include")	|| stlmatch (buf, "# include") ||
X	stlmatch (buf, "#define")	|| stlmatch (buf, "# define") ||
X	stlmatch (buf, "#ifdef")	|| stlmatch (buf, "# ifdef") ||
X	stlmatch (buf, "#ifndef")	|| stlmatch (buf, "# ifndef") ||
X	stlmatch (buf, "/*"))
X    { fprintf (stderr,
X	       "unshar: %s looks like raw C code, not a shell archive\n", fn);
X      return (0);
X    }
X
X    /* Does this line start with a shell command or comment */
X    if (stlmatch (buf, "#")	|| stlmatch (buf, ":") ||
X	stlmatch (buf, "echo ")	|| stlmatch (buf, "sed ") ||
X	stlmatch (buf, "cat "))
X    { fseek (fil, pos, 0); return (1); }
X
X    /* Does this line say "Cut here" */
X    if (smatch (buf, "*CUT*HERE*", result) ||
X	smatch (buf, "*cut*here*", result) ||
X	smatch (buf, "*TEAR*HERE*", result) ||
X	smatch (buf, "*tear*here*", result) ||
X	smatch (buf, "*CUT*CUT*", result) ||
X	smatch (buf, "*cut*cut*", result))
X    {
X      /* Read next line after "cut here", skipping blank lines */
X      while (1)
X      { pos = ftell (fil);
X
X        if (fgets (buf, BUFSIZ, fil) == NULL)
X	{ fprintf (stderr,
X		"unshar: found no shell commands after 'cut' in %s\n", fn);
X	  return (0);
X	}
X	
X	if (*buf != '\n') break;
X      }
X
X      /* Win if line starts with a comment character of lower case letter */
X      if (*buf == '#' || *buf == ':' || (('a' <= *buf) && ('z' >= *buf)))
X      { fseek (fil, pos, 0);
X	return (1);
X      }
X
X      /* Cut here message lied to us */      
X      fprintf (stderr, "unshar: %s is probably not a shell archive,\n", fn);
X      fprintf (stderr, "        the 'cut' line was followed by: %s", buf);
X      return (0);
X    }
X  }
X}
X
X/*****************************************************************
X * stlmatch  --  match leftmost part of string
X *
X * Usage:  i = stlmatch (big,small)
X *	int i;
X *	char *small, *big;
X *
X * Returns 1 iff initial characters of big match small exactly;
X * else 0.
X *
X * HISTORY
X * 18-May-82 Michael Mauldin (mlm) at Carnegie-Mellon University
X *      Ripped out of CMU lib for Rog-O-Matic portability
X * 20-Nov-79  Steven Shafer (sas) at Carnegie-Mellon University
X *	Rewritten for VAX from Ken Greer's routine.
X *
X *  Originally from klg (Ken Greer) on IUS/SUS UNIX
X *****************************************************************/
X
Xint   stlmatch (big, small)
Xchar *small, *big;
X{ register char *s, *b;
X  s = small;
X  b = big;
X  do
X  { if (*s == '\0')
X      return (1);
X  }
X  while (*s++ == *b++);
X  return (0);
X}
X
X/*****************************************************************
X * smatch: Given a data string and a pattern containing one or
X * more embedded stars (*) (which match any number of characters)
X * return true if the match succeeds, and set res[i] to the
X * characters matched by the 'i'th *.
X *****************************************************************/
X
Xsmatch (dat, pat, res)
Xregister char *dat, *pat, **res;
X{ register char *star = 0, *starend, *resp;
X  int nres = 0;
X
X  while (1)
X  { if (*pat == '*')
X    { star = ++pat; 			     /* Pattern after * */
X      starend = dat; 			     /* Data after * match */
X      resp = res[nres++]; 		     /* Result string */
X      *resp = '\0'; 			     /* Initially null */
X    }
X    else if (*dat == *pat) 		     /* Characters match */
X    { if (*pat == '\0') 		     /* Pattern matches */
X	return (1);
X      pat++; 				     /* Try next position */
X      dat++;
X    }
X    else
X    { if (*dat == '\0') 		     /* Pattern fails - no more */
X	return (0); 			     /* data */
X      if (star == 0) 			     /* Pattern fails - no * to */
X	return (0); 			     /* adjust */
X      pat = star; 			     /* Restart pattern after * */
X      *resp++ = *starend; 		     /* Copy character to result */
X      *resp = '\0'; 			     /* null terminate */
X      dat = ++starend; 			     /* Rescan after copied char */
X    }
X  }
X}
/
echo 'Part 01 of packmail and unshar complete.'
exit