[net.sources] unshar

aeb@turing.UUCP (02/10/85)

Below an improved version of unshar.c. I have been too lazy to also
update unshar.1. Main differences with previous versions are:
- The lseek() call has been removed, so that unshar still works when
  standard input is a pipe.
- Options -o and -O have been added. -o will save the text before the
  cut line in a file something.hdr; -O will save not only the text, but
  also the mail headers.
- The heuristics have been improved a little. Unshar now 'recognizes'
  C and PASCAL program text, and troff input. Some shell archivers
  produce lines like   # This is a shell archive   before the cut line,
  and unshar now handles this situation. Unshar no longer sees 'cut' in
  'execution' or 'here' in 'whereabouts'.
This version of unshar will correctly handle all files we currently have
in the net.sources spool directory, but clearly the program is still very
primitive.
Just before posting this I saw Stephen C. Woods posting. I also put in
his changes, but haven't really looked at them.
People who want to do some more work on unshar might think about the
following:
- Stop when sh reports errors.
- Do something about security; it is potentially dangerous to execute
  a shell file that some unknown person has posted to the net.

-----------------------Cut Here-----------------------------------
# This is a shell archive.  Remove anything before this line, then
# unpack it by saving it in a file and typing "sh file".  (Files
# unpacked will be owned by you and have default permissions.)
#
# This archive contains:
# unshar.c

echo x - unshar.c
cat > "unshar.c" << '//E*O*F unshar.c//'
/****************************************************************
 * unshar.c: Unpackage one or more shell archive files
 *
 * Usage:	unshar [-c] [-o] [ -d directory ] [ file ] ...
 *
 * Description:	unshar is a filter which removes the front part
 *		of a file and passes the rest to the 'sh' command.
 *		It understands phrases like "cut here", and also
 *		knows about shell comment characters and the Unix
 *		commands "echo", "cat", and "sed".
 *		The -o flag causes it to preserve the header.
 *		With -O also the mail headers are preserved.
 *
 * HISTORY
 *  9-Feb-85  Andries Brouwer (aeb@mcvax) at CWI, Amsterdam
 *	Fixed filter mode; added -o and -O options; improved heuristics.
 *  6-Feb-85  Stephen C. Woods (scw@cepu) Los Angeles CA
 *	fixed '#' for v7 flavor systems (sh dosen't know about # comments)
 *	added -c flag to allow echo of skipped #comments
 *  1-Feb-85  Guido van Rossum (guido@mcvax) at CWI, Amsterdam
 *	Added missing 'quit' routine;
 *	added -d flag to change to directory first;
 *	added filter mode (read stdin when no arguments);
 *	added 'getopt' to get flags (makes it self-contained).
 * 29-Jan-85  Michael Mauldin (mlm) at Carnegie-Mellon University
 *	Created.
 ****************************************************************/

# include <stdio.h>
# define EOL '\n'

extern char *rindex ();		/* for SYSV: change to strrchr() */
extern char *optarg;
extern int  optind;
extern char *position ();

int     outheader = 0;
int     outall = 0;
#ifdef V7
int     cflag = 0;
#endif V7

main (argc, argv)
int     argc;
char   *argv[];
{
    register int    i,
                    ch;
    FILE * in;

 /* Process options */

    while ((ch = getopt (argc, argv, "cd:oO")) != EOF) {
	switch (ch) {
	    case 'd': 
		if (chdir (optarg) == -1) {
		    fprintf (stderr, "unshar: cannot chdir to '%s'\n", optarg);
		    exit (2);
		}
		break;
	    case 'O': 
		outall++;
	    /* fall into next case */
	    case 'o': 
		outheader++;
		break;
#ifdef V7
	    case 'c': 
		cflag++;
		if (cflag == 1)
		    break;
	    /* fall into next case */
	    default: 
		quit (2, "Usage: unshar [-c] [-o or -O] [-d directory] [input files]\n");
#else V7
	    default: 
		quit (2, "Usage: unshar [-o or -O] [-d directory] [input files]\n");
#endif V7
	}
    }

    if (optind < argc) {
	for (i = optind; i < argc; ++i) {
	    if ((in = fopen (argv[i], "r")) == NULL) {
		fprintf (stderr, "unshar: file '%s' not found\n", argv[i]);
		exit (1);
	    }
	    process (argv[i], in);
	    fclose (in);
	}
    }
    else
	process ((char *) 0, stdin);

    exit (0);
}


process (name, in)
char   *name;
FILE * in;
{
    char    ch;
    register char  *firstline;
    FILE * shpr, *popen ();

    if ((firstline = position (name, in)) != NULL) {
	printf ("%s:\n", name ? name : "standard input");
#ifdef V7
	{
	    int     inline,
	            comment;

	    ch = fgetc (in);

	    if ((inline = comment = (ch == '#'))) {
		fprintf (stderr, " skipping comments\n");
		if (cflag)
		    fputc (ch, stderr);
		do {
		    ch = fgetc (in);
		    if (ch == EOF) {
			fprintf (stderr, "file was all comments, nothing extracted\n");
			return;
		    }
		    if (inline) {
			inline = ch != '\n';
		    }
		    else {
			inline = comment = (ch == '#');
		    }
		    if (comment && cflag)
			fputc (ch, stderr);
		} while (comment);
	    }
	    ungetc (ch, in);
	}
#endif V7
	if ((shpr = popen ("sh", "w")) == NULL)
	    quit (1, "unshar: cannot open 'sh' process\n");
	fputs (firstline, shpr);
	while ((ch = fgetc (in)) != EOF)
	    fputc (ch, shpr);

	pclose (shpr);
    }
}

/****************************************************************
 * position: position 'fil' at the start of the shell command
 * portion of a shell archive file.
 ****************************************************************/

char   *
        position (fn, fil)
char   *fn;
FILE * fil;
{
    char    buf[BUFSIZ],
            bufl[BUFSIZ];
    FILE * hdr;
    register char  *xfn = fn ? fn : "the standard input";
    short   inmailheader;

    if (outheader) {
	char    hdrnam[15];
	if (fn) {
	    register char  *base = rindex (fn, '/');

	    strncpy (hdrnam, base ? base + 1 : fn, 14);
	    hdrnam[10] = 0;
	    strcat (hdrnam, ".hdr");
	}
	else {
	    printf ("Sending header to unshar.hdr\n");
	    strcpy (hdrnam, "unshar.hdr");
	}
	if ((hdr = fopen (hdrnam, "a")) == NULL) {
	    fprintf (stderr, "unshar: cannot open %s\n", hdrnam);
	    return (0);
	}
	inmailheader = 1;
    }

    while (1) {
    /* Read next line, fail if no more */
	if (fgets (buf, BUFSIZ, fil) == NULL) {
	    fprintf (stderr, "unshar: found no shell commands in %s\n", xfn);
	    return (0);
	}

    /* Bail out if we see C preprocessor commands or C comments */
	if (lookslikeC (buf)) {
	    fprintf (stderr,
		    "unshar: %s looks like raw C code, not a shell archive\n",
		    xfn);
	    return (0);
	}

	if (lookslikePASCAL (buf)) {
	    fprintf (stderr,
		    "unshar: %s looks like raw PASCAL code, not a shell archive\n",
		    xfn);
	    return (0);
	}

	if (lookslikeTROFF (buf)) {
	    fprintf (stderr,
		    "unshar: %s looks like raw TROFF input, not a shell archive\n",
		    xfn);
	    return (0);
	}

    /* Does this line start with a shell command or comment */
	if ((stlmatch (buf, "#") && !textline (buf)) || stlmatch (buf, ":") ||
		stlmatch (buf, "echo ") || stlmatch (buf, "sed ") ||
		stlmatch (buf, "cat ")) {
	    if (outheader)
		(void) fclose (hdr);
	    return (buf);
	}

    /* Does this line say "Cut here" */
    /* Of course we have to be a little careful: for example it says in a
       recent distribution: ... this program is cute, but there are ... */
	tolowercase (buf, bufl);
	if (stlmatch (buf, "--------") ||
		contains (bufl, "cut", "here") ||
		contains (bufl, "cut", "cut") ||
		contains (bufl, "tear", "here")) {
	/* Read next line after "cut here", skipping blank lines */
	    while (1) {
		if (fgets (buf, BUFSIZ, fil) == NULL) {
		    fprintf (stderr,
			    "unshar: found no shell commands after 'cut' in %s\n",
			    fn ? fn : "the standard input");
		    return (0);
		}

		if (*buf != '\n')
		    break;
	    }

	/* Win if line starts with a comment character of lower case
	   letter */
	    if (*buf == '#' || *buf == ':' || (('a' <= *buf) && ('z' >= *buf)))
		return (buf);

	/* Cut here message lied to us */
	    fprintf (stderr, "unshar: %s is probably not a shell archive,\n", fn);
	    fprintf (stderr, "        the 'cut' line was followed by: %s", buf);
	    return (0);
	}

	if (outheader) {
	    if (inmailheader && !mailheaderline (buf))
		inmailheader = 0;
	    if (!inmailheader || outall)
		fputs (buf, hdr);
	}
    }
}

/*****************************************************************
 * stlmatch  --  match leftmost part of string
 *
 * Usage:  i = stlmatch (big,small)
 *	int i;
 *	char *small, *big;
 *
 * Returns 1 iff initial characters of big match small exactly;
 * else 0.
 *
 * HISTORY
 * 18-May-82 Michael Mauldin (mlm) at Carnegie-Mellon University
 *      Ripped out of CMU lib for Rog-O-Matic portability
 * 20-Nov-79  Steven Shafer (sas) at Carnegie-Mellon University
 *	Rewritten for VAX from Ken Greer's routine.
 *
 *  Originally from klg (Ken Greer) on IUS/SUS UNIX
 *****************************************************************/

int     stlmatch (big, small)
char   *small,
       *big;
{
    register char  *s,
                   *b;
    s = small;
    b = big;
    do {
	if (*s == '\0')
	    return (1);
    }
    while (*s++ == *b++);
    return (0);
}

/*****************************************************************
 * smatch: Given a data string and a pattern containing one or
 * more embedded stars (*) (which match any number of characters)
 * return true if the match succeeds, and set res[i] to the
 * characters matched by the 'i'th *.
 *****************************************************************/

smatch (dat, pat, res)
register char  *dat,
               *pat,
              **res;
{
    register char  *star = 0,
                   *starend,
                   *resp;
    int     nres = 0;

    while (1) {
	if (*pat == '*') {
	    star = ++pat;	/* Pattern after * */
	    starend = dat;	/* Data after * match */
	    resp = res[nres++];	/* Result string */
	    *resp = '\0';	/* Initially null */
	}
	else
	    if (*dat == *pat) {	/* Characters match */
		if (*pat == '\0')/* Pattern matches */
		    return (1);
		pat++;		/* Try next position */
		dat++;
	    }
	    else {
		if (*dat == '\0')/* Pattern fails - no more */
		    return (0);	/* data */
		if (star == 0)	/* Pattern fails - no * to */
		    return (0);	/* adjust */
		pat = star;	/* Restart pattern after * */
		*resp++ = *starend;/* Copy character to result */
		*resp = '\0';	/* null terminate */
		dat = ++starend;/* Rescan after copied char */
	    }
    }
}

/*****************************************************************
 * Addendum: quit subroutine (print a message and exit)
 *****************************************************************/

quit (status, message)
int     status;
char   *message;
{
    fprintf (stderr, message);
    exit (status);
}

/*****************************************************************
 * Public Domain getopt routine
 *****************************************************************/

/*
 * get option letter from argument vector
 */
int     optind = 1,		/* index into parent argv vector */
        optopt;			/* character checked for validity */
char   *optarg;			/* argument associated with option */

#define BADCH	(int)'?'
#define EMSG	""
#define tell(s)	fputs(*nargv,stderr);fputs(s,stderr); \
fputc (optopt, stderr); \
fputc ('\n', stderr); \
return (BADCH);

getopt (nargc, nargv, ostr)
int     nargc;
char  **nargv,
       *ostr;
{
    static char *place = EMSG;	/* option letter processing */
    register char  *oli;	/* option letter list index */
    char   *index ();

    if (!*place) {		/* update scanning pointer */
	if (optind >= nargc || *(place = nargv[optind]) != '-' || !*++place)
	    return (EOF);
	if (*place == '-') {	/* found "--" */
	    ++optind;
	    return (EOF);
	}
    }				/* option letter okay? */
    if ((optopt = (int) * place++) == (int) ':' ||
	    !(oli = index (ostr, optopt))) {
	if (!*place)
	    ++optind;
	tell (": illegal option -- ");
    }
    if (*++oli != ':') {	/* don't need argument */
	optarg = NULL;
	if (!*place)
	    ++optind;
    }
    else {			/* need an argument */
	if (*place)
	    optarg = place;	/* no white space */
	else
	    if (nargc <= ++optind) {/* no arg */
		place = EMSG;
		tell (": option requires an argument -- ");
	    }
	    else
		optarg = nargv[optind];/* white space */
	place = EMSG;
	++optind;
    }
    return (optopt);		/* dump back option letter */
}

/********************************************************************
 *
 * Differentiate mail headers from the rest of the article.
 *
 ********************************************************************/

mailheaderline (buf)
register char  *buf;
{
    register int    cnt = 0;

    if (!*buf || *buf == '\n')
	return (0);
    if (*buf == ' ' || *buf == '\t')
	return (1);
    while (letdig (*buf)) {
	buf++;
	cnt++;
    }
    return (cnt && *buf == ':');
}

letdig (c)			/* Symbols found in field tags, like 
				   Article-I.D.: */
char    c;
{
    return (c == '-' || c == '_' || c == '.' || digit (c) || letter (c));
}

/*
 * Some shar's produce output like:
 *	#	This is a shell archive.
 *	#	...
 *	#	---- cut here ----
 *	#!/bin/sh
 *
 * so if we haven't seen the 'cut' message yet, we have to skip #text lines.
 */
textline (buf)
register char  *buf;
{
    if (*buf == '#')
	buf++;
    while (textsym (*buf))
	buf++;
    return (!*buf);
}

textsym (c)
char    c;
{
    return (letter (c) ||
	    c == ' ' || c == '\t' || c == '\n' || c == ',' || c == '.');
}

/****************************************************************
 *
 * Recognize various types of files.
 *
 ****************************************************************/

lookslikeC (buf)
register char  *buf;
{
    return (stlmatch (buf, "#include") || stlmatch (buf, "# include") ||
	    stlmatch (buf, "#define") || stlmatch (buf, "# define") ||
	    stlmatch (buf, "#ifdef") || stlmatch (buf, "# ifdef") ||
	    stlmatch (buf, "#ifndef") || stlmatch (buf, "# ifndef") ||
	    stlmatch (buf, "/*")
	);
}

lookslikePASCAL (buf)
register char  *buf;
{
    return (stlmatch (buf, "(*"));
}

lookslikeTROFF (buf)
register char  *buf;
{
    return (*buf == '.' &&
	    letter (buf[1]) && letter (buf[2]) && !letter (buf[3]));
}

/************************************************************
 *
 * Some miscellaneous goodies
 *
 ************************************************************/

letter (c)
char    c;
{
    return (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
}

digit (c)
char    c;
{
    return ('0' <= c && c <= '9');
}

tolowercase (buf, bufl)
register char  *buf,
               *bufl;
{
    while (*buf) {
	if ('A' <= *buf && *buf <= 'Z')
	    *bufl++ = *buf++ + ('a' - 'A');
	else
	    *bufl++ = *buf++;
    }
    *bufl = 0;
}

/* check whether buf contains wd1 and wd2 as words
   (i.e., no preceding or following letters) */
contains (buf, wd1, wd2)
register char  *buf, *wd1, *wd2;
{
    register char  *wd = wd1;
    int     first = 1;

again: 
    while (*buf) {
	if (!letter (*buf)) {
	    buf++;
	    continue;
	}
	while (*buf++ == *wd++) {
	    if (!*wd) {
		if (!letter (*buf)) {
		    if (!first)
			return (1);
		    first = 0;
		    wd = wd2;
		    goto again;
		}
		break;
	    }
	}
	while (letter (*buf))
	    buf++;
	wd = first ? wd1 : wd2;
    }
    return (0);
}
//E*O*F unshar.c//

exit 0

-- 
      Andries Brouwer -- CWI, Amsterdam -- {philabs,decvax}!mcvax!aeb

ken@boring.UUCP (07/15/85)

In article <12600026@uiucdcs> liberte@uiucdcs.Uiuc.ARPA writes:
#! /bin/sh
# unshar - pipe shar part of input through sh
# Ignore lines before first "#" comment starting in first column.
# Input is either $1 or stdin.
sed -n '/^#/,$ p' ${1-} | sh

Good stuff. This is even better.
sed -n '/^#/,$ p' ${1-} | exec sh

	Ken
-- 
UUCP: ..!{seismo,okstate,garfield,decvax,philabs}!mcvax!ken Voice: Ken!
Mail: Centrum voor Wiskunde en Informatica, Kruislaan 413, 1098 SJ, Amsterdam.

cagordon@watnot.UUCP (Chris A. Gordon) (07/18/85)

In article <6511@boring.UUCP> ken@mcvax.UUCP (Ken Yap) writes:
># unshar - pipe shar part of input through sh
># Ignore lines before first "#" comment starting in first column.
># Input is either $1 or stdin.
>sed -n '/^#/,$ p' ${1-} | sh
>
>Good stuff. This is even better.
>sed -n '/^#/,$ p' ${1-} | exec sh

Or, if you are using /bin/csh, and wish to save a small amount of filespace:
alias unshar "sed -n '/^#/,$ p' \!* | sh"  (or  | exec sh  whatever you wish)
placed in the login file will set up an alias which works just as well.