[alt.sources] Program to strip out non-data lines from uuencoded files

maman@uranie.inria.fr (Nathan Maman) (01/25/91)

I like the way Joe Peterson defines his valid_line() test.  So here is
a new uud.c using his technique. I don't remember if I said it before:
you have to  provide the files  in order and  you can provide multiple
files at the same time.
Compilation: cc [-O] -o myuud myuud.c
I call it:   myuud -v file|-

For this purpose, I  also wrote a little   shell script that sorts  by
subject all the articles.  They have to be  in separate files and must
contain in the Subject line some relevant information of course.

Uses exported  variables  UUDCOMMAND and  UUDOPTIONS. By default, they
are set to `myuud' and `-v'.

Call:	     sortuud file1 file2 file3 ...

Below signature, there's the shar file.

-- 
				Nat.

+--------------------------------------------------------------------------+
| Address: M. Nathan MAMAN, INRIA Sophia-Antipolis, 06560 Valbonne, FRANCE |
| E-mail: maman@mirsa.inria.fr, Phone: 33-93.65.77.95, FAX: 33-93.65.78.58 |
+--------------------------------------------------------------------------+

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  myuud.c sortuud
# Wrapped by maman@uranie on Thu Jan 24 18:24:16 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'myuud.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'myuud.c'\"
else
echo shar: Extracting \"'myuud.c'\" \(14887 characters\)
sed "s/^X//" >'myuud.c' <<'END_OF_FILE'
X/*
X * Uud -- decode a uuencoded file back to binary form.
X *
X * From the Berkeley original, modified by MSD, RDR, JPHD & WLS.
X * The Atari GEMDOS version compiled with MWC 2.x.
X * The MSDOS version with TurboC.
X * The Unix version with cc.
X * this version is made: 25 Nov 1988.
X * Mods :
X * ------ 24 Jan 1991 ------ maman@cosinus.inria.fr ------
X *    No use of ceil. (cdlen[] is already there).
X *    valid_char, valid_length and valid_chars used.
X *    verify flag added for more tests and more verbose,
X *    exactly the way I like it ;-)
X * ------ 23 Jan 1991 ------ maman@cosinus.inria.fr ------
X *    I've taken this from ftp at sics.se:/mailserver/uudecode.shar
X *    It was said Wrapped by Edwin Kremer <edwin@zlotty> on \
X *    Wed Dec 20 17:06:37 1989
X *
X *    Checks the declared length and the effective length
X *    in order to skip the header/signature of different
X *    files and test the range validity of one character
X *    in the middle of the buffer.
X *    Works almost all the time.
X *    Tolerate one more byte at the end for another count
X *    (should be given as a parameter ?).
X *    UNIX compilation: cc [-O] -o uud uud.c
X */
X
X/*
X * Be sure to have the proper symbol at this point. (GEMDOS, MSDOS, UNIX...)
X */
X/*
X#ifndef GEMDOS
X#define GEMDOS 1
X#endif
X */
X#ifndef UNIX
X#define UNIX 1
X#endif
X/*
X#ifndef MSDOS
X#define MSDOS 1
X#endif
X */
X
X#undef GWMDOS
X#undef MSDOS
X
X#ifdef GEMDOS
X#define SYSNAME "gemdos"
X#define SMALL 1
X#endif
X#ifdef MSDOS
X#define SYSNAME "msdos"
X#define SMALL 1
X#endif
X#ifdef UNIX
X#define SYSNAME "unix"
X#define format printf
X#define TEST_LENGTH
X#endif
X
X#include <stdio.h>
X
X#ifdef GEMDOS
X#include <osbind.h>
X#define Error(n)  { Bconin(2); exit(n); }
X#define WRITE	  "wb"
X#else
X#define Error(n)  exit(n)
X#define WRITE	  "w"
X#endif
X
X#define loop	while (1)
X
Xextern FILE *fopen();
Xextern char *strcpy();
Xextern char *strcat();
X
Xchar *getnword();
X
X#define MAXCHAR 256
X#define LINELEN 256
X#define FILELEN 64
X#define NORMLEN 60	/* allows for 80 encoded chars per line */
X
X#define SEQMAX 'z'
X#define SEQMIN 'a'
Xchar seqc;
Xint first, secnd, check, numl;
X#ifdef TEST_LENGTH
Xint numlbad = -1;
X#endif
X
XFILE *in, *out;
Xchar *pos;
Xchar ifname[FILELEN], ofname[FILELEN];
Xchar *source = NULL, *target = NULL;
Xchar blank, part = '\0';
Xint partn, lens;
Xint debug = 0, nochk = 0, onedone = 0, verify = 0;
Xint chtbl[MAXCHAR], cdlen[NORMLEN + 3];
X
Xmain(argc, argv) int argc; char *argv[];
X{
X	int mode;
X	register int i, j;
X	char *curarg;
X	char dest[FILELEN], buf[LINELEN];
X
X	if (argc < 2) {
X		format("Almost foolproof uudecode v3.4 (%s) 25-Nov-88\n",
X			SYSNAME);
X		format("\n");
X		format("Usage: uud [-n] [-d] [-v] [-s dir] [-t dir] input-file\n");
X		format("\n");
X		format("Option: -n -> No line sequence check\n");
X		format("Option: -d -> Debug/verbose mode\n");
X		format("Option: -v -> Verify all chars\n");
X		format("Option: -s + Source directory for all input files\n");
X		format("  (MUST be terminated by directory separator)\n");
X		format("Option: -t + Target directory for all output files\n");
X		format("  (MUST be terminated by directory separator)\n");
X		format("If input-file is - then stdin is used as input-file\n");
X		Error(1);
X	}
X
X	curarg = argv[1];
X	
X	while (curarg[0] == '-') {
X		if (((curarg[1] == 'd') || (curarg[1] == 'D')) &&
X		    (curarg[2] == '\0')) {
X			debug = 1;
X		} else if (((curarg[1] == 'v') || (curarg[1] == 'V')) &&
X			   (curarg[2] == '\0')) {
X			verify = 1;
X		} else if (((curarg[1] == 'n') || (curarg[1] == 'N')) &&
X			   (curarg[2] == '\0')) {
X			nochk = 1;
X		} else if (((curarg[1] == 't') || (curarg[1] == 'T')) &&
X			   (curarg[2] == '\0')) {
X			argv++;
X			argc--;
X			if (argc < 2) {
X				format("uud: Missing target directory.\n");
X				Error(15);
X			}
X			target = argv[1];
X			if (debug||verify)
X				format("Target dir = %s\n",target);
X		} else if (((curarg[1] == 's') || (curarg[1] == 'S')) &&
X			   (curarg[2] == '\0')) {
X			argv++;
X			argc--;
X			if (argc < 2) {
X				format("uud: Missing source directory.\n");
X				Error(15);
X			}
X			source = argv[1];
X			if (debug||verify)
X				format("Source dir = %s\n",source);
X		} else if (curarg[1] != '\0') {
X			format("uud: Unknown option <%s>\n", curarg);
X			Error(15);
X		} else
X			break;
X		argv++;
X		argc--;
X		if (argc < 2) {
X			format("uud: Missing file name.\n");
X			Error(15);
X		}
X		curarg = argv[1];
X	}
X
X	if ((curarg[0] == '-') && (curarg[1] == '\0')) {
X		in = stdin;
X		strcpy(ifname, "<stdin>");
X	} else {
X		if (source != NULL) {
X			strcpy(ifname, source);
X			strcat(ifname, curarg);
X		} else
X			strcpy(ifname, curarg);
X		if ((in = fopen(ifname, "r")) == NULL) {
X			format("uud: Can't open %s\n", ifname);
X			Error(2);
X		}
X		numl = 0;
X	}
X
X/*
X * Set up the default translation table.
X */
X	for (i = 0; i < ' '; i++) chtbl[i] = -1;
X	for (i = ' ', j = 0; i < ' ' + 64; i++, j++) chtbl[i] = j;
X	for (i = ' ' + 64; i < MAXCHAR; i++) chtbl[i] = -1;
X	chtbl['`'] = chtbl[' '];	/* common mutation */
X	chtbl['~'] = chtbl['^'];	/* an other common mutation */
X	blank = ' ';
X/*
X * set up the line length table, to avoid computing lotsa * and / ...
X */
X	cdlen[0] = 1;
X	for (i = 1, j = 5; i <= NORMLEN; i += 3, j += 4)
X		cdlen[i] = (cdlen[i + 1] = (cdlen[i + 2] = j));
X/*
X * search for header or translation table line.
X */
X	loop {	/* master loop for multiple decodes in one file */
X		partn = 'a';
X		loop {
X			if (fgets(buf, sizeof buf, in) == NULL) {
X				if (onedone) {
X					if (debug||verify)
X					   format("End of file.\n");
X					exit(0);
X				} else {
X					format("uud: No begin line.\n");
X					Error(3);
X				}
X			}
X			numl++;
X			if (strncmp(buf, "table", 5) == 0) {
X				gettable();
X				continue;
X			}
X			if (strncmp(buf, "begin", 5) == 0) {
X				break;
X			}
X		}
X		lens = strlen(buf);
X		if (lens) buf[--lens] = '\0';
X#ifdef SMALL
X		if ((pos = getnword(buf, 3))) {
X			strcpy(dest, pos);
X		} else
X#else
X		if(sscanf(buf,"begin%o%s", &mode, dest) != 2)
X#endif
X		{
X			format("uud: Missing filename in begin line.\n");
X			Error(10);
X		}
X
X		if (target != NULL) {
X			strcpy(ofname, target);
X			strcat(ofname, dest);
X		} else
X			strcpy(ofname, dest);
X
X		if((out = fopen(ofname, WRITE)) == NULL) {
X			format("uud: Cannot open output file: %s\n", ofname);
X			Error(4);
X		}
X		if (debug||verify) format("Begin uudecoding: %s\n", ofname);
X		seqc = SEQMAX;
X		check = nochk ? 0 : 1;
X		first = 1;
X		secnd = 0;
X		decode();
X		fclose(out);
X#ifdef UNIX
X		chmod(ofname, mode);
X#endif
X		onedone = 1;
X		if (debug||verify) format("End uudecoding: %s\n", ofname);
X	}	/* master loop for multiple decodes in one file */
X}
X
X/*
X * Bring back a pointer to the start of the nth word.
X */
Xchar *getnword(str, n) register char *str; register int n;
X{
X	while((*str == '\t') || (*str == ' ')) str++;
X	if (! *str) return NULL;
X	while(--n) {
X		while ((*str != '\t') && (*str != ' ') && (*str)) str++;
X		if (! *str) return NULL;
X		while((*str == '\t') || (*str == ' ')) str++;
X		if (! *str) return NULL;
X	}
X	return str;
X}
X
X/*
X * Install the table in memory for later use.
X */
Xgettable()
X{
X	char buf[LINELEN];
X	register int c, n = 0;
X	register char *cpt;
X
X	if(debug) format("Getting new table.\n")
X
X	for (c = 0; c <= MAXCHAR; c++) chtbl[c] = -1;
X
Xagain:	if (fgets(buf, sizeof buf, in) == NULL) {
X		format("uud: EOF while in translation table.\n");
X		Error(5);
X	}
X	numl++;
X	if (strncmp(buf, "begin", 5) == 0) {
X		format("uud: Incomplete translation table.\n");
X		Error(6);
X	}
X	cpt = buf + strlen(buf) - 1;
X	*cpt = ' ';
X	while (*(cpt) == ' ') {
X		*cpt = 0;
X		cpt--;
X	}
X	cpt = buf;
X	while (c = *cpt) {
X		if (chtbl[c] != -1) {
X			format("uud: Duplicate char in translation table.\n");
X			Error(7);
X		}
X		if (n == 0) blank = c;
X		chtbl[c] = n++;
X		if (n >= 64)
X		{
X		    if(debug) format("End of getting table.\n")
X		    return;
X		}
X		cpt++;
X	}
X	goto again;
X}
X
X/*
X * Testing functions
X */
X
X#ifdef TEST_LENGTH
Xvalid_length(l,n)
X     int l,n;
X{
X    int k, resp;
X    k = cdlen[n];
X    /*    k = n % 3;
X     *    k = ( n + (k ? 3-k : 0) )*4/3;
X     */
X    resp = ( (l-k >= 0) && (l-k <= 1) );
X    if(debug && !resp && !verify)
X    format("Bad length at line %d\n", numl);
X    return(resp);
X}
X
Xvalid_char(c)
X     char c;
X{
X    int resp;
X    /*
X     * DON'T FORGET TO MODIFY THE TEST IN valid_chars TOO !
X     */
X    resp = ( (chtbl[c]<0) ? 0 : 1 );
X    if(debug && !resp && !verify)
X    format("Bad char <%c> at line %d\n", c, numl);
X    return(resp);
X}
X#endif
X
Xvalid_chars(s,l)
X     char *s;
X     int l;
X{
X    int resp, k = 0;
X    /*
X     * DON'T FORGET TO MODIFY THE TEST IN valid_char TOO !
X     */
X    while( (k<l) && (chtbl[s[k]]>=0) ) k++;
X    resp = (k==l);
X    if(debug && !resp) format("%d char bad <%c> at line %d\n",
X			      k, s[k], numl);
X    return( resp );
X}
X
X/*
X * copy from in to out, decoding as you go along.
X */
X
Xdecode()
X{
X	char buf[LINELEN], outl[LINELEN];
X	register char *bp, *ut;
X	register int *trtbl = chtbl;
X	register int n, c, rlen;
X	register unsigned int len;
X#ifdef TEST_LENGTH
X	int efflen;
X#endif
X
X	loop {
X		if (fgets(buf, sizeof buf, in) == NULL) {
X			format("uud: EOF before end.\n");
X			fclose(out);
X			Error(8);
X		}
X		numl++;
X		len = strlen(buf);
X		if (len) buf[--len] = '\0';
X/*
X * Is it an unprotected empty line before the end line ?
X */
X		if (len == 0) continue;
X/*
X * Get the binary line length.
X */
X		n = trtbl[buf[0]];
X/*
X * end of uuencoded file ?
X */
X		if (strncmp(buf, "end", 3) == 0) return;
X/*
X * end of current file ? : get next one.
X */
X		if (strncmp(buf, "include", 7) == 0) {
X			getfile(buf);
X			continue;
X		}
X#ifdef TEST_LENGTH
X/*
X * good length ?
X */
X		efflen = len;
X
X		/* Suppress bad chars at the end */
X		while(   (n >= 0)
X		      && (efflen > 0)
X		      && ! valid_char(buf[efflen-1])
X		      ) efflen--;
X
X		if(   (n >= 0)
X		   && (efflen > 0)
X		   && valid_length(efflen,n)
X		   && ((debug||verify)
X		       ? valid_chars(buf,efflen)
X		       : valid_char(buf[efflen/2]))
X		   )
X		{
X		    if (debug||verify) {
X			if((numlbad!=-1) && (numlbad<numl-1))
X			{
X			    format("Bad lines %d--%d\n",
X				   numlbad, numl-1);
X			}
X		    }
X		    numlbad = -1;
X		    goto decod;
X		}
X
X		if(numlbad<0) {
X		    numlbad = numl;
X		    if (debug)
X		    {
X			format("Bad line %d =%s\n", numl, buf);
X			format("n=%d --> %d, len=%d\n", n, cdlen[n], len);
X		    }
X		}
X		continue;
X#else /* !TEST_LENGTH */
X		if (n >= 0) goto decod;
X		format("uud: Bad prefix line %d in file: %s\n",numl, ifname);
X		if (debug) format("Bad line =%s\n",buf);
X		Error(11);
X#endif /* TEST_LENGTH */
X/*
X * Sequence checking ?
X */
Xdecod:		rlen = cdlen[n];
X/*
X * Is it the empty line before the end line ?
X */
X		if (n == 0) continue;
X/*
X * Pad with blanks.
X */
X		for (bp = &buf[c = len];
X			c < rlen; c++, bp++) *bp = blank;
X
X#if  !defined(TEST_LENGTH)
X/*
X * Verify if asked for.
X */
X		if (debug||verify) {
X			for (len = 0, bp = buf; len < rlen; len++) {
X				if (trtbl[*bp] < 0) {
X					format(
X	"Non uuencoded char <%c>, line %d in file: %s\n", *bp, numl, ifname);
X					format("Bad line =%s\n",buf);
X					Error(16);
X				}
X				bp++;
X			}
X		}
X#endif
X
X/*
X * All this just to check for uuencodes that append a 'z' to each line....
X */
X		if (secnd && check) {
X			secnd = 0;
X			if (buf[rlen] == SEQMAX) {
X				check = 0;
X				if (debug) format("Sequence check turned off (2).\n");
X			} else
X				if (debug) format("Sequence check on (2).\n");
X		} else if (first && check) {
X			first = 0;
X			secnd = 1;
X			if (buf[rlen] != SEQMAX) {
X				check = 0;
X				if (debug) format("No sequence check (1).\n");
X			} else
X				if (debug) format("Sequence check on (1).\n");
X		}
X/*
X * There we check.
X */
X		if (check) {
X			if (buf[rlen] != seqc) {
X				format("uud: Wrong sequence line %d in %s\n",
X					numl, ifname);
X				if (debug)
X					format(
X	"Sequence char is <%c> instead of <%c>.\n", buf[rlen], seqc);
X				Error(18);
X			}
X			seqc--;
X			if (seqc < SEQMIN) seqc = SEQMAX;
X		}
X/*
X * output a group of 3 bytes (4 input characters).
X * the input chars are pointed to by p, they are to
X * be output to file f.n is used to tell us not to
X * output all of them at the end of the file.
X */
X		ut = outl;
X		len = n;
X		bp = &buf[1];
X		while (n > 0) {
X			*(ut++) = trtbl[*bp] << 2 | trtbl[bp[1]] >> 4;
X			n--;
X			if (n) {
X				*(ut++) = (trtbl[bp[1]] << 4) |
X					  (trtbl[bp[2]] >> 2);
X				n--;
X			}
X			if (n) {
X				*(ut++) = trtbl[bp[2]] << 6 | trtbl[bp[3]];
X				n--;
X			}
X			bp += 4;
X		}
X		if ((n = fwrite(outl, 1, len, out)) <= 0) {
X			format("uud: Error on writing decoded file.\n");
X			Error(18);
X		}
X	}
X}
X
X/*
X * Find the next needed file, if existing, otherwise try further
X * on next file.
X */
Xgetfile(buf) register char *buf;
X{
X	if ((pos = getnword(buf, 2)) == NULL) {
X		format("uud: Missing include file name.\n");
X		Error(17);
X	} else
X		if (source != NULL) {
X			strcpy(ifname, source);
X			strcat(ifname, pos);
X		} else
X			strcpy(ifname, pos);
X#ifdef GEMDOS
X	if (Fattrib(ifname, 0, 0) < 0)
X#else
X	if (access(ifname, 04))
X#endif
X	{
X		if (debug) {
X			format("Cant find: %s\n", ifname);
X			format("Continuing to read same file.\n");
X		}
X	}
X	else {
X		if (freopen(ifname, "r", in) == in) {
X			numl = 0;
X			if (debug||verify) 
X				format("Reading next section from: %s\n", ifname);
X		} else {
X			format("uud: Freopen abort: %s\n", ifname);
X			Error(9);
X		}
X	}
X	loop {
X		if (fgets(buf, LINELEN, in) == NULL) {
X			format("uud: No begin line after include: %s\n", ifname);
X			Error(12);
X		}
X		numl++;
X		if (strncmp(buf, "table", 5) == 0) {
X			gettable();
X			continue;
X		}
X		if (strncmp(buf, "begin", 5) == 0) break;
X	}
X	lens = strlen(buf);
X	if (lens) buf[--lens] = '\0';
X/*
X * Check the part suffix.
X */
X	if ((pos = getnword(buf, 3)) == NULL ) {
X		format("uud: Missing part name, in included file: %s\n", ifname);
X		Error(13);
X	} else {
X		part = *pos;
X		partn++;
X		if (partn > 'z') partn = 'a';
X		if (part != partn) {
X			format("uud: Part suffix mismatch: <%c> instead of <%c>.\n",
X				part, partn);
X			Error(14);
X		}
X		if (debug||verify) format("Reading part %c\n", *pos);
X	}
X}
X
X#ifndef UNIX
X/*
X * Printf style formatting. (Borrowed from MicroEmacs by Dave Conroy.) 
X * A lot smaller than the full fledged printf.
X */
X/* VARARGS1 */
Xformat(fp, args) char *fp;
X{
X	doprnt(fp, (char *)&args);
X}
X
Xdoprnt(fp, ap)
Xregister char	*fp;
Xregister char	*ap;
X{
X	register int	c, k;
X	register char	*s;
X
X	while ((c = *fp++) != '\0') {
X		if (c != '%')
X			outc(c);
X		else {
X			c = *fp++;
X			switch (c) {
X			case 'd':
X				puti(*(int *)ap, 10);
X				ap += sizeof(int);
X				break;
X
X			case 's':
X				s = *(char **)ap;
X				while ((k = *s++) != '\0')
X					outc(k);
X				ap += sizeof(char *);
X				break;
X
X			case 'c':
X				outc(*(int *)ap);
X				ap += sizeof(int);
X				break;
X
X			default:
X				outc(c);
X			}
X		}
X	}
X}
X
X/*
X * Put integer, in radix "r".
X */
Xputi(i, r)
Xregister unsigned int	i;
Xregister unsigned int	r;
X{
X	register unsigned int	q, s;
X
X	if ((q = i / r) != 0)
X		puti(q, r);
X	s = i % r;
X	if (s <= 9)
X		outc(s + '0');
X	else
X		outc(s - 10 + 'A');
X}
Xoutc(c) register char c;
X{
X#ifdef GEMDOS
X	if (c == '\n') Bconout(2, '\r');
X	Bconout(2, c);
X#else
X	putchar(c);
X#endif
X}
X
X#endif
END_OF_FILE
if test 14887 -ne `wc -c <'myuud.c'`; then
    echo shar: \"'myuud.c'\" unpacked with wrong size!
fi
# end of 'myuud.c'
fi
if test -f 'sortuud' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'sortuud'\"
else
echo shar: Extracting \"'sortuud'\" \(231 characters\)
sed "s/^X//" >'sortuud' <<'END_OF_FILE'
X#!/bin/sh
X
Xcat `
X  grep '^Subject: ' ${*} |
X  sed -e '
X    s/ * /_/g
X    s/_/ /
X    s/^\([^ ]* \)_*Subject_*_/\1/
X    s/ _*Re:_*_//g
X    s/:/ /g
X    ' |
X  sort +1 -f |
X  sed -e 's/ .*$//'
X` |
X${UUDCOMMAND-myuud} ${UUDOPTIONS--v} -
END_OF_FILE
if test 231 -ne `wc -c <'sortuud'`; then
    echo shar: \"'sortuud'\" unpacked with wrong size!
fi
chmod +x 'sortuud'
# end of 'sortuud'
fi
echo shar: End of shell archive.
echo "Good luck !"
exit 0