[comp.sys.atari.st] uuencoded problem stats

silvert@dalcs.UUCP (01/25/87)
Given the number of complaints about defective uuencoded files that
appear in this newsgroup, I ran a small program I wrote called wstat to
see what uuencoded files look like.  When wstat is called with the -U
option it reports the line width statistics for all uuencoded lines,
i.e., it reads through a file and checks all lines between the begin and
end lines (it also checks for Royar's include lines).  In a good file
with end check byte all lines are 62 bytes long.  I'm not a uuencode
guru, but I think that with the possible exception of blank lines and
the short end line generated by some uuencodes, all lines should be the
same length.  Here is what all of the uuencoded files in our ST news
directory looked like when I just ran this.  And, the ones that look bad
(a distribution of line lengths!) are bad -- I found out the hard way,
which is why I wrote wstat (the source code is apended to the stats
data).  I'm posting this so that the posters can see what happened to
their postings, and perhaps the pattern will help someone improve the
situation.  We are losing a large fraction, close to 50%, of the
uuencoded postings (the 50% that are OK mostly come from Moshe Braner!).

The format is two lines that identify the posting (a grep of the begin
line), followed by the length distribution, with totally blank lines of
length 0 deleted.  Thus NITE.PRG had 14 uuencoded lines out of 51, 11 of
which were of length 61 bytes, one of line 21, and one just one
(presumably a single blank on the line before the end).

I haven't checked all of these files, but I've marked those which I know
were bad when uudecoded.
			************************
716:begin 644 NITE.PRG		--- SEEMS TO WORK OK ---
wstat stats for file 716:
     1	     1
    21	     1
    61	    11
------	------
	    51 lines with 2116 characters

733:begin 644 dash.arc		*** BAD ***
wstat stats for file 733:
    41	     1
    60	     9
    61	   170
------	------
	   223 lines with 12568 characters

734:begin 644 football.arc		--- SEEMS TO WORK OK ---
wstat stats for file 734:
     1	     1
    53	     1
    61	   642
------	------
	   661 lines with 40402 characters

737:begin 644 flexcopy.arc
wstat stats for file 737:
     1	     1
     9	     1
    61	    87
------	------
	   113 lines with 6151 characters

754:begin 644 spacewar.arc		--- SEEMS TO WORK OK ---
wstat stats for file 754:
     1	     1
    33	     1
    61	   776
------	------
	   862 lines with 50808 characters

758:begin 644 dtr.arc
wstat stats for file 758:
     1	     1
    41	     1
    61	   157
------	------
	   181 lines with 10596 characters

778:begin 644 disasmbl.arc		*** BAD ***
wstat stats for file 778:
    59	     2
    60	     7
    61	   321
------	------
	   354 lines with 21279 characters

823:begin 644 barrel.arc		--- SEEMS TO WORK OK ---
wstat stats for file 823:
     1	     1
    61	   244
------	------
	   279 lines with 16496 characters

832:begin 777 NIGHT.ARC		*** BAD ***
wstat stats for file 832:
     2	     1
    42	     1
    59	     4
    60	     5
    61	    30
    62	    77
------	------
	   161 lines with 8972 characters

835:begin 777 dircnt.arc
wstat stats for file 835:
     2	     1
    10	     1
    62	   242
------	------
	   266 lines with 16132 characters

836:begin 644 print.ttp		--- SEEMS TO WORK OK ---
wstat stats for file 836:
     1	     1
    13	     1
    61	    36
------	------
	    73 lines with 3455 characters

#### Here is the program that collects the stats with the -U options:

/* wstat -- compile statistics on the width distribution of lines in files */
#include <stdio.h>
#include <ctype.h>
#define OPT
static char SCCSID[] = "@(#)wstat.c	Ver. 1.4, 87/01/22 15:05:30";
char *progname, *filename;
int uflag=0, uu=0;	/* turn on uuencode check */

main(argc,argv)
int argc;
char *argv[];
{
	FILE *efopen(), *fp;
#ifdef OPT
	int c, getopt();
	extern int optind;
	extern char *optarg;

	progname = *argv;

	while((c = getopt(argc, argv, "Uu")) != EOF)
		switch(c) {
		case 'U':
			uflag+=2;
			break;
		case 'u':
			uflag++;
			break;
		default:
			help(c);
			exit(1);
		}
#else
	int optind = 1;
	progname = *argv;
#endif
	switch(argc - optind) {
	case 0:
		filename = "";
		process(stdin);
		break;
	default:
		for(; optind<argc; optind++) {
			filename = argv[optind];
			printf("\n%s stats for file %s:\n", progname, filename);
			fp = efopen(filename, "r");
			process(fp);
			efclose(fp);
		}
	}
	if(uu % 2)
		printf("\n%s: Not a complete set of uuencoded files!\007\n",
			progname);
	exit(0);
}

#ifdef OPT
help(c)
char c;
{
	fprintf(stderr, "%s: Options are:\n", progname);
	fprintf(stderr, "\t-U\tOnly count uuencoded lines\n");
	fprintf(stderr, "\t-u\tCount uuencoded lines\n");
}
#endif

#define MAXWIDTH 256

process(fp)
FILE *fp;
{
	int count[MAXWIDTH], i, strlen(), bytes, lines;
	int uucount[MAXWIDTH];
	char *buffer[MAXWIDTH];

	for(i=0; i< MAXWIDTH; i++) {
		count[i] = 0;
		uucount[i] = 0;
	}
	bytes = 0;
	lines = 0;

	while(fgets(buffer, MAXWIDTH, fp)) {
		i = strlen(buffer);
		bytes += i--;
		count[i]++;
		lines ++;
	/* here is the code for use with uuencoded files */
		if(uflag) {
			if((!strncmp(buffer, "end", 3) ||
			    !strncmp(buffer, "include", 7)) && uu % 2 ) uu++;
			if( uu % 2 ) uucount[i]++;
			if(!strncmp(buffer, "begin", 5)) {
				if( uu % 2 )
				    printf("\nDuplicate begin line in %s\077\n",
								filename);
				else
				    uu++;
			}
		}
	}

	for(i=0; i< MAXWIDTH; i++)
		if( (uflag < 2) ? count[i] : uucount[i] ) {
			printf("%6d", i);
			if(uflag < 2)
				printf("\t%6d", count[i]);
			if(uflag && uucount[i])
				printf("\t%6d\n", uucount[i]);
			else
				putchar('\n');
		}
	printf("------\t------\n");
	printf("\t%6d lines with %d characters\n", lines, bytes);
	if( uu % 2 )
		printf("*** %s is not a complete uuencoded file ***\n",
				filename);
}