silvert@dalcs.UUCP (01/25/87)
Given the number of complaints about defective uuencoded files that appear in this newsgroup, I ran a small program I wrote called wstat to see what uuencoded files look like. When wstat is called with the -U option it reports the line width statistics for all uuencoded lines, i.e., it reads through a file and checks all lines between the begin and end lines (it also checks for Royar's include lines). In a good file with end check byte all lines are 62 bytes long. I'm not a uuencode guru, but I think that with the possible exception of blank lines and the short end line generated by some uuencodes, all lines should be the same length. Here is what all of the uuencoded files in our ST news directory looked like when I just ran this. And, the ones that look bad (a distribution of line lengths!) are bad -- I found out the hard way, which is why I wrote wstat (the source code is apended to the stats data). I'm posting this so that the posters can see what happened to their postings, and perhaps the pattern will help someone improve the situation. We are losing a large fraction, close to 50%, of the uuencoded postings (the 50% that are OK mostly come from Moshe Braner!). The format is two lines that identify the posting (a grep of the begin line), followed by the length distribution, with totally blank lines of length 0 deleted. Thus NITE.PRG had 14 uuencoded lines out of 51, 11 of which were of length 61 bytes, one of line 21, and one just one (presumably a single blank on the line before the end). I haven't checked all of these files, but I've marked those which I know were bad when uudecoded. ************************ 716:begin 644 NITE.PRG --- SEEMS TO WORK OK --- wstat stats for file 716: 1 1 21 1 61 11 ------ ------ 51 lines with 2116 characters 733:begin 644 dash.arc *** BAD *** wstat stats for file 733: 41 1 60 9 61 170 ------ ------ 223 lines with 12568 characters 734:begin 644 football.arc --- SEEMS TO WORK OK --- wstat stats for file 734: 1 1 53 1 61 642 ------ ------ 661 lines with 40402 characters 737:begin 644 flexcopy.arc wstat stats for file 737: 1 1 9 1 61 87 ------ ------ 113 lines with 6151 characters 754:begin 644 spacewar.arc --- SEEMS TO WORK OK --- wstat stats for file 754: 1 1 33 1 61 776 ------ ------ 862 lines with 50808 characters 758:begin 644 dtr.arc wstat stats for file 758: 1 1 41 1 61 157 ------ ------ 181 lines with 10596 characters 778:begin 644 disasmbl.arc *** BAD *** wstat stats for file 778: 59 2 60 7 61 321 ------ ------ 354 lines with 21279 characters 823:begin 644 barrel.arc --- SEEMS TO WORK OK --- wstat stats for file 823: 1 1 61 244 ------ ------ 279 lines with 16496 characters 832:begin 777 NIGHT.ARC *** BAD *** wstat stats for file 832: 2 1 42 1 59 4 60 5 61 30 62 77 ------ ------ 161 lines with 8972 characters 835:begin 777 dircnt.arc wstat stats for file 835: 2 1 10 1 62 242 ------ ------ 266 lines with 16132 characters 836:begin 644 print.ttp --- SEEMS TO WORK OK --- wstat stats for file 836: 1 1 13 1 61 36 ------ ------ 73 lines with 3455 characters #### Here is the program that collects the stats with the -U options: /* wstat -- compile statistics on the width distribution of lines in files */ #include <stdio.h> #include <ctype.h> #define OPT static char SCCSID[] = "@(#)wstat.c Ver. 1.4, 87/01/22 15:05:30"; char *progname, *filename; int uflag=0, uu=0; /* turn on uuencode check */ main(argc,argv) int argc; char *argv[]; { FILE *efopen(), *fp; #ifdef OPT int c, getopt(); extern int optind; extern char *optarg; progname = *argv; while((c = getopt(argc, argv, "Uu")) != EOF) switch(c) { case 'U': uflag+=2; break; case 'u': uflag++; break; default: help(c); exit(1); } #else int optind = 1; progname = *argv; #endif switch(argc - optind) { case 0: filename = ""; process(stdin); break; default: for(; optind<argc; optind++) { filename = argv[optind]; printf("\n%s stats for file %s:\n", progname, filename); fp = efopen(filename, "r"); process(fp); efclose(fp); } } if(uu % 2) printf("\n%s: Not a complete set of uuencoded files!\007\n", progname); exit(0); } #ifdef OPT help(c) char c; { fprintf(stderr, "%s: Options are:\n", progname); fprintf(stderr, "\t-U\tOnly count uuencoded lines\n"); fprintf(stderr, "\t-u\tCount uuencoded lines\n"); } #endif #define MAXWIDTH 256 process(fp) FILE *fp; { int count[MAXWIDTH], i, strlen(), bytes, lines; int uucount[MAXWIDTH]; char *buffer[MAXWIDTH]; for(i=0; i< MAXWIDTH; i++) { count[i] = 0; uucount[i] = 0; } bytes = 0; lines = 0; while(fgets(buffer, MAXWIDTH, fp)) { i = strlen(buffer); bytes += i--; count[i]++; lines ++; /* here is the code for use with uuencoded files */ if(uflag) { if((!strncmp(buffer, "end", 3) || !strncmp(buffer, "include", 7)) && uu % 2 ) uu++; if( uu % 2 ) uucount[i]++; if(!strncmp(buffer, "begin", 5)) { if( uu % 2 ) printf("\nDuplicate begin line in %s\077\n", filename); else uu++; } } } for(i=0; i< MAXWIDTH; i++) if( (uflag < 2) ? count[i] : uucount[i] ) { printf("%6d", i); if(uflag < 2) printf("\t%6d", count[i]); if(uflag && uucount[i]) printf("\t%6d\n", uucount[i]); else putchar('\n'); } printf("------\t------\n"); printf("\t%6d lines with %d characters\n", lines, bytes); if( uu % 2 ) printf("*** %s is not a complete uuencoded file ***\n", filename); }