silvert@dalcs.UUCP (01/25/87)
Given the number of complaints about defective uuencoded files that
appear in this newsgroup, I ran a small program I wrote called wstat to
see what uuencoded files look like. When wstat is called with the -U
option it reports the line width statistics for all uuencoded lines,
i.e., it reads through a file and checks all lines between the begin and
end lines (it also checks for Royar's include lines). In a good file
with end check byte all lines are 62 bytes long. I'm not a uuencode
guru, but I think that with the possible exception of blank lines and
the short end line generated by some uuencodes, all lines should be the
same length. Here is what all of the uuencoded files in our ST news
directory looked like when I just ran this. And, the ones that look bad
(a distribution of line lengths!) are bad -- I found out the hard way,
which is why I wrote wstat (the source code is apended to the stats
data). I'm posting this so that the posters can see what happened to
their postings, and perhaps the pattern will help someone improve the
situation. We are losing a large fraction, close to 50%, of the
uuencoded postings (the 50% that are OK mostly come from Moshe Braner!).
The format is two lines that identify the posting (a grep of the begin
line), followed by the length distribution, with totally blank lines of
length 0 deleted. Thus NITE.PRG had 14 uuencoded lines out of 51, 11 of
which were of length 61 bytes, one of line 21, and one just one
(presumably a single blank on the line before the end).
I haven't checked all of these files, but I've marked those which I know
were bad when uudecoded.
************************
716:begin 644 NITE.PRG --- SEEMS TO WORK OK ---
wstat stats for file 716:
1 1
21 1
61 11
------ ------
51 lines with 2116 characters
733:begin 644 dash.arc *** BAD ***
wstat stats for file 733:
41 1
60 9
61 170
------ ------
223 lines with 12568 characters
734:begin 644 football.arc --- SEEMS TO WORK OK ---
wstat stats for file 734:
1 1
53 1
61 642
------ ------
661 lines with 40402 characters
737:begin 644 flexcopy.arc
wstat stats for file 737:
1 1
9 1
61 87
------ ------
113 lines with 6151 characters
754:begin 644 spacewar.arc --- SEEMS TO WORK OK ---
wstat stats for file 754:
1 1
33 1
61 776
------ ------
862 lines with 50808 characters
758:begin 644 dtr.arc
wstat stats for file 758:
1 1
41 1
61 157
------ ------
181 lines with 10596 characters
778:begin 644 disasmbl.arc *** BAD ***
wstat stats for file 778:
59 2
60 7
61 321
------ ------
354 lines with 21279 characters
823:begin 644 barrel.arc --- SEEMS TO WORK OK ---
wstat stats for file 823:
1 1
61 244
------ ------
279 lines with 16496 characters
832:begin 777 NIGHT.ARC *** BAD ***
wstat stats for file 832:
2 1
42 1
59 4
60 5
61 30
62 77
------ ------
161 lines with 8972 characters
835:begin 777 dircnt.arc
wstat stats for file 835:
2 1
10 1
62 242
------ ------
266 lines with 16132 characters
836:begin 644 print.ttp --- SEEMS TO WORK OK ---
wstat stats for file 836:
1 1
13 1
61 36
------ ------
73 lines with 3455 characters
#### Here is the program that collects the stats with the -U options:
/* wstat -- compile statistics on the width distribution of lines in files */
#include <stdio.h>
#include <ctype.h>
#define OPT
static char SCCSID[] = "@(#)wstat.c Ver. 1.4, 87/01/22 15:05:30";
char *progname, *filename;
int uflag=0, uu=0; /* turn on uuencode check */
main(argc,argv)
int argc;
char *argv[];
{
FILE *efopen(), *fp;
#ifdef OPT
int c, getopt();
extern int optind;
extern char *optarg;
progname = *argv;
while((c = getopt(argc, argv, "Uu")) != EOF)
switch(c) {
case 'U':
uflag+=2;
break;
case 'u':
uflag++;
break;
default:
help(c);
exit(1);
}
#else
int optind = 1;
progname = *argv;
#endif
switch(argc - optind) {
case 0:
filename = "";
process(stdin);
break;
default:
for(; optind<argc; optind++) {
filename = argv[optind];
printf("\n%s stats for file %s:\n", progname, filename);
fp = efopen(filename, "r");
process(fp);
efclose(fp);
}
}
if(uu % 2)
printf("\n%s: Not a complete set of uuencoded files!\007\n",
progname);
exit(0);
}
#ifdef OPT
help(c)
char c;
{
fprintf(stderr, "%s: Options are:\n", progname);
fprintf(stderr, "\t-U\tOnly count uuencoded lines\n");
fprintf(stderr, "\t-u\tCount uuencoded lines\n");
}
#endif
#define MAXWIDTH 256
process(fp)
FILE *fp;
{
int count[MAXWIDTH], i, strlen(), bytes, lines;
int uucount[MAXWIDTH];
char *buffer[MAXWIDTH];
for(i=0; i< MAXWIDTH; i++) {
count[i] = 0;
uucount[i] = 0;
}
bytes = 0;
lines = 0;
while(fgets(buffer, MAXWIDTH, fp)) {
i = strlen(buffer);
bytes += i--;
count[i]++;
lines ++;
/* here is the code for use with uuencoded files */
if(uflag) {
if((!strncmp(buffer, "end", 3) ||
!strncmp(buffer, "include", 7)) && uu % 2 ) uu++;
if( uu % 2 ) uucount[i]++;
if(!strncmp(buffer, "begin", 5)) {
if( uu % 2 )
printf("\nDuplicate begin line in %s\077\n",
filename);
else
uu++;
}
}
}
for(i=0; i< MAXWIDTH; i++)
if( (uflag < 2) ? count[i] : uucount[i] ) {
printf("%6d", i);
if(uflag < 2)
printf("\t%6d", count[i]);
if(uflag && uucount[i])
printf("\t%6d\n", uucount[i]);
else
putchar('\n');
}
printf("------\t------\n");
printf("\t%6d lines with %d characters\n", lines, bytes);
if( uu % 2 )
printf("*** %s is not a complete uuencoded file ***\n",
filename);
}