rob@mtdiablo.Concord.CA.US (Rob Bernardo) (03/18/91)
I've also had some difficulty lately with 'out of sync' unbatching problems. Unfortunately, Eamonn McManus's patchbatch didn't work. Below is a shell archive for a more robust program to fix batches with bad article character counts. Accompanying it are a makefile and a wrapper shell script, both of which you may need to tailor to your systems. The main significant difference between my and E.M.'s batch fixers is that mine does not presume that the correct article size is close to the article size given in the bad news batch file. Mine goes through the batch finding each article and recalculating it's size from scratch. The syntax of the command line is rebatch [-v] -i input_file -o output_file where the input file is a compressed newsbatch file (e.g. as found in your in.coming/bad directory and where the output file is not compressed. The wrapper shell script will take care of compressing the output file (so that you can just wait till cron next runs input/newsrun). I wrote this real quick this morning, and it worked for me. Please let me know of any problems or suggestions. #!/bin/sh # shar: Shell Archiver (v1.22) # Packed Sun Mar 17 18:30:16 PST 1991 by mtdiablo!rob # from directory /usr/local/src/rebatch # # Run the following text with /bin/sh to create: # Makefile # rebatch # dorebatch.c # if test -f Makefile; then echo "File Makefile exists"; else echo "x - extracting Makefile (Text)" sed 's/^X//' << 'SHAR_EOF' > Makefile && XNEWSOWNER=bin XNEWSGRP=bin XMODE=755 XNEWSLIB=/usr/local/lib X Xall: dorebatch X Xinstall: dorebatch X install -g ${NEWSGRP} -o ${NEWSOWNER} -m ${MODE} rebatch ${NEWSLIB}/newsbin/batch X install -g ${NEWSGRP} -o ${NEWSOWNER} -m ${MODE} dorebatch ${NEWSLIB}/newsbin/batch SHAR_EOF chmod 0644 Makefile || echo "restore of Makefile fails" set `wc -c Makefile`;Sum=$1 if test "$Sum" != "262" then echo original size 262, current size $Sum;fi fi if test -f rebatch; then echo "File rebatch exists"; else echo "x - extracting rebatch (Text)" sed 's/^X//' << 'SHAR_EOF' > rebatch && X#! /bin/sh X# =()<. ${NEWSCONFIG-@<NEWSCONFIG>@}>()= X. ${NEWSCONFIG-/usr/local/lib/news/bin/config} Xumask $NEWSUMASK X XPATH=$NEWSCTL/bin:$NEWSBIN/batch:$NEWSBIN:$NEWSPATH ; export PATH Xfor file in ${NEWSARTS}/in.coming/bad/* Xdo X barename=`basename $file` X newbatch=${NEWSARTS}/in.coming/$barename X echo "rebatching $barename" X dorebatch -i $file -o $newbatch && compress $newbatch && mv $newbatch.Z $newbatch && rm $file Xdone SHAR_EOF chmod 0644 rebatch || echo "restore of rebatch fails" set `wc -c rebatch`;Sum=$1 if test "$Sum" != "437" then echo original size 437, current size $Sum;fi fi if test -f dorebatch.c; then echo "File dorebatch.c exists"; else echo "x - extracting dorebatch.c (Text)" sed 's/^X//' << 'SHAR_EOF' > dorebatch.c && X#define ZCAT "zcat" X#define RNEWSSTRING "#! rnews" X X#include <stdio.h> X Xint verbose; Xextern int errno; Xextern int sys_nerr; Xextern char *sys_errlist[]; X X#define ERRNO_MSG (errno < sys_nerr ? sys_errlist[errno] : "unknown errno") X Xmain(argc, argv) Xint argc; Xchar **argv; X{ X char zfile[BUFSIZ], command[BUFSIZ], dataline[BUFSIZ]; X char *infile = NULL, *outfile = NULL, *arttempfile; X FILE *infp, *outfp, *arttempfp; X int eofreached, optret, rnewslen, charcnt, artcnt, argerr; X extern int optind; X extern char *optarg; X X /* Process command line */ X argerr = 0; X while ((optret = getopt(argc, argv, "vi:o:")) != -1) { X switch (optret) { X case 'v': X verbose = 1; X break; X case 'i': X infile = optarg; X break; X X case 'o': X outfile = optarg; X break; X X default: X argerr++; X } X } X X if(!infile || !outfile || argerr) { X fprintf(stderr, "%s: usage: %s -i [infile] -o [outfile]\n", X argv[0], argv[0]); X exit(-1); X } X X /* Open data stream of infile uncompressed */ X strcpy(zfile, infile); X strcat(zfile, ".Z"); X if(link(infile, zfile) == 1) { X fprintf(stderr, "%s: link(%s, %s): %s\n", argv[0], X infile, zfile, ERRNO_MSG); X exit(errno); X } X sprintf(command, "%s %s\n", ZCAT, zfile); X if((infp = popen(command, "r")) == NULL) { X fprintf(stderr, "%s: popen(%s, \"r\") failed\n", argv[0], command); X unlink(zfile); X exit(errno?errno:-1); X } X X /* Open output file */ X if((outfp = fopen(outfile, "w+")) == NULL) { X fprintf(stderr, "%s: fopen(%s, \"w+\") failed\n", argv[0], outfile); X pclose(infp); X unlink(zfile); X exit(errno?errno:-1); X } X X /* Get file name for temp file to hold each article */ X arttempfile = tmpnam(NULL); X X /* Verify first line is rnews */ X if(fgets(dataline, BUFSIZ, infp) == NULL) { X fprintf(stderr, "%s: premature end of file %s\n", argv[0], infile); X pclose(infp); X unlink(zfile); X exit(errno?errno:-1); X } X rnewslen = strlen(RNEWSSTRING); X if(strncmp(dataline, RNEWSSTRING, rnewslen)) { X fprintf(stderr, "%s: file %s not a compressed news archive\n", X argv[0], infile); X pclose(infp); X unlink(zfile); X exit(errno?errno:-1); X } X eofreached = 0; X artcnt = 0; X X if(verbose) X printf("input = %s output = %s\n", infile, outfile); X X /* Process each article in batch */ X while(!eofreached) { X X /* Open temp file for storing next article */ X if((arttempfp = fopen(arttempfile, "w+")) == NULL) { X fprintf(stderr, "%s: fopen(%s, \"w+\") failed\n", argv[0], X arttempfile); X pclose(infp); X unlink(zfile); X exit(errno?errno:-1); X } X X /* Read a line from pipe. If oef or end of article, X * write rnews line with count on output file, X * copy article (in tempfile) to output file and quite loop. X * Otherwise append this next line of article to end X * of temp file and continue with next line from pipe. X */ X while (1) { X if(fgets(dataline, BUFSIZ, infp) == NULL) X eofreached = 1; X X if(eofreached||(!strncmp(dataline, RNEWSSTRING, rnewslen))) { X X charcnt = ftell(arttempfp); X rewind(arttempfp); X fprintf(outfp, "%s %d\n", RNEWSSTRING, charcnt); X if(verbose) X printf("article %d charcnt %d\n", artcnt, charcnt); X X while(fgets(dataline, BUFSIZ, arttempfp)) { X if(fputs(dataline, outfp) == EOF) { X fprintf(stderr, "%s: fputs to %s failed\n", X argv[0], outfile); X pclose(infp); X unlink(zfile); X fclose(arttempfp); X unlink(arttempfile); X fclose(outfp); X exit(errno?errno:-1); X } X charcnt -= strlen(dataline); X } X if(charcnt) X fprintf(stderr, "%s: error in count by %d\n", argv[0], X charcnt); X X fclose(arttempfp); X artcnt++; X break; X X } else X fputs(dataline, arttempfp); X } X } X X pclose(infp); X unlink(zfile); X unlink(arttempfile); X fclose(outfp); X exit(0); X} SHAR_EOF chmod 0644 dorebatch.c || echo "restore of dorebatch.c fails" set `wc -c dorebatch.c`;Sum=$1 if test "$Sum" != "3789" then echo original size 3789, current size $Sum;fi fi exit 0 -- Rob Bernardo Mt. Diablo Software Solutions email: rob@mtdiablo.Concord.CA.US phone: (415) 827-4301
em@dce.ie (Eamonn McManus) (03/26/91)
rob@mtdiablo.Concord.CA.US (Rob Bernardo) writes: >I've also had some difficulty lately with 'out of sync' unbatching >problems. Unfortunately, Eamonn McManus's patchbatch didn't work. >Below is a shell archive for a more robust program to fix batches >with bad article character counts. There are advantages and disadvantages to each of our programs. Patchbatch is designed to be run automatically on all incoming batches, whereas Rob's program (rebatch) is to be run by hand on known bad batches. Running automatically from newsrun means that the fixer doesn't have to worry about decompression and the like. The reason I wrote patchbatch to fish around in the vicinity of the supposed article end, rather than scanning through every line as rebatch does, was that it provides a greater degree of transparency. If an article happens to contain the string "#! rnews" at the beginning of a line, rebatch will assume it ends there. Patchbatch is only susceptible to problems if an article contains such a string very near the end. Also, if an article is truncated in the middle of a line, so that the "#! rnews" of the following article is not preceded by a newline, rebatch will not find that article. Of course if it were changed to look for "#! rnews" anywhere in a line it would go ape on articles like this one. There is a problem with hacks like these, of striking a balance between fixing corrupt batches and leaving alone correct ones. Patchbatch stays closer to the latter at the expense of sometimes failing to do the former. However, I think people should try increasing the value of FUDGE before resorting to a more promiscuous program like rebatch. You might also need to change the size of the buf[] array when doing this; I can't remember if the version I posted had a magic constant 64 as the size (ugh). Another noteworthy difference between the programs is that patchbatch modifies the batch in place rather than creating a replacement. This means that it is much faster. In particular, if you only occasionally get corrupt batches you can afford to run patchbatch over every incoming batch, since there is very little overhead in checking through a correct batch. There is a theoretical problem, in that the size of an article may change from an n-digit number to a (n+1)-digit number, in which case patchbatch will fail. I never saw this happen in practice. , Eamonn