[news.software.b] unbatcher out of sync?

mcormier@micor.OCUnix.On.Ca (Michel B. Cormier) (01/11/91)

Hello networld:

   I remember a few months back someone ask the same question and Henry
had a very good answer to why to we get a 'unbatcher out of sync' error 
message on CNEWS.  However, I forgot the answer and one of the site I am 
feeding is getting this error now!  Can someone remind me of the answer 
please?

Thanks

MBC
-- 
Michel B. Cormier              | UUCP: mcormier@micor.uucp
Computer Consultant            | Internet: mcormier@micor.OCUnix.On.Ca
DOS/Unix/MPE V/MPE XL          | BBS:   (613) 237-5077
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

henry@zoo.toronto.edu (Henry Spencer) (01/12/91)

In article <1991Jan11.001342.6553@micor.OCUnix.On.Ca> mcormier@micor.OCUnix.On.Ca (Michel B. Cormier) writes:
>... answer to why to we get a 'unbatcher out of sync' error 
>message on CNEWS.  However, I forgot the answer and one of the site I am 
>feeding is getting this error now!  Can someone remind me ...

It means "something's wrong with your batch":  relaynews did not find a
"#! rnews nnnnn" line where one should have been.  Typically this means
garbling during preparation or transmission.  One notorious trouble spot
is that the batch format cannot tolerate transformations of newlines to
CR-LF pairs; the byte counts in the "#! rnews" lines must be spot-on.
-- 
If the Space Shuttle was the answer,   | Henry Spencer at U of Toronto Zoology
what was the question?                 |  henry@zoo.toronto.edu   utzoo!henry

em@dce.ie (Eamonn McManus) (01/16/91)

henry@zoo.toronto.edu (Henry Spencer) writes:
>It means "something's wrong with your batch":  relaynews did not find a
>"#! rnews nnnnn" line where one should have been.  Typically this means
>garbling during preparation or transmission.  One notorious trouble spot
>is that the batch format cannot tolerate transformations of newlines to
>CR-LF pairs; the byte counts in the "#! rnews" lines must be spot-on.

We had `unbatcher out of sync' problems at a site I was involved in, which
was fed its news by mail from a VMS site (ugh).  The VMS mailer (PMDF) got
confused when lines exceeded 256 characters, as References lines often do,
and would make a total hash of the header when this happened.  As a result,
the "#! rnews" count would always be off by a small amount for the affected
article.  C News resyncs at the next "#! rnews" line, but if the count is
too long for the actual article contents it will have missed the start of
the article following the garbled one.

To kludge around this problem I wrote a program `patchbatch' which zips
through a news batch looking for "#! rnews" lines with incorrect counts.
If it finds one, it hunts back and forth a small amount for the next "#!
rnews" line and adjusts the incorrect one to point to it.  This was
surprisingly effective: while it was running I believe it never failed to
correct a munged batch.

I'm including the source of patchbatch in case it is of use to the original
poster, or anyone else.

,
Eamonn

/* patchbatch.c - patch a news batch. */

/* By Eamonn McManus <emcmanus@cs.tcd.ie>, February 1990.
 * This program is not copyrighted.
 *
 * Blast through a news batch checking the offsets after `#! rnews'.
 * If we find that the offset does not lead to another `#! rnews' line
 * or EOF, we search around for the line somewhere in the vicinity.  If
 * it is found, we go back and patch the original offset to point to the
 * correct place.  This is useful for example on systems where long lines
 * get truncated or split in transmission, since in this case the stated
 * offset will be wrong.
 *
 * This is the hackiest program I have written in a long time.
 */

#include <stdio.h>
#include <string.h>
#include <sys/fcntl.h>	/* For O_RDWR. */
#include <sys/types.h>
#include <sys/stat.h>

extern long strtol();

char verbose;
extern int optind;


main(argc, argv)
char **argv;
{
	int i, status;
	while ((i = getopt(argc, argv, "v")) != -1)
		switch (i) {
		case 'v':
			verbose = 1; break;
		default:
			goto usage;
		}
	if (optind == argc) {
usage:
		fprintf(stderr, "Usage: patchbatch file [...]\n");
		exit(2);
	}
	status = 0;
	for (i = optind; i < argc; i++)
		if (patchbatch(argv[i]) < 0)
			status = 1;
	exit(status);
}


static char lead[] = "#! rnews ";
#define LEADLEN (sizeof lead - 1)
#define FUDGE (2 * sizeof lead)

int patchbatch(name)
char *name;
{
	int fd, i;
	long here, offset;
	char buf[64];
	struct stat st;
	if ((fd = open(name, O_RDWR)) < 0) {
		perror(name);
		return -1;
	}
	if (fstat(fd, &st) < 0) {
		perror(name);
		return -1;
	}
	if ((i = read(fd, buf, sizeof buf - 1)) != sizeof buf - 1) {
		if (i < 0)
			perror(name);
		else	fprintf(stderr, "%s: too short for a news batch\n");
		close(fd); return -1;
	}
	buf[sizeof buf - 1] = '\0';
	if (strncmp(buf, lead, LEADLEN) != 0) {
		fprintf(stderr, "%s: not a news batch (should start with %s)\n",
			name, lead);
		close(fd);
		return -1;
	}
	here = 0; i = 0;
	while (1) {
		char *p;
		int numsize;
		long artstart, newpos;
		offset = strtol(buf + LEADLEN, &p, 10);
		if (offset == 0) {
			fprintf(stderr,
				"%s: bad value after %s, file offset %ld\n",
				name, lead, here);
			close(fd);
			return -1;
		}
		numsize = p - (buf + LEADLEN);
		artstart = here + LEADLEN + numsize + 1/*\n*/;
		newpos = artstart + offset;
		if (newpos == st.st_size)
			return 0;
		else if (newpos > st.st_size) {
			char offstr[16];
lastart:
			offset = st.st_size - artstart;
changeoffset:
			sprintf(offstr + 1, "%ld", offset);
			switch (strlen(offstr + 1) - numsize) {
			case 0:		/* Same size, just overwrite. */
				p = offstr + 1;
				break;
			case -1:	/* Shorter, use leading 0. */
				p = offstr; *p = '0';
				break;
			case 1:		/* Longer, oops. */
				fprintf(stderr, "%s: no room to change article \
length to %ld, file offset %ld\n", name, offset, here);
				goto setnewpos;
			}
			lseek(fd, here + LEADLEN, 0);
			if (write(fd, p, numsize) < 0) {
				perror(name); return -1;
			}
			if (verbose)
				fprintf(stderr, "%s: changed article length to \
%ld, file offset %ld\n", name, offset, here);
setnewpos:
			newpos = artstart + offset;
			if (newpos >= st.st_size)
				return 0;
		} else {	/* newpos < st.st_size */
			lseek(fd, newpos - FUDGE, 0);
			if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1)
				goto lastart;
			if (strncmp(buf + FUDGE, lead, LEADLEN) == 0) {
				strcpy(buf, buf + FUDGE);	/* Hmmm... */
				here = newpos;
				continue;
			}
			for (p = buf; (p = strchr(p, lead[0])) != NULL; p++)
				if (strncmp(p, lead, LEADLEN) == 0)
					break;
			if (p == NULL) {
				fprintf(stderr, "%s: can't find next article \
with offset %ld from file pos %ld\n", name, offset, here);
				close(fd); return -1;
			}
			offset = (newpos - FUDGE) + (p - buf) - artstart;
			goto changeoffset;
		}
		lseek(fd, newpos, 0);
		if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1) {
			fprintf(stderr, "%s: last article too short\n", name);
			close(fd); return -1;
		}
		here = newpos;
	}
}

em@dce.ie (Eamonn McManus) (01/18/91)

I wrote:
>The VMS mailer (PMDF) got confused when lines exceeded 256 characters...

I've been informed that PMDF does not have problems with line lengths,
except for the generic SMTP 1024-character limit.  The problem was
presumably elsewhere in the software, perhaps in the News system itself.

,
Eamonn