[net.sources] History File Rebuilder

kiessig (09/09/82)

	Here is the program to rebuild a history file, along with its
header file.   Good luck with it.  If someone would care to take the
time and chop this program down to a reasonable size, I would appreciate
it.

Rick Kiessig
{sri-unix,randvax,dsd}!fortune!kiessig

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

header.h

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*
 * header.h - Article header format
 *
 * Modified to not require defs.h
 *
 * Rick Kiessig
 * Fortune Systems Corporation
 * August 18, 1982
 */

static char *Header = "@(#) header.h    2.6     8/18/82";

#define	BUFLEN	128	/* standard buffer size				*/
#define LBUFLEN 1024	/* big buffer size				*/ 
#define PATHLEN 512	/* length of longest source string		*/
#define	DATELEN	35	/* length of longest allowed date string	*/
#define	NAMELEN	15	/* length of longest possible file name		*/
#define NUNREC 10

/* article header */
struct	hbuf {
	char	path[PATHLEN];		/* source string	*/
	char	nbuf[LBUFLEN];		/* newsgroup line	*/
	char	title[BUFLEN];		/* title		*/
	char	ident[BUFLEN];		/* article I.D.		*/
	char	replyto[BUFLEN];	/* reply address	*/
	char	followid[BUFLEN];	/* artid in followup to	*/
	char	subdate[DATELEN];	/* submittal date	*/
	time_t	subtime;		/* subdate in secs	*/
	char	recdate[DATELEN];	/* receival date	*/
	time_t	rectime;		/* recdate in secs	*/
	char	expdate[DATELEN];	/* expiration date	*/
	time_t	exptime;		/* expdate in secs	*/
	char	ctlmsg[PATHLEN];	/* control message	*/
	char	unrec[NUNREC][BUFLEN];	/* unrecognized lines	*/
};

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

hisfix.c

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*
 * Create a history file based on existing news items.
 * This is useful when you've trashed your history file
 * for some reason, or when you're converting to one
 * of the newer versions.  Note that the history file
 * normally contains entries for all items received by a
 * site.  This program will condense the file to its
 * smallest useful size.
 *
 * Much code taken from news 2.8.  Could be made lots
 * smaller with some effort.
 *
 * Rick Kiessig
 * Fortune Systems Corp.
 * August 18, 1982
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/dir.h>
#include <ctype.h>
#include "header.h"

#define SYSNAME "fortune"       /* not used in an important way here */

char    fullpath[256], *fpp;    /* Keep full path here for error printouts */
char *index();

struct  xdirect {
	ino_t   xd_ino;
	char    xd_name[DIRSIZ + 1];
};

int     baditems = 0;
char	bfr[LBUFLEN];			/* general-use scratch area */

#define NETCHRS	"!:.@^"	/* Punct. chars used for various networks	*/
#define	NGDELIM	','	/* delimit character in news group line		*/
#define	TRUE	1	/* boolean true					*/
#define	FALSE	0	/* boolean false				*/
#define	PROTO	'A'	/* old protocol name				*/

#define FROM 		1
#define NEWSGROUP 	2
#define TITLE 		3
#define SUBMIT		4
#define RECEIVE		5
#define EXPIRE		6
#define ARTICLEID	7
#define REPLYTO		8
#define FOLLOWID	9
#define CONTROL		10
#define OTHER		99

main (argc, argv)
int argc;
char *argv[];
{
	register int i;

	if (argc < 2) {
		fprintf (stderr, "usage: cd $(SPOOLDIR) ; %s * > $(LIBDIR)/history\n", argv[0]);
		exit(1);
	}
	*(fpp = fullpath) = '\0';
	for (i = 1; i < argc; i++)
		cd (argv[i]);
	exit (baditems);
}

cd(name)                                /* examine a directory */
char *name;
{
	register FILE *fd;
	register char *cp;
	struct stat s;
	struct xdirect direntry;
	register char *ofpp;

	ofpp = fpp;
	for(cp = name; *fpp = *cp++; fpp++)
		continue;
	if(strcmp(name, "/")) {
		*fpp++ = '/';  *fpp = 0;
	}
	if ((fd = fopen(name, "r")) == NULL) {
		fflush(stdout);
		fprintf(stderr, "Can't open directory: ");
		perror(fullpath);
		goto ret;
	}
	if (chdir(name) == -1) {
		fflush(stdout);
		fprintf(stderr, "chdir(%s) FAILED\n", fullpath);
		goto ret;
	}
	if(getdirent (fd, &direntry) != 1 ||
	   strcmp(direntry.xd_name, ".")) {
		fflush(stdout);
		fprintf(stderr, "First entry not '.': ");
		perror(fullpath);
		goto ret;
	}
	if(getdirent (fd, &direntry) != 1 ||
	   strcmp(direntry.xd_name, "..")) {
		fflush(stdout);
		fprintf(stderr, "Second entry not '..': ");
		perror(fullpath);
		goto ret;
	}
	while (getdirent (fd, &direntry))
		if(direntry.xd_ino && stat(direntry.xd_name, &s) != -1)
			switch (s.st_mode & S_IFMT) {
			case S_IFREG:           /* just a file */
				cf(direntry.xd_name, name);
				break;
			case S_IFDIR:           /* directory */
				cd(direntry.xd_name);
			}
	fclose(fd);
	if(chdir("..") == -1) {
		fflush(stdout);
		fprintf(stderr, "chdir .. FAILED: ");
		perror(fullpath);
		exit(-1);
	}
 ret:
	*(fpp = ofpp) = '\0';
	return;
}

getdirent (file, dir)
FILE *file;
struct xdirect *dir;
{
	struct direct direntry;

	if (fread((char *) &direntry, sizeof direntry, 1, file) != 1)
		return 0;

	dir->xd_ino = direntry.d_ino;
	dirstrcpy (dir->xd_name, direntry.d_name);
	return 1;
}

dirstrcpy (dest, source)
register char *dest;
register char *source;
{
	register int num;

	num = DIRSIZ;
	do {
		if (*source == '\0')
		    break;
		*dest++ = *source++;
	} while (--num);
	*dest = '\0';
}

cf(name, dirn)
char *name, *dirn;
{
	struct hbuf hb;
	FILE *fp, *hread();

	if ((fp = fopen (name, "r")) == NULL) {
		fprintf (stderr, "%s/", dirn);
		perror (name);
		return -1;
	}
	if (hread (&hb, fp) == NULL) {
		baditems++;
		fprintf (stderr, "%s/%s: not a valid news item\n", dirn, name);
		fflush (stderr);
		fclose (fp);
		return 1;
	}
	fclose (fp);
	printf ("%s\t%s\t%s/%s\n", hb.ident, hb.recdate, dirn, name);
}

/*
 * Read header from file dir/name into *hp.
 * Return (FILE *) if header okay, else NULL.
 */
FILE *
hread(hp, fp)
register struct hbuf *hp;
FILE *fp;
{
	register int len;

	bclear((char *) hp, sizeof (*hp));
	if (((fgets(bfr, PATHLEN, fp) != NULL &&
		*bfr >= 'A' && *bfr <= 'Z') && index(bfr, ':')) ||
		!strncmp(bfr, "From ", 5))
		if (frmread(fp, hp))
				goto strip;
	if (*bfr != PROTO)
		return(NULL);
	strncpy(hp->ident, &(bfr[1]), NAMELEN);	/* file name */
	if (!nstrip(hp->ident))
		return(NULL);
	fgets(hp->nbuf, BUFLEN, fp);		/* newsgroup list */
	if (!nstrip(hp->nbuf))
		return(NULL);
	ngcat(hp->nbuf);
	fgets(hp->path, BUFLEN, fp);		/* source path */
	if (!nstrip(hp->path))
		return(NULL);
	fgets(hp->subdate, DATELEN, fp);	/* date */
	if (!nstrip(hp->subdate))
		return(NULL);
	fgets(hp->title, BUFLEN, fp);		/* title */
	if (!nstrip(hp->title))
		return(NULL);
strip:	/* strip off sys! from front of path. */
	strcpy(bfr, SYSNAME);
	if (strncmp(bfr, hp->path, (len = strlen(bfr))) == 0 && index(NETCHRS,hp->path[len]))
		strcpy(hp->path, &(hp->path[len+1]));
	lcase(hp->nbuf);
	return(fp);
}

/*
 * Set nc bytes, starting at cp, to zero.
 */
bclear(cp, nc)
register char *cp;
register int nc;
{
	while (nc--)
		*cp++ = 0;
}

/*
 * Strip trailing newlines, blanks, and tabs from 's'.
 * Return TRUE if newline was found, else FALSE.
 */
nstrip(s)
register char *s;
{
	register char *p;
	register int rc;

	rc = FALSE;
	p = s;
	while (*p)
		if (*p++ == '\n')
			rc = TRUE;
	while (--p >= s && (*p == '\n' || *p == ' ' || *p == '\t'));
	*++p = '\0';
	return(rc);
}

/*
 * Append NGDELIM to string.
 */
ngcat(s)
register char *s;
{
	if (*s) {
		while (*s++);
		s -= 2;
		if (*s++ == NGDELIM)
			return;
	}
	*s++ = NGDELIM;
	*s = '\0';
}

lcase(s)
register char *s;
{
	register char *ptr;

	for (ptr = s; *ptr; ptr++)
		if (isupper(*ptr))
			*ptr = tolower(*ptr);
}

/*
 * Get header info from mail-format file.
 * Return non-zero on success.
 */
frmread(fp, hp)
register FILE *fp;
register struct hbuf *hp;
{
	int fromflag = FALSE, groupflag = FALSE, subflag = FALSE;
	int titleflag = FALSE, fileflag = FALSE, recflag = FALSE, i;
	int exprflag = FALSE, replyflag = FALSE, followflag = FALSE;
	int ctlflag = FALSE;
	int unreccnt = 0;
	long curpos;
	char wordfrom[100], uname[100], at[100], site[100];

	i = type(bfr);
	do {
		curpos = ftell(fp);
		switch (i) {
			case FROM:
				if (!fromflag) {
#ifdef ATSIGN
				/*
				 * This old code understood the "user at site"
				 * notation but threw away all but the first
				 * word of names (like your full name) so has
				 * been taken out.
				 */
					sscanf(bfr, "%s %s %s %s",
						wordfrom, uname, at, site);
					if (isat(at))
						sprintf(hp->path, "%s@%s",
							uname, site);
					else
						strcpy(hp->path, uname);
#else
					getfield(&fromflag, hp->path);
#endif
					fromflag = TRUE;
				}
				break;
			case NEWSGROUP:
				if (!groupflag)
					getfield(&groupflag, hp->nbuf);
				break;
			case TITLE:
				if (!titleflag)
					getfield(&titleflag, hp->title);
				break;
			case SUBMIT:
				if (!subflag)
					getfield(&subflag, hp->subdate);
				break;
			case RECEIVE:
				if (!recflag)
					getfield(&recflag, hp->recdate);
				break;
			case EXPIRE:
				if (!exprflag)
					getfield(&exprflag, hp->expdate);
				break;
			case ARTICLEID:
				if (!fileflag)
					getfield(&fileflag, hp->ident);
				break;
			case REPLYTO:
				if (!replyflag)
					getfield(&replyflag, hp->replyto);
				break;
			case FOLLOWID:
				if (!followflag)
					getfield(&followflag, hp->followid);
				break;
			case CONTROL:
				if (!ctlflag)
					getfield(&ctlflag, hp->ctlmsg);
				break;
			case OTHER:
				if (unreccnt < NUNREC) {
					strcpy(&hp->unrec[unreccnt][0], bfr);
					unreccnt++;
				}
				break;
		}
	} while ((i=type(fgets(bfr, BUFLEN, fp))) > 0);

	if (*bfr != '\n')
		fseek(fp, curpos, 0);
	if (fromflag && subflag && fileflag)
		return TRUE;
	return FALSE;
}

isat(str)
char *str;
{
	if (!strcmp(str, "@")) return TRUE;
	if (!strcmp(str, "at")) return TRUE;
	if (!strcmp(str, "AT")) return TRUE;
	return FALSE;
}

getfield(flag, hpfield)
int *flag;
char *hpfield;
{
	char *ptr;
	
	for (ptr = index(bfr, ':'); isspace(*++ptr); )
		;
	if (*ptr != '\0')
		*flag = TRUE;
	strcpy(hpfield, ptr);
	nstrip(hpfield);
	return;
}

type(ptr)
char *ptr;
{
	char *colon, *space;

	if (!isalpha(*ptr) && strncmp(ptr, "From ", 5))
		return FALSE;
	colon = index(ptr, ':');
	space = index(ptr, ' ');
	if (!colon || colon + 1 != space)
		return FALSE;
	if (!strncmp(ptr, "From ", 5) || !strncmp(ptr, "From: ", 6) || !strncmp(ptr, "Path: ", 6))
		return FROM;
	if (!strncmp(ptr, "To: ", 4) || !strncmp(ptr, "Newsgroups: ", 12))
		return NEWSGROUP;
	if (!strncmp(ptr, "Subject: ", 9) || !strncmp(ptr, "Title: ", 7))
		return TITLE;
	if (!strncmp(ptr, "Posted: ", 8))
		return SUBMIT;
	if (!strncmp(ptr, "Received: ", 10))
		return RECEIVE;
	if (!strncmp(ptr, "Expires: ", 9))
		return EXPIRE;
	if (!strncmp(ptr, "Article-I.D.: ", 14))
		return ARTICLEID;
	if (!strncmp(ptr, "Reply-To: ", 10))
		return REPLYTO;
	if (!strncmp(ptr, "References: ", 12))
		return FOLLOWID;
	if (!strncmp(ptr, "Control: ", 9))
		return CONTROL;
	return OTHER;
}