[net.sources] dejunk - automatic handling of junked news

jf@sal.UUCP (Johan Finnved) (01/25/85)

At our site we had a somewhat outdated active file so we got a
lot of articles in newsgroup junk.
I wrote a program to move articles from junk to their proper
newsgroups as if though the newsgroups were there when the articles
were received.
Ambition level:
	- If the articles were received to *some* newsgroups
	  duplicates are avoided.
	- If newsgroups are missing a question is asked *once* for
	  each missing newsgroup if you want to create it. If the
	  answer is 'no' you don't get repeated questions even
	  if there is a lot of articles for the unwanted newsgroup.
	- Xref: header lines are fixed to reflect the new situation.
	- If there is no problem the articles are unlinked from
	  the junk directory.

Somebody recognizes the problem ?
Perhaps someone has already made such a program ?

My program (as entered from scratch today so there may be some bugs left)
is posted to net.sources

		Johan Finnved
		jf@sal.UUCP
		...!decvax!mcvax!enea!sal!jf

------- cut here to get dejunk.c ------
/* 
 * De-junker
 * dejunk version 1.0	18-Jan-85	Johan Finnved
 * 
 * Program to move articles from junk to their proper
 * newsgroups as if though the newsgroups were there when the articles
 * were received.
 * Ambition level:
 * 	- If the articles were received to *some* newsgroups
 * 	  duplicates are avoided.
 * 	- If newsgroups are missing a question is asked *once* for
 * 	  each missing newsgroup if you want to create it. If the
 * 	  answer is 'no' you don't get repeated questions even
 * 	  if there is a lot of articles for the unwanted newsgroup.
 * 	- Xref: header lines are fixed to reflect the new situation.
 * 	- If there is no problem the articles are unlinked from
 * 	  the junk directory.
 * 
 * 
 * The program is tested only on our site running V7 and news version 2.10.1
 * 
 * On our site it is sufficient to have dejunk setuid news
 * 	our kernel allows setuid(geteuid())
 * 
 * Possible porting problems:
 * 	The program assumes that the d_ino fields in the spool
 * 	directories uniquely identify the articles.
 * 	That is is an article appears in several places they
 * 	are *hard-linked* to the same article.
 * 	(This is a problem with eunice isn't it?)
 * 
 * 	Program assumes index() and rindex()
 * 
 *	Program relies on relatively easy headers generated by inews
 *	(no contiuation lines etc...)
 *
 * 	Almost all data areas are static, you may want to
 * 	have a smarter malloc sceme.
 *	*/

#include	<ctype.h>
#include	<whoami.h>
#include	<stdio.h>
#include	<sys/types.h>
#include	<sys/dir.h>
#include	<sys/stat.h>

#define	MAXNG		1000	/* Max number of newsgroups	*/
#define	ARTNGMAX	40	/* Max number of newsgroups in one article */
#define	HDRMAX		3000	/* Max size (in bytes) of header */
#define	HLINEMAX	50	/* Max header lines	*/

char	ACTIVE[]=	"/usr/lib/news/active" ;
char	SPOOLDIR[]=	"/usr/spool/news" ;
char	* tmpname ;
char	*strcpy(), *strcat(), *index(), *rindex(), *mktemp() ;

#ifndef	READDIR
#define N_D 3
struct dirsim {
	FILE * D_fp ;
	struct direct D_entry ;
} dirsimtab[N_D] ;

typedef struct dirsim DIR ;

DIR *
opendir(name)
char *name ;
{
	register DIR * dp ;
	for(dp = dirsimtab ; dp < &dirsimtab[N_D] ; dp++) {
		if(dp->D_fp == NULL) {
			dp->D_fp = fopen(name,"r") ;
			if(dp->D_fp == NULL)
				return(NULL) ;
			return(dp) ;
		}
	}
	fprintf(stderr,"diropen out of slots\n") ; abort() ;
}

closedir(dp)
register DIR *dp ;
{
	fclose(dp->D_fp) ; dp->D_fp = NULL ;
}

struct direct *
readdir(dp)
register DIR *dp ;
{
	while(fread(&dp->D_entry,sizeof(struct direct),1,dp->D_fp) == 1) {
		if(dp->D_entry.d_ino == 0)
			continue ;
		return(&dp->D_entry) ;
	}
	return(NULL) ;
}
#endif

DIR * dirallo() ;
char * ngtodir() ;

struct act {
	char *ac_name ;		/* Name of newsgroup	*/
	long ac_rnd ;		/* Random address of seq	*/
	long ac_seq ;		/* Local sequence number or
				 * -1 is newsgroup shouldn't be
				 * created	*/
} acttab[MAXNG] ;

int ini_ng ;			/* Initial number of newsgroups	*/
int tot_ng ;			/* Total number of names in acttab */

int new_mod, new_uid, new_gid ;	/* Protection info for news directory */

FILE * actfp ;
FILE * artfp ;

int nang ;			/* Number of newsgroups in this article */
int nhlines ;			/* Number of header lines	*/
long artxref[ARTNGMAX] ;	/* Xref numbers found in article */
long newxref[ARTNGMAX] ;	/* Xref numbers that are found by searching */
int artng[ARTNGMAX] ;		/* Newsgroups in article (index in acttab) */
char *ng, *xref, *title ;	/* Interesting header pointers */

char hdrbuf[HDRMAX] ;		/* Buffer to store header	*/
char * hdrlines[HLINEMAX] ;	/* Array of line starts	*/

char myname[] = sysname ;
int mynamez ;

namecmp(ap1,ap2)
struct act *ap1, *ap2 ;
{
	return(strcmp(ap1->ac_name,ap2->ac_name)) ;
}

main()	
{
	register char *cp, *cp1, *cpe ;
	register i, j ;
	char line[100] ;
	char junkname[100] ;
	DIR * junkdp ;
	DIR * chkdp ;
	FILE * tfp ;
	struct stat sbuf ;
	struct direct * dirp ;
	ino_t artino ;
	int goodart, badart ;
	register struct act * ap ;

	setgid(getegid()) ; setuid(geteuid()) ;	/* If the system allows it */
	mynamez = strlen(myname) ;
	if((actfp = fopen(ACTIVE,"r+w")) == NULL) {
		fprintf(stderr,"Unable to open active file\n") ;
		done(1) ;
	}
	if(fstat(fileno(actfp),&sbuf) < 0) {
		fprintf(stderr,"Unable to fstat active file\n") ;
		done(1) ;
	}
	if((cp1 = cp = (char *) malloc((int) sbuf.st_size)) == NULL) {
		fprintf(stderr,"Unable to allocate in-core active file copy") ;
		done(1) ;
	}
	cpe = cp + sbuf.st_size ;
	if(fread(cp,1,cpe-cp,actfp) != cpe-cp) {
		fprintf(stderr,"Unable to read active file\n") ;
		done(1) ;
	}
	while(cp < cpe) {
		acttab[ini_ng].ac_name = cp ;
		while(*cp++ != ' ' && cp < cpe)
			;
		cp[-1] = '\0' ;
		acttab[ini_ng].ac_rnd = cp - cp1 ;	/* rnd adr of seq */
		acttab[ini_ng].ac_seq = atol(cp) ;
		if(cp[5] != '\n') {
			fprintf(stderr,"Strange line in active ng=%s\n",
				acttab[ini_ng].ac_name) ;
			done(1) ;
		}
		cp += 6 ;
		if(ini_ng++ >= MAXNG) {
			fprintf(stderr,"Too many newsgroups\n") ;
			done(1) ;
		}
	}
	qsort(acttab,tot_ng=ini_ng,sizeof(struct act),namecmp) ;
	sprintf(line,"%s/junk",SPOOLDIR) ;
	if(stat(line,&sbuf) < 0) {
		fprintf(stderr,"Unable to stat junk directory") ;
		done(1) ;
	}
	new_mod = sbuf.st_mode & 0777 ;
	new_uid = sbuf.st_uid ; new_gid = sbuf.st_gid ;
	if((junkdp = opendir(line)) == NULL) {
		fprintf(stderr,"Unable to open %s directory\n",line) ;
		done(1) ;
	}
	while((dirp = readdir(junkdp)) != NULL) {
		if(!islegal(dirp->d_name))
			continue ;
		artino = dirp->d_ino ;
		sprintf(junkname,"%s/junk/%s",SPOOLDIR,dirp->d_name) ;
		if(stat(junkname,&sbuf) < 0
		   || (sbuf.st_mode & S_IFMT) != S_IFREG)
			continue ;
		if((artfp = fopen(junkname,"r")) == NULL)
			continue ;
		if(hread()== NULL) {	/* Get the article	*/
			fprintf(stderr,"%s garbled\n",junkname) ;
			fclose(artfp) ;
			continue ;
		}
		for(nang = 0 , cp = strcpy(line,ng); *cp ;) {
			cp1 = cp ;
			artxref[nang] = 0 ;
			newxref[nang] = 0 ;
			while(*cp && *cp !=',') cp++ ;
			if (*cp == ',') *cp++ = '\0' ;
			artng[nang++] = lookng(cp1,1) ;
		}
		if(xref != NULL && !strncmp(xref,myname,mynamez)) {
			for(cp = strcpy(line,xref+mynamez+1) ; *cp ;) {
				if((cp1 = index(cp,':')) == NULL)
					break ;
				*cp1++ = '\0' ;
				if((j = lookng(cp,0)) >= 0)
				    for(i = 0 ; i < nang ; i++)
					if(artng[i] == j) {
						artxref[i] = atol(cp1) ;
						break ;
					}
				for(cp=cp1 ; *cp && *cp++ != ' ' ; )
					;
			}
		}
		printf("%s: %s\n",dirp->d_name,title) ;
		j = 0 ;
		goodart = 0 ; badart = 0 ;
		for(i = 0 ; i < nang ; i++) {
			ap = &acttab[artng[i]] ;
			if(++j > 2) {
				j = 1 ;
				printf("\n") ;
			}
			printf("\t%s:",ap->ac_name) ;
			if(ap->ac_seq < 0l) {	/* Inactive newsgroup */
				printf("Skipped") ;
				/* Note that badart
				 * is not incremented
				 * since we don't want this newsgroup */
				continue ;
			}
			if((chkdp = dirallo(ngtodir(ap->ac_name))) == NULL){
				printf("no directory") ;
				badart++ ;
				continue ;
			}
			while((dirp = readdir(chkdp)) != NULL) {
				if(!islegal(dirp->d_name))
					continue ;
				if(dirp->d_ino == artino) {
					printf("Ok(%s)",dirp->d_name) ;
					newxref[i] = atol(dirp->d_name) ;
					goodart++ ;
					goto nextgrp ;
				}
			}
				/* Install missing news	*/
			if(install(ap,junkname) < 0) {
				printf("Missing") ;
				badart++ ;
			} else {
				newxref[i] = ap->ac_seq ;
				printf("Installed(%ld)",ap->ac_seq) ;
				goodart++ ;
			}
nextgrp:
			closedir(chkdp) ;
		}

				/* Check Xrefs	*/
		for(i = 0 ; i < nang ; i++)
			if(artxref[i] != ((goodart>1) ? newxref[i] : 0l))
				break ;
		if(i < nang) {
			line[0] = '\0' ;
			for(i = 0 ; i < nang ; i++)
				if(newxref[i] > 0l) {
					if(line[0] == '\0')
						sprintf(line,
							"Xref: %s",myname) ;
					sprintf(line+strlen(line)," %s:%ld",
						acttab[artng[i]].ac_name,
						newxref[i]) ;
				}
			printf("\nModified->\t%s",line) ;
			if(tmpname == NULL)
				tmpname = mktemp("/tmp/dejunkXXXXXX") ;
			if((tfp = fopen(tmpname,"w+r")) == NULL) {
				fprintf(stderr,"Unable to make tmp copy") ;
				done(1) ;
			}
			for(i = 0 ; i < nhlines ; i++)
				if(strncmp(cp=hdrlines[i],"Xref:",5)!=0)
					fprintf(tfp,"%s\n",cp) ;
			if(line[0] != '\0')
				fprintf(tfp,"%s\n",line) ;
			putc('\n',tfp) ;
			while(fgets(line,sizeof line,artfp))
				fprintf(tfp,"%s",line) ;
			fclose(artfp) ;
			fflush(tfp) ;
			if(ferror(tfp)) {
				fprintf(stderr,"Error writing temp article") ;
				done(1) ;
			}
			if((artfp = fopen(junkname,"w")) == NULL) {
				fprintf(stderr,
				    "Unable to reopen article for write\n") ;
				done(1) ;
			}
			rewind(tfp) ;
			while(fgets(line,sizeof line,tfp))
				fprintf(artfp,"%s",line) ;
			fclose(tfp) ;
		}
		printf("\n") ;
		fclose(artfp) ;
		if(badart == 0) {
			unlink(junkname) ;
		}
	}
	done(0) ;
}

done(rt)
{
	if(tmpname)
		unlink(tmpname) ;
	exit(rt) ;
}

install(ap,name)
register struct act *ap ;
char *name ;
{
	register char *cp ;
	long newseq ;
	char destname[100] ;
	char numbuf[10] ;

	newseq = ap->ac_seq+1 ;
	sprintf(destname,"%s/%ld",ngtodir(ap->ac_name),newseq) ;
	fseek(actfp,ap->ac_rnd,0) ;
	if(fgets(numbuf,sizeof numbuf,actfp) == 0
	    || (cp = index(numbuf,'\n')) == NULL)
		goto rdfault ;
	*cp = '\0' ;
	if(ftell(actfp) != ap->ac_rnd + 6
	    || (!islegal(numbuf))
	    || ap->ac_seq != atol(numbuf)) {
rdfault:
		fprintf(stderr,"Something wrong checkreading active\n") ;
		return(-1) ;
	}
	fseek(actfp,ap->ac_rnd,0) ;
	fprintf(actfp,"%05ld",newseq) ;
	fflush(actfp) ;
	if(ferror(actfp)) {
		fprintf(stderr,"Problem writing active file\n") ;
		done(1) ;
	}
	if(link(name,destname) < 0) {
		perror("making link") ;
		return(-1) ;
	}
	ap->ac_seq = newseq ;	/* Committed to new seq number	*/
	return(0) ;
}

islegal(name)
char *name ;
{
	register char *cp ;
	for(cp = name ; *cp && cp < name+5 ; cp++)
		if(!isascii(*cp) || !isdigit(*cp))
			return(0) ;
	return(*cp == '\0') ;
}

char sysline[100] ;

DIR * dirallo(name)
char * name ;
{
	register char *cp ;
	register i = 0 ;
	DIR * dp ;
	struct stat sbuf ;

	for(;;) {
		if((dp = opendir(name)) != NULL)
			return(dp) ;
		if(i) {
			fprintf(stderr,"Unable to create %s\n",name) ;
			done(1) ;
		}
		cp = rindex(name,'/') ;
		*cp = '\0' ;
		if((dp = dirallo(name)) != NULL)
			closedir(dp) ;
		*cp = '/' ;
		sprintf(sysline,"mkdir %s",name) ;
		i = system(sysline) ;
		printf("'%s' returns %d\n",sysline,i) ;
		chmod(name,new_mod) ;
		chown(name,new_uid,new_gid) ;

				/* Check that directory is correctly allocat */
		if( stat(name,&sbuf) < 0
				/* If uid is not correct - complain
				 * only if modes are different for owner
				 * and others	*/ 
		   || (sbuf.st_uid != new_uid
			&& ((new_mod & 0700) >> 6) != (new_mod & 07))
				/* If gid is not correct - complain
				 * only if modes are different for group
				 * and others	*/ 
		   || (sbuf.st_gid != new_gid
			&& ((new_mod & 070) >> 3) != (new_mod & 07))
		   || (sbuf.st_mode & 0777) != new_mod ) {
			fprintf(stderr,"Directory allocation failed\n") ;
			done(1) ;
		}
		i = 1 ;		/* No more retries	*/
	}
}

char *
ngtodir(ng)
register char *ng ;
{
	static char line[100] ;
	register char *cp ;
	strcpy(line,SPOOLDIR) ;
	cp = line + strlen(line) ;
	*cp++ = '/' ;
	while(*cp = *ng++)
		if(*cp++ == '.')
			cp[-1] = '/' ;
	return(line) ;
}

lookng(cp,doalloc)
register char *cp ;
{
	register k, l, r, i ;
	register char *cp1 ;
	DIR * dp ;
	char line[100] ;
				/* Binary search initial ng table */
	l = 0 ; r = ini_ng -1 ;
	while(l <= r) {
		k = (l + r) >> 1 ;
		i = strcmp(cp,acttab[k].ac_name) ;
		if (i <= 0) r = k - 1 ;
		if (i >= 0) l = k + 1 ;
	}
	if(l - r >= 2)
		return(k) ;	/* Found in binary search	*/

				/* Linear search in additional ng table */
	for(k = ini_ng ; k < tot_ng ; k++) {
		if(!strcmp(cp,acttab[k].ac_name))
			return(k) ;
	}

				/* Not found - possibly insert	*/
	if(!doalloc)
		return(-1) ;
	if(k >= MAXNG) {
merr:		fprintf(stderr,"Too many newsgroups\n") ;
		done(1) ;
	}
	if((cp1 = (char *)malloc(strlen(cp)+1)) == NULL) {
		goto merr ;
	}
	tot_ng = k+1 ;
	acttab[k].ac_name = strcpy(cp1,cp) ;
	acttab[k].ac_seq = (long) (-1) ;
	fprintf(stderr,"Do you wish to add newsgroup %s [yn]",cp) ;
	line[0] = 0 ;
	gets(line) ;
	switch(line[0]) {
	case 'y':
	case 'Y':
		if(fseek(actfp,0l,2) < 0
		    || fprintf(actfp,"%s 00000\n",cp) < 0
		    || fflush(actfp) < 0
		    || ferror(actfp)) {
			fprintf(stderr,"Unable to append to active file\n") ;
			break ;
		}
		acttab[k].ac_seq = 0 ;
		acttab[k].ac_rnd = ftell(actfp) - 6l ;
		if((dp = dirallo(ngtodir(cp))) != NULL)	/* create dirs */
			closedir(dp) ;
		break ;
	case 'n':
	case 'N':
		break ;
	case 'q':
	case 'Q':
		done(1) ;
		break ;
	}
	return(k) ;
}

/* Get article header (We know inews puts one header on each line */

hread()
{
	register char *cp, *linep ;
	register i ;

	ng = NULL ; xref = NULL ; title = NULL ;
	linep = hdrbuf ;
	nhlines = 0 ;
	for(;;) {
		if(fgets(linep, &hdrbuf[HDRMAX] - linep, artfp)==NULL)
			return(NULL) ;
		cp = linep + strlen(linep) ;
		if(cp[-1] != '\n')
			return(NULL) ;	/* Too big header	*/
		cp[-1] = '\0' ;	/* Clobber newline	*/
		if(cp == linep+1)
			break ;	/* Empty line - end of header	*/
		hdrlines[nhlines++] = linep ;
		if(ng == NULL && strncmp(linep,"Newsgroups: ",12)==0)
			ng = linep+12 ;
		if(xref == NULL && strncmp(linep,"Xref: ",6) == 0)
			xref = linep+6 ;
		if(title == NULL && strncmp(linep,"Subject: ",9) == 0)
			title = linep+9 ;
		linep = cp ;
	}
	return(nhlines) ;
}