jf@sal.UUCP (Johan Finnved) (01/25/85)
At our site we had a somewhat outdated active file so we got a lot of articles in newsgroup junk. I wrote a program to move articles from junk to their proper newsgroups as if though the newsgroups were there when the articles were received. Ambition level: - If the articles were received to *some* newsgroups duplicates are avoided. - If newsgroups are missing a question is asked *once* for each missing newsgroup if you want to create it. If the answer is 'no' you don't get repeated questions even if there is a lot of articles for the unwanted newsgroup. - Xref: header lines are fixed to reflect the new situation. - If there is no problem the articles are unlinked from the junk directory. Somebody recognizes the problem ? Perhaps someone has already made such a program ? My program (as entered from scratch today so there may be some bugs left) is posted to net.sources Johan Finnved jf@sal.UUCP ...!decvax!mcvax!enea!sal!jf ------- cut here to get dejunk.c ------ /* * De-junker * dejunk version 1.0 18-Jan-85 Johan Finnved * * Program to move articles from junk to their proper * newsgroups as if though the newsgroups were there when the articles * were received. * Ambition level: * - If the articles were received to *some* newsgroups * duplicates are avoided. * - If newsgroups are missing a question is asked *once* for * each missing newsgroup if you want to create it. If the * answer is 'no' you don't get repeated questions even * if there is a lot of articles for the unwanted newsgroup. * - Xref: header lines are fixed to reflect the new situation. * - If there is no problem the articles are unlinked from * the junk directory. * * * The program is tested only on our site running V7 and news version 2.10.1 * * On our site it is sufficient to have dejunk setuid news * our kernel allows setuid(geteuid()) * * Possible porting problems: * The program assumes that the d_ino fields in the spool * directories uniquely identify the articles. * That is is an article appears in several places they * are *hard-linked* to the same article. * (This is a problem with eunice isn't it?) * * Program assumes index() and rindex() * * Program relies on relatively easy headers generated by inews * (no contiuation lines etc...) * * Almost all data areas are static, you may want to * have a smarter malloc sceme. * */ #include <ctype.h> #include <whoami.h> #include <stdio.h> #include <sys/types.h> #include <sys/dir.h> #include <sys/stat.h> #define MAXNG 1000 /* Max number of newsgroups */ #define ARTNGMAX 40 /* Max number of newsgroups in one article */ #define HDRMAX 3000 /* Max size (in bytes) of header */ #define HLINEMAX 50 /* Max header lines */ char ACTIVE[]= "/usr/lib/news/active" ; char SPOOLDIR[]= "/usr/spool/news" ; char * tmpname ; char *strcpy(), *strcat(), *index(), *rindex(), *mktemp() ; #ifndef READDIR #define N_D 3 struct dirsim { FILE * D_fp ; struct direct D_entry ; } dirsimtab[N_D] ; typedef struct dirsim DIR ; DIR * opendir(name) char *name ; { register DIR * dp ; for(dp = dirsimtab ; dp < &dirsimtab[N_D] ; dp++) { if(dp->D_fp == NULL) { dp->D_fp = fopen(name,"r") ; if(dp->D_fp == NULL) return(NULL) ; return(dp) ; } } fprintf(stderr,"diropen out of slots\n") ; abort() ; } closedir(dp) register DIR *dp ; { fclose(dp->D_fp) ; dp->D_fp = NULL ; } struct direct * readdir(dp) register DIR *dp ; { while(fread(&dp->D_entry,sizeof(struct direct),1,dp->D_fp) == 1) { if(dp->D_entry.d_ino == 0) continue ; return(&dp->D_entry) ; } return(NULL) ; } #endif DIR * dirallo() ; char * ngtodir() ; struct act { char *ac_name ; /* Name of newsgroup */ long ac_rnd ; /* Random address of seq */ long ac_seq ; /* Local sequence number or * -1 is newsgroup shouldn't be * created */ } acttab[MAXNG] ; int ini_ng ; /* Initial number of newsgroups */ int tot_ng ; /* Total number of names in acttab */ int new_mod, new_uid, new_gid ; /* Protection info for news directory */ FILE * actfp ; FILE * artfp ; int nang ; /* Number of newsgroups in this article */ int nhlines ; /* Number of header lines */ long artxref[ARTNGMAX] ; /* Xref numbers found in article */ long newxref[ARTNGMAX] ; /* Xref numbers that are found by searching */ int artng[ARTNGMAX] ; /* Newsgroups in article (index in acttab) */ char *ng, *xref, *title ; /* Interesting header pointers */ char hdrbuf[HDRMAX] ; /* Buffer to store header */ char * hdrlines[HLINEMAX] ; /* Array of line starts */ char myname[] = sysname ; int mynamez ; namecmp(ap1,ap2) struct act *ap1, *ap2 ; { return(strcmp(ap1->ac_name,ap2->ac_name)) ; } main() { register char *cp, *cp1, *cpe ; register i, j ; char line[100] ; char junkname[100] ; DIR * junkdp ; DIR * chkdp ; FILE * tfp ; struct stat sbuf ; struct direct * dirp ; ino_t artino ; int goodart, badart ; register struct act * ap ; setgid(getegid()) ; setuid(geteuid()) ; /* If the system allows it */ mynamez = strlen(myname) ; if((actfp = fopen(ACTIVE,"r+w")) == NULL) { fprintf(stderr,"Unable to open active file\n") ; done(1) ; } if(fstat(fileno(actfp),&sbuf) < 0) { fprintf(stderr,"Unable to fstat active file\n") ; done(1) ; } if((cp1 = cp = (char *) malloc((int) sbuf.st_size)) == NULL) { fprintf(stderr,"Unable to allocate in-core active file copy") ; done(1) ; } cpe = cp + sbuf.st_size ; if(fread(cp,1,cpe-cp,actfp) != cpe-cp) { fprintf(stderr,"Unable to read active file\n") ; done(1) ; } while(cp < cpe) { acttab[ini_ng].ac_name = cp ; while(*cp++ != ' ' && cp < cpe) ; cp[-1] = '\0' ; acttab[ini_ng].ac_rnd = cp - cp1 ; /* rnd adr of seq */ acttab[ini_ng].ac_seq = atol(cp) ; if(cp[5] != '\n') { fprintf(stderr,"Strange line in active ng=%s\n", acttab[ini_ng].ac_name) ; done(1) ; } cp += 6 ; if(ini_ng++ >= MAXNG) { fprintf(stderr,"Too many newsgroups\n") ; done(1) ; } } qsort(acttab,tot_ng=ini_ng,sizeof(struct act),namecmp) ; sprintf(line,"%s/junk",SPOOLDIR) ; if(stat(line,&sbuf) < 0) { fprintf(stderr,"Unable to stat junk directory") ; done(1) ; } new_mod = sbuf.st_mode & 0777 ; new_uid = sbuf.st_uid ; new_gid = sbuf.st_gid ; if((junkdp = opendir(line)) == NULL) { fprintf(stderr,"Unable to open %s directory\n",line) ; done(1) ; } while((dirp = readdir(junkdp)) != NULL) { if(!islegal(dirp->d_name)) continue ; artino = dirp->d_ino ; sprintf(junkname,"%s/junk/%s",SPOOLDIR,dirp->d_name) ; if(stat(junkname,&sbuf) < 0 || (sbuf.st_mode & S_IFMT) != S_IFREG) continue ; if((artfp = fopen(junkname,"r")) == NULL) continue ; if(hread()== NULL) { /* Get the article */ fprintf(stderr,"%s garbled\n",junkname) ; fclose(artfp) ; continue ; } for(nang = 0 , cp = strcpy(line,ng); *cp ;) { cp1 = cp ; artxref[nang] = 0 ; newxref[nang] = 0 ; while(*cp && *cp !=',') cp++ ; if (*cp == ',') *cp++ = '\0' ; artng[nang++] = lookng(cp1,1) ; } if(xref != NULL && !strncmp(xref,myname,mynamez)) { for(cp = strcpy(line,xref+mynamez+1) ; *cp ;) { if((cp1 = index(cp,':')) == NULL) break ; *cp1++ = '\0' ; if((j = lookng(cp,0)) >= 0) for(i = 0 ; i < nang ; i++) if(artng[i] == j) { artxref[i] = atol(cp1) ; break ; } for(cp=cp1 ; *cp && *cp++ != ' ' ; ) ; } } printf("%s: %s\n",dirp->d_name,title) ; j = 0 ; goodart = 0 ; badart = 0 ; for(i = 0 ; i < nang ; i++) { ap = &acttab[artng[i]] ; if(++j > 2) { j = 1 ; printf("\n") ; } printf("\t%s:",ap->ac_name) ; if(ap->ac_seq < 0l) { /* Inactive newsgroup */ printf("Skipped") ; /* Note that badart * is not incremented * since we don't want this newsgroup */ continue ; } if((chkdp = dirallo(ngtodir(ap->ac_name))) == NULL){ printf("no directory") ; badart++ ; continue ; } while((dirp = readdir(chkdp)) != NULL) { if(!islegal(dirp->d_name)) continue ; if(dirp->d_ino == artino) { printf("Ok(%s)",dirp->d_name) ; newxref[i] = atol(dirp->d_name) ; goodart++ ; goto nextgrp ; } } /* Install missing news */ if(install(ap,junkname) < 0) { printf("Missing") ; badart++ ; } else { newxref[i] = ap->ac_seq ; printf("Installed(%ld)",ap->ac_seq) ; goodart++ ; } nextgrp: closedir(chkdp) ; } /* Check Xrefs */ for(i = 0 ; i < nang ; i++) if(artxref[i] != ((goodart>1) ? newxref[i] : 0l)) break ; if(i < nang) { line[0] = '\0' ; for(i = 0 ; i < nang ; i++) if(newxref[i] > 0l) { if(line[0] == '\0') sprintf(line, "Xref: %s",myname) ; sprintf(line+strlen(line)," %s:%ld", acttab[artng[i]].ac_name, newxref[i]) ; } printf("\nModified->\t%s",line) ; if(tmpname == NULL) tmpname = mktemp("/tmp/dejunkXXXXXX") ; if((tfp = fopen(tmpname,"w+r")) == NULL) { fprintf(stderr,"Unable to make tmp copy") ; done(1) ; } for(i = 0 ; i < nhlines ; i++) if(strncmp(cp=hdrlines[i],"Xref:",5)!=0) fprintf(tfp,"%s\n",cp) ; if(line[0] != '\0') fprintf(tfp,"%s\n",line) ; putc('\n',tfp) ; while(fgets(line,sizeof line,artfp)) fprintf(tfp,"%s",line) ; fclose(artfp) ; fflush(tfp) ; if(ferror(tfp)) { fprintf(stderr,"Error writing temp article") ; done(1) ; } if((artfp = fopen(junkname,"w")) == NULL) { fprintf(stderr, "Unable to reopen article for write\n") ; done(1) ; } rewind(tfp) ; while(fgets(line,sizeof line,tfp)) fprintf(artfp,"%s",line) ; fclose(tfp) ; } printf("\n") ; fclose(artfp) ; if(badart == 0) { unlink(junkname) ; } } done(0) ; } done(rt) { if(tmpname) unlink(tmpname) ; exit(rt) ; } install(ap,name) register struct act *ap ; char *name ; { register char *cp ; long newseq ; char destname[100] ; char numbuf[10] ; newseq = ap->ac_seq+1 ; sprintf(destname,"%s/%ld",ngtodir(ap->ac_name),newseq) ; fseek(actfp,ap->ac_rnd,0) ; if(fgets(numbuf,sizeof numbuf,actfp) == 0 || (cp = index(numbuf,'\n')) == NULL) goto rdfault ; *cp = '\0' ; if(ftell(actfp) != ap->ac_rnd + 6 || (!islegal(numbuf)) || ap->ac_seq != atol(numbuf)) { rdfault: fprintf(stderr,"Something wrong checkreading active\n") ; return(-1) ; } fseek(actfp,ap->ac_rnd,0) ; fprintf(actfp,"%05ld",newseq) ; fflush(actfp) ; if(ferror(actfp)) { fprintf(stderr,"Problem writing active file\n") ; done(1) ; } if(link(name,destname) < 0) { perror("making link") ; return(-1) ; } ap->ac_seq = newseq ; /* Committed to new seq number */ return(0) ; } islegal(name) char *name ; { register char *cp ; for(cp = name ; *cp && cp < name+5 ; cp++) if(!isascii(*cp) || !isdigit(*cp)) return(0) ; return(*cp == '\0') ; } char sysline[100] ; DIR * dirallo(name) char * name ; { register char *cp ; register i = 0 ; DIR * dp ; struct stat sbuf ; for(;;) { if((dp = opendir(name)) != NULL) return(dp) ; if(i) { fprintf(stderr,"Unable to create %s\n",name) ; done(1) ; } cp = rindex(name,'/') ; *cp = '\0' ; if((dp = dirallo(name)) != NULL) closedir(dp) ; *cp = '/' ; sprintf(sysline,"mkdir %s",name) ; i = system(sysline) ; printf("'%s' returns %d\n",sysline,i) ; chmod(name,new_mod) ; chown(name,new_uid,new_gid) ; /* Check that directory is correctly allocat */ if( stat(name,&sbuf) < 0 /* If uid is not correct - complain * only if modes are different for owner * and others */ || (sbuf.st_uid != new_uid && ((new_mod & 0700) >> 6) != (new_mod & 07)) /* If gid is not correct - complain * only if modes are different for group * and others */ || (sbuf.st_gid != new_gid && ((new_mod & 070) >> 3) != (new_mod & 07)) || (sbuf.st_mode & 0777) != new_mod ) { fprintf(stderr,"Directory allocation failed\n") ; done(1) ; } i = 1 ; /* No more retries */ } } char * ngtodir(ng) register char *ng ; { static char line[100] ; register char *cp ; strcpy(line,SPOOLDIR) ; cp = line + strlen(line) ; *cp++ = '/' ; while(*cp = *ng++) if(*cp++ == '.') cp[-1] = '/' ; return(line) ; } lookng(cp,doalloc) register char *cp ; { register k, l, r, i ; register char *cp1 ; DIR * dp ; char line[100] ; /* Binary search initial ng table */ l = 0 ; r = ini_ng -1 ; while(l <= r) { k = (l + r) >> 1 ; i = strcmp(cp,acttab[k].ac_name) ; if (i <= 0) r = k - 1 ; if (i >= 0) l = k + 1 ; } if(l - r >= 2) return(k) ; /* Found in binary search */ /* Linear search in additional ng table */ for(k = ini_ng ; k < tot_ng ; k++) { if(!strcmp(cp,acttab[k].ac_name)) return(k) ; } /* Not found - possibly insert */ if(!doalloc) return(-1) ; if(k >= MAXNG) { merr: fprintf(stderr,"Too many newsgroups\n") ; done(1) ; } if((cp1 = (char *)malloc(strlen(cp)+1)) == NULL) { goto merr ; } tot_ng = k+1 ; acttab[k].ac_name = strcpy(cp1,cp) ; acttab[k].ac_seq = (long) (-1) ; fprintf(stderr,"Do you wish to add newsgroup %s [yn]",cp) ; line[0] = 0 ; gets(line) ; switch(line[0]) { case 'y': case 'Y': if(fseek(actfp,0l,2) < 0 || fprintf(actfp,"%s 00000\n",cp) < 0 || fflush(actfp) < 0 || ferror(actfp)) { fprintf(stderr,"Unable to append to active file\n") ; break ; } acttab[k].ac_seq = 0 ; acttab[k].ac_rnd = ftell(actfp) - 6l ; if((dp = dirallo(ngtodir(cp))) != NULL) /* create dirs */ closedir(dp) ; break ; case 'n': case 'N': break ; case 'q': case 'Q': done(1) ; break ; } return(k) ; } /* Get article header (We know inews puts one header on each line */ hread() { register char *cp, *linep ; register i ; ng = NULL ; xref = NULL ; title = NULL ; linep = hdrbuf ; nhlines = 0 ; for(;;) { if(fgets(linep, &hdrbuf[HDRMAX] - linep, artfp)==NULL) return(NULL) ; cp = linep + strlen(linep) ; if(cp[-1] != '\n') return(NULL) ; /* Too big header */ cp[-1] = '\0' ; /* Clobber newline */ if(cp == linep+1) break ; /* Empty line - end of header */ hdrlines[nhlines++] = linep ; if(ng == NULL && strncmp(linep,"Newsgroups: ",12)==0) ng = linep+12 ; if(xref == NULL && strncmp(linep,"Xref: ",6) == 0) xref = linep+6 ; if(title == NULL && strncmp(linep,"Subject: ",9) == 0) title = linep+9 ; linep = cp ; } return(nhlines) ; }