tom (01/10/83)
Well, here is the news secretary program I promised you. I hope a few of you will find it useful. If you are running an old version of news, you might want to double check that the maximum length of a .newsrc line is LBUFLEN and that LBUFLEN is defined to be something reasonable (like 1024). The larger it is, the more files this program can exclude for you. This program was written under 4.0 BSD, but I don't think I used anything that wasn't standard version 7. If you enjoy it, hate it, or plan to redistribute it, please let me know. Till then I remain. Tom Neuendorffer decvax!idis!tom or mcnc!idis!tom or floyd!idis!tom P.S. The manual page follows the source. /* newsec.c News secretary program by Tom Neuendorffer University of Pittsburgh decvax \ mcnc !idis!tom floyd / This program reads unread netnews articles and searches for phrases in an exclude file. If any are found, it modifys .newsrc so that these articles are not displayed by readnews(1). Options: -t process title only -h process header up through title -b process the first BUFFSZ characters -n <newsgroup> process only the named newsgroup -f <file> read the excluded phrases from named file -v verbose option -c compact option, removes newsgroup lines from .newsrc if they have no corresponding directory in NEWSDIR The exclude file should have its phrases listed one per line with significant blanks only. compile 'cc -O -o newsec newsec.c' */ /* The following may be changed according to your system and situation. Note that some changes may require changes to the manual page. */ #include <sys/types.h> #include <sys/stat.h> #include <sys/dir.h> #include <stdio.h> #include <ctype.h> #include "/usr/src/local/news/src/defs.h" /* the readnews defs.h file. If the defs file is not available, comment out this ^ line and take care in defining LBUFLEN and NEWSRC below. */ #ifndef LBUFLEN #define LBUFLEN 1024 /* should be the same as LBUFLEN in ?/news/src/defs.h ie. the maximum length of a .newsrc line */ #endif #ifndef NEWSRC #define NEWSRC ".newsrc" /* name of .newsrc file in home directory */ #endif #define NEWSDIR "/usr/spool/news" /* the netnews spool directory */ #define WARN 1 /* if defined, then a warning if printed if a .newsrc line becomes to long to add any more exceptions too. If not defined,warnings are printed only with the verbose option Under no circumstance is the .newsrc made too long for readnews. */ #define BUFFSZ BUFSIZ /* the maximum number of characters to read with the -b option */ /* The following files default to the $HOME directory */ #define NEWSTMP ".newsrc.tmp" /* name of temporary file. Must be on same device as $HOME */ #define NEWSOLD ".newsrc.old" /* name to be given to old .newsrc file. May be undefined if no backup is desired */ #define NEWSEXC ".newsexc" /* name of the default exclude file */ #define MAXEX 1024 /* maximum number of excluded strings */ #define TDEFAULT 't' /* the default search type (see below) */ #define MAXART 350 /* the maximum number of articles that one might exclude from a given newsgroup */ /* End of user definable definitions */ #define start(C) ((int)(C- 'a') * nument / 25) #define MIN(A,B) (A>B)? B:A char *cpt[MAXEX]; struct direct dirr; struct stat sta; int verb,nument; char nsin[LBUFLEN],nsout[LBUFLEN]; char *midnsout,*endnsout; main(argc,argv) int argc; char *argv[]; { int lo,sknum,flag,minus(),i,lnum[MAXART],cnum; unsigned siz; char *eb,*gg,*og,*grp,*ed,*cp,*cen,*malloc(),*c,*getenv(); char linein[BUFFSZ]; FILE *f,*fo,*fx ,*dd, *fopen(); char *cf= NEWSEXC; char *ns= NEWSRC; char *nws= NEWSTMP; char type= TDEFAULT; int comp=0; grp = NULL; verb=0; eb= linein+BUFFSZ-1; endnsout=nsout + LBUFLEN - 7; for(i=1 ; i < argc ; i++){ /* process arguments */ if(*argv[i] != '-') { fprintf(stderr,"bad argument %s",argv[i]); exit(-3); } switch(*(argv[i]+1)){ case 't': /* process title only */ case 'h': /* process header up through title */ case 'b': /* process the first BUFFSZ characters */ type = *(argv[i]+1); break; case 'n': /* process only the newsgroup named in the next arg. */ grp = argv[++i]; break; case 'f': /* read the excluded phrases from the file named in the next arg. */ cf = argv[++i]; break; case 'v': /* verbose option */ verb++; break; case 'c': /* compact option */ comp++; break; default: fprintf(stderr,"bad argument %s",argv[i]); exit(-3); } } if(chdir(getenv("HOME")) == -1) fileerr("HOME"); if(stat(cf,&sta)== -1 ) fileerr(cf); siz=(unsigned)sta.st_size +1; if((cen=malloc(siz))== NULL) lerror("malloc failed"); if((f=fopen(cf,"r")) == NULL) fileerr(cf); nument=0; cp = cen; for(c=cen;(*c=fgetc(f)) != EOF;c++){ /* read in phrases to exclude */ if(isupper(*c)) *c= tolower(*c); if(*c=='\n') { *c='\0'; if(c-cp > 3)cpt[nument++]= cp; cp = c+1; } } sort(cpt,nument); fclose(f); if((f=fopen(ns,"r")) == NULL) fileerr(ns); if((fo=fopen(nws,"w"))== NULL) fileerr(nws); for(;fgets(nsin,LBUFLEN,f) != NULL;*midnsout++='\n',*midnsout='\0', fputs(nsout,fo)){ /* process .newsrc lines */ if(chdir(NEWSDIR)== -1) lerror("couldn't change to news directory"); sknum=0; midnsout=nsout; for(ed=nsin; *ed != '\0' && *ed != '\n';ed++) *midnsout++ = *ed; *midnsout='\0'; if(*ed == '\0') lerror(".newsrc line too long"); for(c=nsin;*c != ':' && *c != '\0';c++); if (*c== '\0') /* exclude unsubscribed to newsgroups */ continue; *c='\0'; if(grp != NULL && strcmp(grp,nsin) != 0) /* process only newsgroup specified (if any) */ continue; while(isdigit(*(--ed))); lnum[sknum++]=atoi(++ed); if((dd=fopen(nsin,"r") )== NULL) { if(verb) printf("notice, no newsgroup %s\n",nsin); if(comp){ if(verb) printf(" ...removing from .newsrc.\n"); *nsout='\0'; } continue; } if(chdir(nsin)== -1) lerror("couldn't change to newsgroup directory"); if(verb) printf("checking %s\n",nsin); fx = NULL; while(fread(&dirr,sizeof(struct direct),1,dd)){ /* read directory for newsgroup in .newsrc line */ cnum=atoi(dirr.d_name); if( dirr.d_ino < 1 || cnum <= lnum[0]) /* process only recent articles */ continue; if(fx != NULL)fclose(fx); if((fx=fopen(dirr.d_name,"r")) == NULL) fileerr(dirr.d_name); for(og=gg=linein;gg < eb && (*gg=fgetc(fx)) != EOF ; gg++){ if(isupper(*gg)) *gg=tolower(*gg); if( *gg == '\n'){ if(type != 'b' && strncmp(og,"title:",6) ==0) break; /* newlines are translated to spaces */ *gg = ' '; og = gg+1; } } *gg = '\0'; if(type == 't') gg = og+6; else gg = linein; if((lo=locate(gg)) ==-1) continue; if(verb){ printf("-- %s found in article %d\n",cpt[lo],cnum); } lnum[sknum++] = cnum; if(sknum == MAXART) { fprintf(stderr,"MAXART too small, redefine"); sknum--; } } if(fx != NULL)fclose(fx); fclose(dd); if(sknum ==1) { /* no articles found */ continue; } qsort(lnum,sknum,sizeof(int),minus); /* append .newsrc line */ flag=0; addbuf(',',lnum[1]) ; for(i=2 ; i < sknum;i++){ if(lnum[i-1] +1 == lnum[i]){ flag=1 ; continue; } if(flag==1) addbuf('-',lnum[i-1]); addbuf(',',lnum[i]); flag=0; } if(flag==1) addbuf('-',lnum[i-1]); } fclose(f); fclose(fo); if(chdir(getenv("HOME")) == -1) fileerr("HOME"); /* rename NEWSRC to NEWSOLD and NEWSTMP to NEWSRC */ #ifdef NEWSOLD unlink(NEWSOLD); if(link(ns,NEWSOLD) != 0) lerror("couldn't link old .newsrc"); #endif if(unlink(ns) != 0) lerror("couldn't unlink old .newsrc"); if(link(nws,ns) != 0) lerror("couldn't link new .newsrc"); unlink(nws); } addbuf(cc,ii) char cc; int ii; { /* appends the arguments to the new .newsrc output buffer if the line won't be made too long */ if(midnsout >= endnsout){ #ifndef WARN if(verb) #endif printf("Warning: can't add %c%d to %s line due to length\n" ,cc,ii,nsin); return; } sprintf(midnsout,"%c%d",cc,ii); while(*midnsout != '\0') midnsout++; } minus(a,b) int *a,*b; { return(*a - *b); } locate(c) char *c; { /* search the sorted exclude list for phrases found in the string pointed to by c. Returns the argument of the phrase found or -1 if none. A phrase is defined, in this case, to be series of letters (a-z) and non-letters (ascii) initiated by a letter and delineated by non-letter. All upper case is translated to lower case before comparison. */ register int i,j,k,l; while(*c != '\0'){ while(islower(*c) == 0 ){ if(*c++ == '\0') return(-1); } l=0; i=start(*c); for(i=MIN(nument-1,i);i < nument && i >= 0;){ j=strlen(cpt[i]); k=strncmp(c,cpt[i],j); if(k == 0){ if(isalpha(*(c+j)) == 0) return(i); k = 1; } if(k > 0 && l < 0) break; if(k < 0 && l > 0) break; l=k; i += (l > 0) ? 1:-1; } while(isalpha(*(++c))) ; } return(-1); } fileerr(c) char *c; { /* file error , report and exit */ fprintf(stderr,"couldn't open %s\n",c); exit(-1); } lerror(c) char *c; /* report error and exit */ { fputs(c,stderr); exit(-5); } sort(v,n) char *v[]; int n; { /* shell sort from Kernighan and Ritchie */ int gap,i,j; char *temp; for (gap=n/2;gap > 0;gap /=2) for(i=gap;i<n;i++) for(j=i-gap;j>=0;j -= gap){ if(strcmp(v[j],v[j+gap]) <=0) break; temp=v[j]; v[j]=v[j+gap]; v[j+gap]=temp; } } /* The manual page .TH NEWSEC 1 .UC 4 .SH NAME newsec \- news secretary .SH SYNOPSIS .B newsec [ .B \-thb ][ .B \-v ][ .B \-c ][ .B \-f <exclude file>][ .B \-n <newsgroup>] .br .SH DESCRIPTION .I Newsec reads a list of phrases from a file and checks them against unread netnews articles. Articles containing any of the phrases are noted in the .newsrc file so that, in subsequent calls to .I readnews (1), these articles are not displayed. .PP With the -t option (the default), only netnews titles are searched. The -h option causes the header up through the title to be searched; and the -b option searches all the words in the first buffer full of characters. The t,h, and b options are mutually exclusive. .PP The -v (verbose) option causes newsec to comment on the newsgroups it is searching and articles being deleted. Normally, output is only produced on error. .PP The -c option compacts .newsrc by removeing newsgroup lines for which there is no corresponding netnews directory. This is mainly useful for those who use the '-n all' option line. Only subscribed to newsgroups are removed. .PP The -f option specifies the file of phrases to be excluded. These phrases are expected to be listed 1 per line with significant spaces only. If no -f is specified, $HOME/.newsexc is the default. .PP The -n option specifies the newsgroup to act upon. The normal default is to act upon all the currently subscribed to newsgroups in .newsrc. If the specified newsgroup if not subscribed to, the command has no effect. .PP The word phrases searched are sets of letters (a-z) and non-letters that are initiated by a letter and delineated by non-letters. All letters are converted to lower case before comparison. New-lines are converted to spaces. .PP Upon exiting, newsec moves the old .newsrc file to .newsrc.old. Its effects may thus be canceled by restoring the old file. .SH FILES ~/.newsrc , ~/.newsexc .SH "SEE ALSO" readnews(1), newsrc(5) .SH BUGS Not tested on all flavors of netnews.