sources-request@mirror.UUCP (11/06/86)
Submitted by: seismo!rick (Rick Adams) Mod.sources: Volume 7, Issue 50 Archive-name: 2.11news/Part10 # To extract, sh this file # # news 2.11 miscellaneous File 2 of 2 # if test ! -d misc then mkdir misc fi echo x - misc/keepnews 1>&2 sed 's/.//' >misc/keepnews <<'*-*-END-of-misc/keepnews-*-*' -From chuqui@nsc.UUCP (Chuq Von Rospach) Thu Jun 6 20:36:39 1985 -Relay-Version: version B 2.10.3 4.3bsd-beta 6/6/85; site seismo.UUCP -Posting-Version: version B 2.10.2 9/17/84 chuqui version 1.7 9/23/84; site nsc.UUCP -Path: seismo!nsc!chuqui -From: chuqui@nsc.UUCP (Chuq Von Rospach) -Newsgroups: net.sources -Subject: YA News Archiver -Message-ID: <2806@nsc.UUCP> -Date: 7 Jun 85 00:36:39 GMT -Date-Received: 7 Jun 85 06:25:58 GMT -Distribution: net -Organization: The Blue Parrot -Lines: 566 - -Here is a netnews archiver similar to the recently posted keepnews but -designed to work with much larger archives where the wonderful quadratic -search time feature of the Unix (Unix is a trademark of AT&T Bell Labs, -quadratic search times are a feature of Unix) becomes a real problem. This -archive also knows how to walk through a directory tree so you can simply -set it on /usr/spool/oldnews and let it do its work. There are lots of -other nifty things I call features (and you might, too) that make it a lot -easier to use than anything else I've seen set up to work on archives. Mine -simply outgrew any capability to do anything with about the same time I got -a request for information out of it. I found out (the hard way) that -keepnews wasn't terribly reliable working under 2.10.2, so I finally -decided to hack together my own. - -Comments, enhancements, bug fixes, etc... are welcome, but I can only work -on them on a time available basis... - -chuq -------- -# This is a shell archive. -# Remove everything above and including the cut line. -# Then run the rest of the file through sh. -#-----cut here-----cut here-----cut here-----cut here----- -#!/bin/sh -# shar: Shell Archiver -# Run the following text with /bin/sh to create: -# README -# Makefile -# savenews.c -# This archive created: Thu Jun 6 17:28:50 1985 -# By: Chuq Von Rospach (The Blue Parrot) -cat << \SHAR_EOF > README -Savenews -- - -Savenews is a short program designed to make handling of usenet archives -generated by 'expire -a' easier, and to make it possible to find stuff in -the archive once it is there. - -It was created by me when I had to get something out of my archives and -realized that there was no way I was going to find anything in 70 megabytes -of random data. It keeps a set of logs of the Subject lines of the articles -and stores the articles themselves in a hashed subdirectory format designed -to minimize the quadratic lookup hassles of the unix directory system -(This, of course, is a feature). - -It has been put into the public domain by national semiconductor, and -neither myself or national guarantee that this code even exists, much -less that it does anything useful. This, BTW, is a disclaimer. - -chuq von rospach -national semiconductor -nsc!chuqui -SHAR_EOF -cat << \SHAR_EOF > Makefile -# -# Makefile for savenews -# -CFLAGS = -g - -savenews: savenews.c - ${CC} ${CFLAGS} savenews.c -o savenews - -clean: - rm -f savenews - -lint: - lint -hx savenews.c -SHAR_EOF -cat << \SHAR_EOF > savenews.c -/* - * savenews filename [filename ...] - * - * Savenews is a program designed to clean up and compact a - * usenet archive. It will take the filename(s) given to it as arguments - * and save them in a netnews archive (defined by SAVENEWS, default is - * /usr/spool/savenews). - * - * This program was set up to do two main things: - * - * 1) compact out the useless parts of the message, specifically the lines - * in the header that don't serve a useful purpose in an archive. This - * is done by removing all but the following header lines: From, Date, - * Newsgroups, Subject, and Message-ID, and seems to save an average of - * 500 bytes an article. - * - * 2) keep the quadratic nature of unix(TM AT&T Bell labs) directory searches - * from making your life miserable. Storing a raw archive of - * net.unix-wizards is a silly thing to do, for example. What I do is - * create a one level subdirectory set to keep any one directory from - * getting too large, but this program is currently set so that there - * are enough directories to keep the total number of files in any one - * directory below about 150 in the largest parts of my archive. The - * algorithm I use is abs(atoi(Message-ID)%HASHVAL)) with HASHVAL being - * prime. This quick and dirty hash gives you directories with the - * numbers 0 to HASHVAL-1, and about the same number of files in each - * given a random distribution of Message-ID numbers (not bad, in - * reality) - * - * The program will add the name of the file and the subject line of the - * article in a logfile in subdirectory LOGS, the filename being the - * newsgroup. - * - * As currently written, an article will be saved only to the first - * newsgroup in the Newsgroups header line. This means that something - * posted to 'net.source,net.flame' will end up in net.sources, but that - * somethine posted to 'net.flame,net.sources' will end up in net.flame. - * I consider this a feature. Others may disagree. - * - * If an article is saved that has a duplicate message-ID of one already - * in the archive, then it will be saved by adding the character '_' and - * some small integer needed to make the filename unique. You can then - * use ls or find to look for these and see if they are duplicates (and - * remove them) or if they are simply botches by some other site (it does - * happen, unfortunately). - * - * This program will do intelligent things if given a non-news article, - * such as nothing. Don't push it, though -- I haven't tried it on - * special devices, symbolic links, and other wierdies and it is likely - * to throw up on some of them since I didn`t feel like protecting someone - * from trying to archive /dev (if tar can consider this a feature, so can - * I...) - * - * This program uses the 4.2 Directory routines (libndir). If you don't - * run 4.2, get ahold of a copy of the compatibility library for your - * system and use it, or hack up do_dir and is_dir to get around it - * if you believe in messing around with primitive hacks (I LIKE libndir) - * - * General usage: every so often run the program with - * 'savenews /usr/spool/oldnews'. Look through /usr/spool/savenews - * for duplicated articles and remove them, and then copy all of the - * stuff to tape. Remove everything except the LOGS directory, so that - * people can use grep to look for things in the archive. It should be - * easy to get things back off of tape and make the archive useful this - * way. Thinking about it, if you can't use the archive, you might as well - * not have it, which is why this program got written (I needed something - * out of my archive, and it took me a week to find it). - * - * This program is designed to run under 2.10.2, but should work under any - * B news system. Anyone else is on their own. This is in - * the public domain by the kindness of my employer, national - * semiconductor, but neither I nor national make any guarantee that it - * will work, that we will support this program, or even admit that it - * exists. This is called a disclaimer, and means that if you use this - * program, you are on your own. It DOES, however, pass lint cleanly, which - * is more than I can say for most stuff posted to the net. Feel free to - * fix, break, enhance, change, or do anything to this program except - * claim it to be your own (unless, of course, you break it...). Passing - * enhancements back to me would be nice, too. - * - * chuq von rospach, national semiconductor (nsc!chuqui) - * - */ - -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/dir.h> -#include <ctype.h> - -#define FALSE 0 -#define TRUE 1 -#define HASHVAL 37 /* hash value for sub-dirs. Prime number! */ -#define NUMDIRS 1024 /* number of dirs that can be pushed */ -#define SAVENEWS "/usr/spool/savenews" /* home of the archive */ -#define LOGFILE "LOGS" /* subdir in SAVENEWS to save logs in */ -#define JOBLOG "joblog" /* where log of this job is put */ -#define DIRMODE 0755 /* mkdir with this mode */ -#define COPYBUF 8192 /* block read/write buffer size */ - -char *Progname; /* name of the program for Eprintf */ -char line[BUFSIZ]; /* general purpose line buffer */ - -#define NUM_HEADERS 5 /* number of headers we are saving */ -#define GROUP_HEADER 1 /* where Newsgroup will be found */ -#define SUBJECT_HEADER 2 /* where Subject will be found */ -#define MESSAGE_HEADER 3 /* where Message-ID will be found */ -char header_data[NUM_HEADERS][BUFSIZ]; -char *headers[NUM_HEADERS] = -{ - "From:", - "Newsgroups:", - "Subject:", - "Message-ID:", - "Date:" -}; - -long num_saved = 0; /* number of articles saved */ -FILE *logfp; /* file pointer to joblog file */ - -char *rindex(), *strcat(), *pop_dir(), *strcpy(), *strsave(), *index(); - -main(argc,argv) -int argc; -char *argv[]; -{ - register int i; - char joblogfile[BUFSIZ]; - char *dirname; - - /* - * This removes and preceeding pathname so that - * anything printed out by Eprintf has just the - * program name and not where it came from - */ - if ((Progname = rindex(argv[0],'/')) == NULL) - Progname = argv[0]; - else - Progname++; - - if (argc == 1) { - fprintf(stderr,"Usage: %s file [file ...]\n",Progname); - exit(1); - } - - sprintf(joblogfile,"%s/%s",SAVENEWS,JOBLOG); - if ((logfp = fopen(joblogfile,"w")) == NULL) - fprintf(stderr,"Can't open %s, logging suspended\n",joblogfile); - - for (i = 1 ; i < argc; i++) { /* process each parameter */ - register int rc; - if ((rc = is_dir(argv[i])) == -1) - continue; - else if (rc == TRUE) - do_dir(argv[i]); - else - save_file(argv[i]); - } - while((dirname = pop_dir()) != NULL) { - do_dir(dirname); /* process whatever is left on dirstack */ - } - printf("Total articles saved was %d\n",num_saved); - exit(0); -} - -do_dir(dname) /* process a directory, push other directories on stack */ - /* to be handled recursively later */ -char *dname; -{ - DIR *dirp; - struct direct *dp; - char fullname[BUFSIZ]; - - if ((dirp = opendir(dname)) == NULL) { - Eprintf("can't opendir %s\n",dname); - return; - } - - for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) { - register int rc; - - if(dp->d_namlen == 2 && !strcmp(dp->d_name,"..") - || (dp->d_namlen == 1 && !strcmp(dp->d_name,"."))) - continue; /* skip . and .. */ - - sprintf(fullname,"%s/%s",dname,dp->d_name); - if((rc = is_dir(fullname)) == -1) - continue; - else if (rc == TRUE) - push_dir(fullname); - else - save_file(fullname); - } - closedir(dirp); -} - -is_dir(name) -char *name; -{ - struct stat sbuf; - - if (stat(name,&sbuf) == -1) { - Eprintf("can't stat '%s'\n",name); - return(-1); - } - return((sbuf.st_mode & S_IFDIR) ? TRUE : FALSE); -} - -/* VARARGS */ -Eprintf(s1,s2,s3,s4,s5,s6,s7,s8,s9) -char *s1,*s2,*s3,*s4,*s5,*s6,*s7,*s8,*s9; -{ - if (logfp == NULL) - return; - fprintf(logfp,"%s: ",Progname); - fprintf(logfp,s1,s2,s3,s4,s5,s6,s7,s8,s9); - fflush(logfp); -} - -/* - * quick and dirty stack routines. - * - * push_dir(name) char *name; - * stores the given string in the stack - * char *pop_dir() - * returns a string from the stack, or NULL if none. - */ - -static char *dirstack[NUMDIRS]; -static int lastdir = 0; -static char pop_name[BUFSIZ]; - -push_dir(name) -char *name; -{ - if (lastdir >= NUMDIRS) { - Eprintf("push_dir overflow!\n"); - return; - } - dirstack[lastdir] = strsave(name); - if (dirstack[lastdir] == NULL) - { - Eprintf("malloc failed!\n"); - return; - } - lastdir++; -} - -char *pop_dir() -{ - if(lastdir == 0) - return(NULL); - lastdir--; - strcpy(pop_name,dirstack[lastdir]); - dirstack[lastdir] = NULL; - free(dirstack[lastdir]); - return(pop_name); -} - -char *strsave(s) -char *s; -{ - char *p, *malloc(); - - if ((p = malloc((unsigned)strlen(s)+1)) != NULL) - strcpy(p,s); - return(p); -} - -save_file(name) /* save the article in the archive */ -char *name; -{ - FILE *fp, *ofp, *fopen(), *output_file(); - register int i, nc; - char diskbuf[COPYBUF]; - - Eprintf("saving '%s'\n",name); - if ((fp = fopen(name,"r")) == NULL) { - Eprintf("can't open\n"); - return; - } - - if ((fgets(line,BUFSIZ,fp) == NULL)) { - Eprintf("0 length file\n"); - fclose(fp); - return; - } - if (!start_header(line)) { - Eprintf("not a news article\n"); - fclose(fp); - return; - } - read_header(fp); - if ((ofp = output_file()) == NULL) { - Eprintf("Can't save\n"); - fclose(fp); - return; - } - - for (i = 0; i < NUM_HEADERS; i++) - fprintf(ofp,"%s\n",header_data[i]); - fputc('\n',ofp); - - while ((nc = fread(diskbuf,sizeof(char),COPYBUF,fp)) != 0) - fwrite(diskbuf,sizeof(char),nc,ofp); /* copy body of article */ - fclose(ofp); - fclose(fp); - num_saved++; - return; -} - -start_header(s) /* see if this is the start of a news article */ -char *s; -{ - /* - * If this is coming from B news, the first line will 'always' be - * Relay-Version (at least, on my system). Your mileage my vary. - */ - if (!strncmp(s,"Relay-Version:",14)) - return(TRUE); - /* - * If you are copying a section of archive already archived by - * sendnews, then the first line will be From (unless you changed - * the headers data structure, then its up to you...) - */ - if (!strncmp(s,"From:",5)) - return(TRUE); - return(FALSE); -} - -/* - * By the time we get here, the first line will already be read in and - * checked by start_header(). If we are re-copying a savenews archive - * (which happens when you decide to play with HASHVAL, trust me) then - * we need to save the From line, so we can't just throw it away. Hence - * the funky looking do-while setup instead of something a bit more - * straightforward - */ -read_header(fp) -FILE *fp; -{ - register int i; - - for (i = 0; i < NUM_HEADERS; i++) - header_data[i][0] = '\0'; /* remove last articles data */ - - do { - char *cp; - - if (line[0] == '\n') /* always be a blank line after the header */ - return; - - for (i = 0 ; i < NUM_HEADERS; i++) { - if (!strncmp(headers[i],line,strlen(headers[i]))) { - strcpy(header_data[i],line); - if (cp = index(header_data[i],'\n')) - *cp = '\0'; /* eat newlines */ - } - } - } while (fgets(line,BUFSIZ,fp) != NULL); -} - -FILE *output_file() /* generate the name in the archive */ -{ - int hashval, copy = 0; - FILE *fp, *fopen(); - char *p, newsgroup[BUFSIZ], message_id[BUFSIZ]; - char shortname[BUFSIZ], filename[BUFSIZ], filename2[BUFSIZ]; - - /* get the first newsgroup */ - p = index(header_data[GROUP_HEADER],':'); /* move past Newsgroups */ - if (!p) { - Eprintf("Invalid newsgroups\n"); - return(NULL); - } - p++; /* skip the colon */ - while (isspace(*p)) - p++; /* skip whitespace */ - strcpy(newsgroup,p); - if (p = index(newsgroup,',')) - *p= '\0'; /* newsgroup now only has one name in it */ - - /* get the message-id */ - p = index(header_data[MESSAGE_HEADER],':'); - if (!p) { - Eprintf("Invalid message-id\n"); - return(NULL); - } - p++; /* skip the colon */ - while (isspace(*p)) - p++; /* skip whitespace */ - if (*p == '<' || *p == '(') - p++; - if (*p == '-') /* make negative article id numbers positive (hack) */ - p++; - strcpy(message_id,p); - if (p = index(message_id,'.')) /* trim off the .UUCP if any */ - *p = '\0'; - else if (p = index(message_id,'>')) /* or get the closing bracket */ - *p = '\0'; - else if (p = index(message_id,')')) /* or get the closing paren */ - *p = '\0'; - if (p = index(message_id,'@')) /* change nnn@site */ - *p = '.'; /* to nnn.site */ - - /* generate the hash value for the subdirectory */ - hashval = atoi(message_id) % HASHVAL; - - /* setup the filename to save to */ - sprintf(shortname,"%s/%d/%s",newsgroup,hashval,message_id); - sprintf(filename,"%s/%s",SAVENEWS,shortname); - while (exists(filename)) { /* make it unique if neccessary */ - - sprintf(shortname,"%s/%d/%s_%d",newsgroup,hashval,message_id,++copy); - sprintf(filename,"%s/%s",SAVENEWS,shortname); - } - - strcpy(filename2,filename); /* must chop off the filename */ - if (p = rindex(filename2,'/')) /* since we don't want to */ - *p = '\0'; /* to makeparents */ - makeparents(filename2); - - if ((fp = fopen(filename,"w")) == NULL) { - Eprintf("Can't open %s for output\n",filename); - return(NULL); - } - log(newsgroup,shortname); - return(fp); -} - -exists(name) -char *name; -{ - struct stat sbuf; - - if (stat(name,&sbuf) == -1) { - return(FALSE); - } - return(TRUE); -} - -makeparents(name) /* recursively make parent directories */ -char *name; -{ - char *p, buf[BUFSIZ]; - - if (exists(name)) - return; - strcpy(buf,name); - if (!(p = rindex(buf,'/'))) { - Eprintf("makeparents failed!\n"); - return; - } - *p = '\0'; - makeparents(buf); - mkdir(name,DIRMODE); -} - -log(group,name) /* write to the logfile */ -char *group, *name; -{ - char *subject, logfile[BUFSIZ]; - FILE *ofp, *fopen(); - - /* get the subject */ - subject = index(header_data[SUBJECT_HEADER],':'); - if (!subject) { - Eprintf("Invalid subject, no log entry\n"); - return; - } - subject++; /* skip the colon */ - while (isspace(*subject)) - subject++; /* skip whitespace */ - - /* generate the place where it goes */ - sprintf(logfile,"%s/%s",SAVENEWS,LOGFILE); - makeparents(logfile); - strcat(logfile,"/"); - strcat(logfile,group); - - if ((ofp = fopen(logfile,"a")) == NULL) - { - Eprintf("open failed on %s\n",logfile); - return; - } - fprintf(ofp,"%s\t%s\n", name, subject); - fclose(ofp); -} - -SHAR_EOF -# End of shell archive -exit 0 --- -:From the misfiring synapses of: Chuq Von Rospach -{cbosgd,fortune,hplabs,ihnp4,seismo}!nsc!chuqui nsc!chuqui@decwrl.ARPA - -The offices were very nice, and the clients were only raping the land, and -then, of course, there was the money... - - *-*-END-of-misc/keepnews-*-* echo x - misc/report.awk 1>&2 sed 's/.//' >misc/report.awk <<'*-*-END-of-misc/report.awk-*-*' -From cbosgd!ucbvax!usenet Mon Oct 13 05:39:17 1986 -Received: by beno.CSS.GOV (5.54/5.17) - id AA01253; Mon, 13 Oct 86 05:39:12 EDT -Received: from cbosgd.UUCP by seismo.CSS.GOV (5.54/1.14) - id AA03513; Mon, 13 Oct 86 05:39:11 EDT -Received: by cbosgd.ATT.COM (4.12/UUCP-Project/rel-1.0/06-28-86) - id AA08778; Mon, 13 Oct 86 03:44:14 edt -Received: by ucbvax.Berkeley.EDU (5.53/1.17) - id AA15536; Sun, 12 Oct 86 23:51:25 PDT -Date: Sun, 12 Oct 86 23:51:25 PDT -From: ucbvax!usenet (USENET News Administration) -Message-Id: <8610130651.AA15536@ucbvax.Berkeley.EDU> -To: cbosgd!backbone -Subject: a handy awk script for netnews log reports -Status: R - -Since Mark didn't have a copy of this, I will assume that most of the -rest of you don't either, and send it along. The comments should be -explanation enough... If not, ask me. - - Erik E. Fair ucbvax!fair fair@ucbarpa.berkeley.edu -------------------------------------------------------------------------------- -# USAGE: awk -f report_awk /usr/lib/news/log -# -# AWK script which eats netnews log files and produces a summary of USENET -# traffic and errors over the period of time that the log was collected. -# -# August 31, 1986 -# -# Erik E. Fair <dual!fair> -# Original Author, May 22, 1984 -# -# Brad Eacker <onyx!brad> -# Modified to simplify the record processing and to sort the output. -# -# Erik E. Fair <dual!fair> -# Modifed to provide information about control messages. -# -# Erik E. Fair <dual!fair> -# Bug in system name extraction fixed. It was assumed that the forth field -# (system name) always had a dot. local is one that doesn't. Some others -# (including 2.9 sites) don't either. -# -# Earl Wallace <pesnta!earlw> -# The "sent" field was changed from $5 to $6 in 2.10.2 (beta) -# named "newstats" and called with no arguments. -# -# Erik E. Fair <dual!fair> -# Remove support for 2.10.1, revise for 2.10.2 to provide information -# about junked articles, garbled articles, and bad newsgroups -# -# Erik E. Fair <ucbvax!fair> -# Minor bug fix to bad newsgroup reporting, also now counting ``old'' -# articles as junked, with counter for number that are `old'. -# -# Erik E. Fair <ucbvax!fair> -# Fix up the domain & local hosts support -# -# Erik E. Fair <ucbvax!fair> -# Fix up the counting of gatewayed material, add counting of "linecount" -# problems. Additional cleanup to make things faster. -# -BEGIN{ -# -# this is the prefix that your site uses in hostnames to identify your -# hosts (e.g. ucbarpa, ucbvax, su-score, mit-mc, mit-ai) -# You will probably want to change (or add to) the following line -# - lprefix = "ucb"; - lplen = length(lprefix); -# -# If you do bi-directional USENET gatewaying (e.g. mailing list -# to newsgroup where the material flows both ways freely), this -# should be the name in the sys file that you use to mail stuff -# to the mailing lists. -# - pseudo = "internet"; - rptname = "(GATEWAY)"; -# -# Top level domain names and what network they represent -# (for use in counting stuff that is gatewayed) -# - domains["ARPA"] = rptname; - domains["arpa"] = rptname; - domains["EDU"] = rptname; - domains["edu"] = rptname; - domains["GOV"] = rptname; - domains["gov"] = rptname; - domains["COM"] = rptname; - domains["com"] = rptname; - domains["MIL"] = rptname; - domains["mil"] = rptname; - domains["ORG"] = rptname; - domains["org"] = rptname; - domains["NET"] = rptname; - domains["net"] = rptname; - domains["UK"] = rptname; - domains["uk"] = rptname; - domains["DEC"] = rptname; - domains["dec"] = rptname; - domains["CSNET"] = rptname; - domains["csnet"] = rptname; - domains["BITNET"] = rptname; - domains["bitnet"] = rptname; - domains["MAILNET"] = rptname; - domains["mailnet"] = rptname; - domains["UUCP"] = rptname; - domains["uucp"] = rptname; - domains["OZ"] = rptname; - domains["oz"] = rptname; - domains["AU"] = rptname; - domains["au"] = rptname; -# -# tilde chosen because it is ASCII 126 (don't change this) -# - invalid = "~~~~~~"; -# - accept[invalid] = 0; - reject[invalid] = 0; - xmited[invalid] = 0; - control[invalid] = 0; - junked[invalid] = 0; - neighbor[invalid] = 0; - badgrp = 0; - garbled = 0; - lcount = 0; - canfail = 0; - candup = 0; - insfail = 0; - old = 0; -} -# -# Skip some things that we won't bother with -# -/^$/ { next } -$5 == "from" { next } -$5 == "make" { next } -$5 == "Cancelling" { next } -# -# Or that we just count -# -$5 == "Inbound" { garbled++; next } -$6 == "cancel" { canfail++; next } -$6 == "Cancelled" { candup++; next } -$6 == "install" { insfail++; next } -# -# Articles sent to remote systems (this is what 2.10.2 (beta) says) -# -$6 == "sent" { - for(j = 8; j <= NF; j++) { - comma = index( $(j), ","); - if (comma != 0) $(j) = substr( $(j), 1, (comma - 1)); - if ($(j) == pseudo) $(j) = rptname; - else neighbor[$(j)] = 1; - xmited[$(j)]++; - } - next; -} -# -# Articles sent to remote systems (this is what 2.11 says) -# -$5 == "sent" { - for(j = 7; j <= NF; j++) { - comma = index( $(j), ","); - if (comma != 0) $(j) = substr( $(j), 1, (comma - 1)); - if ($(j) == pseudo) $(j) = rptname; - else neighbor[$(j)] = 1; - xmited[$(j)]++; - } - next; -} -# -# Get the name of the system that did this, -# taking into account that not everyone believes in domains. -# -{ -# if we get a route addr (we shouldn't, but...), take the last one -# - nhosts = split($4, hosts, "@"); - hostname = hosts[nhosts]; -# -# get the root domain name, and the hostname -# - ndoms = split(hostname, doms, "."); - domain = doms[ndoms]; - sys = doms[1]; -# -# check for local system, and if not that, then internet sites. -# special case the network name replacement of specific host names, -# such that the network name is there only on a `local' posting -# (which is really gatewaying in disguise) -# - if ($5 == "posted") { - prefix = substr(sys, 1, lplen); - if (prefix == lprefix) { - sys = "local"; - } else { - dom = domains[domain]; - if (dom) sys = dom; - } - } -} -# -# Duplicates & receiveds/posted & control messages -# -$5 == "posted" || $5 == "received" { - accept[sys]++; - if ($5 == "received") neighbor[sys] = 1; - nng = split($8, ngl, ","); - for(i = 1; i <= nng; i++) { - dot = index(ngl[i], "."); - if (dot) ng = substr(ngl[i], 1, (dot - 1)); - else ng = ngl[i]; - if (ng) newsgcnt[ng]++; - } - next; -} -$5 == "Duplicate" { reject[hostname]++; next } -$6 == "valid" { junked[sys]++; next } -$6 == "too" { junked[sys]++; old++; next } -$5 == "Unknown" { - x = length($7) - 2; - ng = substr($7, 2, x); - badng[ng]++; - badgrp++; - next; -} -# -# articles who actual line count differs from the Line: header count -# -$5 == "linecount" { - expect = $7; -# awk does very strange things with non-numeric characters in numbers - comma = index(expect, ","); - if (comma != 0) expect = substr(expect, 1, (comma - 1)); - got = $9; - diff = got - expect; - lcount++; - alc_host[sys] = 1; - neighbor[sys] = 1; - if (diff < 0) { - diff = 0 - diff; - a_nshort[sys]++; - a_short[sys] += diff; - if (a_smax[sys] < diff) a_smax[sys] = diff; - } else { - a_nlong[sys]++; - a_long[sys] += diff; - if (a_lmax[sys] < diff) a_lmax[sys] = diff; - } - next; -} -# -# articles who actual line count is Zero -# -$7 == "linecount" { - lcount++; - a_zero[sys]++; - reject[sys]++; - next; -} -# -# Control messages -# -$5 == "Ctl" { - ctot++; - control[sys]++; - ctlcnt[$(10)]++; - next; -} -# -# Print anything we didn't recognize, it's probably an error message. -# For the submitted report to USENET, do sed -e '1,/^$/d' file | inews -# so that this cruft doesn't get out the door. -# -{ - print; -} -# -# Summarize and print the report -# -END{ -# special processing for Duplicates, because we can't tell if -# they came from a netnews neighbor or from the gatewaying -# activities until we have processed the entire log. -# - for( hostname in reject ) { -# -# get the root domain name, and the hostname -# - ndoms = split(hostname, doms, "."); - domain = doms[ndoms]; - sys = doms[1]; - if (! neighbor[sys]) { - prefix = substr(sys, 1, lplen); - if (prefix == lprefix) { - sys = "local"; - } else { - dom = domains[domain]; - if (dom) sys = dom; - } - } - i = reject[hostname]; - reject[hostname] = 0; - reject[sys] += i; - } - - rtot = 0; - for( i in reject ) { - if (reject[i] > 0) { - list[i] = 1; - rtot += reject[i]; - } - } - - atot = 0; - for( i in accept ) { - list[i] = 1; - atot += accept[i]; - } - - xtot = 0; - for( i in xmited ) { - list[i] = 1; - xtot += xmited[i]; - } - - ctot = 0; - for( i in control ) { - list[i] = 1; - ctot += control[i]; - } - - jtot = 0; - for( i in junked ) { - list[i] = 1; - jtot += junked[i]; - } -# -# ctot is part of rtot, so we don't add it in to the grand total. -# - totarticles = atot + rtot; - if (totarticles == 0) totarticles = 1; - - printf("\nSystem \tAccept\tReject\tJunked\tXmit to\tControl\t%% total\t%% rejct\n"); - for( ; ; ) { -# selection sort - i = invalid; - for( j in list ) { - if ( list[j] > 0 && j < i ) i = j; - } - if ( i == invalid ) break; - list[i] = 0; -# -# control & junked are counted under accept. -# - sitetot = accept[i] + reject[i]; - if (sitetot == 0) sitetot = 1; - articles[i] = sitetot; -# -# What an 'orrible printf spec -# - printf("%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", i, accept[i], reject[i], junked[i], xmited[i], control[i], (sitetot * 100) / totarticles, (reject[i] * 100) / sitetot); -# - } - printf("\nTOTALS \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", atot, rtot, jtot, xtot, ctot, 100, (rtot * 100) / totarticles); - printf("\nTotal Articles processed %d", totarticles); - if (old) printf(", old %d", old); - if (garbled) printf(", garbled %d", garbled); - if (insfail) printf(", uninstallable %d", insfail); - printf("\n"); - - if (ctot) { - printf("\nControl Invocations\n"); - for( i in ctlcnt ) { - if (i == "cancel") { - printf("%-12s %6d", i, ctlcnt[i]); - if (canfail) printf(", %d failed", canfail); - if (candup) printf(", %d duplicate", candup); - printf("\n"); - } else { - printf("%-12s %6d\n", i, ctlcnt[i]); - } - } - } - - if (lcount) { - printf("\nReceived Article Length Problems\n"); - printf("System Zero Short Smax Savg Long Lmax Lavg Total %% Tot\n"); - for( i in alc_host ) { - nlong = a_nlong[i]; - nshort = a_nshort[i]; - if (nlong == 0) nlong = 1; - if (nshort == 0) nshort = 1; - lavg = a_long[i] / nlong; - savg = a_short[i] / nshort; - sitetot = (a_zero[i] + a_nshort[i] + a_nlong[i]); - printf("%-14s %5d %5d %5d %5d %5d %5d %5d %5d %4d%%\n", i, a_zero[i], a_nshort[i], a_smax[i], savg, a_nlong[i], a_lmax[i], lavg, sitetot, (sitetot * 100) / articles[i]); - } - } - - if (atot) { - printf("\nNetnews Categories Received\n"); - l = 0; - for( i in newsgcnt ) { - if (l < length(i)) l = length(i); - } - fmt = sprintf("%%-%ds %%6d\n", l); - for( ; ; ) { -# selection sort - max = 0; - for( j in newsgcnt ) { - if (newsgcnt[j] > max) { - i = j; - max = newsgcnt[j]; - } - } - if (max == 0) break; - printf(fmt, i, newsgcnt[i]); - newsgcnt[i] = 0; - } - } - - if (badgrp) { - printf("\nBad Newsgroups Received\n"); - l = 0; - for( i in badng ) { - if (l < length(i)) l = length(i); - } - fmt = sprintf("%%-%ds %%5d\n", l); - for( ; ; ) { -# selection sort - i = invalid; - for( j in badng ) { - if (badng[j] > 0 && j < i) i = j; - } - if (i == invalid) break; - printf(fmt, i, badng[i]); - badng[i] = 0; - } - } -} - *-*-END-of-misc/report.awk-*-* echo x - misc/restore.active 1>&2 sed 's/.//' >misc/restore.active <<'*-*-END-of-misc/restore.active-*-*' -: recreate the active file from readers .newsrc files -: and from the existing articles - -lib=${1-/usr/lib/news} -tmp=/tmp/$$ -: Find the highest numbered articles from the .newsrcs -cat `sed 's/[^:]*:[^:]*:[^:]*:[^:]*:[^:]*:// -s/:.*//' /etc/passwd | sort -u | sed 's;$;/.newsrc;' ` 2>/dev/null | -sed '/:/!d -s/:.*[,-]/ / -s/: */ /' >$tmp -: in case there are groups no-one reads, look in the list of newsgroups -sed 's/[ ].*/ 1/' $lib/newsgroups >>$tmp -sort +0 -1 +1nr $tmp | sort -m +0u -1 | sed 's/$/ 00001 y/ -/^fa/s/y$/n/' >$lib/active -: finally, scan the spool directory and fix up the active file. -$lib/expire -u -rm -f $tmp *-*-END-of-misc/restore.active-*-* echo x - misc/sendnewsmail 1>&2 sed 's/.//' >misc/sendnewsmail <<'*-*-END-of-misc/sendnewsmail-*-*' -From cbosg!ucbvax!decvax!ittvax!swatt Thu Mar 25 07:06:53 1982 -Date: Wed Mar 24 20:29:56 1982 -From: cbosg!ucbvax!decvax!ittvax!swatt -Subject: sendnewsmail script -Via: cbosgd.uucp (V3.73 [1/5/82]); 25-Mar-82 07:06:53-EST (Thu) -Mail-From: cbosg received by cbosgd at 25-Mar-82 07:06:51-EST (Thu) -To: cbosgd!mark -Status: R - - -Mark: - -I find the following handy as a mail interface to news; you don't have -to remake the aliases database everytime a new newsgroup gets formed. - - - Alan -======================================================================= -#! /bin/sh -: '/********************************************************************* - program: sendnewsmail - description: Send news items from mail - programmer: Alan S. Watt - (ittvax!swatt) - - Sccsid=@W@ - - usage: - Not invoked by user: called as program mail alias - News item title and newsgroup(s) are specified on the - mail subject line by: - - Subj: <news item title> : <newsgroup> ... - - Several (blank separated) newsgroups may be specified; - the news article will be submitted to each. There is - no way to embed a colon character in the title, so there - can only be one colon on the subject line. - - arguments: - None - - notes: - To install this, put it someplace safe from system updates - (I use /usr/lib/news), and put an alias in the system - mail alias file (/usr/lib/aliases) that names this program - as the alias for the use "news": - - news:"|/usr/lib/news/sendnewsmail" - - history: - 11/11/81 original version - 11/19/81 fixed to properly handle default newsgroup - 03/13/82 changes to work with "B" netnews - 03/35/82 Modest documentation changes - *********************************************************************/' - -PROGRAM_NAME='sendnewsmail' -VERSION_NUMBER='@I@' -TOOL_LOG='/usr/advanced/logs/testlog' -USAGE='mail news' - - -: 'mail alias program to send news items through mail(1)' -: 'need to get newsgroup and title from subject line' - -tempf=/tmp/news$$.tmp -errorf=/tmp/news$$.err - -: 'copy standard input to a temporary file' -cat >$tempf - -: 'read the message and grab title and newsgroups from the - Subject line. Grab the sender from the From line. - Header ends on first blank line (/^$/). - ' -eval `sed -n ' -/^Subj/ { - s/^Subj[^ :]*[ :] *\([^:]*\):\(.*\)/title="\1";newsgroup="\2"/p - s/^Subj[^ :]*[ :] *\([^:]*\)$/title="\1"/p -} -/^From/ { - s/^[fF]rom[: ] *\([^ ]*\).*/sender="\1"/p -} -/^$/ { - b done -} -: done -' $tempf` - -: 'default newsgroup to "general" if unspecified' -case $newsgroup in -'') newsgroup=general ;; -esac - -: 'make up something if the title unspecified' -case $title in -'') title="News from mail" ;; -esac - -: 'Submit the article to news' -if sed "1,/^$/d" $tempf | inews -t "$title" -n $newsgroup >$errorf 2>&1 -then - : 'OK exit, do nothing' -else - : 'On errors, return article together with error messages to user' - : 'Change this line if your mailer does not have a -s flag' - mail -s 'Rejected News Article' $sender <<!EOF - -The news article you submitted could not be accepted for the reasons: -`cat $errorf` - -The text of the article you submitted was: -`cat $tempf` -!EOF - -fi - -: 'clean up' -rm -f $tempf $errorf - - - - - *-*-END-of-misc/sendnewsmail-*-* echo x - misc/shar 1>&2 sed 's/.//' >misc/shar <<'*-*-END-of-misc/shar-*-*' -for i -do - echo "echo x - $i" - echo "sed 's/^X//' >$i <<'*-*-END-of-$i-*-*'" - sed 's/^/X/' $i - echo "*-*-END-of-$i-*-*" -done -echo exit *-*-END-of-misc/shar-*-* echo x - misc/trimlib 1>&2 sed 's/.//' >misc/trimlib <<'*-*-END-of-misc/trimlib-*-*' -: if this is run once per day, it will save the last -: weeks worth of news log files. You can, of course, comment -: out some of the lines to save less -cd /usr/lib/news -mv log.5 log.6 -mv log.4 log.5 -mv log.3 log.4 -mv log.2 log.3 -mv log.1 log.2 -mv log.0 log.1 -mv log log.0 -cp /dev/null log -/etc/chown news log* history* *-*-END-of-misc/trimlib-*-* echo x - misc/unshar 1>&2 sed 's/.//' >misc/unshar <<'*-*-END-of-misc/unshar-*-*' -From lee@unmvax.UUCP Sun Oct 28 16:56:42 1984 -Relay-Version: version B 2.10.2 10/19/84; site seismo.UUCP -Posting-Version: version B 2.10.2 9/5/84; site unmvax.UUCP -Path: seismo!cmcl2!lanl!unm-cvax!unmvax!lee -From: lee@unmvax.UUCP -Newsgroups: net.sources -Subject: Program to un-shar netmaps without using a shell.. -Message-ID: <473@unmvax.UUCP> -Date: 28 Oct 84 21:56:42 GMT -Date-Received: 29 Oct 84 11:14:42 GMT -Distribution: net -Organization: Univ. of New Mexico, Albuquerque -Lines: 336 - -#ifndef lint -char *Rcsid = "$Header: getmaps.c,v 1.4 84/10/13 18:19:13 lee Exp $"; -#endif - -/* - * getmaps - * - * Get the net maps from USENET as published by Karen and Mark Horton, in - * "shar" format. Because of paranoia the sh is not used but instead a DFA - * recognizing the appropriate commands. - * - * lee Ward 10/13/84 - */ - -#include <stdio.h> -#include <ctype.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/dir.h> - -char *mapgrp = "/usr/spool/news/net/news/map"; -char *mapdir = "/usr/lee/netmap/maps"; -char *seqfil = "/usr/lee/netmap/.seq"; -char *logfil = "/usr/lee/netmap/log"; - -char *usestr = "[-l logfil] [-g group] [-s seqfile] [-a archiv-dir]"; - -FILE *logsd = NULL; - -void domaps(), myabort(), log(), mkmaps(), getwrd(), logtime(); - -main(argc, argv) - int argc; - char *argv[]; -{ - int x; - FILE *seqsd; - char seqbuf[BUFSIZ]; - - for (x = 1; x < argc; x++) { - if (*argv[x]++ != '-') { - fprintf(stderr, "Bad usage\n"); - fprintf(stderr, "Usage: %s %s\n", argv[0], usestr); - exit(-1); - } - switch (*argv[x]) { - - case 'l': - logfil = argv[++x]; - break; - case 'g': - mapgrp = argv[++x]; - break; - case 's': - seqfil = argv[++x]; - break; - case 'a': - mapdir = argv[++x]; - break; - default: - fprintf(stderr, "Bad switch\n"); - fprintf(stderr, "Usage: %s %s\n", argv[0], usestr); - exit(-1); - } - } - - logsd = fopen(logfil, "a"); - - logtime("Start"); - - if (chdir(mapdir) != 0) - myabort("Could not change directory to %s", mapdir); - - seqbuf[0] = NULL; - if ((seqsd = fopen(seqfil, "r")) != NULL) { - if ((x = fread(seqbuf, sizeof(char), sizeof(seqbuf), - seqsd)) != 0) - seqbuf[x - 1] = NULL; - (void )fclose(seqsd); - } - if ((seqsd = fopen(seqfil, "a")) == NULL) - myabort("Could not open seq file for writing"); - (void )fseek(seqsd, 0L, 0); - - domaps(mapgrp, seqbuf, seqsd); - (void )fclose(seqsd); - - logtime("End"); -} - -void -domaps(grp, seqbuf, seqsd) - char *grp, *seqbuf; - FILE *seqsd; -{ - char nbuf[BUFSIZ], *nptr, *tptr; - struct direct **filst; - int nfils, x; - struct stat stbuf; - extern int scandir(), alphasort(); - extern char *strcpy(), *strncat(); - - if ((nfils = scandir(grp, &filst, (int (*)())NULL, alphasort)) == -1) - myabort("scandir failed"); - - (void )strcpy(nbuf, grp); - nptr = nbuf + strlen(nbuf); - *nptr++ = '/'; - *nptr = NULL; - nbuf[BUFSIZ] = NULL; - - for (x = 0; x < nfils; x++) { - if (strcmp(".", filst[x]->d_name) == 0 || - strcmp("..", filst[x]->d_name) == 0) - continue; - tptr = filst[x]->d_name; - while(*tptr && isdigit(*tptr)) - tptr++; - if (*tptr != NULL) - continue; - *nptr = NULL; - (void )strncat(nptr, filst[x]->d_name, - BUFSIZ - (nptr - nbuf) - 1); - if (stat(nbuf, &stbuf) != 0) { - log("Could not stat %s", nbuf); - continue; - } - if ((stbuf.st_mode & S_IFMT) == S_IFDIR) - continue; - if (strcmp(seqbuf, filst[x]->d_name) >= 0) - continue; - - mkmaps(nbuf); - (void )fseek(seqsd, 0L, 0); - (void )fwrite(filst[x]->d_name, sizeof(char), - strlen(filst[x]->d_name), seqsd); - (void )fputc('\n', seqsd); - (void )fflush(seqsd); - } -} - -void -mkmaps(file) - char *file; -{ - char buf[BUFSIZ], tofil[BUFSIZ], delim[BUFSIZ]; - int state, sizdel; - FILE *isd, *osd; - extern FILE *fopen(); - -#define SEARCH 1 -#define INAMAP 2 -#define SKIPPING 3 - - if ((isd = fopen(file, "r")) == NULL) { - log("Could not open %s. Skipping...", file); - return; - } - log("Unarchive %s", file); - - state = SEARCH; - while (fgets(buf, sizeof(buf) - 1, isd) != NULL) { - buf[sizeof(buf)] = NULL; - if (state == SEARCH) { - if (gotcat(buf, tofil, BUFSIZ, delim, BUFSIZ)) { - state = INAMAP; - sizdel = strlen(delim); - if ((osd = fopen(tofil, "w")) == NULL) { - log("Could not open %s", tofil); - state = SKIPPING; - } - } - continue; - } - if (strncmp(buf, delim, sizdel) == 0) { - state = SEARCH; - if (osd != NULL) - (void )fclose(osd); - continue; - } - if (state == SKIPPING) - continue; - fputs(buf, osd); - } - if (state != SEARCH) - log("Read/sync error on %s", file); - (void )fclose(isd); - -#undef SEARCH -#undef INAMAP -#undef SKIPPING -} - -/* - * gotcat - * - * Use a DFA to recognize - * cat << DELIM > OUT - * or - * cat > OUT << DELIM - * - */ - -/* Transition table for the DFA */ -int ttbl[9][4] = { - 1,-1,-1,-1, - -1,6,2,-1, - -1,-1,-1,3, - -1,4,-1,-1, - -1,-1,-1,5, - -1,-1,-1,-1, - -1,-1,-1,7, - -1,-1,8,-1, - -1,-1,-1,5, - }; - -gotcat(buf, tofil, tofilln, delim, delimln) - char *buf, - *tofil, - *delim; - int tofilln, - delimln; -{ - int state; - char *ptr; - - state = 0; /* Start state */ - while (state != -1 && state != 5) { - /* Eat up white */ - while (*buf != '\n' && (*buf == ' ' || *buf == '\t')) - buf++; - if (*buf == '>') { - buf++; - state = ttbl[state][1]; - continue; - } - if (*buf == '<' && *(buf + 1) == '<') { - buf += 2; - state = ttbl[state][2]; - continue; - } - if (*buf == 'c' && *(buf + 1) == 'a' && *(buf + 2) == 't') { - buf += 3; - state = ttbl[state][0]; - continue; - } - ptr = buf; - while (*buf != '\n' && *buf != ' ' && *buf != '\t') - buf++; - if (state == 2 || state == 8) - getwrd(ptr, buf, delim, delimln); - else if (state == 6 || state == 4) - getwrd(ptr, buf, tofil, tofilln); - state = ttbl[state][3]; - } - - if (state == 5) - return(1); - return(0); -} - -void -getwrd(fc, lc, buf, maxlen) - char *fc, - *lc, - *buf; - int maxlen; -{ - char *ptr, *t1ptr, *t2ptr; - - maxlen--; - maxlen = lc - fc > maxlen ? maxlen : lc - fc; - ptr = buf; - t1ptr = fc; - while (maxlen-- != 0) - *ptr++ = *t1ptr++; - *ptr = NULL; - - /* Strip quotes */ - ptr = buf; - while (*ptr != NULL) { - if (*ptr == '\\' && (*(ptr + 1) == '\'' || *(ptr + 1) == '"')) - ptr += 2; - else if (*ptr == '\'' || *ptr == '"') { - t1ptr = ptr; - t2ptr = ptr + 1; - while ((*t1ptr++ = *t2ptr++) != NULL) - ; - } else - ptr++; - } -} -/*VARARGS1*/ -void -myabort(s, a, b, c, d, e, f, g, h, i, j, k, l) - char *s; -{ - - if (logsd != NULL) { - fputs("ABORT - ", logsd); - fprintf(logsd, s, a, b, c, d, e, f, g, h, i, j, k, l); - (void )fputc('\n', logsd); - logtime("End"); - } - exit(-1); -} - -/*VARARGS1*/ -void -log(s, a, b, c, d, e, f, g, h, i, j, k, l) - char *s; -{ - - if (logsd == NULL) - return; - fprintf(logsd, s, a, b, c, d, e, f, g, h, i, j, k, l); - (void )fputc('\n', logsd); - (void )fflush(logsd); -} - -void -logtime(s) - char *s; -{ - time_t clock; - extern char *ctime(); - - if (logsd == NULL) - return; - (void )time(&clock); - fprintf(logsd, "%s %s", s, ctime(&clock)); - (void )fflush(logsd); -} --- - --Lee (Ward) - {ucbvax,convex,gatech,pur-ee}!unmvax!lee - - *-*-END-of-misc/unshar-*-* exit