palkovic@linac.fnal.gov (John Palkovic) (03/01/91)
I came up with a shell script to remove new news articles with old Date: headers. Neil Rickert made some useful modifications and this is the result. It needs header.c, so that is included. I got header.c from Chip Salzenberg's deliver package. You will have to compile and link header and place it in the PATH in order for the script to work. -John #!/bin/sh # This is a shell archive (shar 3.46) # made 02/28/1991 23:22 UTC by palkovic@linac # Source directory /home/palkovic/news # # existing files will NOT be overwritten unless -c is specified # # This shar contains: # length mode name # ------ ---------- ------------------------------------------ # 502 -rwxr-xr-x old.rm # 4796 -rw-r--r-- header.c # # ============= old.rm ============== if test -f 'old.rm' -a X"$1" != X"-c"; then echo 'x - skipping old.rm (File already exists)' else echo 'x - extracting old.rm (Text)' sed 's/^X//' << 'SHAR_EOF' > 'old.rm' && #!/bin/sh # # Old.rm. Trash new news articles with old Date: headers. # J. Palkovic 2/28/91 # # You may need to change this next line: PATH=/usr/lib/newsbin:/usr/local/bin:/usr/bin TODAY="`date`" TODAY=`getdate "$TODAY"` LIMIT=`expr $TODAY - 1209600` cd /usr/spool/news find `ls |egrep -v '\.'` -mtime -7 -name '[0-9]*' -type f -print |( X while read f X do X DATE=`header -f Date $f` X THEN=`getdate "$DATE" 2>/dev/null` X if test "$THEN" X then X test $THEN -lt $LIMIT && rm -f $f X fi X done ) SHAR_EOF chmod 0755 old.rm || echo 'restore of old.rm failed' Wc_c="`wc -c < 'old.rm'`" test 502 -eq "$Wc_c" || echo 'old.rm: original size 502, current size' "$Wc_c" fi # ============= header.c ============== if test -f 'header.c' -a X"$1" != X"-c"; then echo 'x - skipping header.c (File already exists)' else echo 'x - extracting header.c (Text)' sed 's/^X//' << 'SHAR_EOF' > 'header.c' && /* $Header: header.c,v 2.2 89/06/09 13:08:07 network Exp $ X * X * A program to parse RFC 822 mail/news headers. X * X * usage: header [-c] [-n] [-f field] ... files X * X * Default action is to print entire header. If one or more -f options X * are given, only the specified fields are printed. The field names are X * not printed unless -n is specified. Field name comparisons are case X * insensitive unless -c is specified. X * X * Output lines are preceeded by the filename if more than one file is X * specified. X * X * This program is intended for use in delivery files, to extract multi- X * line header fields. X * X * $Log: header.c,v $ X * Revision 2.2 89/06/09 13:08:07 network X * Adapt to BSD quirks. X * X * Revision 2.1 89/06/09 12:25:29 network X * Update RCS revisions. X * X * Revision 1.5 89/06/09 12:23:51 network X * Baseline for 2.0 release. X * X */ X #include <stdio.h> #include <ctype.h> X /* X * Manifest constants X */ X #define TRUE 1 #define FALSE 0 X /* X * Other useful macros. X */ X #define GETSIZE(buf) (sizeof(buf) - 1) X #define ISFROM(p) ((p)[0] == 'F' && (p)[1] == 'r' && (p)[2] == 'o' \ X && (p)[3] == 'm' && (p)[4] == ' ') X /* X * External data. X */ X /* Variables set by getopt() [blech] */ X extern int optind, opterr; extern char *optarg; X /* X * Library functions. X */ X extern char *malloc(); extern char *realloc(); extern void free(); X /* X * Global data X */ X int field_count = 0; int field_alloc = 0; char **field_names = NULL; X int nocasematch = TRUE; /* ignore case in header matches */ int printnames = FALSE; /* print field names with data */ X /*---------------------------------------------------------------------- X * The Program. X */ X main(argc, argv) int argc; char **argv; { X int c, errors; X X field_alloc = 8; X field_names = (char **) malloc(field_alloc * sizeof(char **)); X if (field_names == NULL) X nomem(); X X errors = FALSE; X while ((c = getopt(argc, argv, "cnf:")) != EOF) X { X switch (c) X { X case 'c': X nocasematch = FALSE; X break; X case 'n': X printnames = TRUE; X break; X case 'f': X if (field_count >= field_alloc) X { X field_alloc *= 2; X field_names = X (char **) realloc((char *)field_names, X field_alloc * sizeof(char **)); X if (field_names == NULL) X nomem(); X } X field_names[field_count++] = optarg; X break; X default: X errors = TRUE; X break; X } X } X X if (errors) X usage(); X X if (optind == argc) X header(stdin, (char *)NULL); X else X { X FILE *fp; X int a, filenames; X X filenames = ((argc - optind) > 1); X for (a = optind; a < argc; ++a) X { X if ((fp = fopen(argv[a], "r")) == NULL) X { X errors = TRUE; X perror(argv[a]); X continue; X } X X header(fp, (filenames ? argv[a] : (char *)NULL)); X fclose(fp); X } X } X X exit(errors ? 1 : 0); } X usage() { X fprintf(stderr, "usage: header [-c] [-f fieldname] ... files\n"); X exit(1); } X nomem() { X fprintf(stderr, "header: out of memory\n"); X exit(1); } X header(fp, filename) FILE *fp; char *filename; { X char buf[1024]; X X if (fgets(buf, GETSIZE(buf), fp) == NULL) X return; X X /* Ignore From_ line(s). */ X X while (ISFROM(buf) || buf[0] == '>') X { X if (fgets(buf, GETSIZE(buf), fp) == NULL) X return; X } X X while (buf[0] != '\n') X { X char *p; X int print_this; X X p = buf; X while (isupper(*p) || islower(*p) || isdigit(*p) || *p == '-') X ++p; X if (p == buf || *p != ':') X break; X print_this = field(buf, p - buf); X if (print_this) X { X if (filename) X { X fputs(filename, stdout); X putc(':', stdout); X } X ++p; X if (*p == ' ' || *p == '\t') X ++p; X if (field_count == 0 || printnames) X fputs(buf, stdout); X else X fputs(p, stdout); X } X X /* get the next input line */ X if (fgets(buf, GETSIZE(buf), fp) == NULL) X break; X X /* deal with continuation lines */ X while (buf[0] == ' ' || buf[0] == '\t') X { X if (print_this) X { X if (filename) X { X fputs(filename, stdout); X putc(':', stdout); X } X fputs(buf, stdout); X } X X if (fgets(buf, GETSIZE(buf), fp) == NULL) X { X buf[0] = '\n'; X break; X } X } X } } X int field(s, n) char *s; int n; { X int i; X X if (field_count == 0) X return TRUE; X X for (i = 0; i < field_count; ++i) X { X char *f = field_names[i]; X X if (strlen(f) == n) X { X if (nocasematch) X { X if (ci_strncmp(f, s, n) == 0) X return TRUE; X } X else X { X if (strncmp(f, s, n) == 0) X return TRUE; X } X } X } X X return FALSE; } X int ci_strncmp(s, t, n) char *s, *t; int n; { X char c, d; X X while (n-- > 0) X { X c = *s++; X d = *t++; X if ((c == 0) && (d == 0)) X break; X if (isupper(c)) X c = tolower(c); X if (isupper(d)) X d = tolower(d); X if (c > d) X return 1; X if (c < d) X return -1; X } X X return 0; } SHAR_EOF chmod 0644 header.c || echo 'restore of header.c failed' Wc_c="`wc -c < 'header.c'`" test 4796 -eq "$Wc_c" || echo 'header.c: original size 4796, current size' "$Wc_c" fi exit 0 -- palkovic@linac.fnal.gov || {royko,tellab5,simon}!linac!palkovic
sow@cad.luth.se (Sven-Ove Westberg) (03/02/91)
In article <9Y`#-6-@linac.fnal.gov> palkovic@linac.fnal.gov (John Palkovic) writes: | |I came up with a shell script to remove new news articles with old |Date: headers. Neil Rickert made some useful modifications and this is |the result. It needs header.c, so that is included. I got header.c |from Chip Salzenberg's deliver package. You will have to compile and |link header and place it in the PATH in order for the script to work. Hmmmmm.... This is from the manual page for expire. -p causes expire to use the date the article was posted, rather than the date it arrived at your machine, as the basis for expiration. Every now and then there is a time warp that causes a batch of very very old news to be dumped onto the network; judicious use of the -p option can eradicate it. Sven-Ove