[news.software.b] Package to remove old news floods

palkovic@linac.fnal.gov (John Palkovic) (03/01/91)

I came up with a shell script to remove new news articles with old
Date: headers. Neil Rickert made some useful modifications and this is
the result. It needs header.c, so that is included. I got header.c
from Chip Salzenberg's deliver package. You will have to compile and
link header and place it in the PATH in order for the script to work.

-John


#!/bin/sh
# This is a shell archive (shar 3.46)
# made 02/28/1991 23:22 UTC by palkovic@linac
# Source directory /home/palkovic/news
#
# existing files will NOT be overwritten unless -c is specified
#
# This shar contains:
# length  mode       name
# ------ ---------- ------------------------------------------
#    502 -rwxr-xr-x old.rm
#   4796 -rw-r--r-- header.c
#
# ============= old.rm ==============
if test -f 'old.rm' -a X"$1" != X"-c"; then
	echo 'x - skipping old.rm (File already exists)'
else
echo 'x - extracting old.rm (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'old.rm' &&
#!/bin/sh
#
# Old.rm. Trash new news articles with old Date: headers.
# J. Palkovic 2/28/91
#
# You may need to change this next line:
PATH=/usr/lib/newsbin:/usr/local/bin:/usr/bin
TODAY="`date`"
TODAY=`getdate "$TODAY"`
LIMIT=`expr $TODAY - 1209600`
cd /usr/spool/news
find `ls |egrep -v '\.'` -mtime -7 -name '[0-9]*' -type f -print |(
X while read f
X do
X   DATE=`header -f Date $f`
X   THEN=`getdate "$DATE" 2>/dev/null`
X   if test "$THEN"
X   then
X     test $THEN -lt $LIMIT && rm -f $f
X   fi
X done
)
SHAR_EOF
chmod 0755 old.rm ||
echo 'restore of old.rm failed'
Wc_c="`wc -c < 'old.rm'`"
test 502 -eq "$Wc_c" ||
	echo 'old.rm: original size 502, current size' "$Wc_c"
fi
# ============= header.c ==============
if test -f 'header.c' -a X"$1" != X"-c"; then
	echo 'x - skipping header.c (File already exists)'
else
echo 'x - extracting header.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'header.c' &&
/* $Header: header.c,v 2.2 89/06/09 13:08:07 network Exp $
X *
X * A program to parse RFC 822 mail/news headers.
X *
X * usage: header [-c] [-n] [-f field] ... files
X *
X * Default action is to print entire header.  If one or more -f options
X * are given, only the specified fields are printed.  The field names are
X * not printed unless -n is specified.  Field name comparisons are case
X * insensitive unless -c is specified.
X *
X * Output lines are preceeded by the filename if more than one file is
X * specified.
X *
X * This program is intended for use in delivery files, to extract multi-
X * line header fields.
X *
X * $Log:	header.c,v $
X * Revision 2.2  89/06/09  13:08:07  network
X * Adapt to BSD quirks.
X * 
X * Revision 2.1  89/06/09  12:25:29  network
X * Update RCS revisions.
X * 
X * Revision 1.5  89/06/09  12:23:51  network
X * Baseline for 2.0 release.
X * 
X */
X
#include <stdio.h>
#include <ctype.h>
X
/*
X * Manifest constants
X */
X
#define TRUE 1
#define FALSE 0
X
/*
X * Other useful macros.
X */
X
#define GETSIZE(buf)    (sizeof(buf) - 1)
X
#define ISFROM(p) ((p)[0] == 'F' && (p)[1] == 'r' && (p)[2] == 'o' \
X		&& (p)[3] == 'm' && (p)[4] == ' ')
X
/*
X * External data.
X */
X
/* Variables set by getopt() [blech] */
X
extern  int     optind, opterr;
extern  char    *optarg;
X
/*
X * Library functions.
X */
X
extern  char    *malloc();
extern  char    *realloc();
extern  void    free();
X
/*
X * Global data
X */
X
int     field_count     = 0;
int     field_alloc     = 0;
char    **field_names   = NULL;
X
int     nocasematch     = TRUE;         /* ignore case in header matches */
int     printnames      = FALSE;        /* print field names with data */
X
/*----------------------------------------------------------------------
X * The Program.
X */
X
main(argc, argv)
int     argc;
char    **argv;
{
X	int     c, errors;
X
X	field_alloc = 8;
X	field_names = (char **) malloc(field_alloc * sizeof(char **));
X	if (field_names == NULL)
X		nomem();
X
X	errors = FALSE;
X	while ((c = getopt(argc, argv, "cnf:")) != EOF)
X	{
X		switch (c)
X		{
X		case 'c':
X			nocasematch = FALSE;
X			break;
X		case 'n':
X			printnames = TRUE;
X			break;
X		case 'f':
X			if (field_count >= field_alloc)
X			{
X				field_alloc *= 2;
X				field_names =
X				    (char **) realloc((char *)field_names,
X					      field_alloc * sizeof(char **));
X				if (field_names == NULL)
X					nomem();
X			}
X			field_names[field_count++] = optarg;
X			break;
X		default:
X			errors = TRUE;
X			break;
X		}
X	}
X
X	if (errors)
X		usage();
X
X	if (optind == argc)
X		header(stdin, (char *)NULL);
X	else
X	{
X		FILE    *fp;
X		int     a, filenames;
X
X		filenames = ((argc - optind) > 1);
X		for (a = optind; a < argc; ++a)
X		{
X			if ((fp = fopen(argv[a], "r")) == NULL)
X			{
X				errors = TRUE;
X				perror(argv[a]);
X				continue;
X			}
X
X			header(fp, (filenames ? argv[a] : (char *)NULL));
X			fclose(fp);
X		}
X	}
X
X	exit(errors ? 1 : 0);
}
X
usage()
{
X	fprintf(stderr, "usage: header [-c] [-f fieldname] ... files\n");
X	exit(1);
}
X
nomem()
{
X	fprintf(stderr, "header: out of memory\n");
X	exit(1);
}
X
header(fp, filename)
FILE    *fp;
char    *filename;
{
X	char    buf[1024];
X
X	if (fgets(buf, GETSIZE(buf), fp) == NULL)
X		return;
X
X	/* Ignore From_ line(s). */
X
X	while (ISFROM(buf) || buf[0] == '>')
X	{
X		if (fgets(buf, GETSIZE(buf), fp) == NULL)
X			return;
X	}
X
X	while (buf[0] != '\n')
X	{
X		char    *p;
X		int     print_this;
X
X		p = buf;
X		while (isupper(*p) || islower(*p) || isdigit(*p) || *p == '-')
X			++p;
X		if (p == buf || *p != ':')
X			break;
X		print_this = field(buf, p - buf);
X		if (print_this)
X		{
X			if (filename)
X			{
X				fputs(filename, stdout);
X				putc(':', stdout);
X			}
X			++p;
X			if (*p == ' ' || *p == '\t')
X				++p;
X			if (field_count == 0 || printnames)
X				fputs(buf, stdout);
X			else
X				fputs(p, stdout);
X		}
X
X		/* get the next input line */
X		if (fgets(buf, GETSIZE(buf), fp) == NULL)
X			break;
X
X		/* deal with continuation lines */
X		while (buf[0] == ' ' || buf[0] == '\t')
X		{
X			if (print_this)
X			{
X				if (filename)
X				{
X					fputs(filename, stdout);
X					putc(':', stdout);
X				}
X				fputs(buf, stdout);
X			}
X
X			if (fgets(buf, GETSIZE(buf), fp) == NULL)
X			{
X				buf[0] = '\n';
X				break;
X			}
X		}
X	}
}
X
int
field(s, n)
char    *s;
int     n;
{
X	int     i;
X
X	if (field_count == 0)
X		return TRUE;
X
X	for (i = 0; i < field_count; ++i)
X	{
X		char    *f = field_names[i];
X
X		if (strlen(f) == n)
X		{
X			if (nocasematch)
X			{
X				if (ci_strncmp(f, s, n) == 0)
X					return TRUE;
X			}
X			else
X			{
X				if (strncmp(f, s, n) == 0)
X					return TRUE;
X			}
X		}
X	}
X
X	return FALSE;
}
X
int
ci_strncmp(s, t, n)
char    *s, *t;
int     n;
{
X	char    c, d;
X
X	while (n-- > 0)
X	{
X		c = *s++;
X		d = *t++;
X		if ((c == 0) && (d == 0))
X			break;
X		if (isupper(c))
X			c = tolower(c);
X		if (isupper(d))
X			d = tolower(d);
X		if (c > d)
X			return 1;
X		if (c < d)
X			return -1;
X	}
X
X	return 0;
}
SHAR_EOF
chmod 0644 header.c ||
echo 'restore of header.c failed'
Wc_c="`wc -c < 'header.c'`"
test 4796 -eq "$Wc_c" ||
	echo 'header.c: original size 4796, current size' "$Wc_c"
fi
exit 0


-- 
palkovic@linac.fnal.gov || {royko,tellab5,simon}!linac!palkovic

sow@cad.luth.se (Sven-Ove Westberg) (03/02/91)

In article <9Y`#-6-@linac.fnal.gov> palkovic@linac.fnal.gov (John Palkovic) writes:
|
|I came up with a shell script to remove new news articles with old
|Date: headers. Neil Rickert made some useful modifications and this is
|the result. It needs header.c, so that is included. I got header.c
|from Chip Salzenberg's deliver package. You will have to compile and
|link header and place it in the PATH in order for the script to work.

Hmmmmm.... This is from the manual page for expire.

     -p   causes expire to use the date the article  was  posted,
          rather than the date it arrived at your machine, as the
	  basis for expiration. Every now and  then  there  is  a
          time  warp that causes a batch of very very old news to
          be dumped onto the network; judicious  use  of  the  -p
          option can eradicate it.

Sven-Ove