[can.uucp] Program to remove articles that have been stuck in some backwater

mason@tmsoft.uucp (Dave Mason) (02/22/91)

# Apologies to news.software.b if this is the 27th program like this.

# This uses the libraries that come with C news, so B news sites may
# have to do a lot of hacking to make it work.

# There has recently (always?) been a problem of the odd site that has
# really bad news connections and eventually passes on news that is so
# old that the Message-ID's are gone from everyone's history file.

# This irritated me enough today that I did something about it.

# This program is to nuke those old articles.

# 	../Dave

#!/bin/sh
# This is a shell archive (produced by shar 3.49)
# To extract the files from this archive, save it to a file, remove
# everything above the "!/bin/sh" line above, and type "sh file_name".
#
# made 02/22/1991 00:52 UTC by mason@tmsoft
# Source directory /src/pub/news/c/expire
#
# existing files will NOT be overwritten unless -c is specified
#
# This shar contains:
# length  mode       name
# ------ ---------- ------------------------------------------
#   2525 -rw-r--r-- README.backwater
#    175 -rwxr-xr-x histfiles
#   2634 -rw-rw-r-- backwater.c
#   9816 -rw-rw-r-- Makefile
#
# ============= README.backwater ==============
if test -f 'README.backwater' -a X"$1" != X"-c"; then
	echo 'x - skipping README.backwater (File already exists)'
else
echo 'x - extracting README.backwater (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'README.backwater' &&
No Warranty on this code, use at your own risk.
Written 1991.02.21 Dave Mason <mason@tmsoft>
X
This uses the libraries that come with C news, so B news sites may
have to do a lot of hacking to make it work.
X
There has recently (always?) been a problem of the odd site that has
really bad news connections and eventually passes on news that is so
old that the Message-ID's are gone from everyone's history file.
X
This irritated me enough today that I did something about it.  On this
system it didn't make a HUGE change, but it removed about 600K from
the 25MB that was sitting in the news partition at the time.  I
suspect it would have more effect on systems that don't expire quite
so aggressively.
X
This program takes a list of file names on stdin, checks the Date:
field in each file, checks the date the file was created, and if the
Date is >10 days older than the modify time, determines that this file
has taken too long in transit & spits its name on stdout.  (If you
keep more than 10 days in your history file you can run the program
with the number of days as a parameter.)
X
Typical usage to clean out all those pesky junk files would be:
X	cd /usr/spool/news
X	histfiles /usr/lib/news/history |
X	backwater 10 |
X	xargs rm -f
X
I looked in expire and relaynews to see if I could get them to do the
work more easily, but the answer is no.  relaynews doesn't even look
at dates, and expire doesn't look at the files.
X
The right place to put this is in relaynews (so we don't create the
file in the firstplace), but that was a little more work and would
increase its size (since it would require getdate) though it wouldn't
slow it down (when you didn't want it) if it was made a switch (and
probably not TOO much even when you did want it).  Hence this quick & dirty. 
X
histfiles just extracts filenames from the history file (just the
first name) replacing `.' with `/'.  Although it's an easy hack, it may
be of some small value on its own.
X
I put backwater.c in news/c/expire and modified the Makefile (included
here).  I might write a little program to allow you to run backlog
only on the files that have arrived since the last try (improving on
``histfiles''), but I may just make the change to relaynews & send it to
Geoff.
X
Final comment: I have not tested backwater with the parameter on the
command line.  The code looks right, but I've put more time into this
than I can afford already.  Buyer beware!
X
Oh, and thanks Henry & Geoff for making code that's nice & easy to hack.
X
Hope this helps someone.	../Dave
SHAR_EOF
chmod 0644 README.backwater ||
echo 'restore of README.backwater failed'
Wc_c="`wc -c < 'README.backwater'`"
test 2525 -eq "$Wc_c" ||
	echo 'README.backwater: original size 2525, current size' "$Wc_c"
fi
# ============= histfiles ==============
if test -f 'histfiles' -a X"$1" != X"-c"; then
	echo 'x - skipping histfiles (File already exists)'
else
echo 'x - extracting histfiles (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'histfiles' &&
awk -Ft '
X	NF==3 {i=index($3," ")
X		if (i) x=substr($3,1,i-1);else x=$3
X		i=index(x,".")
X		while (i) {x=substr(x,1,i-1) "/" substr(x,i+1);i=index(x,".")}
X		print x
X	}' ${1:-}
SHAR_EOF
chmod 0755 histfiles ||
echo 'restore of histfiles failed'
Wc_c="`wc -c < 'histfiles'`"
test 175 -eq "$Wc_c" ||
	echo 'histfiles: original size 175, current size' "$Wc_c"
fi
# ============= backwater.c ==============
if test -f 'backwater.c' -a X"$1" != X"-c"; then
	echo 'x - skipping backwater.c (File already exists)'
else
echo 'x - extracting backwater.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'backwater.c' &&
/*
X * backwater - print file names for articles that got stuck in a net backwater
X */
X
#include <stdio.h>
#include <sys/types.h>
#include <sys/timeb.h>
#include <sys/stat.h>		/* for modified time (date received) */
#include <string.h>
#include "config.h"
#include "fgetmfs.h"
#include "alloc.h"
#include "case.h"
X
#define STRLEN(s) (sizeof(s) - 1)	/* s must be a char array */
#define	DAYSECS	(60*60*24L)
X
long	dateoffset = DAYSECS*10;
char *progname;
int debug;
struct timeb ftnow;		/* ftime() result for getdate() */
X
FILE *efopen();
X
char *spdir;
int spdirlen;
X
/*
X * main - parse arguments and handle options
X */
main(argc, argv)
int argc;
char *argv[];
{
X	int c;
X	int errflg = 0;
X	FILE *in;
X	char *inname;
X	extern int optind;
X	extern char *optarg;
X
X	progname = argv[0];
X	ftime(&ftnow);
X	while ((c = getopt(argc, argv, "d")) != EOF)
X		switch (c) {
X		case 'd':
X			++debug;
X			break;
X		default:
X			errflg++;
X			break;
X		}
X	if (optind < argc) {
X		dateoffset=atoi(argv[optind++])*DAYSECS;
X		if (dateoffset<DAYSECS) ++errflg;
X	}
X	if (optind < argc || errflg) {
X		(void) fprintf(stderr, "usage: %s [-d] [days]\n", progname);
X		exit(2);
X	}
X
X	spdir = artfile((char *)NULL);
X	spdirlen = strlen(spdir);
X	
X	while ((inname = fgetms(stdin)) != NULL) {
X		inname[strlen(inname)-1] = '\0';	/* kill newline */
X		if (strchr(inname, '.') == NULL) {	/* skip dot names */
X			in = efopen(inname, "r");
X			process(in, inname);
X			(void) fclose(in);
X		}
X		free(inname);
X	}
X	exit(0);
}
X
/*
X * process - process input file
X */
process(in, inname)
FILE *in;
char *inname;
{
X	char *name;
X	char *line;
X	char *date;
X	time_t datercv,origdate;
X	struct stat statb;
X	static char datenm[] =    "Date: ";
X	register char *p;
X
X	date = NULL;
X
X	/* read until EOF or blank line (end of headers) */
X	while ((line = fgetms(in)) != NULL && strcmp(line, "\n") != 0) {
X		line[strlen(line)-1] = '\0';		/* trim newline */
X		if (CISTREQN(line, datenm, STRLEN(datenm))) {
X			if (date != NULL)
X				free(date);
X			date = strsave(line+STRLEN(datenm));
X		}
X		free(line);
X	}
X	if (line != NULL)
X		free(line);
X
X	if (date!=NULL) {
X		/* generate the date received */
X		(void) fstat(fileno(in), &statb);
X		datercv = statb.st_mtime;
X
X		/* find out when it was posted */
X		origdate = getdate(date, &ftnow);
X
X		if (origdate == -1)
X			fprintf(stderr,"%s: Invalid date in %s:`%s'\n",progname,inname,date);
X		else if (origdate+dateoffset<datercv) {
X			/* whomp out the file name */
X			(void) fputs(inname, stdout);
X			(void) putchar('\n');
X			(void) fflush(stdout);
X		  }
X		free(date);
X	}
}
X
/*
X * unprivileged - no-op to keep pathname stuff happy
X */
void
unprivileged(reason)
char *reason;
{
}
SHAR_EOF
chmod 0664 backwater.c ||
echo 'restore of backwater.c failed'
Wc_c="`wc -c < 'backwater.c'`"
test 2634 -eq "$Wc_c" ||
	echo 'backwater.c: original size 2634, current size' "$Wc_c"
fi
# ============= Makefile ==============
if test -f 'Makefile' -a X"$1" != X"-c"; then
	echo 'x - skipping Makefile (File already exists)'
else
echo 'x - extracting Makefile (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'Makefile' &&
PROF = 
COPTS = -O
CFLAGS = $(COPTS) -I../include $(PROF)
LINTFLAGS = -I../include
JUNKLINT = 'possible pointer align'
DBM =
LIBS= ../libcnews.a
THEM = expire histdups histinfo histslash mkdbm mkhistory \
X	upact doexpire mkadir recovact backwater
DTR = README Makefile dircheck doexpire expire.c histdups histinfo.c \
X	histslash.c mkdbm.c mkhistory pgood tgood upact \
X	mkadir updatemin.c recovact backwater.c
UPACT=upact
# =()<NEWSARTS = @<NEWSARTS>@>()=
NEWSARTS = /usr/spool/news
# =()<NEWSSPOOL = @<NEWSSPOOL>@>()=
NEWSSPOOL = /usr/lib/news
# =()<NEWSBIN = @<NEWSBIN>@>()=
NEWSBIN = /usr/lib/newsbin
# =()<NEWSCTL = @<NEWSCTL>@>()=
NEWSCTL = /usr/lib/news
# workaround for System V make bug
SHELL = /bin/sh
X
all:	$(THEM)
X	chmod +x $(THEM)
X
bininstall:	$(THEM)
X	-if test ! -d $(NEWSBIN)/expire ; then mkdir $(NEWSBIN)/expire; fi
X	cp $(THEM) $(NEWSBIN)/expire
X
explists:	explist.no explist.yes
X
cmp:	$(THEM)
X	for f in $(THEM) ; do cmp $(NEWSBIN)/expire/$$f $$f ; done
X
check:	$(THEM)
X	for f in $(THEM) ; do cmp $(NEWSBIN)/expire/$$f $$f || true ; done
X
newsinstall:	explist
X	-if test ! -r $(NEWSCTL)/explist ; then cp explist $(NEWSCTL)/explist ; fi
X
expire: expire.o $(LIBS)
X	$(CC) $(CFLAGS) $(LDFLAGS) expire.o $(PRE) $(DBM) $(LIBS) $(POST) -o $@
X
histinfo: histinfo.o $(LIBS)
X	$(CC) $(CFLAGS) $(LDFLAGS) histinfo.o $(PRE) $(LIBS) $(POST) -o $@
X
backwater: backwater.o $(LIBS)
X	$(CC) $(CFLAGS) $(LDFLAGS) backwater.o $(PRE) $(LIBS) $(POST) -o $@
X
updatemin:	updatemin.o $(LIBS)
X	$(CC) $(CFLAGS) $(LDFLAGS) updatemin.o $(PRE) $(LIBS) $(POST) -o $@
X
histslash:	histslash.o $(LIBS)
X	$(CC) $(CFLAGS) $(LDFLAGS) histslash.o $(PRE) $(LIBS) $(POST) -o $@
X
mkdbm:	mkdbm.o $(LIBS)
X	$(CC) $(CFLAGS) $(LDFLAGS) mkdbm.o $(PRE) $(LIBS) $(DBM) $(POST) -o $@
X
lint:	expire.c
X	lint -ha $(LINTFLAGS) expire.c 2>&1 | egrep -v $(JUNKLINT) | tee $@
X
explist.yes:
X	echo "# hold onto history lines 14 days, nobody gets >90 days" >$@
X	echo "/expired/			x	14	-" >>$@
X	echo "/bounds/			x	0-1-90	-" >>$@
X	echo >>$@
X	echo "# override later defaults for some groups of note" >>$@
X	echo "sci.space.shuttle,rec.birds	x	7	@" >>$@
X	echo >>$@
X	echo "# big non-tech groups held long enough for a long weekend" >>$@
X	echo "sci,rec,talk,soc,misc,alt	u	4	-" >>$@
X	echo >>$@
X	echo "# real noise gets thrown away fast" >>$@
X	echo "news.groups			x	2	-" >>$@
X	echo "junk,tor.news.stats		x	2	-" >>$@
X	echo >>$@
X	echo "# throw away some technical stuff not worth archiving" >>$@
X	echo "comp.os.vms,comp.mail.maps	x	7	-" >>$@
X	echo >>$@
X	echo "# default:  7 days and archive" >>$@
X	echo "all				x	7	@" >>$@
X
explist.no:	explist.yes
X	echo '# modified version, no archiving' >$@
X	echo '#' >>$@
X	sed 's/@/-/' explist.yes >>$@
X
explist:	explist.no
X	cp explist.no $@
X
# setup for regression test
setup:
X	rm -f explist history active active.after history.proto
X	rm -rf arts arch arch2 arch3 nbin
X	>history.pag
X	>history.dir
X	echo "/expired/	x	0.08	-" >>explist
X	echo '/bounds/	x	0.01-0.1-0.11	-' >>explist
X	echo "bar,!bar.ugh	x	0.05	`pwd`/arch2" >>explist
X	echo 'mod	m	0.1	@' >>explist
X	echo 'mod	u	0.05	-' >>explist
X	echo 'urp	x	0.01-0.05-0.1	-' >>explist
X	echo "bletch	x	0.1	=`pwd`/arch3/bletch" >>explist
X	echo 'all	x	0.1	@' >>explist
X	mkdir nbin nbin/expire
X	for f in $(THEM) ; do ln $$f nbin/expire/$$f ; done
X	echo 'ln $$* >/dev/null 2>/dev/null' >nbin/newslock
X	echo 'echo 10' >nbin/spacefor
X	chmod +x nbin/* nbin/expire/*
X	mkdir arts arts/foo arts/bar arts/bar/ugh arts/urp arch arch2
X	mkdir arts/mod arts/mod/mod arts/mod/unmod arch3 arch3/bletch
X	mkdir arts/bletch
X	echo 'foo 00099 00000 y' >>active
X	echo 'foo 00099 00001 y' >>active.after
X	echo 'bar 00099 00000 m' >>active
X	echo 'bar 00099 00100 m' >>active.after
X	echo 'bar.ugh 00099 00000 m' >>active
X	echo 'bar.ugh 00099 00099 m' >>active.after
X	echo 'urp 00099 00000 n' >>active
X	echo 'urp 00099 00007 n' >>active.after
X	echo 'mod.mod 00013 00000 m' >>active
X	echo 'mod.mod 00013 00013 m' >>active.after
X	echo 'mod.unmod 00016 00000 y' >>active
X	echo 'mod.unmod 00016 00016 y' >>active.after
X	echo 'bletch 00099 00000 y' >>active
X	echo 'bletch 00099 00100 y' >>active.after
X	echo '<wont1>	9999~-	foo/1' >>history.proto
X	echo :foo/1: >arts/foo/1
X	echo 'MeSsAge-ID: <wont1>' >>arts/foo/1
X	echo '<will2>	1000~-	foo/2' >>history.proto
X	echo foo/2 >arts/foo/2
X	echo '<will3>	1000~100	foo/3' >>history.proto
X	echo foo/3 >arts/foo/3
X	echo '<two4>	100	bar/4' >>history.proto
X	echo :bar/4: >arts/bar/4
X	echo 'SuBjeCt: yes' >>arts/bar/4
X	echo '<will5>	100~	bar.ugh/5' >>history.proto
X	echo :bar/ugh/5: >arts/bar/ugh/5
X	echo >>arts/bar/ugh/5
X	echo 'Subject: no' >>arts/bar/ugh/5
X	echo '<gone6>	100~-	urp/6' >>history.proto
X	echo urp/6 >arts/urp/6
X	echo '<wont7>	9999~9999~xx~a	urp/7' >>history.proto
X	echo :urp/7: >arts/urp/7
X	echo 'mEssaGe-id: <wont7>' >>arts/urp/7
X	echo 'exPiRes: 9999' >>arts/urp/7
X	echo '<gone8>	100~20000	urp/8' >>history.proto
X	echo urp/8 >arts/urp/8
X	echo '<gone9>	100~-	urp/9' >>history.proto
X	echo '<wont10>	1000~20000	foo/10' >>history.proto
X	echo :foo/10: >arts/foo/10
X	echo 'Message-ID: <wont10>' >>arts/foo/10
X	echo 'Expires: 20000' >>arts/foo/10
X	echo 'Supersedes: <wont1>' >>arts/foo/10
X	echo >>arts/foo/10
X	echo '<will11>	100~20000	foo/11' >>history.proto
X	echo foo/11 >arts/foo/11
X	echo '<will12>	100~-	mod.mod/12' >>history.proto
X	echo mod/mod/12 >arts/mod/mod/12
X	echo '<wont13>	2000	mod.mod/13' >>history.proto
X	echo :mod/mod/13: >arts/mod/mod/13
X	echo 'Message-ID: <wont13>' >>arts/mod/mod/13
X	echo '<gone14>	100~-	mod.unmod/14' >>history.proto
X	echo mod/unmod/14 >arts/mod/unmod/14
X	echo '<gone15>	2000	mod.unmod/15' >>history.proto
X	echo mod/unmod/15 >arts/mod/unmod/15
X	echo '<wont16>	9999~-	mod.unmod/16' >>history.proto
X	echo :mod/unmod/16: >arts/mod/unmod/16
X	echo 'Message-ID: <wont16>' >>arts/mod/unmod/16
X	echo '<three17>	100~-	bletch/17' >>history.proto
X	echo bletch/17 >arts/bletch/17
X	egrep wont history.proto >history.after
X	echo '<gone96>	5000~-	/' >>history.proto
X	echo '<gone96>	5000~-' >>history.after
X	echo '<gone97>	5000~-' >>history.proto
X	echo '<gone97>	5000~-' >>history.after
X	echo '<gone98>	5000~-	urp/98' >>history.proto
X	echo '<gone98>	5000~-' >>history.after
X	echo urp/98 >arts/urp/98
X	echo '<multi99>	5000~-	bar/99, bar.ugh/99  foo/99 urp/99' >>history.proto
X	echo '<multi99>	5000~-	bar.ugh/99 foo/99' >>history.after
X	echo :foo/99:bar/99:bar/ugh/99:urp/99: >arts/foo/99
X	echo 'Message-ID: <multi99>' >>arts/foo/99
X	ln arts/foo/99 arts/bar/99
X	ln arts/foo/99 arts/bar/ugh/99
X	ln arts/foo/99 arts/urp/99
X	cp history.proto history
X
# regression test machinery
D = NEWSARTS=`pwd`/arts NEWSCTL=`pwd` NEWSSPOOL=`pwd` NEWSBIN=`pwd`/nbin
RUN = $(D) ./expire -a `pwd`/arch -n 10000
FIXMIDFIELD = 's/	[0-9]*~*/	~/;s/~	/~-	/;s/~xx~a//'
FIXPATHS = "s;`pwd`;P;g"
goodt:	expire setup	# do NOT run this unless you know what you're doing
X	$(RUN) -t explist 2>&1 | sed $(FIXPATHS) >tgood
X	chmod -w tgood
goodp:	expire setup	# do NOT run this unless you know what you're doing
X	$(RUN) -p explist >pgood 2>&1
X	chmod -w pgood
doit:	expire		# for debugging
X	echo "$(RUN) -t explist" >$@
X	chmod +x $@
X
# the regression test proper
r:	$(THEM) $(UPACT) dircheck setup tgood pgood
X	chmod +x dircheck $(THEM)
X	$(RUN) -c explist
X	: "okay, it's sane"
X	$(RUN) -t explist >test.out 2>&1
X	sed $(FIXPATHS) test.out | cmp - tgood
X	: "okay, it seems to know what it's doing -- now, can it do it?"
X	$(RUN) -p explist >test.out 2>test.stderr
X	test ! -s test.stderr ;
X	cmp test.out pgood
X	test ! -f history.n ;
X	cmp history.proto history.o
X	egrep wont history.proto | ./dircheck arts
X	egrep 'will|two|gone|three' history.proto | ./dircheck -n arts
X	egrep will history.proto | ./dircheck arch
X	egrep 'wont|two|gone|three' history.proto | ./dircheck -n arch
X	egrep two history.proto | ./dircheck arch2
X	egrep 'will|wont|gone|three' history.proto | ./dircheck -n arch2
X	egrep three history.proto | ./dircheck arch3
X	egrep 'will|wont|gone|two' history.proto | ./dircheck -n arch3
X	test -f arts/foo/99 ;
X	test -f arts/bar/ugh/99 ;
X	test -f arch2/bar/99 ;
X	test ! -f arts/urp/99 ;
X	cmp history history.after
X	: "that's it for expire, on to upact and recovact"
X	$(D) ./$(UPACT)
X	cmp active active.after
X	$(D) ./$(UPACT)
X	cmp active active.after
X	$(D) ./recovact
X	cmp active active.after
X	sed '/^foo /s/99/09/' active.after >active
X	$(D) ./recovact
X	cmp active active.after
X	: "and for upact, on to mkhistory"
X	awk -F'	' 'NF > 2' history | sed $(FIXMIDFIELD) | sort >history.after
X	rm -f history.pag history.dir
X	$(D) ./mkhistory
X	sort history | sed $(FIXMIDFIELD) | cmp - history.after
X	test -r history.pag ;
X	test -r history.dir ;
X	: "success!"
X
# variant regression test for -r
rr:	$(THEM) $(UPACT) dircheck setup tgood pgood
X	chmod +x dircheck $(THEM)
X	$(RUN) -p -r explist >test.out 2>test.stderr
X	test ! -s test.stderr ;
X	cmp test.out pgood
X	cmp history.proto history
X	egrep wont history.proto | ./dircheck arts
X	egrep 'will|two|gone|three' history.proto | ./dircheck -n arts
X	egrep will history.proto | ./dircheck arch
X	egrep 'wont|two|gone|three' history.proto | ./dircheck -n arch
X	egrep two history.proto | ./dircheck arch2
X	egrep 'will|wont|gone|three' history.proto | ./dircheck -n arch2
X	egrep three history.proto | ./dircheck arch3
X	egrep 'will|wont|gone|two' history.proto | ./dircheck -n arch3
X	test -f arts/foo/99 ;
X	test -f arts/bar/ugh/99 ;
X	test -f arch2/bar/99 ;
X	test ! -f arts/urp/99 ;
X
tidy:
X	rm -f junk history history.pag history.dir history.o active active.tmp
X	rm -f history.n* *mon.out history.proto history.after test.out doit
X	rm -f active.old active.new explist lint active.after test.stderr
X	rm -rf arts arch arch2 arch3 nbin
X
clean:	tidy
X	rm -f *.o expire histslash mkdbm histinfo explist explist.*
X	rm -f dtr updatemin backwater
X
spotless:	clean	# don't run this unless you know what you're doing
X	rm -f pgood tgood
X
dtr:	$(DTR)
X	makedtr $(DTR) >$@
SHAR_EOF
chmod 0664 Makefile ||
echo 'restore of Makefile failed'
Wc_c="`wc -c < 'Makefile'`"
test 9816 -eq "$Wc_c" ||
	echo 'Makefile: original size 9816, current size' "$Wc_c"
fi
exit 0
-- 
This is a one line proof...if we start                            ../Dave Mason
sufficiently far to the left.                     <mason%tmsoft@cs.toronto.edu>
         -peter@cbmvax.cbm.commodore.com          <mason@tmsoft.uucp>