[news.admin] A Perl script to kill old articles

chip@tct.uucp (Chip Salzenberg) (03/02/91)

J. Palkovic recently posted a shell script that detects and removes
recently received articles with old dates.  That script works fine.
It even uses a program I wrote ("header").  But it's quite slow; it
invokes the "header" and "getdate" programs once for each article.

The Perl script below, "oldarts", finds the Date: header on its own,
and each invocation of "getdate" interprets 100 date strings.  It is
therefore fast enough that I don't mind running it daily.

Shar and enjoy.

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  oldarts
# Wrapped by chip@tct on Fri Mar  1 11:50:13 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'oldarts' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'oldarts'\"
else
echo shar: Extracting \"'oldarts'\" \(2339 characters\)
sed "s/^X//" >'oldarts' <<'END_OF_FILE'
Xeval 'exec /bin/perl -S $0 ${1+"$@"}'
X	if 0;
X
X# $Id: oldarts,v 1.1 1991/03/01 16:28:09 news Exp $
X#
X# Print on standard output the names of new news articles
X# with old Date: headers.
X#
X# Chip Salzenberg 3/1/91.
X# Based on an idea by J. Palkovic.
X#
X# $Log: oldarts,v $
X# Revision 1.1  1991/03/01  16:28:09  news
X# Initial revision
X#
X
X#----------------------------------------------------------------
X# Configuration -- edit these values.
X#
X
X$NEWSLIB = "/u/news/lib";
X$NEWSBIN = "/u/news/cbin";
X$NEWSSPOOL = "/u/news/spool";
X$ENV{"PATH"} = "$NEWSBIN:/u/local/bin:/usr/bin:/bin";
X$RECENT = 7;
X$OLD = 14;
X
X#----------------------------------------------------------------
X# Setup.
X#
X
X($ME = $0) =~ s#^.*/##;
X
Xchdir $NEWSSPOOL || die "$ME: $NEWSSPOOL: $!\n";
X
X$now = time;
X$limit = $now - &DAYS($OLD);
X
X#----------------------------------------------------------------
X# Generate names of all recent articles, that is,
X# articles that have arrived within the last week.
X#
X
Xopendir(NS, ".") || die "$ME: $NEWSSPOOL: can't read '.'\n";
X@hier = grep(!/\./, readdir(NS));
Xclosedir(NS);
Xopen(FIND, "find @hier -mtime -$RECENT -name '[0-9]*' -type f -print |");
Xwhile (<FIND>) {
X	chop;
X	push(@F, $_);
X	&check(*F) if @F >= 100;
X}
Xclose(FIND);
X&check(*F) if @F;
Xexit;
X
X#----------------------------------------------------------------
X# Check all filenames in the array with the given symbol table
X# entry.  Automatically truncates the array.
X#
X
Xsub check {
X	local(*f) = @_;
X	local(@d) = ();
X
X	# Generate an array @d parallel to @f, containing the
X	# date strings for the given files, or "missing" if none.
X
X	for $f (@f) {
X		$date = "missing";
X		if (open(f, $f)) {
X			while (<f>) {
X				last if /^$/;
X				($date = $1, last) if /^Date:\s+(.*)$/;
X			}
X			close(f);
X		}
X		push(@d, $date);
X	}
X
X	# Use the C News "getdate" program to translate date strings
X	# to numeric values.  Print files with old dates.
X
X	die "$ME: getdate: $!\n" unless defined($pid = open(GD, "-|"));
X	if ($pid == 0) {
X		open(STDERR, ">&STDOUT");
X		exec "getdate", @d;
X		print STDERR "$ME: getdate: $!\n";
X		exit 1;
X	}
X	while (<GD>) {
X		last unless defined($f = shift(@f));
X		if (/^(\d+)$/) {
X			print $f, "\n" if $_ < $limit;
X		}
X		else {
X			print STDERR "$ME: $f: ", $_;
X		}
X	}
X	close(GD);
X
X	# Truncate the caller's array, and return.
X
X	@f = ();
X	1;
X}
X
Xsub DAYS {
X	$_[0] * 86400;
X}
END_OF_FILE
if test 2339 -ne `wc -c <'oldarts'`; then
    echo shar: \"'oldarts'\" unpacked with wrong size!
fi
chmod +x 'oldarts'
# end of 'oldarts'
fi
echo shar: End of shell archive.
exit 0
-- 
Chip Salzenberg at Teltronics/TCT     <chip@tct.uucp>, <uunet!pdn!tct!chip>