[comp.archives.admin] Useful perl scripts: source

lmjm@doc.ic.ac.uk (Lee McLoughlin) (06/24/91)

Here are some highly hacked perl/shell scripts I use for helping me
maintain the UKUUG Software Distribution Service archive.  Till
a couple of weeks ago my machines were not on the internet so I had
to use other means of getting files back.  An ftam/ftp gateway was the
main one.  (Ftam is the ISO equivalent of ftp - more or less).  Hence
the -i flags on the "get" scripts below.

I can't say I'm proud of the code in these scripts - but they get the
job done.  They need to be sprused up a lot - especially norm.

What I'd really like to do is agree a common format between all the 
archive maintainers.  A common pathname policy would be a great boon
too!

Anyhow what I tend to do is get a directory listing back from a remote
site.  Run norm over it and then edit it down to just the bits
I want and then use the fget script to pull back these files.

Or I just use fget_ca!

-rw-r--r--  1 lmjm         1582 Jun 23 22:14 dateconv.pl
	A perl package to do useful things with dates.
	It is the first perl package I wrote so don't laugh.
	The library routines provided can convert "ls-lR" type time
	strings into time()  values.  Time values can also be
	converted into a "normalised" format:
		dd mon yy hh:mm
	eg:
		13 Jun 91 18:53
	This format has changed several times after feedback from
	users.  It is fixed format, easy to understand, not dependant
	on either US or UK ordering of dates.  I tried just using 
	the "ls" date string but found the yyyy vs hh:mm thing just a pain
	to convert back into a time() value.

-rwxr-xr-x  1 lmjm         1297 Jun 23 22:14 norm
	Normalise a directory listing.  A bit simplistic but does the
	job well enough.  Doesn't understand empty directories nor
	links.  Output can be restricted to only recent files with 
	a "-days" argument.
	For example:
		bash$ cd gnu
		bash$ ls -lRt gnu /usr/tmp | norm -15
   11097 23 Jun 91 01:35 gnu/ls-lR.Z
  288425 13 Jun 91 18:53 gnu/fileutils-2.1.tar.Z
 1087901 13 Jun 91 18:51 gnu/groff-1.02.tar.Z
  747311 13 Jun 91 18:45 gnu/perl-4.010.tar.Z
   93825 13 Jun 91 20:08 gnu/EmacsBits/auc-tex.3.0.tar.Z
   15912 13 Jun 91 20:17 gnu/EmacsBits/elisp-archive/as-is/c++-mode.el.Z
	This format is designed to be easy to read and to be
	parsable by a perl script.  Full paths are used as they
	make "diff"ing two directories a lot easier!

-rwxr-xr-x  1 lmjm         2898 Jun 23 22:14 fget
	Given a site name and a normalised directory listing pulls all the
	files back.  This script is a dumb as a post but does do the job.
	About the only smart thing it does is not retrieve files that
	already exists locally and are the right size.
		bash$ fget -i prep.ai.mit.edu get
	The -i flag is to use internet ftp rather than ftam (which was
	all I had till recently).
	A chat based script would be much better.
	
-rwxr-xr-x  1 lmjm         1223 Jun 23 22:14 fget_ca
	Read a comp.archives posting and try to suck back all the files
	listed in the verification section.
		bash$ fget -i /usr/spool/news/comp.archives/5681

-rwxr-xr-x  1 lmjm          720 Jun 23 22:14 fgetone
	Simple shell script to pull back either files or directory
	listings.
		bash$ fgetone -i -l prep.ai.mit.edu

-rwxr-xr-x  1 lmjm          963 Jun 23 22:31 dis/notgot
	Filter that checks that files in a normalised directory listing
	do not already exist in the filestore.
		bash$ notgot -r /public/gnu get > get2

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  dateconv.pl fget fget_ca fgetone norm notgot
# Wrapped by lmjm@swan.doc.ic.ac.uk on Sun Jun 23 22:45:40 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'dateconv.pl' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'dateconv.pl'\"
else
echo shar: Extracting \"'dateconv.pl'\" \(1582 characters\)
sed "s/^X//" >'dateconv.pl' <<'END_OF_FILE'
X# Convert a date into a time.
X
Xsub lstime_to_standard
X{
X	local( $ls ) = @_;
X
X	return &time_to_standard( &lstime_to_time( $ls ) );
X}
X
Xpackage dateconv;
X
Xrequire 'timelocal.pl';
X
X@months = ( "zero", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" );
X
X$month_num{ "Jan" } = 0;
X$month_num{ "Feb" } = 1;
X$month_num{ "Mar" } = 2;
X$month_num{ "Apr" } = 3;
X$month_num{ "May" } = 4;
X$month_num{ "Jun" } = 5;
X$month_num{ "Jul" } = 6;
X$month_num{ "Aug" } = 7;
X$month_num{ "Sep" } = 8;
X$month_num{ "Oct" } = 9;
X$month_num{ "Nov" } = 10;
X$month_num{ "Dec" } = 11;
X
X( $mn, $yr ) = (localtime)[ 4, 5 ];
X
X# Convert an 'ls' type date string into a time
Xsub main'lstime_to_time
X{
X	package dateconv;
X
X	local( $date ) = @_;
X
X	local( $mon, $day, $hours, $mins, $month, $year );
X
X	if( $date =~ /^(\w\w\w)\s+(\d+)\s+((\d\d\d\d)|((\d+):(\d+)))$/ ){
X		($mon, $day, $year, $hours, $mins) = ($1, $2, $4, $6, $7);
X#print "(mon $mon, day $day, year $year, hours $hours, mins $mins)\n";
X	}
X	else {
X		printf STDERR "invalid date $date\n";
X		return time;
X	}
X	
X	$month = $month_num{ $mon };
X
X	if( $year !~ /\d\d\d\d/ ){
X		$year = $yr;
X		$year-- if( $month > $mn );
X	}
X	if( $year > 1900 ){
X		$year -= 1900;
X	}
X	 
X#print " &timegm( 0, $mins, $hours, $day, $month, $year );\n";
X	return &timegm( 0, $mins, $hours, $day, $month, $year );
X}
X
Xsub main'time_to_standard
X{
X	package dateconv;
X
X	local( $time ) = @_;
X
X	local( $sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst ) =
X		 gmtime( $time );
X	return sprintf( "%2d $months[ $mon + 1 ] %2d %02d:%02d", $mday, $year, $hour, $min );
X}
END_OF_FILE
if test 1582 -ne `wc -c <'dateconv.pl'`; then
    echo shar: \"'dateconv.pl'\" unpacked with wrong size!
fi
# end of 'dateconv.pl'
fi
if test -f 'fget' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'fget'\"
else
echo shar: Extracting \"'fget'\" \(2898 characters\)
sed "s/^X//" >'fget' <<'END_OF_FILE'
X#!/usr/local/bin/perl -s
X$gateway = "ulcc";
X$gohome = 1; #go home before each cd
Xif( $g ){
X	$gohome = 0;
X}
X
X$#ARGV != 1 && die "Usage: $0 hostname xfer-details";
X
X$host = $ARGV[ 0 ];
X$xfer = $ARGV[ 1 ];
Xshift;shift;
X
Xopen( STDIN, $xfer ) || die "Cannot open $xfer";
X
Xif( ! $debug ){
X	if( $i ){
X		open( FTAM, "| ftp -v -i -n" ) || die "Cannot pipe to ftp";
X	}
X	else {
X		open( FTAM, "| ftam" ) || die "Cannot pipe to ftam";
X	}
X}
X
Xselect( STDOUT );
X$| = 1;
X
Xchop( $ftamhome = `pwd` );
Xprint "Currently in $ftamhome\n" if( $show );
X
Xif( $i ){
X	&fcmd( "open $host" );
X	&fcmd( "user anonymous ukuug-soft@doc.ic.ac.uk" );
X	&fcmd( "bin" );
X	&fcmd( "hash" );
X}
Xelse {
X	&fcmd( "set qua ftpstore" );
X	&fcmd( "open $gateway anonymous@$host" );
X	&fcmd( "set realstore unix" );
X	&fcmd( "set watch on" );
X}
X
X# Read the normalised directory list and send commands to ftam/ftp
X$currdir = "/";
X$slashes = 0;
Xwhile( <> ){
X	$ok = 0;
X	next if( /^#/ );
X	if( /,/ ){
X		print STDERR "Ignoring line as it contains a ,: $_";
X		next;
X	}
X	if( /^\s*(\d+)\s*([ \d]\d) (\w\w\w) (\d\d) (\d\d):(\d\d)\s*(.*)$/ ){
X		($size, $day, $mon, $year, $hour, $min, $file) =
X			($1,$2,$3,$4,$5,$6,$7);
X		$ok = 1;
X	}
X	elsif( /^\s*(\d+)\s+([^\s]+).*$/ ){
X		$size = $1;
X		$file = $2;
X		$ok = 1;
X	}
X	if( $ok ){
X		$dir = "";
X		
X		$n = rindex( $file, "/" );
X		if( $n != 0 && ($file =~ /^(.*)\/([^\/][^\/]*)$/) ){
X			$dir = $1;
X			$file = $2;
X		}
X			
X		if( $currdir ne ($dir . '/') ){
X			if( $gohome ){
X				&fcd( $dir );
X			}
X			else {
X				local( $up );
X				if( $slashes ){
X					$up = "../" x $slashes;
X				}
X				$slashes = ($dir =~ s,/,/,g) + 1;
X				&fcd( $up . $dir );
X			}
X		}
X		$currdir = "$dir/";
X
X		if( -e $file ){
X			if( ! &filesize( $file, $size ) ){
X				print "$file is wrong size, removing\n" if( $show );
X				unlink $file if( ! $debug );
X			}
X			else {
X				print "$file already exists with right size, skipping\n" if( $show );
X				next;
X			}
X		}
X		&fcmd( "get $file $file" );
X	}
X	else {
X		print STDERR "Unmatched line: $_";
X		last;
X	}
X}
X&fcmd( "quit" );
Xclose FTAM;
X
X
Xsub fcmd
X{
X	local( $cmd ) = @_;
X	
X	print "$cmd\n" if( $show );
X	if( !$debug ){
X		print FTAM "$cmd\n";
X	}
X}
X
Xsub filesize
X{
X	local( $file, $expected_size ) = @_;
X
X	($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
X	 $atime,$mtime,$ctime,$blksize,$blocks)
X	 = stat( $file );
X	
X	return $size == $expected_size;
X}
X
Xsub fcd
X{
X	local( $dir ) = @_;
X	
X	if( $gohome ){
X		chdir( $ftamhome );
X		&mkdirs( $dir );
X		chdir( $dir );
X		&fcmd( "lcd $ftamhome/$dir" );
X		&fcmd( "cd /$dir" );
X		return;
X	}
X	
X	&mkdirs( $dir );
X	chdir( $dir );
X	&fcmd( "lcd $dir" );
X	&fcmd( "cd $dir" );
X}
X
Xsub mkdirs
X{
X	local( $dir, @dir, $d, $path ) = @_;
X	
X	# Make sure that the target directory exists
X	@dirs = split( '/', $dir );
X	$path = "";
X	foreach $d ( @dirs ){
X		$path = $path . $d;
X		if( ! -d $path ){
X			print "mkdir $path\n" if( $show );
X			mkdir( $path, 0755 );
X		}
X		$path .= "/";
X	}
X}
Xexit( 0 );
END_OF_FILE
if test 2898 -ne `wc -c <'fget'`; then
    echo shar: \"'fget'\" unpacked with wrong size!
fi
chmod +x 'fget'
# end of 'fget'
fi
if test -f 'fget_ca' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'fget_ca'\"
else
echo shar: Extracting \"'fget_ca'\" \(1223 characters\)
sed "s/^X//" >'fget_ca' <<'END_OF_FILE'
X#!/usr/local/bin/perl -s
X# parse a comp.archive posting and convert it into
X# a "get" script
X
X$tmp = "/tmp/.fgca$$";
X$tmp2 = "/tmp/.fgca$$.2";
X
Xif( $d ){
X	$options .= " -debug";
X}
Xif( $i ){
X	$options .= " -i";
X}
X
Xopen( get, ">$tmp" ) || die "cannot create $tmp";
X
Xwhile( <> ){
X	if( /^Archive(-directory)?:\s*([^:]+).*\[(.*)\]\s*$/ ){
X		( $host, $ip ) = ($2, $3);
X	}
X	if( /^-- comp.archives file verification/ ||
X	    /^-- MSEN Archive Service file verification/ ){
X		local( $files );
X		while( <> ){
X			chop;
X			$site_name = $_;
X			last;
X		}
X		while( <> ){
X			$files .= $_;
X			last if( /^found/ );
X		}
X		while( <> ){
X			if( /^[^:]+:(.+)\/[^\/]*$/ ){
X				$dir = $1;
X				last;
X			}
X		}
X		print get "$dir:\n";
X		print get "$files\n";
X	}
X}
X
Xclose( get );
X
Xsystem( "norm $tmp > $tmp2" ); # || die "cannot run 'norm $tmp > $tmp2'";
Xunlink( $tmp );
X
Xprint "About to get from $host ($ip)\n";
Xsystem( "cat < $tmp2" );
Xopen( STDIN, "/dev/tty" );
X$prompt= "edit, quit, do [do] ";
Xprint $prompt;
Xwhile( <> ){
X	if( /^\s*$/ || /do/ ){
X		last;
X	}
X	if( /e/ ){
X		system( "vi $tmp2" );
X	}
X	elsif( /q/ ){
X		exit( 0 );
X	}
X	print $prompt;
X}
X
Xsystem( "fget $options $ip $tmp2" ); # || die "cannot run 'fget $options $ip $tmp2'";
Xunlink( $tmp2 );
END_OF_FILE
if test 1223 -ne `wc -c <'fget_ca'`; then
    echo shar: \"'fget_ca'\" unpacked with wrong size!
fi
chmod +x 'fget_ca'
# end of 'fget_ca'
fi
if test -f 'fgetone' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'fgetone'\"
else
echo shar: Extracting \"'fgetone'\" \(720 characters\)
sed "s/^X//" >'fgetone' <<'END_OF_FILE'
X#!/bin/sh
Xcase "$1" in
X-i)	flag=-i; shift ;;
Xesac
X
Xcase "$1" in
X-l)	ls=yes ; shift ;;
Xesac
X
Xcase "$1" in
X"")
X	echo  Must give a hostname
X	exit
X	;;
Xesac
Xhost=$1
X
Xcase "$ls" in
X'')
X	case "$2" in
X	"")
X		echo Must give a filename
X		exit
X		;;
X	esac
X	file=$2
X	filehere=`basename $2`
X
X	if [ -r $filehere ]; then
X		echo $filehere already exists
X		exit
X	fi
Xesac
X
Xcase "$flag" in
X-i)
X	(
X	echo open $host
X	echo user anonymous ukuug-soft@doc.ic.ac.uk
X	echo binary
X	echo hash
X	case "$ls" in
X	yes) echo ls -lR ls-lR ;;
X	*)   echo get $file $filehere
X	esac
X	echo quit
X	) | ftp -v -n -i
X	;;
X*)
X	(
X	echo set qua ftpstore
X	echo open ulcc anonymous@$host
X	echo set realstore unix
X	echo get $file $filehere
X	echo quit
X	) | ftam -v
X	;;
Xesac
END_OF_FILE
if test 720 -ne `wc -c <'fgetone'`; then
    echo shar: \"'fgetone'\" unpacked with wrong size!
fi
chmod +x 'fgetone'
# end of 'fgetone'
fi
if test -f 'norm' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'norm'\"
else
echo shar: Extracting \"'norm'\" \(1297 characters\)
sed "s/^X//" >'norm' <<'END_OF_FILE'
X#!/usr/local/bin/perl
X
Xif( $ARGV[ 0 ] =~ /^-help$/ ){
X	print <<EOF;
XUsage: norm [-days] [files]
XConverts an "ls -l" listing into a listing in a "standard" format.
XIf -days then only files <= that many days old will be output.
XEOF
X	exit( 1 );
X}
X
X# Add to the include path so the require will work.
Xpush( @INC, "/homes/lmjm/lib/perl" );
Xrequire 'dateconv.pl';
X
X$now = time;
Xif( $ARGV[ 0 ] =~ /^-(\d+)$/ ){
X	$recent_days = $1;
X	$recent_secs = 60 * 60 * 24 * $recent_days;
X	shift;
X}
X
X$currdir = "";
X
Xwhile( <> ){
X	# Junk everything up to the "size mon day..."
X	if( /^-.*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/ ){
X		local( $size, $lsdate, $file ) = ($1, $2, $4);
X		
X		$secs = &lstime_to_time( $lsdate );
X		
X		# Skip non recent files
X		next if( $recent_secs && (($now - $secs) > $recent_secs) );
X		
X		$date = &time_to_standard( $secs );
X
X		$file =~ s,^/$match,,;
X		printf " %7d $date $currdir$file\n", $size ;
X	}
X	elsif( /^[\.\/]*(.*):$/ ){
X		$match = $currdir = "$1/";
X		$match =~ s/[\+\(\[\*\?]/\\$1/g;
X	}
X	elsif( /^[ldcbsp].*[^:]$/ || /^\s*$/ || /^[Tt]otal.*/ || /[Uu]nreadable$/ ){
X		;
X	}
X	elsif( /^[\.\/]*([^\s]*)/ ){
X		# Just for the expo ls listing
X		$match = $currdir = "$1/";
X		$match =~ s/[\+\(\[\*\?]/\\$1/g;
X	}		
X	else {
X		printf( "Unmatched line: %s", $_ );
X	}
X}
Xexit( 0 );
END_OF_FILE
if test 1297 -ne `wc -c <'norm'`; then
    echo shar: \"'norm'\" unpacked with wrong size!
fi
chmod +x 'norm'
# end of 'norm'
fi
if test -f 'notgot' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'notgot'\"
else
echo shar: Extracting \"'notgot'\" \(963 characters\)
sed "s/^X//" >'notgot' <<'END_OF_FILE'
X#!/usr/local/bin/perl
X
X# Presume the root to compare against starts here...
X$root = ".";
Xif( $#ARGV > 0 && $ARGV[ 0 ] eq "-r" ){
X	$root = $ARGV[ 1 ];
X	shift;
X	shift;
X}
X
X# Ignore file size
X$ignore = 0;
Xif( $#ARGV > 0 && $ARGV[ 0 ] eq "-i" ){
X	$ignore = 1;
X	shift;
X}
X
Xsub notgot
X{
X	local( $size, $file ) = @_;
X	local( $full ) = "$root/$file";
X	$full =~ s/^\.\///;
X	$full =~ s/^\.\///;
X	if( ! -e $full || (!$ignore && ! &filesize( $full, $size )) ){
X		print $line;
X	}
X}
X
Xwhile( <> ){
X	$line = $_;
X	next if( /^#/ );
X	if( /^\s*(\d+)\s+([ \d]\d \w\w\w \d\d \d\d:\d\d)\s+(.+)$/ ){
X		local( $size, $date, $file ) = ($1, $2, $3);
X		&notgot( $size, $file );
X		next;
X	}
X	if( /^\s*(\d+)\s+(.+)$/ ){
X		local( $size, $file ) = ($1, $2);
X		&notgot( $size, $file );
X		next;
X	}
X}
X
Xsub filesize
X{
X	local( $file, $expected_size ) = @_;
X
X	($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
X	 $atime,$mtime,$ctime,$blksize,$blocks)
X	 = stat( $file );
X	
X	return $size == $expected_size;
X}
END_OF_FILE
if test 963 -ne `wc -c <'notgot'`; then
    echo shar: \"'notgot'\" unpacked with wrong size!
fi
chmod +x 'notgot'
# end of 'notgot'
fi
echo shar: End of shell archive.
exit 0
-- 
--
Lee McLoughlin		phone: 071 589 5111 X 5085  	fax: 071 581 8024
Department of Computing, Imperial College, 180 Queens Gate, London SW7 2BZ, UK
Janet: lmjm@uk.ac.ic.doc	Uucp:  lmjm@icdoc.UUCP (or ..!ukc!icdoc!lmjm)