rae%alias@csri.toronto.edu (Reid Ellis) (03/27/90)
A problem and a program. The problem - I have a program that archives all of the source groups for me at night [comp.sources.*] which uses an 'rc' file in the home directory. The program is called 'narc' and the rc file is named '.narcrc' and everytime I run narc, which is a perl script, I get 8k of NULs appended to the end of my '.narcrc' [which JUST SO HAPPENS to be == BUFSIZ].. The program - To help people find my problem :) here is my news archival program, written in Perl. #!/bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #!/bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # narc.pl # This archive created: Mon Mar 26 15:46:44 1990 # By: Reid Ellis (Alias Research) export PATH; PATH=/bin:$PATH if test -f 'narc.pl' then echo shar: over-writing existing file "'narc.pl'" fi cat << \SHAR_EOF > 'narc.pl' #!/usr/local/bin/perl # # $Source: /repousr/u/rae/src/TEXT/perl/RCS/narc.pl,v $ # $Revision: 2.0 $ # $Author: rae $ # $Date: 90/03/25 15:43:00 $ # narc -- News ARChiver # to avoid the buffering problems # $| = 0; # Check for a NARCRC environment variable and use it if # it's there. # if($ENV{'NARCRC'}) { $narcrc = $ENV{'NARCRC'}; # if NARCRC doesn't start with a '/', prepend # $HOME to it # if($narcrc !~ /\/.*/) { $narcrc = $ENV{"HOME"} . "/" . $narcrc; } } else { # Default NARCRC: "$HOME/.narcrc" # $narcrc = $ENV{'HOME'} . "/.narcrc"; } # testing flag. Usually zero. # $verbose=0; $standalone = "stand"; # get the name of the news server. Can be replaced with # a hardcoded '$server=machine;' if you like # $server_file = "/usr/local/lib/news/server"; $server = `cat $server_file` || die "Can't open $server_file"; chop $server; # Where to find nntp # $nntp = "/usr/local/bin/nntp $server"; # open narcrc for input. The format is: # # /full/path/to/archive/directory # dirname newsgroup last-time-of-access # dirname newsgroup last-time-of-access # dirname newsgroup last-time-of-access # ... # # 'dirname' is appended to the initial path. The special # case dirname of 'default' will use the last element of # the newsgroup name. For example, # default comp.sources.unix 19900101 000000 # will use the dirname 'unix' open(NARCRC, "<$narcrc"); $SIG{'INT'} = 'handler'; $SIG{'QUIT'} = 'handler'; # The first line of the narcrc is where to store everythign # $repo = <NARCRC>; chop($repo); print "Saving files in $repo\n" if $verbose; # $narc_line[$narc_index] is where each new line of the narcrc is # stored . This only gets written out after a successful 'narc'. # $narc_index = 1; # for each line in the narcrc # chop it into separate bits # ask nntp for all new news for the group # for each new news item # read article into /tmp/narc$$ # scan for magic string 'Archive-name' # get the subdir and part-name from this line # if hit eof, then no magic string was found, do next news item # create all leading directories if they're not there already # move /tmp/narc$$ to full pathname of archived article while($narc_entry = <NARCRC>) { chop($narc_entry); # This is a software end-of-file because I am having trouble with # garbage showing up at the ends of file. With shar files it's # not such a big deal, since they usually have 'exit 0' at the end # of them, but with the .narcrc, it might be accidentally parsed. # last if($narc_entry eq "EOF"); ($real_dirname, $newsgroup, $lastdate, $lasttime) = split(' ', $narc_entry); # check for degenerate empty case # last if $newsgroup eq "" || $lastdate eq "" || $lasttime eq ""; # if "default", set dirname to last element # of the newsgroup name # if($real_dirname eq "default" && $newsgroup =~ /.*\.([^.]*)$/) { $dirname = $1; } else { $dirname = $real_dirname; } # tell the user what's going on # "$lastdate $lasttime" =~ /^(..)(..)(..) (..)(..)(..)$/ && printf("%s %s 19%s-%s-%s %s:%s\n", $newsgroup, "[$dirname]", $1, $2, $3, $4, $5); # open the stdout of the nntp command # open(MSGIDS, "$nntp newnews $newsgroup $lastdate $lasttime|") || die "Can't get message ID's for $newsgroup from $server"; # flag to see if we do anything $we_did_something = 0; # for each message ID.. # while($msgid = <MSGIDS>) { # set the we_did_something flag so we know to update # the narcrc # $we_did_something = 1; # Zap the trailing newline on $msgid # chop($msgid); print "\t$msgid\n" if $verbose; # Get the article and store it in /tmp so we don't # need to use multiple [slow] nntp's for the same article # system("$nntp article '$msgid' > /tmp/narc$$"); # Open the article and check for "Archive-name" # open(BODY, "</tmp/narc$$") || die "Can't read retrieved article $msgid in file /tmp/narc$$"; while(<BODY>) { if(/^Archive-name:/) { ($foo, $arc_name) = split; @tree = split('/', $arc_name); # Put it in the '$standalone' dir if it's a single shar unshift(tree, $standalone) if $#tree == 0; print "\t", join('/', @tree), "\n"; last; } #if Archive-name } # while BODY # if eof then "Archive-name" wasn't found # last if eof; # close the file # close(<BODY>); # Now build the full pathname of where the file is # supposed to live $fullpath = $repo . '/' . $dirname . '/' . join('/', @tree); $name = pop(@tree); $relpath = $dirname . '/' . join('/', @tree); # make sure the dirs are there # &mkpath($repo . '/' . $dirname . '/' . join('/', @tree)); if(-f $fullpath) { # A copy of the file already exists, so name this one the # same as the existing one, but with a number appended to # it to delineate its version. # for($i=2; -f $fullpath . $i; $i++) { } # tack the number onto the end of the file name # $fullpath .= $i; } # and finally, put it there system("mv /tmp/narc$$ $fullpath"); } # while MSGIDS close(<MSGIDS>); # only update the timestamp if we did anything # $timestamp = $we_did_something ? &now() : "$lastdate $lasttime"; # Save the narcrc lines in memory in order so we can save them in # their original order. Just using 'each' on %NARCRC currently # inverts their order [as of Perl 3.0] # $narc_line[$narc_index++] = sprintf("%s\t%s\t%s", $real_dirname, $newsgroup, $timestamp); } # while narc_entry close(<NARCRC>); # Save an updated narcrc # print "Saving $narcrc.."; open(NARCRC, ">$narcrc"); print NARCRC $repo, "\n"; for($i=1; $i < $narc_index; $i++) { print NARCRC $narc_line[$i], "\n"; } # software EOF print NARCRC "EOF\n"; close(<NARCRC>); print "\n"; # Subroutine to return the current date and time in # nntp format, which is "YYMMDD HHMMSS" # sub now { ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time()); $mon++; sprintf("%02d%02d%02d %02d%02d%02d", $year,$mon,$mday,$hour,$min,$sec); } sub handler { local($sig) = @_; print "\n\nCaught signal $sig -- exiting $ARGV[0]\n"; close(<ARTICLE>); close(<BODY>); close(<MSGIDS>); close(<NARCRC>); close(<SAVEFILE>); exit 0; } sub mkpath { local($fullpath) = @_; local(@tree) = split('/', $fullpath); local(@tmp) = (); if($tree[0] eq "") { push(@tmp, shift(@tree)); } foreach $dir (@tree) { push(@tmp, $dir); $path = join('/', @tmp); print "\t%% checking for '$path'\n" if $verbose; mkdir("$path", 0777) || die "Can't create directory $path" if ! -d "$path"; } } # Emacs cutsomisation # # Local Variables: # mode:fundamental # tab-width:4 # End: SHAR_EOF chmod +x 'narc.pl' # End of shell archive exit 0 Reid Ellis 264 Broadway Avenue, Toronto ON, M4P 1V9 Canada rae@gpu.utcs.toronto.edu || rae%alias@csri.toronto.edu || +1 416 487 1383