[comp.sources.misc] v06i092: Archiving news articles

allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc) (04/24/89)

Posting-number: Volume 6, Issue 92
Submitted-by: jv@mh.nl (Johan Vromans)
Archive-name: doarch.pl

[Ah, but can it handle multi-level by-name's?  (A few of the larger .misc
submissions have been split into multiple subdirectories; jetroff, for
example.  ++bsa]

The following perl script has proven to be handy when archiving and
ordening lots of comp.sources articles.

It creates two links to each article, ordered bij volume, one link by
name and the other link by issue. As an example, an article
containing

	Archive-name: gl_plot/part04
	Posting-number: Volume 18, Issue 24

will end up having links named

	":Volume18/:By-Issue/v18i024" and
	":Volume18/:By-Name/gl_plot/part04" .

Of course, the script can easily be modified to do other things ...

The script is documented. An example how to use it is included.

--
Johan Vromans			 jv@mh.nl via european backbone (mcvax)
Multihouse Automatisering bv		uucp: ..!{mcvax,hp4nl}!mh.nl!jv
Doesburgweg 7					  phone: +31 1820 62944
2803 PL Gouda - The Netherlands			    fax: +31 1820 62500

#!/bin/sh
# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by Johan Vromans <jv@mhres> on Wed Apr 19 18:04:52 1989
#
# This archive contains:
#	doarch.pl	arch_all	
#

LANG=""; export LANG

echo x - doarch.pl
cat >doarch.pl <<'@EOF'
#!/usr/bin/perl

# @(#)@ doarch	1.1
#
# Netnews source archiving
#
# This PERL program takes one input arg, the name of a Netnews
# article, scans it for archiving headers, and links the article to
# the appropriate places.
#
# Input syntax:
#
#	<standard headers or garbage>
#	<empty line>
#	<archiving headers or garbage>
#	<empty line>
#	<more garbage>
#
# The archiving headers recognized are:
#
#	Archive-name: NN
#	Posting-number: Volume VV, Issue II
#	Posting-number: Volume VV, Info AA
#	Posting-number: Volume VV, Administrivia AA
#	<garbage>: Volume VV, Issue II
#	etc ...
#
# Upon completion, NN and VV must be known, as well as one of II or
# AA. Multi-level directories are allowed in NN .
#
# The input article is linked to
#
#	./:VolumeVV/:By-Issue/vVVINFAA
# or
#	./:VolumeVV/:By-Name/NN
#	./:VolumeVV/:By-Issue/vVViIII
#
# One level of repost is handled.

$This_Article = $ARGV[0];
$Archive_name = "";
$Volume = 0;
$Issue = 0;
$Admin = 0;

$in_hdr = 2;		# looking for second section

while ( <> ) {

  # check which section if empty line, break out if we have had
  # two sections.
  $in_hdr-- if /^\n$/;
  last if $in_hdr == 0;

  # look for archiving headers ...

  # Archive-name: funky-stuff/part02
  if ( /^Archive-name:\s*(\S+)/i ) {
    $Archive_name = $1;
#    printf "=> Archive-name: \"%s\"\n", $Archive_name;
    next;
  }

  # Posting-number: Volume 18, Issue 24
  if ( /^Posting-number:\s+Volume\s+(\d+),\s*Issue\s+(\d+)/i ) {
    $Volume = $1;
    $Issue = $2;
#    printf "=> Posting-number: Volume %d, Issue %d\n", $Volume, $Issue;
    next;
  }

  # Posting-number: Volume 18, Info 4
  if ( /^Posting-number:\s+Volume\s+(\d+),\s*Info\s+(\d+)/i ) {
    $Volume = $1;
    $Admin = $2;
#    printf "=> Posting-number: Volume %d, Info %d\n", $Volume, $Admin;
    next;
  }

  # Posting-number: Volume 18, Administrivia 4
  if ( /^Posting-number:\s+Volume\s+(\d+),\s*Administrivia\s+(\d+)/i ) {
    $Volume = $1;
    $Admin = $2;
#    printf "=> Posting-number: Volume %d, Administrivia %d\n", $Volume, $Admin;
    next;
  }

  if ($Volume == 0 || ($Issue == 0 && $Admin == 0)) {

    if ( /Volume\s+(\d+),\s*Issue\s+(\d+)/i ) {
      $Volume = $1;
      $Issue = $2;
      next;
    }
  
    # Posting-number: Volume 18, Info 4
    if ( /Volume\s+(\d+),\s*Info\s+(\d+)/i ) {
      $Volume = $1;
      $Admin = $2;
      next;
    }
  
    if ( /Volume\s+(\d+),\s*Administrivia\s+(\d+)/i ) {
      $Volume = $1;
      $Admin = $2;
      next;
    }
  }
}

# check for completeness
if ( ($Archive_name eq "") || ($Volume == 0) ||
    (($Issue == 0) && ($Admin == 0)) ) {
  printf stderr "*> Error: incomplete entry %s: \"%s\" %d %d %d\n",
    $This_Article, $Archive_name, $Volume, $Issue, $Admin;
} else {

# ok
  if ($Admin == 0) {	# not Administrivia

    do do_link($This_Article,
	       sprintf(":Volume%02d/:By-Name/%s",
		       $Volume, $Archive_name));
    do do_link($This_Article,
	       sprintf(":Volume%02d/:By-Issue/v%02di%03d",
		       $Volume, $Volume, $Issue));
  } else {
    do do_link($This_Article,
	       sprintf(":Volume%02d/:By-Issue/v%02dINF%03d",
		       $Volume, $Volume, $Admin));
  }
}

sub do_link {
  local($source) = shift(@_);
  local($dest) = shift(@_);

  return 1
    if do make_path(".", $dest) == 1;

  if (link($source,$dest) == 0) {
    if ($! == 17) {
      $dest .= "-REPOST";
      if (link($source,$dest) == 0) {
	do syserr("link(\"" . $source . "\",\"" . $dest . "\")");
	return 1;
      }
    } else {
      do syserr("link(\"" . $source . "\",\"" . $dest . "\")");
      return 1;
    }
  }

  printf "linked " . $source . " => " . $dest . "\n";
}

sub make_path {
  local ($here) = shift(@_);
  local ($dest) = shift(@_);

#  print "=> make_path: [" . $here . "/]" . $dest . "\n";
  if ( $dest =~ m'^([^/]+)/(.+)' ) {
    $first = $here . "/" . $1;
    $next = $2;
  } else {
    return 0;
  }

  if ( -d $first ) { 
    return do make_path ($first, $next);
  }

  if ( -e $first ) {
    printf stderr "=> make_path: %s exists but not directory\n", $first;
    return 1;
  }

#  printf "=> mkdir %s\n", $first;
  do syserr("mkdir(" . $first . ")") 
    if system "mkdir " . $first;

  if ( -d $first ) { 
    return do make_path ($first, $next);
  }
    
  printf stderr "=> make_path: could not make path %s\n", $dest;
  return 1;
}

sub syserr {
	printf stderr "%s: %s\n", shift(@_), $!;
}
@EOF

chmod 444 doarch.pl

echo x - arch_all
cat >arch_all <<'@EOF'
#!/bin/sh

loc=/usr/spool/oldnews/comp/sources

for this in unix misc games x
do
	cd $loc/:$this || exit 1
	rm -fr *
	for arch in news oldnews
	do
		for i in /usr/spool/$arch/comp/sources/$this/*
		do
			if [ -f $i ]
			then
				$HOME/arch/doarch $i
			fi
		done
	done
done
@EOF

chmod 755 arch_all

exit 0
--
Johan Vromans			 jv@mh.nl via european backbone (mcvax)
Multihouse Automatisering bv		uucp: ..!{mcvax,hp4nl}!mh.nl!jv
Doesburgweg 7					  phone: +31 1820 62944
2803 PL Gouda - The Netherlands			    fax: +31 1820 62500


-- 
Johan Vromans			 jv@mh.nl via european backbone (mcvax)
Multihouse Automatisering bv		uucp: ..!{mcvax,hp4nl}!mh.nl!jv
Doesburgweg 7					  phone: +31 1820 62944
2803 PL Gouda - The Netherlands			    fax: +31 1820 62500