[comp.lang.perl] Problems with checkgroups with cnews

urlichs@smurf.sub.org (Matthias Urlichs) (09/27/90)

In news.software.b, article <1990Sep21.155243.6218@zoo.toronto.edu>,
  henry@zoo.toronto.edu (Henry Spencer) writes:
< 
< Sigh, I seem to end up saying this about once a week:  C News checkgroups
< has bugs.  This is partly because there is no specification for how
< checkgroups is supposed to work, so implementing it was guesswork.  We
< do hope to fix this eventually.

If you like it, you can use this Perl script...
It's probably as close to a specification as you can get. ;-)

NB: This article has been crossposted. Please followup appropriately.

General bug: There's no way to find out whether a checkgroups message is
authoritative for a hierarchy, or just happens to mention one or two groups
in that hierarchy.
General asset: There's no need to take text out of postings like "Alternate
newsgroup hierarchies".
The Newsgroups file is set to contain any descriptions, even those whose
groups you don't get. Whether this is a bug or a feature is not mine to
decide.

#!/usr/local/bin/perl
# checkgroups.pl V0.9

chdir "/usr/local/news/admin"; ## where "newsgroups" and "distributions" is

## this code performs the following, highly useful functions:
# Read the "distributions" file to find out what you want.
#  I didn't want to analyze the sys file; besides in the distributions file
#  you can put descriptions and comments.
# Read & analyze your newsgroups file.
# Read & analyze input files (extracting any newsgroups-style lines),
#  discarding anything unintelligible or excluded.
#  stdin is read if no files are specified.
#   newsgroups-style lines are defined as lines with a newsgroup name in front,
#   a sequence of tabs, and a description.
#   Optional: a ! in front means that the newsgroup (or distribution, if there
#    are no dots in the newsgroup name) is to be excluded, the trailer
#    " (Moderated)" means just that, and the tabs+description may be missing.
# Read your active file.
# Output a new newsgroups file, appropriately tabulated (per top-level),
#   sorted, which collects all definitions ever in case you need them again.
# Writes recommendations as to which groups to delete/add/(un)moderate.
#  -del means don't emit delgroup lines unless they seem to be definite.
# Never complain about hierarchies which aren't even mentioned in the
#  checkgroups message.
#
# Public Domain. -- Matthias Urlichs
#   Changes, additions, pretty-up-ifications, et al., welcome.

%desc = ();   ## textual descriptions
# state
%reject = (); ## !ed out
%mod = ();    ## marked moderated?
%unmod = ();  ## marked unmoderated?
%mentioned=();## distribution was present in input

# actions
%domod = ();  ## change to moderated
%dounmod = ();## change to unmoderated
%add = ();    ## missing
%delete = (); ## present, but superfluous
%there = ();  ## x-ed out, but add it.
%inthere = ();## n-ed out, but add it.

open (DIST, "distributions") || die "No distrib file";

Config:
while(<DIST>) {
   chop;
   s/\s*#.*//; # drop all comments
   next Config if /^$/; # and empty lines
   if (s/^!//) {
      ($dist, $desc) = split(/\s+/, $_, 2);
      $nondist{$dist} = 1;
   } else {
      ($dist, $desc) = split(/\s+/, $_, 2);
      $dist{$dist} = $desc;
   }
}
close(DIST);

if($ARGV[$[] eq '-del') {
   $defdelete = 1;
   shift @ARGV;
}

unshift (@ARGV, "-") if $#ARGV < $[; # required because of next line
unshift (@ARGV, "newsgroups");
$examine = 0;
die "No Newsgroups file" unless -r "newsgroups";

Newsgroups:
while(<>) {
   chop;
   s/\s*#.*//; # remove comments, as usual
   next Newsgroups if /^$/;
   $exclude = ($_ =~ s/^!//); # remember for later
   next Newsgroups unless (($group, $desc, $dummy) = split(/\t\s*/, $_, 3)) < 3;
   next Newsgroups unless $group =~ /^[a-z]/;
   next Newsgroups if $group =~ /\s/;
   ($dist) = split(/\./, $group, 2);
   next Newsgroups if $nondist{$dist} || $nondist{$group};
   if ($exclude) { 
      $nondist{$group} = 1;
      $mentioned{$dist} = 1 unless $group eq $dist;
      next Newsgroups;
   }
   next Newsgroups unless $dist{$dist};
   $mentioned{$dist} = 1 if $examine;
   if ($desc =~ /\s*\([Mm]oderated\)$/) {
      $mod{$group} = 1;
   } else {
      $mod{$group} = -1;
   }
   if (length($desc) > 5 && ($mod{$group} == -1 || length($desc) > 16)) {
      $desc{$group} = $desc; # replace old descriptions with new version,
          # taking Moderated flag and non-empty-text-requirement into account
   }
   if ($examine && ($reject{$group} != 1)) {
      $add{$group} = 1;
   }
} continue {
   $examine = 1 if (eof);
}

open(ACT,"/usr/local/news/admin/active") || die "Couldn't open active file";
Active:
while(<ACT>) {
   chop;
   next Active if /^#/;
   next Active if /^$/;
   ($group, $max,$min, $flag) = split;
   ($dist) = split(/\./, $group, 2);
   next Active unless $mentioned{$dist};

   if (   $reject{$group}
       || ($add{$group} != 1)
       || $nondist{$group}
       || $nondist{$dist}) {
      $delete{$group} = 1 unless $flag =~ /^x/;

   } elsif ($flag =~ /^x/) {
      $there{$group} = 1;
   } elsif ($flag =~ /^n/ || $flag =~ /^=/) {
      $inthere{$group} = 1;
   } elsif ($flag =~ /^m/) {
      $dounmod{$group} = 1 if $mod{$group} == -1;
   } elsif ($flag =~ /^y/) {
      $domod{$group} = 1 if $mod{$group} == 1;
   }
   if ($flag =~ /^m/ || $flag =~ /^y/) {
      $delete{$group} = 1 unless $add{$group};
   }
   delete $add{$group};
}
close(ACT);

open (NG, ">newsgroups");

$maxlen = 0;
$olddist = "-";
foreach $group (sort keys %desc) {
   ($dist) = split(/\./, $group, 2);
   if ($olddist ne $dist) {
      $maxlen{$olddist} = int(($maxlen / 8) + 1) * 8;
      $olddist = $dist;
      $maxlen = 0;
   }
   $maxlen = length($group) if length($group) > $maxlen;
}
$maxlen{$olddist} = int (($maxlen / 8) + 1) * 8; #// last one 

foreach $group (sort keys %desc) {
   ($dist) = split(/\./, $group, 2);
   $tabs = int(($maxlen{$dist} - length($group) - 1) / 8) + 1;
   print NG $group, "\t" x $tabs, $desc{$group}, "\n";
}
close(NG);

open (STDOUT,"|mail -s Checkgroups news");

$head = 0; Del: foreach $group (sort keys %delete) {
   ($dist) = split(/\./, $group, 2);
   next Del unless $mentioned{$dist};
   print ("\n#** The following groups should (*:definitely) be removed:\n") unless $head;
   print ((($nondist{$group} || $nondist{$dist}) ? "#*" : "# "),
      $group,"\t",$desc{$group},"\n")
     unless $defdelete || $nondist{$group} || $nondist{$dist};
   print ((($nondist{$group} || $nondist{$dist}) ? "addgroup $group x" : "delgroup $group"),
      "\n")
     unless $defdelete || $nondist{$group} || $nondist{$dist};
   $head = 1;
}

$head = 0; Add: foreach $group (sort keys %add) {
   ($dist) = split(/\./, $group, 2);
   next Add unless $dist{$dist};
   next Add unless $mentioned{$dist};
   print ("\n#** The following groups should be created (*:moderated):\n") unless $head;
   print ((($mod{$group} > 0)? "#*" : "# "), $group,"\t",$desc{$group},"\n");
   print "addgroup $group ", (($mod{$group} > 0) ? "m" : "y"), "\n";
   $head = 1;
}

$head = 0; DoMod: foreach $group (sort keys %domod) {
   ($dist) = split(/\./, $group, 2);
   next DoMod unless $mentioned{$dist};
   next DoMod if $mod{$group} > 0;
   print ("\n#** The following groups should be made moderated:\n") unless $head;
   print "# ", $group,"\t",$desc{$group},"\n";
   print "addgroup $group m\n";
   $head = 1;
}

$head = 0; DoUnmod: foreach $group (sort keys %dounmod) {
   ($dist) = split(/\./, $group, 2);
   next DoUnmod unless $mentioned{$dist};
   next DoUnmod if $mod{$group} == 1;
   print ("\n#** The following groups should be made unmoderated:\n") unless $head;
   print "#  ", $group,"\t",$desc{$group},"\n";
   print "addgroup $group y\n";
   $head = 1;
}

-- 
Matthias Urlichs -- urlichs@smurf.sub.org -- urlichs@smurf.ira.uka.de     /(o\
Humboldtstrasse 7 - 7500 Karlsruhe 1 - FRG -- +49+721+621127(0700-2330)   \o)/