urlichs@smurf.sub.org (Matthias Urlichs) (09/27/90)
In news.software.b, article <1990Sep21.155243.6218@zoo.toronto.edu>, henry@zoo.toronto.edu (Henry Spencer) writes: < < Sigh, I seem to end up saying this about once a week: C News checkgroups < has bugs. This is partly because there is no specification for how < checkgroups is supposed to work, so implementing it was guesswork. We < do hope to fix this eventually. If you like it, you can use this Perl script... It's probably as close to a specification as you can get. ;-) NB: This article has been crossposted. Please followup appropriately. General bug: There's no way to find out whether a checkgroups message is authoritative for a hierarchy, or just happens to mention one or two groups in that hierarchy. General asset: There's no need to take text out of postings like "Alternate newsgroup hierarchies". The Newsgroups file is set to contain any descriptions, even those whose groups you don't get. Whether this is a bug or a feature is not mine to decide. #!/usr/local/bin/perl # checkgroups.pl V0.9 chdir "/usr/local/news/admin"; ## where "newsgroups" and "distributions" is ## this code performs the following, highly useful functions: # Read the "distributions" file to find out what you want. # I didn't want to analyze the sys file; besides in the distributions file # you can put descriptions and comments. # Read & analyze your newsgroups file. # Read & analyze input files (extracting any newsgroups-style lines), # discarding anything unintelligible or excluded. # stdin is read if no files are specified. # newsgroups-style lines are defined as lines with a newsgroup name in front, # a sequence of tabs, and a description. # Optional: a ! in front means that the newsgroup (or distribution, if there # are no dots in the newsgroup name) is to be excluded, the trailer # " (Moderated)" means just that, and the tabs+description may be missing. # Read your active file. # Output a new newsgroups file, appropriately tabulated (per top-level), # sorted, which collects all definitions ever in case you need them again. # Writes recommendations as to which groups to delete/add/(un)moderate. # -del means don't emit delgroup lines unless they seem to be definite. # Never complain about hierarchies which aren't even mentioned in the # checkgroups message. # # Public Domain. -- Matthias Urlichs # Changes, additions, pretty-up-ifications, et al., welcome. %desc = (); ## textual descriptions # state %reject = (); ## !ed out %mod = (); ## marked moderated? %unmod = (); ## marked unmoderated? %mentioned=();## distribution was present in input # actions %domod = (); ## change to moderated %dounmod = ();## change to unmoderated %add = (); ## missing %delete = (); ## present, but superfluous %there = (); ## x-ed out, but add it. %inthere = ();## n-ed out, but add it. open (DIST, "distributions") || die "No distrib file"; Config: while(<DIST>) { chop; s/\s*#.*//; # drop all comments next Config if /^$/; # and empty lines if (s/^!//) { ($dist, $desc) = split(/\s+/, $_, 2); $nondist{$dist} = 1; } else { ($dist, $desc) = split(/\s+/, $_, 2); $dist{$dist} = $desc; } } close(DIST); if($ARGV[$[] eq '-del') { $defdelete = 1; shift @ARGV; } unshift (@ARGV, "-") if $#ARGV < $[; # required because of next line unshift (@ARGV, "newsgroups"); $examine = 0; die "No Newsgroups file" unless -r "newsgroups"; Newsgroups: while(<>) { chop; s/\s*#.*//; # remove comments, as usual next Newsgroups if /^$/; $exclude = ($_ =~ s/^!//); # remember for later next Newsgroups unless (($group, $desc, $dummy) = split(/\t\s*/, $_, 3)) < 3; next Newsgroups unless $group =~ /^[a-z]/; next Newsgroups if $group =~ /\s/; ($dist) = split(/\./, $group, 2); next Newsgroups if $nondist{$dist} || $nondist{$group}; if ($exclude) { $nondist{$group} = 1; $mentioned{$dist} = 1 unless $group eq $dist; next Newsgroups; } next Newsgroups unless $dist{$dist}; $mentioned{$dist} = 1 if $examine; if ($desc =~ /\s*\([Mm]oderated\)$/) { $mod{$group} = 1; } else { $mod{$group} = -1; } if (length($desc) > 5 && ($mod{$group} == -1 || length($desc) > 16)) { $desc{$group} = $desc; # replace old descriptions with new version, # taking Moderated flag and non-empty-text-requirement into account } if ($examine && ($reject{$group} != 1)) { $add{$group} = 1; } } continue { $examine = 1 if (eof); } open(ACT,"/usr/local/news/admin/active") || die "Couldn't open active file"; Active: while(<ACT>) { chop; next Active if /^#/; next Active if /^$/; ($group, $max,$min, $flag) = split; ($dist) = split(/\./, $group, 2); next Active unless $mentioned{$dist}; if ( $reject{$group} || ($add{$group} != 1) || $nondist{$group} || $nondist{$dist}) { $delete{$group} = 1 unless $flag =~ /^x/; } elsif ($flag =~ /^x/) { $there{$group} = 1; } elsif ($flag =~ /^n/ || $flag =~ /^=/) { $inthere{$group} = 1; } elsif ($flag =~ /^m/) { $dounmod{$group} = 1 if $mod{$group} == -1; } elsif ($flag =~ /^y/) { $domod{$group} = 1 if $mod{$group} == 1; } if ($flag =~ /^m/ || $flag =~ /^y/) { $delete{$group} = 1 unless $add{$group}; } delete $add{$group}; } close(ACT); open (NG, ">newsgroups"); $maxlen = 0; $olddist = "-"; foreach $group (sort keys %desc) { ($dist) = split(/\./, $group, 2); if ($olddist ne $dist) { $maxlen{$olddist} = int(($maxlen / 8) + 1) * 8; $olddist = $dist; $maxlen = 0; } $maxlen = length($group) if length($group) > $maxlen; } $maxlen{$olddist} = int (($maxlen / 8) + 1) * 8; #// last one foreach $group (sort keys %desc) { ($dist) = split(/\./, $group, 2); $tabs = int(($maxlen{$dist} - length($group) - 1) / 8) + 1; print NG $group, "\t" x $tabs, $desc{$group}, "\n"; } close(NG); open (STDOUT,"|mail -s Checkgroups news"); $head = 0; Del: foreach $group (sort keys %delete) { ($dist) = split(/\./, $group, 2); next Del unless $mentioned{$dist}; print ("\n#** The following groups should (*:definitely) be removed:\n") unless $head; print ((($nondist{$group} || $nondist{$dist}) ? "#*" : "# "), $group,"\t",$desc{$group},"\n") unless $defdelete || $nondist{$group} || $nondist{$dist}; print ((($nondist{$group} || $nondist{$dist}) ? "addgroup $group x" : "delgroup $group"), "\n") unless $defdelete || $nondist{$group} || $nondist{$dist}; $head = 1; } $head = 0; Add: foreach $group (sort keys %add) { ($dist) = split(/\./, $group, 2); next Add unless $dist{$dist}; next Add unless $mentioned{$dist}; print ("\n#** The following groups should be created (*:moderated):\n") unless $head; print ((($mod{$group} > 0)? "#*" : "# "), $group,"\t",$desc{$group},"\n"); print "addgroup $group ", (($mod{$group} > 0) ? "m" : "y"), "\n"; $head = 1; } $head = 0; DoMod: foreach $group (sort keys %domod) { ($dist) = split(/\./, $group, 2); next DoMod unless $mentioned{$dist}; next DoMod if $mod{$group} > 0; print ("\n#** The following groups should be made moderated:\n") unless $head; print "# ", $group,"\t",$desc{$group},"\n"; print "addgroup $group m\n"; $head = 1; } $head = 0; DoUnmod: foreach $group (sort keys %dounmod) { ($dist) = split(/\./, $group, 2); next DoUnmod unless $mentioned{$dist}; next DoUnmod if $mod{$group} == 1; print ("\n#** The following groups should be made unmoderated:\n") unless $head; print "# ", $group,"\t",$desc{$group},"\n"; print "addgroup $group y\n"; $head = 1; } -- Matthias Urlichs -- urlichs@smurf.sub.org -- urlichs@smurf.ira.uka.de /(o\ Humboldtstrasse 7 - 7500 Karlsruhe 1 - FRG -- +49+721+621127(0700-2330) \o)/