[news.software.b] nstats version 1.2

denny@mcmi.uucp (Denny Page) (10/18/89)

Here is version 1.2 of nstats.  It contains a fix for a bug in article
totals (1.1 missed rejection totals), explanation of headers, and variable
name changes for running in Perl 3.0.

Thanks to Jim Budler (jim@eda.com) for the note on Perl 3.0.


#!/usr/bin/perl
#
# Nstats - Print C news statistics via Perl
#
# Version 1.2  (10/17/89)
#
#
#
# Author's notes:
#
# Constructive comments and enhancements are solicited (flames are not).
# Please send suggestions or enhancements to denny@mcmi.
#
# Larry Wall has a Very Nice Work in Perl.  Many thanks to him.
#
# Denny Page, 1989
#
#
#
# Program notes:
#
# The simplest usage is 'perl nstats ~news/log'.  I leave you to find
# more complicated invocations.
#
# While a duplicate is actually a rejected message, it is treated
# separately here.  Rejected messages herein are messages that are not
# subscribed to in the sys file or are excluded in the active file.
#
# Junked messages are not displayed in the system summaries.  It's not
# your neighbor's fault that you are missing active file entries.  If
# you are concerned about receiving junk groups, exclude them in your
# sys or active file.  They will then be summarized :-).
#
# The reason for a newsgroup being bad is assigned only once.  If the
# reason changes later in the log (such as the sys file being modified
# such that a newsgroup is no longer rejected, but rather is filed in
# junk), no notice will be taken.
#
# Calls to newshist are cached at 25.  This may need to be adjusted at
# some sites.
#
# Sitenames are truncated to 15 characters.  This could be done better.
#
#
# Output headers have the following meanings:
#
#   System	Name of the neighboring system.
#   Accept	Number of accepted articles from system.
#   Dup		Number of duplicate articles received from system.
#   Rej		Number of rejected articles from system.
#   Sent	Number of articles sent to system.
#   Sys%	Accepted (or duplicate or rejected) articles as a
#		percentage of total articles from that system.
#   Tot%	Accepted (or duplicate) articles as a percentage
#		of total accepted (or duplicate) articles.
#   Avl%	Number of articles sent as a percentage of total
#		available (accepted) articles.
#
############################################################
#
# Revision history:
#
# 09/24/89	dny	Initial version
# 09/28/89	dny	Added category totals
# 10/02/89	dny	Fixed link count bug in record_groups
# 10/03/89	dny	Cleaned up variable names
# 10/16/89	dny	Renamed variables - Perl 3.0
# 10/17/89	dny	Fixed bug in rejection counts
#
############################################################


################ ***** Change this ***** ###################
#
$newshist="/usr/local/lib/news/bin/maint/newshist";
#
############################################################


# Record the category of a list of message-ids
sub record_groups {
    open(newshist, "-|") || exec $newshist, '--', @_;

    while (<newshist>) {
	if (s/^.+\t.+\t(.+)\n$/$1/) {
	    foreach $link (split(/ /)) {
		$link =~ s/^([^\.\/]+).*/$1/;
		$category{$link}++;
	    }
	}
	else {
		$category{"*expired*"}++;
	}
    }
    close(newshist);
}

############################################################

$#id_cache = -1;

while (<>) {
    ($from, $action, $message_id, $text) =
        /^.+\s(\S+)\s(.)\s<(.+)>\s(.*)$/;
    $from = substr($from, 0, 15);

# Accepted message
    if ($action eq '+') {
	$accepted{$from}++;
        foreach $site (split(/ /, $text)) {
	    $site = substr($site, 0, 15);
	    $sent{$site}++;
	}

	$id_cache[++$#id_cache] = $message_id;
	unless ($#id_cache < 25) {
	    do record_groups(@id_cache);
	    $#id_cache = -1;
	}
	next;
    }
    elsif ($action eq '-') {
# Duplicate
	if ($text eq 'duplicate') {
	    $duplicates{$from}++;
	    next;
	}
	$rejected{$from}++;
# Group not in sys
	if ($text =~ s/no subscribed groups in `(.+)'/$1/) {
            foreach $group (split(/,/, $text)) {
		if ($badgroup{$group}++ == 0) {
		    $badgroup_reason{$group} = "not subscribed in sys";
		}
	    }
	    next;
	}
# Group excluded in active
	elsif ($text =~ s/all groups `(.+)' excluded in active/$1/) {
            foreach $group (split(/,/, $text)) {
		if ($badgroup{$group}++ == 0) {
		    $badgroup_reason{$group} = "excluded in active";
		}
	    }
	    next;
	}
    }
# Junked message
    elsif ($action eq 'j') {
	$junk{$from}++;
	if ($text =~ s/junked due to groups `(.+)'/$1/) {
            foreach $group (split(/,/, $text)) {
		if ($badgroup{$group}++ == 0) {
		    $badgroup_reason{$group} = "not in active (junked)";
		}
	    }
	    next;
	}
    }
# Ignore ihave/sendme messages
    elsif ($action eq 'i') {next;}
    elsif ($action eq 's') {next;}

# Unknown input line
    print $_;
}


if ($#id_cache >= 0) {
    do record_groups(@id_cache);
}


# Collect all sitenames and calc totals
foreach $system (keys(accepted)) {
    $systems{$system} = 1;
    $total_accepted += $accepted{$system};
}
foreach $system (keys(duplicates)) {
    $systems{$system} = 1;
    $total_duplicates += $duplicates{$system};
}
foreach $system (keys(rejected)) {
    $systems{$system} = 1;
    $total_rejected += $rejected{$system};
}
foreach $system (keys(sent)) {
    $systems{$system} = 1;
    $total_sent += $sent{$system};
}
$total_articles = $total_accepted + $total_duplicates + $total_rejected;



# Print system summaries
print "\nSystem             Accept sys% tot%    Dup sys% tot%    Rej sys%     Sent avl%\n";

foreach $system (sort keys(systems)) {
    $articles = $accepted{$system} + $duplicates{$system} + $rejected{$system};

    if ($accepted{$system} > 0) {
	$accepted_pct = ($accepted{$system} * 100) / $articles + 0.5;
	$accepted_totpct = ($accepted{$system} * 100) / $total_accepted + 0.5;
    }
    else {
	$accepted_pct = 0;
	$accepted_totpct = 0;
    }
    if ($duplicates{$system} > 0) {
	$duplicates_pct = ($duplicates{$system} * 100) / $articles + 0.5;
	$duplicates_totpct = ($duplicates{$system} * 100) / $total_duplicates + 0.5;
    }
    else {
	$duplicates_pct = 0;
	$duplicates_totpct = 0;
    }
    if ($rejected{$system} > 0) {
	$rejected_pct = ($rejected{$system} * 100) / $articles + 0.5;
    }
    else {
	$rejected_pct = 0;
    }
    if ($sent{$system} > 0) {
	$sent_pct = ($sent{$system} * 100) / $total_accepted + 0.5;
    }
    else {
	$sent_pct = 0;
    }

    printf "%-15s     %5d %3d%% %3d%%   %4d %3d%% %3d%%   %4d %3d%%    %5d %3d%%\n",
	$system,
	$accepted{$system}, $accepted_pct, $accepted_totpct,
	$duplicates{$system}, $duplicates_pct, $duplicates_totpct,
	$rejected{$system}, $rejected_pct,
	$sent{$system}, $sent_pct;
}


if ($total_accepted > 0) {
    $accepted_pct = ($total_accepted * 100) / $total_articles + 0.5;
}
else {
    $accepted_pct = 0;
}
if ($total_rejected > 0) {
    $rejected_pct = ($total_rejected * 100) / $total_articles + 0.5;
}
else {
    $rejected_pct = 0;
}
if ($total_duplicates > 0) {
    $duplicates_pct = ($total_duplicates * 100) / $total_articles + 0.5;
}
else {
    $duplicates_pct = 0;
}

printf "TOTALS              %5d %3d%%        %4d %3d%%        %4d %3d%%    %5d\n",
$total_accepted, $accepted_pct,
$total_duplicates, $duplicates_pct,
$total_rejected, $rejected_pct,
$total_sent;



# Display any bad newsgroups received
@keys = sort(keys(badgroup));
if ($#keys >= 0) {
    print "\n\nBad Newsgroups                    Articles    Reason\n";
    foreach $group (@keys) {
	printf "%-35s   %4d    %s\n",
	    $group, $badgroup{$group}, $badgroup_reason{$group};
    }
}


# Display news categories received
@keys = sort(keys(category));
if ($#keys >= 0) {
    print "\n\nCategories Received               Articles\n";
    foreach $group (@keys) {
	printf "%-35s   %4d\n",
	    $group, $category{$group};
    }
}
-- 
Good health is merely the slowest rate at which one can die.