[alt.sources] mapstats: uucp map statistics program

campbell@redsox.bsw.com (Larry Campbell) (08/06/89)

Here's a little ditty I just whipped up to print interesting statistics
about the uucp map.  You feed it pathalias output and it tells you, for
each of your neighbors, how many paths utilise that neighbor, and what
the average hop count is for paths through that neighbor.

It's simple, it's fast, and it lints.  Too small to warrant a makefile
or shar archive, here it is:

------------------------------cut here------------------------------
/*
 * mapstats
 *
 *	Compute and print statistics about uucp connectivity.
 *	Reads standard input, which should be in pathalias output format.
 *
 *	I did this in awk originally.  This runs about 100 times faster.
 *
 *	Copyright (c) 1989 by Larry Campbell (campbell@redsox.bsw.com)
 *	You may make any use of this code you wish, including selling
 *	it, as long as you credit the author.
 */

#include <stdio.h>
#include <string.h>

#define NEW(type) ( (type *) malloc(sizeof(type)) )
#define NIL(type) ( (type) 0 )

typedef struct neighbor
    {
    char *n_name;
    unsigned long n_hops;
    unsigned long n_paths;
    struct neighbor *n_next;
    } NEIGHBOR;

static NEIGHBOR *nroot;

static void do_record();
static void inc_neighbor_count();
static void print_stats();
static void xabort();

static unsigned long total_paths;
static unsigned long total_hops;

main()
{
char buf[BUFSIZ];
int n;

while (fgets(buf, BUFSIZ, stdin))
    {
    n = strlen(buf);
    if (buf[n-1] == '\n')
	buf[n-1] = '\0';
    do_record(buf);
    }
print_stats();
return 0;
}


static void do_record(buf)
char *buf;
{
char *p;
char *neighbor;
unsigned hop_count;

p = strchr(buf, '\t');
if (p == NIL(char *))
    xabort("missing tab: %s\n", buf);
neighbor = ++p;
for (hop_count = 0, p = strchr(p, '!'); p != NIL(char *); p = strchr(p, '!'))
    {
    hop_count++;
    *p++ = '\0';
    }
if (hop_count == 0)
    {
    if (strcmp(neighbor, "%s") == 0)
	return;
    xabort("bad hop count: %s\n", buf);
    }
inc_neighbor_count(neighbor, hop_count);
total_paths++;
total_hops += hop_count;
}


static void inc_neighbor_count(name, hop_count)
char *name;
unsigned hop_count;
{
NEIGHBOR *np;

/* Try to find neighbor struct */

for (np = nroot; np != NIL(NEIGHBOR *); np = np->n_next)
    {
    if (strcmp(name, np->n_name) == 0)
	break;
    }

/* Make new one if needed */

if (np == NIL(NEIGHBOR *))
    {
    NEIGHBOR *next, *prev;

    np = NEW(NEIGHBOR);
    np->n_name = strdup(name);
    np->n_paths = 0L;
    np->n_hops = 0L;
    np->n_next = NIL(NEIGHBOR *);

    /* Insert neighbor node in list lexically sorted by name */
    
    next = nroot;
    prev = NIL(NEIGHBOR *);
    while (next != NIL(NEIGHBOR *))
	{
	if (strcmp(name, next->n_name) < 0)
	    break;
	prev = next;
	next = next->n_next;
	}
    if (prev == NIL(NEIGHBOR *))
	nroot = np;
    else
	np->n_next = next, prev->n_next = np;	/* the infamous comma */
    }
np->n_paths++;
np->n_hops += hop_count;
}


static void print_stats()
{
NEIGHBOR *np;

(void) printf("Neighbor     Paths  Avg hops\n");
for (np = nroot; np != NIL(NEIGHBOR *); np = np->n_next)
    {
    (void) printf("%-12s %5ld       %3.1f\n",
		  np->n_name, np->n_paths,
		  np->n_paths == 0L
		      ? 0.0 : (double) np->n_hops / (double) np->n_paths);
    }
(void) printf("Total paths: %5ld\n", total_paths);
}


/* VARARGS1 */

static void xabort(fmt, a, b, c, d)
char *fmt;
{
(void) fprintf(stderr, fmt, a, b, c, d);
exit(1);
}
-- 
Larry Campbell                          The Boston Software Works, Inc.
campbell@bsw.com                        120 Fulton Street
wjh12!redsox!campbell                   Boston, MA 02146

loverso@Xylogics.COM (John Robert LoVerso) (08/11/89)

I couldn't resist posting this.  It computes the most common prefix
paths used in routes derived by pathalias.  I.e., this finds the places
to which you are really routing most of your traffic.  Simply feed the
output from pathalias (no problem if you use "pathalias -c" or not) into
the enclosed.  If you don't have "nawk", I think you can still use "awk".
Sample output:

--
Counts of common prefix route over entire database:

 7974 (42%) are through merk
 7966 (42%) are through merk(100%)uunet
 6868 (36%) are through spdcc
 6607 (35%) are through spdcc(96%)harvard
 3350 (18%) are through merk(42%)uunet(42%)mcvax
 2692 (14%) are through spdcc(39%)harvard(41%)ames
 2570 (14%) are through linus
 2222 (12%) are through linus(86%)att
 1789 ( 9%) are through spdcc(26%)harvard(27%)rutgers
 1091 ( 6%) are through merk(14%)uunet(14%)mcvax(33%)ukc
  934 ( 5%) are through spdcc(14%)harvard(14%)gatech
  868 ( 5%) are through merk(11%)uunet(11%)kddlab
  647 ( 3%) are through spdcc(9%)harvard(10%)rutgers(36%)utai
  595 ( 3%) are through buita
  571 ( 3%) are through encore
  555 ( 3%) are through buita(93%)bu-cs
  552 ( 3%) are through merk(7%)uunet(7%)mcvax(16%)hp4nl
  533 ( 3%) are through spdcc(8%)harvard(8%)ames(20%)lll-winken
  488 ( 3%) are through merk(6%)uunet(6%)mcvax(15%)sunic
  462 ( 2%) are through spdcc(7%)harvard(7%)ames(17%)sun
  447 ( 2%) are through buita(75%)bu-cs(81%)ll-xn
  417 ( 2%) are through buita(70%)bu-cs(75%)ll-xn(93%)ucsd
  289 ( 2%) are through buita(49%)bu-cs(52%)ll-xn(65%)ucsd(69%)nosc
  259 ( 1%) are through encore(45%)umn-d-ub
  253 ( 1%) are through encore(44%)umn-d-ub(98%)umn-cs
  212 ( 1%) are through encore(37%)bellcore
  170 ( 1%) are through sequent
  149 ( 1%) are through encore(26%)umn-d-ub(58%)umn-cs(59%)mmm
  133 ( 1%) are through buita(22%)bu-cs(24%)ll-xn(30%)ucsd(32%)nosc(46%)crash
  107 ( 1%) are through cloud9
    7 ( 0%) are through sunne
    4 ( 0%) are through %s
    1 ( 0%)  is through xylwest

Totals: 18867 known routes using 33 major hops
--
The first two lines show me that 42% of my paths are though my neighbor
"merk", but that 100% of those are just a way to get to "uunet".  The
output is:

	count (per%) ... host

		This means that `count' paths, `per' percentage of the
		total pathalias output, go to neighbor `host'.

	count (per%) ... host1(%1)host2(%2)...hostn

		This means that `count' paths, `per' percentage of the
		total pathalias output, are for site `hostn'.
		All the paths to `hostn' represent `%1' of the paths
		through `host1', `%2' of the paths through `host2', etc.

This may be a CPU hog.  If you have pathalias on an old '750, then
you probably don't want to run this.  With `new awk', ~2MIP CPUs,
and 18000+ paths, this tends to take about 8 CPU minutes to run
(which, on an Encore Multimax, means about 8 minutes realtime).

John

--
# This is a shell archive.  Remove anything before this line, then
# unpack it by saving it in a file and typing "sh file".  (Files
# unpacked will be owned by you and have default permissions.)
#
# This archive contains:
# prefix
#
echo x - prefix
sed -e 's/^X//' > "prefix" << '//E*O*F prefix//'
X#!/bin/sh
X#
X# Figure out statistics on path prefixes
X#
X# assumes last field of input is ([^!]*!)*%s
X# (suitable for any pathalias output)
X#
X# Complete rewrite of old ptm toy, 3/88 jlv
X#
X# last change: Wed Apr 26 11:46:40 EDT 1989 jlv
X#
X
Xecho Counts of common prefix route over entire database:
Xecho ""
X
Xnawk '
XBEGIN	{
X	SILLYMIN=100
X}
X{
X	count = split($NF, path, "!")
X	climb = path[1]
X	links[climb]++
X	for (i = 2; i < count; i++) {
X		climb = climb"!"path[i]
X		links[climb]++
X	}
X}
XEND	{
X	for (i in links) {
X		ilinks = links[i]
X		count = split(i, path, "!")
X		if (count != 1) {
X			if (ilinks < SILLYMIN || ilinks < (links[path[1]]/20))
X				continue
X			where = path[1]
X			build = path[1]
X			for (j = 2; j <= count ; j++) {
X				last = links[build]
X				where = sprintf("%s(%d%%)%s", \
X					where, \
X					(ilinks*100 + last/2)/last, \
X					path[j]);
X				build = build"!"path[j]
X			}
X		}
X		else
X			where = i
X		if (links[i] == 1)
X			areis=" is"
X		else
X			areis="are"
X		printf "%5d (%2d%%) %s through %s\n", \
X			links[i], (links[i]*100+NR/2)/NR, areis, where \
X			| "sort -rn"
X		hops++
X	}
X	printf "\nTotals: %d known routes using %d major hops\n", \
X		NR, hops
X} ' $*
//E*O*F prefix//

exit 0
-- 
John Robert LoVerso			Xylogics, Inc.  617/272-8140
loverso@Xylogics.COM			Annex Terminal Server Development Group
encore!xylogics!loverso			[formerly of Encore Computer Corp]