[net.sources] paths to sites

rwh@aesat.UUCP (Russell Herman) (08/16/84)

[keeping the elephants away]

This is a reposting of the UNETpaths package, which can be run at your site
to derive reasonably good paths to other sites by analysing traffic in
/usr/spool/news/net. DO NOT run this package during prime shift - it is a
voracious cycle-eater. This version fixes two problems of the previous
version:

	1) sed blows up when the input script is too long; bigsed fixes
	   this (anyone know of a fix, or precisely what sed's tensile
	   limit is in 4.1?)

	2) the previous version went berserk when a "Path: ..." entry
	   contained a null site (two consecutive !s). Input validation
	   problems strikes again (but when you can't even trust a
	   machine ...)

-------------------------Cut here, use sh NOT csh--------------------
echo x - UNETpaths
cat >UNETpaths <<'!Funky!Stuff!'
basedir=/usr/spool/news/net
oldpath=NETPATHS
mysite=`cat /etc/UNET/UNET.thishost`
renams=
tst=n
t=/tmp/$$?
ta=/tmp/$$a
tb=/tmp/$$b
tc=/tmp/$$c
td=/tmp/$$d
te=/tmp/$$e
ty=/tmp/$$y
tz=/tmp/$$z
while test -n "$1"
do
	case $1 in
	-d) shift; basedir=/usr/spool/news/net/$1;;
	-o) shift; oldpath=$1;;
	-r) shift; renams=$1;;
	-t) tst=y;;
	 *) echo Usage: netpaths [-d searchdir] [-o result] [-r renams.sedin] [-t] 1>&2; exit 1;;
	esac
	shift
done
if test -d $basedir; then
	:
else
	echo netpaths: $basedir not directory 1>&2; exit 1
fi
if test -f $oldpath; then
	: merge the existing derived paths with the new
	sed -e "s/ / !$mysite!/" $oldpath >$te
else
	touch $te
fi
cat >$ta <<!
BEGIN{FS="!";k=2}
{if (NF < k+1) next
for (j=k; j<NF; j++) {
	if (\$j=="" || 0!=index(\$j, " "))
		next;
	printf "%s ", \$j
	printf "!$mysite"
	for (i=k; i<=j; i++)
		printf "!%s",\$i
	printf "\n"
	}
}
!
: collect raw data from net traffic
find $basedir -type f -exec awk "/^Path: $mysite!/{print \$0; exit}" {} \;|sort -T /tmp -u -o $tz
: the next stage generates one copy of every extant path, then
: minhops selects the one with the shortest number of hops to each destination
case :$renams:$tst in
	::n)	awk -f $ta $tz|cat $te -|sort -T /tmp -u |minhops >$tb;;
	:*:n)	awk -f $ta $tz|cat $te -|bigsed -f $renams|sort -T /tmp -u |minhops >$tb;;
	::y)	awk -f $ta $tz|cat $te -|sort -T /tmp -u -o $ty
		minhops <$ty >$tb;;
	:*:y)	awk -f $ta $tz|cat $te -|bigsed -f $renams|sort -T /tmp -u -o $ty
		minhops <$ty >$tb;;
esac
: apply the minimum hop list to itself to shorten further
sort -T /tmp +2nr -3 $tb|awk "{printf \"/!$mysite!.*!%s/s//%s/\n\",\$1,\$2}" >$tc
bigsed -f $tc $tb >$td
awk "{print \$1 \" \" \$2}" $td|sed -e "s/ !$mysite!/ /" >$oldpath
if test "$tst" = "n"; then
	rm /tmp/$$?
fi
!Funky!Stuff!
echo x - UNETpaths.8
cat >UNETpaths.8 <<'!Funky!Stuff!'
.\" UNETpaths.8  1.3  84/08/14
.TH UNETpaths 8
.SH NAME
UNETpaths - derive routes to USENET sites
.SH SYNOPSIS
UNETpaths [-o result] [-r change_script] [-d basedir] -t
.SH DESCRIPTION
This script generates reasonably short (but not optimally short) paths from
your site to other sites on USENET. It does this by examining the paths
taken by those messages which have been received by your site.
.PP
Options are
.TP
.B \-o
This is the file that receives the results (default NETPATHS).
The paths in it are merged with those derived from the current net traffic,
thus providing a cumulative store of information. The syntax of each entry is

.ce 1
<dest_node> <path>

Entries may be added manually to this file if desired.

The first time this shell is executed, it is recommended that this file
be initialized to contain entries for all your possible 1-hops if you
are a relay, or 2-hops if you are a leaf. Obviously, since this is a
primitive learning program, the more initial information given, the
better the results are likely to be.
.TP
.B \-r
This file is used when links or entire nodes are deleted from the network,
or when names of nodes are changed.
It is, in fact a
.IR sed (1)
script. Two or three lines compose each change_script entry.
To delete the path A!B,
include the entries

.nf
			/A!B!/d
			/A!B$/d
.fi

To delete node A, use

.nf
			/!A!/d
			/!A$/d
			/ A!/d
.fi

To change node A to node B, use

.nf
			/!A!/s//!B!/
			/!A$/s//!B/
			/^A /s//B /
.fi

The default is not to use a change script. Notice that when a net change
occurs, it is not sufficient to apply the relevant changes for only one
.I UNETpaths
invocation. Until every item containing that path has been removed by
the expiry cleanup function, that path will be rederived. A period of
a couple months should probably allowed for the life of each change script
entry.
.TP
.B \-d
This restricts examination of messages to a subdirectory of /usr/spool/new/net.
E.g, "-d jokes/d" would look only at "net.jokes.d" for routes. Normally, the
entire contents of /usr/spool/new/net is examined.
.TP
.B \-t
This flag indicates test mode. Numerous scratch files are built in /tmp;
in test mode, they will not be removed on completion.
.SH FILES
/etc/UNET/UNET.thishost - your site id
.SH SEE ALSO
.SH AUTHOR
Russ Herman (aesat!rwh)
.SH DIAGNOSTICS
self-explanatory
.SH BUGS
None known.
.SH CHANGES
Changed -c parameter to -r to allow renaming nodes.
.br
Add "bigsed" to package to handle large sed scripts.
.br
Discard paths containing null or blank-containing site-names.
!Funky!Stuff!
echo x - bigsed
cat >bigsed <<'!Funky!Stuff!'
files=
f=
params=
while test $# -gt 0
do
	case $1 in
	-) params="$params -";;
	-f) shift; f=$1;;
	-e) shift; params="$params -e $1";;
	-*) params="$params $1";;
	*) files="$files $1";;
	esac
	shift
done
if test -z "$f"; then
	x="sed $params $files"
	eval $x; exit $?
fi
x="sed -f $f $params $files"
if test -r $f; then
	set `wc $f`
	if test $1 -lt 51; then
		eval $x; exit $?
	fi
else
	eval $x; exit $?
fi
trap 'rm -f /tmp/$$.? /tmp/$$.??; exit 1' 1 2 3 15
rm -f /tmp/$$.? /tmp/$$.??
split -50 $f /tmp/$$.
set `ls /tmp/$$.??`
x="sed $params -f $1 $files >/tmp/$$.1"
eval $x; xsav=$?
shift
while test $# -gt 1
do
	x="sed $params -f $1 </tmp/$$.1 >/tmp/$$.2"
	eval $x; xtmp=$?
	if test $xtmp -gt $xsav; then
		xsav=$xtmp
	fi
	mv /tmp/$$.2 /tmp/$$.1
	shift
done
x="sed $params -f $1 </tmp/$$.1"
eval $x; xtmp=$?
if test $xtmp -gt $xsav; then
	xsav=$xtmp
fi
rm -f /tmp/$$.? /tmp/$$.??
exit $xtmp
!Funky!Stuff!
echo x - minhops.c
cat >minhops.c <<'!Funky!Stuff!'
/*$title minhops.c*/
/*M**************************************************************************
 ****************************************************************************


	Program name: minhops.c

	File name: minhops.c 

	Version: 1.3

	Date: 84/03/29     11:04:55

	Author: R. Herman

	Function:
		Used in the netpaths procedure.
		Takes a sorted list of records of the format

			<dest> <path>

		and outputs, for each <dest>, the record with the path
		containing the fewest hops in the format

			<dest> <path> <hop-count>

  
****************************************************************************
****************************************************************************/
static char sccs_id[] = {"@(#)minhops.c    	1.3  84/03/29"};



#include	<stdio.h>
struct rec {
	int hopcnt;
	char dest[17];
	char path [513];
	} s[2];
int base = 0;
/*$sttl main*/
main()
{
	int eofsw;
	char c, *pathp;

	/* prime the input buffer */
	if (scanf("%s %s", &s[1].dest[0], &s[1].path[0]) == EOF)
		exit(0);
	else {
		s[1].hopcnt = 0;
		pathp = &s[1].path[0];
		while (c= *pathp++)
			if (c == '!')
				s[1].hopcnt++;
		}


	/* here to process next record */
	while (EOF != scanf("%s %s", &s[base].dest[0], &s[base].path[0])) {
		pathp = &s[base].path[0];
		s[base].hopcnt = 0;
		while (c= *pathp++)
			if (c == '!')
				s[base].hopcnt++;
		if (strcmp(&s[base].dest[0], &s[1-base].dest[0])) {
			printf("%s %s %d\n", &s[1-base].dest[0], &s[1-base].path[0],
				s[1-base].hopcnt);
			base = 1 - base;
			}
		else
			if (s[base].hopcnt < s[1-base].hopcnt)
				base = 1 - base;
		}
	printf("%s %s %d\n", &s[1-base].dest[0], &s[1-base].path[0],
		s[1-base].hopcnt);
	exit(0);
}
!Funky!Stuff!
-- 
  ______			Russ Herman
 /      \			{allegra,ihnp4,linus,decvax}!utzoo!aesat!rwh
@( ?  ? )@			
 (  ||  )			The opinions above are strictly personal, and 
 ( \__/ )			do not reflect those of my employer (or even
  \____/			possibly myself an hour from now.)