[alt.sources] 5400 performance

mcm@rti.UUCP (Mike Mitchell) (12/07/89)

Submitted-by: mcm@rti.UUCP (Mike Mitchell)
Posting-id: 891107.0724
Posting-number: Volume TEST, Number TEST
Archive-name: demonstrate ultrix fragmentation reassembly bug

[This is an experimental alt.sources re-posting from the
newsgroup(s) comp.sys.dec,comp.unix.ultrix.  
No attempt has been made to edit, clean, modify, or otherwise
change the contents of the original posting, or to contact the
author.  Please consider cross-posting all sources postings to
alt.sources as a matter of course.

Comments on this service to emv@math.lsa.umich.edu.]


We have been experiencing some problems with lost UDP packets on our DECstation
3100's, so I started to look into it.  It appears that the DECstations and
DECsystems do not handle fragmented IP packets properly.  Interestingly
enough, the packets are received and assembled properly, they just are not
delivered to the application in a timely manner.  The system seems to forget
that it received a packet until another packet (from any source) arrives.
When the second packet shows up, both the 'forgotten' packet and the new
packet are delivered properly.  The bug does not happen on every fragmented
packet, but often enough to be reproducible.  I think this bug is responsible
for the complaints about NFS performance on DECstations and DECsystems.  NFS
sends 8k packets, which are fragmented.  Occasionally a packet is 'forgotten',
and the requestor retries.  The retry will cause the 'forgotten' packet to
be remembered, and every thing starts up again.

If you are have a DECsystem or DECstation, please check the number of NFS 
timeouts.  You can do this by typing the following commands:

    dbx -k /vmunix /dev/mem
    print rcstat
    print clstat
    quit

I am enclosing a program that also demonstrates the bug.  Compile it and
execute it without any arguments on a DECstation or DECsystem.  Once it is
running on a DECstation or DECsystem, run it on any other machine, using
the hostname of the DECstation or DECsystem as the first argument.  The
program sends 1000 8k packets to the specfied host, and waits for a reply on
each packet.  If a packet times out, it sends a small packet to nudge the
DECstation.  It keeps track of the packet order and round-trip times, and
gives a report of the number of timeouts, the number of lost packets, and
the number of packets recovered after a timeout.  The bug shows up most
often when there is not much network traffic to the DECstation, so pick
a quiet time to run the bug test.  You might want to run the test several
times.  The bug does NOT show up using loopback, so you cannot run the
test using just one machine.  The program accepts 2 arguments, the first
is the host name, and the second is the packet size to send.  You might
want to try 10240 or 16000 byte packets.  The larger the packet is, the
more often the bug shows up.

If you have any questions or comments, contact me at 'mcm@rti.rti.org' or
 '{decvax,seismo,ihnp4,philabs}!mcnc!rti!mcm', or pick up the phone and dial
 +1 919 541-6098 (in the US).  Ask for Mike Mitchell.

Here is the program I promised - type 'n' now if you don't want to see it.

------------------------------------------------------------------------------
#include <sys/types.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <netdb.h>
#include <stdio.h>
#include <sys/signal.h>

#define		TSTPORT		3100		/* port number to use */
#define		MAXCNT		1000		/* number of packets to send */
#define		TOUT		3		/* number of seconds to wait */

long buf[4096];					/* packet buffer */

/*
 * This program shows off a bug in the networking code for DECstations.
 * Run it with no arguments on a DECstation, then run another copy
 * on another machine, using the hostname of the DECstation as the first
 * argument.
 */

main(argc, argv)
int argc;
char *argv[];
{

    struct hostent *host, *gethostbyname();
    int slen;

    if (argc < 2)
    {
	printf("activating reflector\n");
	reflect();
    }

    if ((host = gethostbyname(argv[1])) == 0)
    {
	fprintf(stderr, "host %s not found\n", argv[1]);
	usage(argv[0]);
    }

    slen = sizeof(buf) / 2;
    if (argc > 2)
    {
	slen = atoi(argv[2]);
	if (slen > sizeof(buf))
	    slen = sizeof(buf);
	else if (slen < 64)
	    slen = 64;
    }

    printf("sending %d byte packets to %s\n", slen, argv[1]);
    pingit(host, slen);
}


/*
 * This is a packet reflector. It sends the first 64 bytes of any incomming
 * packet back to its source.
 */

reflect()
{
    int fd, rc, rlen;
    struct sockaddr_in saddr;

    if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
    {
	perror("socket");
	exit(1);
    }

    rlen = sizeof(buf);
    if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (char *)&rlen, sizeof(rlen)) < 0)
	perror("setsockopt - couldn't get receive space");;

    saddr.sin_family = AF_INET;
    saddr.sin_addr.s_addr = 0;
    saddr.sin_port = htons(TSTPORT);
    if (bind(fd, (struct sockaddr *)(&saddr), sizeof(saddr)) < 0)
    {
	perror("bind");
	exit(1);
    }

    for(;;)
    {
	rlen = sizeof(saddr);
	if ((rc = recvfrom(fd, (char *)buf, sizeof(buf), 0,
			    (struct sockaddr *)(&saddr), &rlen)) < 0)
	{
	    perror("recvfrom");
	    break;
	}
	if (rc > 64)
	    rc = 64;

	if (sendto(fd, (char *)buf, rc, 0, (struct sockaddr *)(&saddr),
		    rlen) < 0)
	{
	    perror("sendto");
	    break;
	}
    }
    exit(1);
}

/*
 * This routine sends 1000 packets to the host specified, measuring the
 * round-trip time.  If a reply packet is not received in 3 seconds, it
 * switches to sending a small (64 byte) packet once, then back to the
 * specified packet size.
 */
struct timeval	roundmin = { 999999, 999999 };	/* round-trip minimum time */
struct timeval	roundmax = {      0,      0 };	/* round-trip maximum time */
struct timeval	roundave = {      0,      0 };	/* round-trip average time */
int		sndcnt   = 0;			/* number of packets sent */
int		rcvcnt   = 0;			/* number of packets recv'd */
int		touts    = 0;			/* number of timeouts */
int		rectouts = 0;			/* no. of recovered timeouts */

pingit(host, slen)
struct hostent *host;
int slen;
{
    int fd, rc, rslen, rd;
    struct sockaddr_in saddr;
    struct timeval rcvtime;
    struct timeval sndtime;
    struct timeval tout;
    int printstats();

    if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
    {
	perror("socket");
	exit(1);
    }

    rd = slen;
    if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (char *)&rd, sizeof(rd)) < 0)
	perror("setsockopt - couldn't get send space");

    saddr.sin_family = AF_INET;
    saddr.sin_addr.s_addr = 0;
    saddr.sin_port = 0;
    if (bind(fd, (struct sockaddr *)(&saddr), sizeof(saddr)) < 0)
    {
	perror("bind");
	exit(1);
    }
    saddr.sin_family = host->h_addrtype;
    bcopy(host->h_addr_list[0], (char *)&saddr.sin_addr, host->h_length);
    saddr.sin_port = htons(TSTPORT);

    rslen = slen;

    signal(SIGINT, printstats);

    for(sndcnt = 0; sndcnt < MAXCNT; sndcnt++)
    {
	(void)gettimeofday(&sndtime, 0);
	buf[0] = htonl(sndtime.tv_sec);
	buf[1] = htonl(sndtime.tv_usec);
	buf[2] = sndcnt;

	if (sendto(fd, (char *)buf, rslen, 0, (struct sockaddr *)(&saddr),
		    sizeof(saddr)) < 0)
	{
	    perror("sendto");
	    break;
	}

	rslen = slen;		/* switch back to full-sized packets */

	rd = (1 << fd);
	tout.tv_sec = TOUT;
	tout.tv_usec = 0;

	while(select(fd+1, &rd, 0, 0, &tout) > 0)
	{
	    if ((rc = recv(fd, (char *)buf, sizeof(buf), 0)) < 0)
	    {
		perror("recv");
		break;
	    }
	    (void)gettimeofday(&rcvtime, 0);

	    if (buf[2] != sndcnt)
		printf("got packet %d when expecting %d\n", buf[2], sndcnt);

	    rcvcnt++;

	    sndtime.tv_sec = ntohl(buf[0]);	/* calculate our round-trip */
	    sndtime.tv_usec = ntohl(buf[1]);
	    timesub(&rcvtime, &sndtime);

	    if (rcvtime.tv_sec >= TOUT)
		rectouts++;

	    if ((rcvtime.tv_sec > roundmax.tv_sec) ||
		(rcvtime.tv_sec == roundmax.tv_sec &&
		rcvtime.tv_usec > roundmax.tv_usec))
	    {
		roundmax = rcvtime;
	    }

	    if ((rcvtime.tv_sec < roundmin.tv_sec) ||
		(rcvtime.tv_sec == roundmin.tv_sec &&
		rcvtime.tv_usec < roundmin.tv_usec))
	    {
		roundmin = rcvtime;
	    }

	    timeadd(&roundave, &rcvtime);	/* add in to average */

	    rd = (1 << fd);		/* switch to a polling select */
	    tout.tv_sec = 0;
	    tout.tv_usec = 0;
	}
	if (tout.tv_sec == TOUT)
	{
	    touts++;
	    printf("timeout on packet %d -- sending small packet\n", sndcnt);
	    rslen = 64;		/* switch to a small packet size */
	}
    }

    printstats();
}

printstats()
{
    printf("\n%d packets transmitted\n", sndcnt);
    printf("%d timeouts\n", touts);
    printf("%d packets received after %d seconds\n", rectouts, TOUT);
    printf("%d lost packets\n", sndcnt - rcvcnt);

    tvdiv(&roundave, rcvcnt);

    printf("roundtrip (ms)  min/avg/max = %d/%d/%d\n", tvtoms(&roundmin),
		tvtoms(&roundave), tvtoms(&roundmax));
    exit(0);
}

timeadd(tp1, tp2)
register struct timeval *tp1, *tp2;
{
    tp1->tv_sec += tp2->tv_sec;
    tp1->tv_usec += tp2->tv_usec;
    if (tp1->tv_usec >= 1000000)
    {
	tp1->tv_usec -= 1000000;
	tp1->tv_sec += 1;
    }
}

timesub(tp1, tp2)
register struct timeval *tp1, *tp2;
{
    tp1->tv_sec -= tp2->tv_sec;
    tp1->tv_usec -= tp2->tv_usec;
    if (tp1->tv_usec < 0)
    {
	tp1->tv_usec += 1000000;
	tp1->tv_sec -= 1;
    }
}

tvtoms(tp)
register struct timeval *tp;
{
    return(tp->tv_sec * 1000 + tp->tv_usec / 1000);
}

tvdiv(tp, div)
register struct timeval *tp;
register int div;
{
    register int tmp;

    if (div == 0)
	return;
    tp->tv_usec /= div;
    tmp = tp->tv_sec / div;

    tp->tv_usec += ((tp->tv_sec - tmp*div) * 1000000) / div;
    tp->tv_sec = tmp;
}

usage(cp)
char *cp;
{
    fprintf(stderr, "usage: %s host [size]\n", cp);
    exit(1);
}
-----------------------------------------------------------------
-- 
Mike Mitchell	{decvax,seismo,ihnp4,philabs}!mcnc!rti!mcm  mcm@rti.rti.org

"If you hear me talking on the wind, You've got
 to understand, We must remain perfect strangers"	    (919) 541-6098