dennis@rlgvax.UUCP (02/05/86)
"Fixtcp" is a shell script which is useful for getting rid of 4.2bsd TCP connections hung in the FIN_WAIT_2 state. Steps: Save this file in some directory. Remove first lines from this file so that "#! /bin/sh" should be the first line. Type "sh file" where file is the name of this file. cat fixtcp.mk, and follow those directions. Enjoy, -dennis #--------------- CUT HERE --------------- #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # _get_tcp_.c # fixtcp # fixtcp.mk # This archive created: Wed Feb 5 15:16:26 EST 1986 # if test -f _get_tcp_.c then echo shar: will not over-write existing file '_get_tcp_.c' else echo x - _get_tcp_.c # ............ F I L E B E G .......... _get_tcp_.c cat << '\SHAR_EOF' > _get_tcp_.c /* * dennis@rlgvax * prints offsets of fields in TCP connection control block. * called by fixtcp sh script */ #include <stdio.h> #include <sys/types.h> /* u_char */ #include <netinet/tcp.h> /* tcp_seq typedef */ #include <netinet/tcp_timer.h> /* tcp timers */ #include <netinet/tcp_var.h> /* tcp connection control block */ #include <netinet/tcp_fsm.h> /* defines for tcp states */ /* use S3/S5 strrchr(), but on 4.x systems, remap to Berkeley rindex */ #ifdef BSD4 # define strrchr rindex #endif #define STR_SAME !strcmp #define STR_DIFF strcmp /* fw non-int functions */ char *basename(); /* external non-int functions */ extern char *strrchr(); main(argc, argv) int argc; char **argv; { char *cmd; struct tcpcb *p = 0; cmd = basename(argv[0]); if (argc != 2) { usage: fprintf(stderr, "usage: %s state|2msl|FIN_WAIT2|TIME_CLOSE\n", cmd); exit(1); } if (STR_SAME(argv[1], "state")) printf("0x%x\n", &p->t_state); /* state offset */ else if (STR_SAME(argv[1], "2msl")) printf("0x%x\n", &p->t_timer[TCPT_2MSL]); /* timer offset */ else if (STR_SAME(argv[1], "FIN_WAIT2")) printf("0x%x\n", TCPS_FIN_WAIT_2); /* state value */ else if (STR_SAME(argv[1], "TIME_CLOSE")) printf("0x%x\n", TCPS_TIME_WAIT); /* state value */ else goto usage; } /* * return basename of full path name */ char * basename(path) char *path; { char *cp; /* general char pointer */ if ((cp = strrchr(path, '/')) == NULL) /* no rightmost slash */ return path; else return cp; } \SHAR_EOF # ............ F I L E E N D .......... _get_tcp_.c fi # end of overwriting check if test -f fixtcp then echo shar: will not over-write existing file 'fixtcp' else echo x - fixtcp # ............ F I L E B E G .......... fixtcp cat << '\SHAR_EOF' > fixtcp # fixtcp # dennis bednar jan 24 86 dennis@rlgvax.uucp # # Unhang tcp connections which are stuck in the FIN_WAIT2 state # These connections can be seen by doing a 4.2 netstat -a command. # # Usage: # invoke as "fixtcp" to display kernel stuff for connections. # "fixtcp" by itself is HIGHLY RECOMMENDED for the first time! # # invoke as "fixtcp fix" to patch kernel memory - you must be root. # Then do a netstat -a command, and it should have gone away. # # CCI only symptom: # A symptom of this problem is that "startoftp" goes wild restarting # the receive daemon, and you see a lot of rcvlog.pid files being # created in the oftp spool directory. # # Symptom for everybody else: # In general, a symptom of this problem is that a tcpopen passive # will fail with the errno UNIX reason being "Address Already In Use". # # # To correct OFTP problem (CCI only): # su root # killoftp; fixtcp fix; startoftp # # Internals of how this script works: # Works by loading the 2 * msl timer (addr+16) in the Connection Control Block # with a 1, which means it will time out in 1/2 second from now, and # enter the CLOSE state, and the the CCB will be freed (so you will not # see it with netstat -a). # The proper offset for the 2 * msl timer can be seen by examining # /usr/include/netinet/tcp_var.h include file, plus other tcp*.h files # in the same directory. # # relies on # _get_tcp_ a.out file that returns the offset of various # fields in a connecton control block. # There is a _get_tcp_.c file to create this. # This was created to avoid problems of offsets # being site-dependent, if your OS uses different # offsets. # # # don't print full path name of command in error messages cmd=`basename $0` # name of state to look for in the netstat command # state=ESTABLISHED # debugging state=FIN_WAIT_2 # really # get the values of the offsets of the fields the the structure for adb stateoff=`_get_tcp_ state` # probably 0x8 timer2msloff=`_get_tcp_ 2msl` # probably 0x10 FIN_WAIT2=`_get_tcp_ FIN_WAIT2` # probably 9 FIN_CLOSE=`_get_tcp_ TIME_CLOSE` # probably 10 # remove temp file if SIGHUP, SIGINT, SIGTERM trap "echo $cmd: interrupted; rm /tmp/fixtcp.$$; exit 1" 1 2 15 # get kernel address of TCP CCB's in FIN_WAIT2 and save in a temporary file netstat -A | grep $state | sed '1,$s/ .*//p' >/tmp/fixtcp.$$ # check if we got any addresses if [ ! -s /tmp/fixtcp.$$ ] then # file doesn't exist or is zero in length, therefore no addresses echo "$cmd: Sorry, no tcp connections stuck in $state state." rm /tmp/fixtcp.$$ exit 0 fi echo "Before: only connections in state $state" netstat -a | grep $state # cat /tmp/fixtcp.$$ # debug # see if we want to patch kernel memory or just display it if [ "$1" = "fix" ] then # patch by writing for addr in `cat /tmp/fixtcp.$$` do adb -w /vmunix /dev/kmem <<EOF 0x$addr+$timer2msloff/w 1 \$q EOF done sleep 2 # wait for connection to clear # make sure it really got unstuck netstat -a | grep $state >/tmp/fixtcp.$$ if [ -s /tmp/fixtcp.$$ ] # file exists and size > 0 then echo "$cmd: Sorry, TCP connections still hung!!" rm /tmp/fixtcp.$$ exit 1 else echo "$cmd: TCP connections in state $state have been unstuck." fi else # just display the current state flag and current 2 * msl timer for addr in `cat /tmp/fixtcp.$$` do echo "The next two numbers displayed by adb should be $FIN_WAIT2 and 0." echo "The state flag value of $FIN_WAIT2 represents the FIN_WAIT_2 state." echo "The decimal 0 means the 2 * msl timer is off." adb /vmunix /dev/kmem <<EOF 0x$addr+8/d 0x$addr+0x10/d \$q EOF done fi # cleanup intermediate file rm /tmp/fixtcp.$$ echo "After: only connections in state $state" netstat -a | grep $state exit 0 \SHAR_EOF # ............ F I L E E N D .......... fixtcp fi # end of overwriting check if test -f fixtcp.mk then echo shar: will not over-write existing file 'fixtcp.mk' else echo x - fixtcp.mk # ............ F I L E B E G .......... fixtcp.mk cat << '\SHAR_EOF' > fixtcp.mk # # dennis@rlgvax 2/4/86 # # fixtcp.mk Makefile, this file # fixtcp shell script # _get_tcp_.c C program # _get_tcp_ a.out program called by fixtcp # .fixtcp.mail header for mail # # directions, type # make -f fixtcp.mk # to make necessary files # edit fixtcp.mk and change INSTALLDIR # make -f fixtcp.mk install # cd $INSTALLDIR # directory where you really installed it # fixtcp # to display tcp connections hung in finwait2 # # # don't do this if you have none to unstick # su root # required for adb write mode # fixtcp fix # to actually unstuck tcp connections # change this at your site INSTALLDIR = . all: _get_tcp_ clean: rm -f _get_tcp_ install: _get_tcp_ -cp _get_tcp_ $(INSTALLDIR) -cp fixtcp $(INSTALLDIR) # distribute the latest version to the world, private for dennis@rlgvax dist: rm -rf /tmp/dpb mkdir /tmp/dpb cp fixtcp.mk /tmp/dpb cp _get_tcp_.c /tmp/dpb cp ../cmd/fixtcp /tmp/dpb cp .fixtcp.mail /tmp/dpb (cd /tmp/dpb; makeshar * >>.fixtcp.mail) # please note that .fixtcp.mail was chosen so that makeshar * # doesn't try to append to itself. \SHAR_EOF # ............ F I L E E N D .......... fixtcp.mk fi # end of overwriting check # end of shell archive exit 0 -- -Dennis Bednar {decvax,ihnp4,harpo,allegra}!seismo!rlgvax!dennis UUCP
steve@umcp-cs.UUCP (Steve D. Miller) (02/06/86)
Here's a better fix for the 4.2 FIN_WAIT_2 problem. I don't remember where I got it, but it works; the basic problem is that the code to drop the connection when nothing cares about it is there in vanilla 4.2BSD, but is in the wrong place. This fix moves it to the right spot and mungs a conditional a little bit... The fix is to netinet/tcp_input.c; I think this is for vanilla 4.2, but your line numbers may vary. *** Vanilla (??) 4.2 tcp_input.c Fri Jan 24 12:24:00 1986 --- Fixed 4.2 tcp_input.c Fri Jan 24 12:24:03 1986 *************** *** 358,363 **** --- 358,372 ---- goto dropafterack; if (ti->ti_len > 0) { m_adj(m, ti->ti_len); + /* + * If data is received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) + && tp->t_state > TCPS_CLOSE_WAIT) { + tp = tcp_close(tp); + goto dropwithreset; + } ti->ti_len = 0; ti->ti_flags &= ~(TH_PUSH|TH_FIN); } *************** *** 404,419 **** ti->ti_len -= todrop; ti->ti_flags &= ~(TH_PUSH|TH_FIN); } - } - - /* - * If data is received on a connection after the - * user processes are gone, then RST the other end. - */ - if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && - ti->ti_len) { - tp = tcp_close(tp); - goto dropwithreset; } /* --- 413,418 ---- -- Spoken: Steve Miller ARPA: steve@mimsy.umd.edu Phone: +1-301-454-4251 CSNet: steve@umcp-cs UUCP: {seismo,allegra}!umcp-cs!steve USPS: Computer Science Dept., University of Maryland, College Park, MD 20742