[comp.unix.wizards] Sockets stuck in FIN_WAIT_2

pdb@sei.cmu.edu (Patrick Barron) (06/09/87)

If I shutdown sendmail on my Ultrix VAX and there is still a socket out there
bound to the local smtp service in FIN_WAIT_2 (waiting for a FIN from the
remote host), then I'll never be able to restart mail unless I reboot the
machine.  Does anyone have a program, or the proper "adb -k" incantations,
to force a stuck FIN_WAIT_2 socket into CLOSED state?

--Pat.

wesommer@bloom-beacon.UUCP (06/11/87)

In article <1556@aw.sei.cmu.edu> pdb@sei.cmu.edu (Patrick Barron) writes:
>If I shutdown sendmail on my Ultrix VAX and there is still a socket out there
>bound to the local smtp service in FIN_WAIT_2 (waiting for a FIN from the
>remote host), then I'll never be able to restart mail unless I reboot the
>machine.  Does anyone have a program, or the proper "adb -k" incantations,
>to force a stuck FIN_WAIT_2 socket into CLOSED state?

Our local network wizard, Jeff Schiller, wrote one once.  It works on
4.2 and 4.3 and Ultrix (several flavors); I'm posting it here because
its so short.  Just compile it with
	cc -O -o fixfin2 fixfin2.c 
and run it as root.  Please, no flames about the style or lack of
comments (the author will ignore the flames anyway).

					Bill Sommerfeld
					wesommer@athena.mit.edu
				
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	fixfin2.c
# This archive created: Wed Jun 10 23:05:01 1987
export PATH; PATH=/bin:/usr/bin:$PATH
echo shar: "extracting 'fixfin2.c'" '(3047 characters)'
if test -f 'fixfin2.c'
then
	echo shar: "will not over-write existing file 'fixfin2.c'"
else
sed 's/^	X//' << \SHAR_EOF > 'fixfin2.c'
	X#include <stdio.h>
	X#include <sys/types.h>
	X#include <sys/file.h>
	X#include <sys/param.h>
	X#include <sys/socket.h>
	X#include <sys/socketvar.h>
	X#include <sys/mbuf.h>
	X#include <sys/protosw.h>
	X#include <netinet/in.h>
	X#include <netinet/in_systm.h>
	X#include <net/route.h>
	X#include <netinet/in_pcb.h>
	X#include <netinet/ip.h>
	X#include <netinet/tcp.h>
	X#include <netinet/tcp_fsm.h>
	X#include <netinet/tcp_timer.h>
	X#include <netinet/tcp_var.h>
	X#include <nlist.h>
	Xextern   int errno;
	Xstruct inpcb inpcb;
	Xstruct tcpcb tcpcb;
	Xint kmem;
	Xstruct nlist nl[] = {
	X#define N_TCB 0
	X  { "_tcb" }, 
	X  "",
	X};
	Xmain ()
	X{
	X  register int err;
	X  register struct inpcb *next, *prev;
	X  register int stateoff;
	X  struct inpcb *tcbaddr;
	X  int testfirst;
	X  short fstate;
	X  stateoff = 0;
	X  nlist("/vmunix", nl);
	X  if (nl[0].n_type == 0) {
	X    fprintf(stderr, "/vmunix: no namelist\n");
	X    exit (1);
	X  }
	X  tcbaddr = (struct inpcb *) nl[N_TCB].n_value;
	X  kmem = open ("/dev/kmem", O_RDWR, 0644);
	X  check_error(kmem, "Opening /dev/kmem", errno);
	X  (void) lseek (kmem, (off_t)tcbaddr, 0);
	X  check_error((errno != 0) ? -1 : 0, "seeking for tcbaddr", errno);
	X  err = read (kmem, (caddr_t)&inpcb, sizeof (struct inpcb));
	X  check_error(err, "reading tcb", errno);
	X  if (inpcb.inp_next == (struct inpcb *)tcbaddr) exit (3);
	X  prev = tcbaddr;
	X  while (inpcb.inp_next != (struct inpcb *)tcbaddr) {
	X    next = inpcb.inp_next;
	X    (void) lseek (kmem, (off_t)next, 0);
	X    check_error((errno != 0) ? -1 : 0, "seeking for next inpcb", errno);
	X    err = read(kmem, (char *)&inpcb, sizeof(inpcb));
	X    check_error(err, "reading next inpcb", errno);
	X    if (inpcb.inp_prev != prev) {
	X      fprintf(stderr,"Bad sample after %x\n", prev);
	X      exit (4);
	X    }
	X    (void) lseek(kmem, (off_t)inpcb.inp_ppcb, 0);
	X    check_error((errno != 0) ? -1 : 0, "Seeking tcpcb", errno);
	X    err = read (kmem, (char *)&tcpcb, sizeof(tcpcb));
	X    check_error(err, "Reading tcpcb", errno);
	X    if (stateoff == 0) {
	X      stateoff = (int) &tcpcb.t_state - (int) &tcpcb;
	X    }
	X    if (tcpcb.t_state == TCPS_FIN_WAIT_2) {
	X      testfirst = 1;
	X    doitagain:
	X      (void) lseek (kmem, (off_t) ((int) inpcb.inp_ppcb + stateoff), 0);
	X      check_error ((errno != 0) ? -1 : 0, "seeking to tcp state variable", errno);
	X      if (testfirst) {
	X	err = read (kmem, (char *)&fstate, sizeof (short));
	X      } else {
	X/*	fprintf(stderr, "Would write %d, &tcpcb = %X, stateoff= %d\n",
	X		fstate, inpcb.inp_ppcb, stateoff); */
	X	err = write (kmem, (char *)&fstate, sizeof (short));
	X      }
	X      check_error (err, testfirst ? "reading tcp state variable" :
	X		   "Writing tcp state variable", errno);
	X      if (testfirst) {
	X	if (fstate != TCPS_FIN_WAIT_2) {
	X	  fprintf(stderr, "Connection changed state!\n");
	X	} else {
	X	  testfirst = 0;
	X	  fstate = 0;
	X	}
	X	goto doitagain;
	X      }
	X    }
	X    prev = next;
	X  }
	X  (void) close (kmem);
	X  exit (0);
	X}
	X
	Xcheck_error(err, msg, code)
	Xint err;
	Xchar *msg;
	Xint code;
	X{
	X  if (err < 0) {
	X    fprintf (stderr, "finfix2: Error: %s\n", msg);
	X    errno = code;
	X    perror("fixfin2");
	X    exit (1);
	X  }
	X  return;
	X}
SHAR_EOF
if test 3047 -ne "`wc -c < 'fixfin2.c'`"
then
	echo shar: "error transmitting 'fixfin2.c'" '(should have been 3047 characters)'
fi
fi
exit 0
#	End of shell archive

mitchell@cadovax.UUCP (Mitchell Lerner) (06/13/87)

In article <1556@aw.sei.cmu.edu> pdb@sei.cmu.edu (Patrick Barron) writes:
>
>If I shutdown sendmail on my Ultrix VAX and there is still a socket out there
>bound to the local smtp service in FIN_WAIT_2 (waiting for a FIN from the
>remote host), then I'll never be able to restart mail unless I reboot the
>machine.  Does anyone have a program, or the proper "adb -k" incantations,
>to force a stuck FIN_WAIT_2 socket into CLOSED state?
>
>--Pat.

I seem to remember back when I worked on BSD type networking stuff that some
connections got stuck in a used or "zombie" state for long periods of time 
after a close() and and unlink() have already been issued.  

I dont know if this is directly related to what you are experiencing but the 
only thing that removed those zombie socket addresses from the address space 
(besides a reboot) was to do an "ifconfig Device_name -trailers down" (not 
sure about the args) and then ifconfig Device_name (i think).  Kinda a pain in 
the a** but it worked for me.

	- Mitchell


-- 
Mitchell Lerner
#  {ucbvax,ihnp4,decvax}!trwrb!cadovax!mitchell
#  cadovax!mitchell@ucla-locus.arpa

ambar@bloom-beacon.UUCP (06/14/87)

In article <1556@aw.sei.cmu.edu> pdb@sei.cmu.edu (Patrick Barron) writes:
>
>If I shutdown sendmail on my Ultrix VAX and there is still a socket out there
>bound to the local smtp service in FIN_WAIT_2 (waiting for a FIN from the
>remote host), then I'll never be able to restart mail unless I reboot the
>machine.  Does anyone have a program, or the proper "adb -k" incantations,
>to force a stuck FIN_WAIT_2 socket into CLOSED state?

fixfin2 was written by our local network wizard, Jeff Schiller.  He
makes it available via anonymous ftp from bitsy.mit.edu, so posting it
isn't a problem.  It works on 4.[23]BSD, and Ultrix.  However, it
assumes that your header files match your kernel.  Be careful.  (and
don't forget to nuke the .signature at the end..)

#include <stdio.h>
#include "/sys/h/types.h"
#include "/sys/h/file.h"
#include "/sys/h/param.h"
#include "/sys/h/socket.h"
#include "/sys/h/socketvar.h"
#include "/sys/h/mbuf.h"
#include "/sys/h/protosw.h"
#include "/sys/netinet/in.h"
#include "/sys/netinet/in_systm.h"
#include "/sys/net/route.h"
#include "/sys/netinet/in_pcb.h"
#include "/sys/netinet/ip.h"
#include "/sys/netinet/tcp.h"
#include "/sys/netinet/tcp_fsm.h"
#include "/sys/netinet/tcp_timer.h"
#include "/sys/netinet/tcp_var.h"
#include <nlist.h>
extern   int errno;
struct inpcb inpcb;
struct tcpcb tcpcb;
int kmem;
struct nlist nl[] = {
#define N_TCB 0
  { "_tcb" }, 
  "",
};
main ()
{
  register int err;
  register struct inpcb *next, *prev;
  register int stateoff;
  struct inpcb *tcbaddr;
  int testfirst;
  short fstate;
  stateoff = 0;
  nlist("/vmunix", nl);
  if (nl[0].n_type == 0) {
    fprintf(stderr, "/vmunix: no namelist\n");
    exit (1);
  }
  tcbaddr = (struct inpcb *) nl[N_TCB].n_value;
  kmem = open ("/dev/kmem", O_RDWR, 0644);
  check_error(kmem, "Opening /dev/kmem", errno);
  (void) lseek (kmem, (off_t)tcbaddr, 0);
  check_error((errno != 0) ? -1 : 0, "seeking for tcbaddr", errno);
  err = read (kmem, (caddr_t)&inpcb, sizeof (struct inpcb));
  check_error(err, "reading tcb", errno);
  if (inpcb.inp_next == (struct inpcb *)tcbaddr) exit (3);
  prev = tcbaddr;
  while (inpcb.inp_next != (struct inpcb *)tcbaddr) {
    next = inpcb.inp_next;
    (void) lseek (kmem, (off_t)next, 0);
    check_error((errno != 0) ? -1 : 0, "seeking for next inpcb", errno);
    err = read(kmem, (char *)&inpcb, sizeof(inpcb));
    check_error(err, "reading next inpcb", errno);
    if (inpcb.inp_prev != prev) {
      fprintf(stderr,"Bad sample after %x\n", prev);
      exit (4);
    }
    (void) lseek(kmem, (off_t)inpcb.inp_ppcb, 0);
    check_error((errno != 0) ? -1 : 0, "Seeking tcpcb", errno);
    err = read (kmem, (char *)&tcpcb, sizeof(tcpcb));
    check_error(err, "Reading tcpcb", errno);
    if (stateoff == 0) {
      stateoff = (int) &tcpcb.t_state - (int) &tcpcb;
    }
    if (tcpcb.t_state == TCPS_FIN_WAIT_2) {
      testfirst = 1;
    doitagain:
      (void) lseek (kmem, (off_t) ((int) inpcb.inp_ppcb + stateoff), 0);
      check_error ((errno != 0) ? -1 : 0, "seeking to tcp state variable", errno);
      if (testfirst) {
	err = read (kmem, (char *)&fstate, sizeof (short));
      } else {
/*	fprintf(stderr, "Would write %d, &tcpcb = %X, stateoff= %d\n",
		fstate, inpcb.inp_ppcb, stateoff); */
	err = write (kmem, (char *)&fstate, sizeof (short));
      }
      check_error (err, testfirst ? "reading tcp state variable" :
		   "Writing tcp state variable", errno);
      if (testfirst) {
	if (fstate != TCPS_FIN_WAIT_2) {
	  fprintf(stderr, "Connection changed state!\n");
	} else {
	  testfirst = 0;
	  fstate = 0;
	}
	goto doitagain;
      }
    }
    prev = next;
  }
  (void) close (kmem);
  exit (0);
}

check_error(err, msg, code)
int err;
char *msg;
int code;
{
  if (err < 0) {
    fprintf (stderr, "finfix2: Error: %s\n", msg);
    errno = code;
    perror("fixfin2");
    exit (1);
  }
  return;
}


				AMBAR
ARPA: ambar@eddie.mit.edu		UUCP: {backbones}!mit-eddie!ambar