[net.unix-wizards] 4.2bsd & nexus 0x6c on 11/780

rlb@Purdue.ARPA (03/08/84)

From:  Bob Brown <rlb@Purdue.ARPA>

4.2BSD kernel memerr() does not properly clear soft ecc errors if you
have an 11/780 running the 64kb chip interleaved memory controller.  The
reason is that the 0x6c controller has (at least) four registers, the
address/syndrome register existing twice - once for each side.  If you
get an error on the second half (the "D" register), memerr() doesn't
clear it and the system hangs at high IPL.

Below are some context diffs that might give you an idea how to fix it.
New copies of vax/machdep.c and vax/mem.h can be had from me if the diffs
seem too much to handle.

Bob Brown
(415)965-5407
------------------------------------------------------------------------------
*** /sys/vax/machdep.c	Wed Mar  7 13:01:02 1984
--- /user/ftp/pub/machdep.c	Wed Mar  7 12:27:28 1984
***************
*** 1,4
! /* $Header: /usr/src/sys/vax/RCS/machdep.c,v 1.1 84/03/07 12:56:10 rlb Exp $ */
  /*	machdep.c	6.2	83/10/02	*/
  
  #include "../machine/reg.h"

--- 1,4 -----
! /* $Header: machdep.c,v 1.1 83/11/22 15:55:13 root Rel $ */
  /*	machdep.c	6.2	83/10/02	*/
  
  #include "../machine/reg.h"
***************
*** 26,31
  #include "../h/msgbuf.h"
  #include "../h/quota.h"
  
  #include "../vax/frame.h"
  #include "../vax/cons.h"
  #include "../vax/cpu.h"

--- 26,32 -----
  #include "../h/msgbuf.h"
  #include "../h/quota.h"
  
+ #include "../vax/nexus.h"
  #include "../vax/frame.h"
  #include "../vax/cons.h"
  #include "../vax/cpu.h"
***************
*** 428,434
  		switch (cpu) {
  #if VAX780
  		case VAX_780:
! 			M780_ENA(mcr);
  			break;
  #endif
  #if VAX750

--- 429,437 -----
  		switch (cpu) {
  #if VAX780
  		case VAX_780:
! 			M780_ENA(mcr,2);
! 			if ((mcr->mc_reg[0]&0xff)==NEX_MEM64I) 
! 				M780_ENA(mcr,3);
  			break;
  #endif
  #if VAX750
***************
*** 463,471
  		switch (cpu) {
  #if VAX780
  		case VAX_780:
! 			if (M780_ERR(mcr)) {
! 				printf("mcr%d: soft ecc addr %x syn %x\n",
! 				    m, M780_ADDR(mcr), M780_SYN(mcr));
  #ifdef TRENDATA
  				memlog(m, mcr);
  #endif

--- 466,474 -----
  		switch (cpu) {
  #if VAX780
  		case VAX_780:
! 			if (M780_ERR(mcr,2)) {
! 				printf("mcr%dc: soft ecc addr %x syn %x\n",
! 				    m, M780_ADDR(mcr,2), M780_SYN(mcr,2));
  #ifdef TRENDATA
  				memlog(m, mcr);
  #endif
***************
*** 469,475
  #ifdef TRENDATA
  				memlog(m, mcr);
  #endif
! 				M780_INH(mcr);
  			}
  			break;
  #endif

--- 472,478 -----
  #ifdef TRENDATA
  				memlog(m, mcr);
  #endif
! 				M780_INH(mcr,2);
  			}
  			if ((mcr->mc_reg[0]&0xff)==NEX_MEM64I && M780_ERR(mcr,3)) {
  				printf("mcr%dd: soft ecc addr %x syn %x\n",
***************
*** 471,476
  #endif
  				M780_INH(mcr);
  			}
  			break;
  #endif
  #if VAX750

--- 474,484 -----
  #endif
  				M780_INH(mcr,2);
  			}
+ 			if ((mcr->mc_reg[0]&0xff)==NEX_MEM64I && M780_ERR(mcr,3)) {
+ 				printf("mcr%dd: soft ecc addr %x syn %x\n",
+ 				    m, M780_ADDR(mcr,3), M780_SYN(mcr,3));
+ 				M780_INH(mcr,3);
+ 			}
  			break;
  #endif
  #if VAX750
***************
*** 543,549
  #if VAX780
  	case VAX_780:
  	for (i = 0; i < (sizeof (memlogtab) / sizeof (memlogtab[0])); i++)
! 		if ((u_char)(M780_SYN(mcr)) == memlogtab[i].m_syndrome) {
  			printf (
  	"mcr%d: replace %s chip in %s bank of memory board %d (0-15)\n",
  				m,

--- 551,557 -----
  #if VAX780
  	case VAX_780:
  	for (i = 0; i < (sizeof (memlogtab) / sizeof (memlogtab[0])); i++)
! 		if ((u_char)(M780_SYN(mcr,2)) == memlogtab[i].m_syndrome) {
  			printf (
  	"mcr%d: replace %s chip in %s bank of memory board %d (0-15)\n",
  				m,
***************
*** 548,555
  	"mcr%d: replace %s chip in %s bank of memory board %d (0-15)\n",
  				m,
  				memlogtab[i].m_chip,
! 				(M780_ADDR(mcr) & 0x8000) ? "upper" : "lower",
! 				(M780_ADDR(mcr) >> 16));
  			return;
  		}
  	printf ("mcr%d: multiple errors, not traceable\n", m);

--- 556,563 -----
  	"mcr%d: replace %s chip in %s bank of memory board %d (0-15)\n",
  				m,
  				memlogtab[i].m_chip,
! 				(M780_ADDR(mcr,2) & 0x8000) ? "upper" : "lower",
! 				(M780_ADDR(mcr,2) >> 16));
  			return;
  		}
  	printf ("mcr%d: multiple errors, not traceable\n", m);
*** /sys/vax/mem.h	Wed Mar  7 13:01:54 1984
--- /user/ftp/pub/mem.h	Wed Mar  7 12:27:28 1984
***************
*** 1,4
- /* $Header: /usr/src/sys/vax/RCS/mem.h,v 1.1 84/03/07 12:56:28 rlb Exp $ */
  /*	mem.h	6.1	83/07/29	*/
  
  /*

--- 1,3 -----
  /*	mem.h	6.1	83/07/29	*/
  
  /*
***************
*** 8,14
   * per cpu, so we define macros here to mask that.
   */
  struct	mcr {
! 	int	mc_reg[3];
  };
  
  /*

--- 7,13 -----
   * per cpu, so we define macros here to mask that.
   */
  struct	mcr {
! 	int	mc_reg[4];
  };
  
  /*
***************
*** 37,48
  /* register; bit 14 there is an error bit which we also clear */
  /* these bits are in the back of the ``red book'' (or in the VMS code) */
  
! #define	M780_INH(mcr)	\
! 	(((mcr)->mc_reg[2] = (M780_ICRD|M780_HIER|M780_ERLOG)), mtpr(SBIER, 0))
! #define	M780_ENA(mcr)	\
! 	(((mcr)->mc_reg[2] = (M780_HIER|M780_ERLOG)), mtpr(SBIER, 3<<14))
! #define	M780_ERR(mcr)	\
! 	((mcr)->mc_reg[2] & (M780_ERLOG))
  
  #define	M780_SYN(mcr)	((mcr)->mc_reg[2] & 0xff)
  #define	M780_ADDR(mcr)	(((mcr)->mc_reg[2] >> 8) & 0xfffff)

--- 36,47 -----
  /* register; bit 14 there is an error bit which we also clear */
  /* these bits are in the back of the ``red book'' (or in the VMS code) */
  
! #define	M780_INH(mcr,i)	\
! 	(((mcr)->mc_reg[i] = (M780_ICRD|M780_HIER|M780_ERLOG)), mtpr(SBIER, 0))
! #define	M780_ENA(mcr,i)	\
! 	(((mcr)->mc_reg[i] = (M780_HIER|M780_ERLOG)), mtpr(SBIER, 3<<14))
! #define	M780_ERR(mcr,i)	\
! 	((mcr)->mc_reg[i] & (M780_ERLOG))
  
  #define	M780_SYN(mcr,i)	((mcr)->mc_reg[i] & 0xff)
  #define	M780_ADDR(mcr,i)	(((mcr)->mc_reg[i] >> 8) & 0xfffff)
***************
*** 44,51
  #define	M780_ERR(mcr)	\
  	((mcr)->mc_reg[2] & (M780_ERLOG))
  
! #define	M780_SYN(mcr)	((mcr)->mc_reg[2] & 0xff)
! #define	M780_ADDR(mcr)	(((mcr)->mc_reg[2] >> 8) & 0xfffff)
  #endif
  
  #if VAX750

--- 43,50 -----
  #define	M780_ERR(mcr,i)	\
  	((mcr)->mc_reg[i] & (M780_ERLOG))
  
! #define	M780_SYN(mcr,i)	((mcr)->mc_reg[i] & 0xff)
! #define	M780_ADDR(mcr,i)	(((mcr)->mc_reg[i] >> 8) & 0xfffff)
  #endif
  
  #if VAX750

salkind%nyu@sri-unix.UUCP (03/09/84)

From:  Lou Salkind <salkind@nyu>

Although the fix handles the usual case (internally interleaved
controllers), it doesn't handle some "pathological" configurations
(for example, what happens if only the upper controller of the MS780E
is enabled?).

I also fixed the MS780E problem, but in a different and slightly more
general way.

Note the 4.2 bsd memory handling code makes the following assumption:
	There is a 1-1 correspondence between cpu type and memory controller
	type.  (Note, for example, that the case tests in the memory
	controller routines are by CPU type.)
To my way of thinking, this is not a valid assumption.

My fix involves setting the memory controller type in autoconf.c, and
then changing the memory code to test by controller type, not cpu type.
The code works fine on our 780's.

	Lou