[comp.os.minix] bug causing panic in kernel

n62@nikhefh.nikhef.nl (Klamer Schutte) (10/05/89)

This is quite st specific because PC segments start at address 0.

I found one reason for the kernel to panic.
Run the file test.c on an original minix-st kernel and you will see ...
Included here is a patch (cdiff) to the file kernel/system.c

The patch is not entirely clean: It is still possible to write in your own
text segment. But repairing this will need a lot of changes, all through the
kernel. The comment at the start of the routine umap is now false:
every address from the start of the text segment to the end of the current
segment is now valid. ( I first tried it different, but well-behavind programs
as ls, cc etc, didn't work... ).

One more bug spotted: The value of errno is not consistent. It should in both
cases (write & fstat) return EFAULT (== -14 on my system). Anyone voluntering
to fix this?

Note on patching:
The context diff is made on my system.c. This one is very different from the
original system.c. Patching against the original system.c succeeded;
ignore the offset. (was -117 lines).

Klamer.
Schutte@nikhefh.nl
----------------------------- cut here ------------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	readme
#	test.c
#	system.c.cdif
# This archive created: Thu Oct  5 11:54:45 1989
export PATH; PATH=/bin:$PATH
echo shar: extracting "'readme'" '(985 characters)'
if test -f 'readme'
then
	echo shar: will not over-write existing file "'readme'"
else
sed 's/^X//' << \SHAR_EOF > 'readme'
XI found one reason for the kernel to panic.
XRun the file test.c on an original minix-st kernel and you will see ...
XIncluded here is a patch (cdiff) to the file kernel/system.c
X
XThe patch is not entirely clean: It is still possible to write in your own
Xtext segment. But repairing this will need a lot of changes, all through the
Xkernel. The comment at the start of the routine umap is now false:
Xevery address from the start of the text segment to the end of the current
Xsegment is now valid. ( I first tried it different, but well-behavind programs
Xas ls, cc etc, didn't work... ).
X
XOne more bug spotted: The value of errno is not consistent. It should in both
Xcases (write & fstat) return EFAULT (== -14 on my system). Anyone voluntering
Xto fix this?
X
XNote on patching:
XThe context diff is made on my system.c. This one is very different from the
Xoriginal system.c. Patching against the original system.c succeeded;
Xignore the offset. (was -117 lines).
X
XKlamer.
XSchutte@nikhefh.nl
X
SHAR_EOF
if test 985 -ne "`wc -c < 'readme'`"
then
	echo shar: error transmitting "'readme'" '(should have been 985 characters)'
fi
fi # end of overwriting check
echo shar: extracting "'test.c'" '(1372 characters)'
if test -f 'test.c'
then
	echo shar: will not over-write existing file "'test.c'"
else
sed 's/^X//' << \SHAR_EOF > 'test.c'
X#include <sys/types.h>
X#include <stat.h>
X#include <stdio.h>
X
Xextern int errno;
X
Xmain()
X{
X	struct stat	st;
X
X	if (write( 2, "hello world!\n", 13 ) < 0)
X		printf("write hello fails; errno = %d\n", errno );
X	else
X		printf("write hello succeeds\n");
X	if (write( 2, main, 13 ) < 0)
X		printf("write text segment fails; errno = %d\n", errno );
X	else
X		printf("write text segment succeeds\n");
X	if (write( 2, 0L, 13 ) < 0)
X		printf("write 0L fails; errno = %d\n", errno );
X	else
X		printf("write 0L succeeds\n");
X	if (write( 2, 2L, 13 ) < 0)
X		printf("write 2L fails; errno = %d\n", errno );
X	else
X		printf("write 2L succeeds\n");
X	if (write( 2, 8000000L, 13 ) < 0)
X		printf("write 8000000L fails; errno = %d\n", errno );
X	else
X		printf("write 8000000L succeeds\n");
X	if (fstat( 2, &st ) < 0)
X		printf("fstat st fails; errno = %d\n", errno );
X	else
X		printf("fstat st succeeds\n");
X	if (fstat( 2, main ) < 0)
X		printf("fstat text segment fails; errno = %d\n", errno );
X	else
X		printf("fstat text segment succeeds\n");
X	if (fstat( 2, 0L ) < 0)
X		printf("fstat 0L fails; errno = %d\n", errno );
X	else
X		printf("fstat 0L succeeds\n");
X	if (fstat( 2, 2L ) < 0)
X		printf("fstat 2L fails; errno = %d\n", errno );
X	else
X		printf("fstat 2L succeeds\n");
X	if (fstat( 2, 8000000L ) < 0)
X		printf("fstat 8000000L fails; errno = %d\n", errno );
X	else
X		printf("fstat 8000000L succeeds\n");
X}
X
X
SHAR_EOF
if test 1372 -ne "`wc -c < 'test.c'`"
then
	echo shar: error transmitting "'test.c'" '(should have been 1372 characters)'
fi
fi # end of overwriting check
echo shar: extracting "'system.c.cdif'" '(2962 characters)'
if test -f 'system.c.cdif'
then
	echo shar: will not over-write existing file "'system.c.cdif'"
else
sed 's/^X//' << \SHAR_EOF > 'system.c.cdif'
X*** /usr/kernel/system.c~	Tue Jul 25 19:38:54 1989
X--- /usr/kernel/system.c	Tue Oct  3 10:40:27 1989
X***************
X*** 684,689 ****
X--- 684,695 ----
X  /*===========================================================================*
X   *				umap					     * 
X   *===========================================================================*/
X+ /* umap can fail (and will return (phys_bytes) 0) if:
X+  *	bytes <= 0
X+  *	start address < start segment
X+  *	start address + len > end segment
X+  * 	start & end should be the same segment!
X+  */
X  PUBLIC phys_bytes umap(rp, seg, vir_addr, bytes)
X  register struct proc *rp;	/* pointer to proc table entry for process */
X  int seg;			/* T, D, or S segment */
X***************
X*** 691,697 ****
X  vir_bytes bytes;		/* # of bytes to be copied */
X  {
X  /* Calculate the physical memory address for a given virtual address. */
X!   vir_clicks vc;		/* the virtual address in clicks */
X    phys_bytes pa;		/* intermediate as phys_bytes */
X  
X    /* If 'seg' is D it could really be S and vice versa.  T really means T.
X--- 697,703 ----
X  vir_bytes bytes;		/* # of bytes to be copied */
X  {
X  /* Calculate the physical memory address for a given virtual address. */
X!   vir_clicks svc, evc;		/* the virtual address in clicks (start/end) */
X    phys_bytes pa;		/* intermediate as phys_bytes */
X  
X    /* If 'seg' is D it could really be S and vice versa.  T really means T.
X***************
X*** 702,708 ****
X     * The Atari ST behaves like the 8088 in this respect.
X     */
X    if (bytes <= 0) return( (phys_bytes) 0);
X!   vc = (vir_addr + bytes - 1) >> CLICK_SHIFT;	/* last click of data */
X  
X  #ifdef i8088
X  #define	GAPinS
X--- 708,715 ----
X     * The Atari ST behaves like the 8088 in this respect.
X     */
X    if (bytes <= 0) return( (phys_bytes) 0);
X!   svc = vir_addr >> CLICK_SHIFT;		/* first click of data */
X!   evc = (vir_addr + bytes - 1) >> CLICK_SHIFT;	/* last click of data */
X  
X  #ifdef i8088
X  #define	GAPinS
X***************
X*** 713,728 ****
X  
X  #ifdef GAPinS
X    if (seg != T)
X! 	seg = (vc < rp->p_map[D].mem_vir + rp->p_map[D].mem_len ? D : S);
X! #else
X!   if (seg != T)
X! 	seg = (vc < rp->p_map[S].mem_vir ? D : S);
X  #endif
X  
X  #undef GAPinS
X  
X!   if((vir_addr>>CLICK_SHIFT) >= rp->p_map[seg].mem_vir + rp->p_map[seg].mem_len)
X! 	return( (phys_bytes) 0 );
X    pa = (phys_bytes) vir_addr;
X  #ifndef ATARI_ST
X    pa -= (phys_bytes)rp->p_map[seg].mem_vir << CLICK_SHIFT;
X--- 720,738 ----
X  
X  #ifdef GAPinS
X    if (seg != T)
X! 	seg = (svc < rp->p_map[D].mem_vir + rp->p_map[D].mem_len ? D : S);
X! #else
X!   if (seg != T)
X! 	seg = (svc < rp->p_map[S].mem_vir ? D : S);
X  #endif
X  
X  #undef GAPinS
X  
X!   if(evc >= rp->p_map[seg].mem_vir + rp->p_map[seg].mem_len)
X! 	return( (phys_bytes) 0 );
X!   if (svc < rp->p_map[T].mem_vir )	/* allow to read from text segment */
X! 	return( (phys_bytes) 0 );
X! 
X    pa = (phys_bytes) vir_addr;
X  #ifndef ATARI_ST
X    pa -= (phys_bytes)rp->p_map[seg].mem_vir << CLICK_SHIFT;
SHAR_EOF
if test 2962 -ne "`wc -c < 'system.c.cdif'`"
then
	echo shar: error transmitting "'system.c.cdif'" '(should have been 2962 characters)'
fi
fi # end of overwriting check
#	End of shell archive
exit 0
-- 
_____________________Yes, mail address changed again :-(________________________
Klamer Schutte        mcvax!nikhefh!{n62,Schutte}        {Schutte,n62}@nikhef.nl

evans@ditsyda.oz (Bruce Evans) (10/09/89)

In article <280@nikhefh.nikhef.nl> n62@nikhefh.nikhef.nl (Klamer Schutte) writes:
>This is quite st specific because PC segments start at address 0.

Not at all. Your test program exposed a bug in my version of TTY. The test

	if (write( 2, 8000000L, 13 ) < 0)

generates an error as in lines 3927-3930 of the book. The error case is
allowed to fall through the general case and only return an error code
much later. I missed checking parts of this labyrinth and did not reply
to TTY for the error case (much like the old driver does not reply
properly after an XOFF suspends output, so FS remains waiting on TTY).

This was with a 32-bit 386 Minix so the 80000000L is sort of OK. Did you
really mean 0x80000000L? That may expose other bugs.

>The patch is not entirely clean: It is still possible to write in your own
>text segment. But repairing this will need a lot of changes, all through the

For PC-Minix common I&D programs, and I think all ST-Minix programs, the
text segment is empty from the umap's point of view (== data seg from
another). Protected mode PC-Minix programs have a read-only text segment.
There is no choice - it can even be execute-only, but the protection is
completely lost with common I&D programs since text seg == data seg
physically. So don't use common I&D on the PC!

Making the text segment read-only in system calls is hardly worthwhile if
crazy programs can still corrupt their text segments and elsewhere using
ordinary pointers.

>One more bug spotted: The value of errno is not consistent. It should in both
>cases (write & fstat) return EFAULT (== -14 on my system). Anyone voluntering

The failed writes return E_BAD_ADDR (-10) which is identical with some other
standard error code and very confusing when printed by perror :-(.

---

Here's how I think umap() should be done. I was thinking about how to make
it faster by avoiding conversions between clicks and bytes. It takes 100+
instructions on a PC and is called 3+ times for every block of i/o, so I
think it is the worst (concentrated) bottleneck in the kernel.

Everything becomes simpler, so the bug found be Klamer is easy to avoid.
Memory *allocation* must be done in clicks, but the rest of the code is
better without them.

Caveat. The code has not been tested.

/* The mem_map structure (or part of it in the kernel) needs 3 new fields:
 *
 *	adjust:		offset to convert virtual address to physical
 *			could be made optional when it is always 0
 *	base:		minimum physical address in segment
 *	limit:		maximum physical address in segment
 *
 * Having these makes the calculations cleaner.  Magic click conversions and
 * the (lack of) distinction between data and stack can be hidden in the
 * initialization code.
 *
 * Phys_bytes and vir_bytes ought to unsigned long instead of long.  Assume
 * that here.  Watch out for overflow, however.  See your local malloc for
 * how not to check for overflow :-).
 */

PUBLIC phys_bytes umap(rp, seg, vir_addr, bytes)
register struct proc *rp;	/* pointer to proc table entry for process */
int seg;			/* T, D, or S segment */
vir_bytes vir_addr;		/* virtual address in bytes within the seg */
vir_bytes bytes;		/* # of bytes to be copied */
{
/* Calculate the physical memory address for a given virtual address. */

  phys_bytes high_phys;
  phys_bytes low_phys;

  low_phys = rp->p_map[seg].adjust + vir_addr;	/* return at once if trusted?*/
  high_phys = low_phys + bytes;

  /* Check base <= low < high < limit.  Preposterous (zero, huge or negative)
   * bytes will not pass, though 0 probably should be allowed.
   */
  if (low_phys < rp->p_map[seg].base || low_phys >= high_phys ||
      high_phys >= rp->p_map[seg].limit)
	return(0);
  return(low_phys);
}

The whole routine could be written in 1 return statement with no local
variables, but the instructions on the Cereal box say not to do that :-).
-- 
Bruce Evans		evans@ditsyda.oz.au