[comp.os.minix] Fast phys_copy

dal@syntel.UUCP (Dale Schumacher) (06/13/89)

Hopefully this will be a fun little modification for Minix-ST.

echo x - readme
gres '^X' '' > readme << '/'
XThe enclosed mods are a couple of performance improvements relating to
Xthe block memory copy function of Minix.
X
XThere are really two changes.  The first is a re-implementation of the
Xphys_copy() function in optimized 68000 assembly.  This version is
Xoptimized for the typical copy block sizes used most by Minix (determined
Xby actual profiling, thanks Todd!).  In addition, the #define HAVEDCOPY
Xin const.h can enable the filesystem to copy data directly to and from
Xthe user's process space.  This gives around 20-30% reductions in the
Xsystem time used to run a typical process.  Without this mod, when the
Xfilesystem wants to copy data (which it does all the time) it sends a
Xmessage to the kernel and the kernel copies the data on behalf of the
Xfilesystem, trusting that the filesystem has made a reasonable request.
XSince we have no MMU enforcing memory access restrictions, I simply
Xavoid this unneeded message passing and context switching.  The issues
Xinvolved in this tradeoff are discussed starting on p.184 of "the book".
XOf course, the new fast phys_copy() is used here as well.
X
XAn interesting side effect of the above changes occurred in copying
Xthe new kernel image to floppy (almost 200K).  This used to take around
X1:30 of wall-clock time under my older kernel (around 0:55 with a
XGCC-compiled kernel), but it only takes 0:12 seconds now!  I suspect
Xthat this has to do with writing a sector or two and not quite keeping
Xup with the disk rotation, thus causing lots of extra disk revs waiting
Xfor the correct sector to come around again.  The new change seems to
Xallow this process to keep up and write continuously.
X
X
XTo apply these changes...
X
X	1. If you DON'T want the FS code to do direct copying,
X	ignore all modifications to files in the fs/ directory
X	and skip step #2.
X
X	2. Add the contents of const.h.add to h/const.h.
X
X	3. Use patch to apply kmakef.cdiff to kernel/Makefile and
X	fsmakef.cdiff to fs/Makefile.
X
X	4. Put a copy of physcopy.s in kernel/ and fs/.
X
X	5. Apply system.c.cdiff to kernel/system.c and read.c.cdiff
X	to fs/read.c.
X
XI hope you'll find these mods as useful as I do.  Good luck & happy hacking.
/
echo x - const.h.add
gres '^X' '' > const.h.add << '/'
X/*
X   Define HAVEDCOPY to be 1 to get direct user/fs copying mods.
X*/
X
X#define	HAVEDCOPY	1
X
X/* fix up if undefined to give the value 0 or 1 */
X#if HAVEDCOPY 
X# define HAVE_DCOPY 1
X#else
X# define HAVE_DCOPY 0
X#endif
/
echo x - physcopy.s
gres '^X' '' > physcopy.s << '/'
X.sect .text;.sect .rom;.sect .data;.sect .bss
X.extern _phys_copy
X.sect .text
X_phys_copy:
X	link	a6,#-12
X	move.l	8(a6),a0		! load source pointer
X	move.l	12(a6),a1		! load destination pointer
X	move.l	a0,d0
X	move.l	a1,d1
X	eor.l	d1,d0
X	btst	#0,d0			! pointers mutually aligned?
X	beq	check1			! yes
X	move.l	16(a6),d0		! load count
X	bra	copy1
Xcheck1:
X	move.l	16(a6),d0		! load count
X	btst	#0,d1			! pointers aligned, but odd?
X	beq	check64			! no
X	move.b	(a0)+,(a1)+		! copy odd byte
X	sub.l	#1,d0			! decrement count
Xcheck64:
X	move.l	d0,d1
X	and.l	#0xFFFFFFC0,d1		! count < 64
X	beq	copy4			! yes
X	movem.l	d2-d7/a2-a5,-(a7)	! save regs for movem use
X	move.l	d0,d1
X	and.l	#0xFF,d1		! count mod 256
X	lsr.l	#8,d0			! count div 256
X	bra	end256
Xloop256:
X	movem.l	(a0)+,d2-d7/a2-a5	! copy 10x4 bytes
X	movem.l	d2-d7/a2-a5,(a1)
X	lea	40(a1),a1
X	movem.l	(a0)+,d2-d7/a2-a5	! copy 10x4 bytes
X	movem.l	d2-d7/a2-a5,(a1)
X	lea	40(a1),a1
X	movem.l	(a0)+,d2-d7/a2-a5	! copy 10x4 bytes
X	movem.l	d2-d7/a2-a5,(a1)
X	lea	40(a1),a1
X	movem.l	(a0)+,d2-d7/a2-a5	! copy 10x4 bytes
X	movem.l	d2-d7/a2-a5,(a1)
X	lea	40(a1),a1
X	movem.l	(a0)+,d2-d7/a2-a5	! copy 10x4 bytes
X	movem.l	d2-d7/a2-a5,(a1)
X	lea	40(a1),a1
X	movem.l	(a0)+,d2-d7/a2-a5	! copy 10x4 bytes
X	movem.l	d2-d7/a2-a5,(a1)
X	lea	40(a1),a1
X	movem.l	(a0)+,d2-d5		! copy 4x4 bytes
X	movem.l	d2-d5,(a1)
X	lea	16(a1),a1
Xend256:
X	dbra	d0,loop256		! decrement count, test and loop
X	move.l	d1,d0			! remainder becomes new count
X	beq	done			! more to copy? no!
X	and.l	#0x3F,d1		! count mod 64
X	lsr.l	#6,d0			! count div 64
X	bra	end64
Xloop64:
X	movem.l	(a0)+,d2-d7/a4-a5	! copy 8x4 bytes
X	movem.l	d2-d7/a4-a5,(a1)
X	lea	32(a1),a1
X	movem.l	(a0)+,d2-d7/a4-a5	! copy 8x4 bytes
X	movem.l	d2-d7/a4-a5,(a1)
X	lea	32(a1),a1
Xend64:
X	dbra	d0,loop64		! decrement count, test and loop
X	movem.l	(a7)+,d2-d7/a2-a5	! restore regs for movem use
X	move.l	d1,d0			! remainder becomes new count
Xcopy4:
X	move.l	d0,d1			! set remainder (for branch in case)
X	and.l	#0x3,d1			! count mod 4
X	lsr.l	#2,d0			! count div 4
X	bra	end4
Xloop4:
X	move.l	(a0)+,(a1)+
Xend4:
X	dbra	d0,loop4		! decrement count, test and loop
X	move.l	d1,d0			! remainder becomes new count
X	bra	end1
Xloop1:
X	move.b	(a0)+,(a1)+
Xcopy1:
Xend1:
X	dbra	d0,loop1		! decrement count, test and loop
Xdone:
X	unlk	a6
X	rts
/
echo x - fsmakef.cdiff
gres '^X' '' > fsmakef.cdiff << '/'
X*** org/fs/Makefile	Thu Jun 11 23:30:13 1989
X--- new/fs/Makefile	Thu Jun 11 23:32:04 1989
X***************
X*** 5,11 ****
X  
X  CFLAGS	= -O -DATARI_ST
X  
X! OBJ	= main.o open.o read.o write.o pipe.o device.o \
X  	  path.o mount.o link.o super.o inode.o cache.o filedes.o \
X  	  stadir.o protect.o time.o misc.o utility.o table.o putc.o
X  HDR	= ../h/callnr.h ../h/com.h ../h/const.h ../h/error.h \
X--- 5,11 ----
X  
X  CFLAGS	= -O -DATARI_ST
X  
X! OBJ	= main.o open.o read.o physcopy.o write.o pipe.o device.o \
X  	  path.o mount.o link.o super.o inode.o cache.o filedes.o \
X  	  stadir.o protect.o time.o misc.o utility.o table.o putc.o
X  HDR	= ../h/callnr.h ../h/com.h ../h/const.h ../h/error.h \
/
echo x - kmakef.cdiff
gres '^X' '' > kmakef.cdiff << '/'
X*** org/kernel/Makefile	Thu Jun 11 23:30:13 1989
X--- new/kernel/Makefile	Thu Jun 11 23:32:04 1989
X***************
X*** 5,11 ****
X  
X  CFLAGS	= -O -DATARI_ST -DACK
X  
X! OBJ	= stmpx.o stmain.o proc.o system.o stshadow.o \
X  	  tty.o clock.o memory.o stdma.o stfloppy.o stwini.o \
X  	  stcon.o stkbd.o stvdu.o stfnt.o stprint.o \
X  	  table.o stdmp.o
X--- 5,11 ----
X  
X  CFLAGS	= -O -DATARI_ST -DACK
X  
X! OBJ	= stmpx.o stmain.o proc.o system.o physcopy.o stshadow.o \
X  	  tty.o clock.o memory.o stdma.o stfloppy.o stwini.o \
X  	  stcon.o stkbd.o stvdu.o stfnt.o stprint.o \
X  	  table.o stdmp.o
/
echo x - read.c.cdiff
gres '^X' '' > read.c.cdiff << '/'
X*** org/fs/read.c	Thu Jun 11 23:36:55 1989
X--- new/fs/read.c	Thu Jun 11 23:48:02 1989
X***************
X*** 294,299 ****
X--- 294,301 ----
X    return(b);
X  }
X  
X+ #ifdef ATARI_ST
X+ #if HAVE_DCOPY
X  /*===========================================================================*
X   *				rw_user					     *
X   *===========================================================================*/
X***************
X*** 308,317 ****
X  /* Transfer a block of data.  Two options exist, depending on 'direction':
X   *     TO_USER:     Move from FS space to user virtual space
X   *     FROM_USER:   Move from user virtual space to FS space
X!  */
X  
X    if (direction == TO_USER ) {
X  	/* Write from FS space to user space. */
X  	umess.SRC_SPACE  = D;
X  	umess.SRC_PROC_NR = FS_PROC_NR;
X  	umess.SRC_BUFFER = (long) buff;
X--- 310,356 ----
X  /* Transfer a block of data.  Two options exist, depending on 'direction':
X   *     TO_USER:     Move from FS space to user virtual space
X   *     FROM_USER:   Move from user virtual space to FS space
X!  *
X!  * This verion of rw_user ignores the umap() problem and assumes that
X!  * virtual addresses are really absolute, as they are on the ST.  -Dal
X!  */
X!   phys_bytes src_phys, dst_phys, copy_length;
X  
X    if (direction == TO_USER ) {
X  	/* Write from FS space to user space. */
X+ 	src_phys = (phys_bytes) buff;
X+ 	dst_phys = (phys_bytes) vir;
X+   } else {
X+ 	/* Read from user space to FS space. */
X+ 	src_phys = (phys_bytes) vir;
X+ 	dst_phys = (phys_bytes) buff;
X+   }
X+ 
X+   copy_length = (phys_bytes) bytes;
X+   phys_copy(src_phys, dst_phys, copy_length);
X+ 
X+   return(OK);
X+ }
X+ 
X+ #else
X+ /*===========================================================================*
X+  *				rw_user					     *
X+  *===========================================================================*/
X+ PUBLIC int rw_user(s, u, vir, bytes, buff, direction)
X+ int s;				/* D or T space (stack is also D) */
X+ int u;				/* process number to r/w (usually = 'who') */
X+ vir_bytes vir;			/* virtual address to move to/from */
X+ vir_bytes bytes;		/* how many bytes to move */
X+ char *buff;			/* pointer to FS space */
X+ int direction;			/* TO_USER or FROM_USER */
X+ {
X+ /* Transfer a block of data.  Two options exist, depending on 'direction':
X+  *     TO_USER:     Move from FS space to user virtual space
X+  *     FROM_USER:   Move from user virtual space to FS space
X+  */
X+ 
X+   if (direction == TO_USER ) {
X+ 	/* Write from FS space to user space. */
X  	umess.SRC_SPACE  = D;
X  	umess.SRC_PROC_NR = FS_PROC_NR;
X  	umess.SRC_BUFFER = (long) buff;
X***************
X*** 332,338 ****
X    sys_copy(&umess);
X    return(umess.m_type);
X  }
X! 
X  
X  /*===========================================================================*
X   *				read_ahead				     *
X--- 371,378 ----
X    sys_copy(&umess);
X    return(umess.m_type);
X  }
X! #endif /* HAVE_DCOPY */
X! #endif ATARI_ST
X  
X  /*===========================================================================*
X   *				read_ahead				     *
/
echo x - system.c.cdiff
gres '^X' '' > system.c.cdiff << '/'
X*** org/kernel/system.c	Thu Jun 11 23:37:09 1989
X--- new/kernel/system.c	Thu Jun 11 23:38:02 1989
X***************
X*** 621,626 ****
X--- 750,758 ----
X  }
X  
X  #ifdef ATARI_ST
X+ #if 0
X+ /* --- this routine is replaced by the one in physcopy.s --- Dal */
X+ 
X  /*===========================================================================*
X   *				phys_copy				     * 
X   *===========================================================================*/
X***************
X*** 649,654 ****
X--- 781,787 ----
X    while (--n >= 0)
X  	*((char *)d)++ = *((char *)s)++;
X  }
X+ #endif
X  
X  /*===========================================================================*
X   *				build_sig				     * 
/

--
      Dale Schumacher                         399 Beacon Ave.
      (alias: Dalnefre')                      St. Paul, MN  55104-3527
      ...bungia!midgard.mn.org!syntel!dal     United States of America
             "I may be competitive, but I'm never ruthless"