dal@syntel.UUCP (Dale Schumacher) (06/13/89)
Hopefully this will be a fun little modification for Minix-ST. echo x - readme gres '^X' '' > readme << '/' XThe enclosed mods are a couple of performance improvements relating to Xthe block memory copy function of Minix. X XThere are really two changes. The first is a re-implementation of the Xphys_copy() function in optimized 68000 assembly. This version is Xoptimized for the typical copy block sizes used most by Minix (determined Xby actual profiling, thanks Todd!). In addition, the #define HAVEDCOPY Xin const.h can enable the filesystem to copy data directly to and from Xthe user's process space. This gives around 20-30% reductions in the Xsystem time used to run a typical process. Without this mod, when the Xfilesystem wants to copy data (which it does all the time) it sends a Xmessage to the kernel and the kernel copies the data on behalf of the Xfilesystem, trusting that the filesystem has made a reasonable request. XSince we have no MMU enforcing memory access restrictions, I simply Xavoid this unneeded message passing and context switching. The issues Xinvolved in this tradeoff are discussed starting on p.184 of "the book". XOf course, the new fast phys_copy() is used here as well. X XAn interesting side effect of the above changes occurred in copying Xthe new kernel image to floppy (almost 200K). This used to take around X1:30 of wall-clock time under my older kernel (around 0:55 with a XGCC-compiled kernel), but it only takes 0:12 seconds now! I suspect Xthat this has to do with writing a sector or two and not quite keeping Xup with the disk rotation, thus causing lots of extra disk revs waiting Xfor the correct sector to come around again. The new change seems to Xallow this process to keep up and write continuously. X X XTo apply these changes... X X 1. If you DON'T want the FS code to do direct copying, X ignore all modifications to files in the fs/ directory X and skip step #2. X X 2. Add the contents of const.h.add to h/const.h. X X 3. Use patch to apply kmakef.cdiff to kernel/Makefile and X fsmakef.cdiff to fs/Makefile. X X 4. Put a copy of physcopy.s in kernel/ and fs/. X X 5. Apply system.c.cdiff to kernel/system.c and read.c.cdiff X to fs/read.c. X XI hope you'll find these mods as useful as I do. Good luck & happy hacking. / echo x - const.h.add gres '^X' '' > const.h.add << '/' X/* X Define HAVEDCOPY to be 1 to get direct user/fs copying mods. X*/ X X#define HAVEDCOPY 1 X X/* fix up if undefined to give the value 0 or 1 */ X#if HAVEDCOPY X# define HAVE_DCOPY 1 X#else X# define HAVE_DCOPY 0 X#endif / echo x - physcopy.s gres '^X' '' > physcopy.s << '/' X.sect .text;.sect .rom;.sect .data;.sect .bss X.extern _phys_copy X.sect .text X_phys_copy: X link a6,#-12 X move.l 8(a6),a0 ! load source pointer X move.l 12(a6),a1 ! load destination pointer X move.l a0,d0 X move.l a1,d1 X eor.l d1,d0 X btst #0,d0 ! pointers mutually aligned? X beq check1 ! yes X move.l 16(a6),d0 ! load count X bra copy1 Xcheck1: X move.l 16(a6),d0 ! load count X btst #0,d1 ! pointers aligned, but odd? X beq check64 ! no X move.b (a0)+,(a1)+ ! copy odd byte X sub.l #1,d0 ! decrement count Xcheck64: X move.l d0,d1 X and.l #0xFFFFFFC0,d1 ! count < 64 X beq copy4 ! yes X movem.l d2-d7/a2-a5,-(a7) ! save regs for movem use X move.l d0,d1 X and.l #0xFF,d1 ! count mod 256 X lsr.l #8,d0 ! count div 256 X bra end256 Xloop256: X movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes X movem.l d2-d7/a2-a5,(a1) X lea 40(a1),a1 X movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes X movem.l d2-d7/a2-a5,(a1) X lea 40(a1),a1 X movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes X movem.l d2-d7/a2-a5,(a1) X lea 40(a1),a1 X movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes X movem.l d2-d7/a2-a5,(a1) X lea 40(a1),a1 X movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes X movem.l d2-d7/a2-a5,(a1) X lea 40(a1),a1 X movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes X movem.l d2-d7/a2-a5,(a1) X lea 40(a1),a1 X movem.l (a0)+,d2-d5 ! copy 4x4 bytes X movem.l d2-d5,(a1) X lea 16(a1),a1 Xend256: X dbra d0,loop256 ! decrement count, test and loop X move.l d1,d0 ! remainder becomes new count X beq done ! more to copy? no! X and.l #0x3F,d1 ! count mod 64 X lsr.l #6,d0 ! count div 64 X bra end64 Xloop64: X movem.l (a0)+,d2-d7/a4-a5 ! copy 8x4 bytes X movem.l d2-d7/a4-a5,(a1) X lea 32(a1),a1 X movem.l (a0)+,d2-d7/a4-a5 ! copy 8x4 bytes X movem.l d2-d7/a4-a5,(a1) X lea 32(a1),a1 Xend64: X dbra d0,loop64 ! decrement count, test and loop X movem.l (a7)+,d2-d7/a2-a5 ! restore regs for movem use X move.l d1,d0 ! remainder becomes new count Xcopy4: X move.l d0,d1 ! set remainder (for branch in case) X and.l #0x3,d1 ! count mod 4 X lsr.l #2,d0 ! count div 4 X bra end4 Xloop4: X move.l (a0)+,(a1)+ Xend4: X dbra d0,loop4 ! decrement count, test and loop X move.l d1,d0 ! remainder becomes new count X bra end1 Xloop1: X move.b (a0)+,(a1)+ Xcopy1: Xend1: X dbra d0,loop1 ! decrement count, test and loop Xdone: X unlk a6 X rts / echo x - fsmakef.cdiff gres '^X' '' > fsmakef.cdiff << '/' X*** org/fs/Makefile Thu Jun 11 23:30:13 1989 X--- new/fs/Makefile Thu Jun 11 23:32:04 1989 X*************** X*** 5,11 **** X X CFLAGS = -O -DATARI_ST X X! OBJ = main.o open.o read.o write.o pipe.o device.o \ X path.o mount.o link.o super.o inode.o cache.o filedes.o \ X stadir.o protect.o time.o misc.o utility.o table.o putc.o X HDR = ../h/callnr.h ../h/com.h ../h/const.h ../h/error.h \ X--- 5,11 ---- X X CFLAGS = -O -DATARI_ST X X! OBJ = main.o open.o read.o physcopy.o write.o pipe.o device.o \ X path.o mount.o link.o super.o inode.o cache.o filedes.o \ X stadir.o protect.o time.o misc.o utility.o table.o putc.o X HDR = ../h/callnr.h ../h/com.h ../h/const.h ../h/error.h \ / echo x - kmakef.cdiff gres '^X' '' > kmakef.cdiff << '/' X*** org/kernel/Makefile Thu Jun 11 23:30:13 1989 X--- new/kernel/Makefile Thu Jun 11 23:32:04 1989 X*************** X*** 5,11 **** X X CFLAGS = -O -DATARI_ST -DACK X X! OBJ = stmpx.o stmain.o proc.o system.o stshadow.o \ X tty.o clock.o memory.o stdma.o stfloppy.o stwini.o \ X stcon.o stkbd.o stvdu.o stfnt.o stprint.o \ X table.o stdmp.o X--- 5,11 ---- X X CFLAGS = -O -DATARI_ST -DACK X X! OBJ = stmpx.o stmain.o proc.o system.o physcopy.o stshadow.o \ X tty.o clock.o memory.o stdma.o stfloppy.o stwini.o \ X stcon.o stkbd.o stvdu.o stfnt.o stprint.o \ X table.o stdmp.o / echo x - read.c.cdiff gres '^X' '' > read.c.cdiff << '/' X*** org/fs/read.c Thu Jun 11 23:36:55 1989 X--- new/fs/read.c Thu Jun 11 23:48:02 1989 X*************** X*** 294,299 **** X--- 294,301 ---- X return(b); X } X X+ #ifdef ATARI_ST X+ #if HAVE_DCOPY X /*===========================================================================* X * rw_user * X *===========================================================================*/ X*************** X*** 308,317 **** X /* Transfer a block of data. Two options exist, depending on 'direction': X * TO_USER: Move from FS space to user virtual space X * FROM_USER: Move from user virtual space to FS space X! */ X X if (direction == TO_USER ) { X /* Write from FS space to user space. */ X umess.SRC_SPACE = D; X umess.SRC_PROC_NR = FS_PROC_NR; X umess.SRC_BUFFER = (long) buff; X--- 310,356 ---- X /* Transfer a block of data. Two options exist, depending on 'direction': X * TO_USER: Move from FS space to user virtual space X * FROM_USER: Move from user virtual space to FS space X! * X! * This verion of rw_user ignores the umap() problem and assumes that X! * virtual addresses are really absolute, as they are on the ST. -Dal X! */ X! phys_bytes src_phys, dst_phys, copy_length; X X if (direction == TO_USER ) { X /* Write from FS space to user space. */ X+ src_phys = (phys_bytes) buff; X+ dst_phys = (phys_bytes) vir; X+ } else { X+ /* Read from user space to FS space. */ X+ src_phys = (phys_bytes) vir; X+ dst_phys = (phys_bytes) buff; X+ } X+ X+ copy_length = (phys_bytes) bytes; X+ phys_copy(src_phys, dst_phys, copy_length); X+ X+ return(OK); X+ } X+ X+ #else X+ /*===========================================================================* X+ * rw_user * X+ *===========================================================================*/ X+ PUBLIC int rw_user(s, u, vir, bytes, buff, direction) X+ int s; /* D or T space (stack is also D) */ X+ int u; /* process number to r/w (usually = 'who') */ X+ vir_bytes vir; /* virtual address to move to/from */ X+ vir_bytes bytes; /* how many bytes to move */ X+ char *buff; /* pointer to FS space */ X+ int direction; /* TO_USER or FROM_USER */ X+ { X+ /* Transfer a block of data. Two options exist, depending on 'direction': X+ * TO_USER: Move from FS space to user virtual space X+ * FROM_USER: Move from user virtual space to FS space X+ */ X+ X+ if (direction == TO_USER ) { X+ /* Write from FS space to user space. */ X umess.SRC_SPACE = D; X umess.SRC_PROC_NR = FS_PROC_NR; X umess.SRC_BUFFER = (long) buff; X*************** X*** 332,338 **** X sys_copy(&umess); X return(umess.m_type); X } X! X X /*===========================================================================* X * read_ahead * X--- 371,378 ---- X sys_copy(&umess); X return(umess.m_type); X } X! #endif /* HAVE_DCOPY */ X! #endif ATARI_ST X X /*===========================================================================* X * read_ahead * / echo x - system.c.cdiff gres '^X' '' > system.c.cdiff << '/' X*** org/kernel/system.c Thu Jun 11 23:37:09 1989 X--- new/kernel/system.c Thu Jun 11 23:38:02 1989 X*************** X*** 621,626 **** X--- 750,758 ---- X } X X #ifdef ATARI_ST X+ #if 0 X+ /* --- this routine is replaced by the one in physcopy.s --- Dal */ X+ X /*===========================================================================* X * phys_copy * X *===========================================================================*/ X*************** X*** 649,654 **** X--- 781,787 ---- X while (--n >= 0) X *((char *)d)++ = *((char *)s)++; X } X+ #endif X X /*===========================================================================* X * build_sig * / -- Dale Schumacher 399 Beacon Ave. (alias: Dalnefre') St. Paul, MN 55104-3527 ...bungia!midgard.mn.org!syntel!dal United States of America "I may be competitive, but I'm never ruthless"