n62@nikhefh.hep.nl (Klamer Schutte) (06/16/89)
(Sorry, can't reply to original article any more)
Hi,
I found a bug in the phys_copy routine posted by Dale Schumacher
This bug resulted in being unable to boot the rebuilt kernel.
The bug is in the beq done, where the register stored on the stack
are not restored. (How could this slip through?).
Here is a cdiff (actually, a diff -c) of the old phys_copy();
it is not a fix.
___________________________________________________________________
*** physcopy.s Fri Jun 16 16:09:27 1989
--- physcopy.s.ammended Fri Jun 16 16:11:53 1989
***************
*** 17,22 ****
--- 17,23 ----
btst #0,d1 ! pointers aligned, but odd?
beq check64 ! no
move.b (a0)+,(a1)+ ! copy odd byte
+ ! YUK! not what you want when count == 0 ( but is count ever 0 ? )
sub.l #1,d0 ! decrement count
check64:
move.l d0,d1
***************
*** 53,58 ****
--- 54,60 ----
dbra d0,loop256 ! decrement count, test and loop
move.l d1,d0 ! remainder becomes new count
beq done ! more to copy? no!
+ ! big error: done does not restore the registers saved on the stack.
and.l #0x3F,d1 ! count mod 64
lsr.l #6,d0 ! count div 64
bra end64
---------------------------------------------------------------------
I also found some points of improvement of the assembly. These are:
1 use movem (a0)+,regs
movem regs,offset(a1)
because this is both faster and smaller than using lea's
2 use move.[wb] where possible instead of move.l .
Yes, i know, there is no difference between .b and .w
on a 68000; so every .b can be a .w .
But we (i) do not have a 68020; so use [.w.b],not .l
3 arrange code so that no bra.w is neede, only bra.s;
this does not improve readability ;-)
4 when pointers are not byte-aligned, use a copy loop
for 16 bytes a time.
5 some trivial bra-order reordening.
Altogether does it look different, but it is essentially the same.
Klamer.
(.signature at end).
Here comes my version of physcopy.s:
---------------------------------------------------------------------
.sect .text;.sect .rom;.sect .data;.sect .bss
.extern _phys_copy
.sect .text
loop256:
movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes
movem.l d2-d7/a2-a5,(a1)
movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes
movem.l d2-d7/a2-a5,40(a1)
movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes
movem.l d2-d7/a2-a5,80(a1)
movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes
movem.l d2-d7/a2-a5,120(a1)
movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes
movem.l d2-d7/a2-a5,160(a1)
movem.l (a0)+,d2-d7/a2-a5 ! copy 10x4 bytes
movem.l d2-d7/a2-a5,200(a1)
movem.l (a0)+,d2-d5 ! copy 4x4 bytes
movem.l d2-d5,240(a1)
lea 256(a1),a1
end256:
dbra d0,loop256 ! decrement count, test and loop
move.l d1,d0 ! remainder becomes new count
beq done ! more to copy? no!
and.b #0x3F,d1 ! + count mod 64
lsr.b #6,d0 ! + count div 64
bra end64
_phys_copy:
move.l 4(a7),a0 ! + load source pointer
move.l 8(a7),a1 ! + load destination pointer
move.l a0,d0
move.l a1,d1
eor.b d1,d0
btst #0,d0 ! pointers mutually aligned?
bne copy1 ! +
move.l 12(a7),d0 ! +
beq end ! if cnt == 0 && pointers both odd ...
btst #0,d1 ! pointers aligned, but odd?
beq check64 ! no
move.b (a0)+,(a1)+ ! copy odd byte
sub.l #1,d0 ! decrement count
check64:
move.l #63,d1 ! +
cmp.l d1,d0 ! +
ble copy4 ! + count < 64
movem.l d2-d7/a2-a5,-(a7) ! save regs for movem use
clr.l d1
move.b d0,d1 ! count mod 256
lsr.l #8,d0 ! count div 256
bra end256
done:
movem.l (a7)+,d2-d7/a2-a5 ! restore regs for movem use
end:
rts
loop64:
movem.l (a0)+,d2-d7/a4-a5 ! copy 8x4 bytes
movem.l d2-d7/a4-a5,(a1)
movem.l (a0)+,d2-d7/a4-a5 ! copy 8x4 bytes
movem.l d2-d7/a4-a5,32(a1)
lea 64(a1),a1
end64:
dbra d0,loop64 ! decrement count, test and loop
movem.l (a7)+,d2-d7/a2-a5 ! restore regs for movem use
move.l d1,d0 ! remainder becomes new count
copy4:
move.b d0,d1 ! +
and.b #3,d1 ! +
lsr.b #2,d0 ! +
bra end4
loop4:
move.l (a0)+,(a1)+
end4:
dbra d0,loop4 ! decrement count, test and loop
move.l d1,d0 ! remainder becomes new count
bra end1
loop1:
move.b (a0)+,(a1)+
end1:
dbra d0,loop1 ! decrement count, test and loop
rts
copy1:
move.l 12(a7),d0
! count can be big; test on it !
move.l #16,d1 ! == moveq; 4
cmp.l d1,d0 ! 6
blt end1
copy16:
move.b d0,d1
and.b #0x0F,d1
lsr.l #4,d0
bra end16
loop16:
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
end16:
dbra d0,loop16
move.l d1,d0
bra end1
--
________________________________________________________________________________
Klamer Schutte mcvax!nikhefh!{n62,Schutte} {Schutte,n62}@nikhefh.hep.nl