n62@nikhefh.hep.nl (Klamer Schutte) (07/26/89)
Here is a fix for the file _mli.s sended by Ronald Lamprecht,
V61@DHDURZ1. To see who wrote what: the lines indented with a tab
are mine.
The code as sended was buggy for all but short * short multiplications.
I think all is fixed by now; i ran it with a quite complete test program.
It functioned the same as the original _mli.s.
For those not wanting to install Ronald's kernel improvements (to which
this patch is neccessary), place it in your libc.a as it also aves time
in user programs. (Dhrystone benchmark accelerated from 510 to 568
dhrystones / second with the same kernel. ).
Klamer. (.signature at end).
-------------------- cut here ------------------
#ifdef ACK
! (ACK needs an #ifdef before the first comment !)
!****************************************************************************
!
! _ m l i . s M I N I X
!
! Basic fast signed multiplication routine
!****************************************************************************
!
!
!============================================================================
! Edition history
!
! # Date Comments By
! --- -------- ---------------------------------------------------- ---
! 1 15.06.89 totally rewritten RAL
! 2 26.07.89 bug fixes KS
!
!****************************************************************************
.sect .text
.sect .rom
.sect .data
.sect .bss
.sect .text
! define DEBUG for a debuggable version: call testmli( &buf, arg1, arg2 )
! from a C program; d0 will be in buf[0], d1 in buf[1]
#ifndef DEBUG
.define .mli
#else
.define _testmli
saveptr:
.data4 1
saveret:
.data4 1
_testmli:
lea saveret,a0
move.l (sp)+,(a0)
lea saveptr,a0
move.l (sp)+,(a0)
bsr .mli
move.l saveptr,a0
move.l d0,(a0)+
move.l d1,(a0)+
move.l saveret,a0
sub #12,sp ! not tested fix! -- KS
jmp (a0)
#endif
!****************************************************************************
!
! . m l i
!
! Fast signed LW multiplication routine
!****************************************************************************
!
! Input: on stack:
! m2.l - multiplicand
! m1.l - multiplier
! (a7) ->(rts_ptr.l) - abs. return ptr.
!
! Output: d0.l - high order result
! d1.l - low order result
! d2,a0,a1 - *
!
!****************************************************************************
.mli:
move.l (sp)+,a0 ! get return ptr.
clr.l d2
move.l (sp)+,d1 ! get multiplier (m1) and test it
bpl next_arg
not.l d2 ! if negativ: store sign and neg
neg.l d1
next_arg:
move.l (sp)+,d0 ! get multiplicand (m2) and test it
bpl sav_reg
not.l d2 ! if negativ: store sign and neg
neg.l d0
sav_reg: movem.l d3-d4,-(sp) ! save used registers
move.l d1,d4
move.l d1,d3
swap d3 ! d3.w = high m1
mulu d0,d1 ! d1 = low m2 x low m1
tst.w d3
bne long_mul
swap d0 ! d0.w = high m2
tst.w d0 ! and test it
bne mid_mul ! high m2 x low m1 <> 0 ->
clr.l d0 ! high order result = 0
set_sign:
tst.l d2
bpl end
neg.l d1
negx.l d0
end:
movem.l (sp)+,d3-d4 ! restore used registers
jmp (a0)
mid_mul:
mulu d0,d4 ! d4 = low m2 * high m1
mid_mul2:
clr.l d0 ! and since high m2 == 0
swap d1 ! result = d1 + d4 << 16
add.w d4,d1
swap d4
addx.w d4,d0
swap d1
bra set_sign
long_mul:
mulu d0,d3 ! d3 = low m2 x high m1
swap d0 ! d0.w = high m2
move.w d0,d2 ! preserve sign in highword of d2
bne really_long
move.l d3,d4
bra mid_mul2 ! identical as mid_mul
really_long:
swap d4 ! get high m1
mulu d4,d0 ! d0 = high m1 * high m2
swap d4
mulu d2,d4 ! d4 = high m2 * low m2
swap d1 ! upper word of low order res.
add d3,d1
clr d3
swap d3
addx.l d3,d0 ! add with x-bit of low order result
add d4,d1
swap d1 ! low order result
clr d4
swap d4
addx.l d4,d0 ! add with x-bit of low order result
bra set_sign
#endif
--
________________________________________________________________________________
Klamer Schutte mcvax!nikhefh!{n62,Schutte} {Schutte,n62}@nikhefh.hep.nl