[comp.os.minix] new _mli.s

n62@nikhefh.hep.nl (Klamer Schutte) (07/26/89)
Here is a fix for the file _mli.s sended by Ronald Lamprecht,
V61@DHDURZ1. To see who wrote what: the lines indented with a tab
are mine.
The code as sended was buggy for all but short * short multiplications.
I think all is fixed by now; i ran it with a quite complete test program.
It functioned the same as the original _mli.s.
For those not wanting to install Ronald's kernel improvements (to which
this patch is neccessary), place it in your libc.a as it also aves time
in user programs. (Dhrystone benchmark accelerated from 510 to 568
dhrystones / second with the same kernel. ).

Klamer. (.signature at end).
-------------------- cut here ------------------
#ifdef ACK
! (ACK needs an #ifdef before the first comment !)

!****************************************************************************
!
!     _ m l i . s                                                M I N I X
!
!     Basic fast signed multiplication routine
!****************************************************************************
!
!
!============================================================================
! Edition history
!
!  #    Date                         Comments                       By
! --- -------- ---------------------------------------------------- --- 
!   1 15.06.89 totally rewritten                                    RAL
!   2 26.07.89 bug fixes                                            KS
!
!****************************************************************************

           .sect    .text
           .sect    .rom
           .sect    .data
           .sect    .bss


           .sect    .text

! define DEBUG for a debuggable version: call testmli( &buf, arg1, arg2 )
! from a C program; d0 will be in buf[0], d1 in buf[1]
#ifndef DEBUG
           .define  .mli
#else
	.define	_testmli

saveptr:
	.data4	1
saveret:
	.data4	1

_testmli:
	lea	saveret,a0
	move.l (sp)+,(a0)
	lea	saveptr,a0
	move.l	(sp)+,(a0)
	bsr	.mli
	move.l	saveptr,a0
	move.l	d0,(a0)+
	move.l	d1,(a0)+
	move.l	saveret,a0
	sub	#12,sp		! not tested fix! -- KS
	jmp	(a0)
#endif
	
!****************************************************************************
!
!          . m l i
!
!          Fast signed LW multiplication routine
!****************************************************************************
!
! Input:   on stack:
!                 m2.l  -  multiplicand  
!                 m1.l  -  multiplier
!          (a7) ->(rts_ptr.l) - abs. return ptr.
!
! Output:  d0.l  - high order result
!          d1.l  - low order result
!          d2,a0,a1  - *
!
!****************************************************************************

.mli:
           move.l   (sp)+,a0        ! get return ptr.
           clr.l    d2
           move.l   (sp)+,d1        ! get multiplier (m1) and test it
           bpl      next_arg
	not.l	d2              ! if negativ: store sign and neg
           neg.l    d1
next_arg:
           move.l   (sp)+,d0        ! get multiplicand (m2) and test it
           bpl      sav_reg
	not.l	d2              ! if negativ: store sign and neg
           neg.l    d0
sav_reg:   movem.l  d3-d4,-(sp)     ! save used registers

           move.l   d1,d4
           move.l   d1,d3
           swap     d3              ! d3.w = high m1
           mulu     d0,d1           ! d1 = low m2 x low m1
	tst.w    d3
           bne      long_mul
           swap     d0              ! d0.w = high m2
           tst.w    d0              !                and test it
           bne      mid_mul         ! high m2 x low m1 <> 0 ->
           clr.l    d0              ! high order result = 0
set_sign:
	tst.l	d2
           bpl      end
           neg.l    d1
           negx.l   d0
end:
           movem.l  (sp)+,d3-d4     ! restore used registers
           jmp      (a0) 


mid_mul:
	mulu	d0,d4		! d4 = low m2 * high m1
mid_mul2:
	clr.l	d0		! and since high m2 == 0
	swap	d1		! result = d1 + d4 << 16
	add.w	d4,d1
	swap	d4
	addx.w	d4,d0
	swap	d1
	bra	set_sign

long_mul:
           mulu     d0,d3           ! d3 = low m2 x high m1
           swap     d0              ! d0.w = high m2
	move.w	d0,d2		! preserve sign in highword of d2
	bne	really_long
	move.l	d3,d4
	bra	mid_mul2	! identical as mid_mul

really_long:
	swap	d4		! get high m1
	mulu	d4,d0		! d0 = high m1 * high m2
	swap	d4
	mulu	d2,d4		! d4 = high m2 * low m2
           swap     d1              ! upper word of low order res.
           add      d3,d1
           clr      d3
           swap     d3
           addx.l   d3,d0           ! add with x-bit of low order result
           add      d4,d1
           swap     d1              ! low order result
           clr      d4
           swap     d4
           addx.l   d4,d0           ! add with x-bit of low order result
           bra      set_sign
#endif
-- 
________________________________________________________________________________
Klamer Schutte      mcvax!nikhefh!{n62,Schutte}     {Schutte,n62}@nikhefh.hep.nl