[comp.sources.amiga] v91i096: CpuBlit 1.0 - speeds up blitter on a 68020/30 Amiga, Part02/02

amiga-request@ab20.larc.nasa.gov (Amiga Sources/Binaries Moderator) (05/08/91)

Submitted-by: ecarroll%maths.tcd.ie@pucc.PRINCETON.EDU
Posting-number: Volume 91, Issue 096
Archive-name: utilities/cpublit-1.0/part02

#!/bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of archive 2 (of 2)."
# Contents:  src/scroll.s
# Wrapped by tadguy@ab20 on Tue May  7 19:45:25 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'src/scroll.s' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'src/scroll.s'\"
else
echo shar: Extracting \"'src/scroll.s'\" \(42602 characters\)
sed "s/^X//" >'src/scroll.s' <<'END_OF_FILE'
X***************************************************************** :ts=8 *****
X*
X*	SCROLL.S
X*
X*	(C) Copyright Eddy Carroll, January 1991.
X*
X*	Replaces BltBitMap with a routine that uses the CPU (preferably
X*	68030). This increases speed by a factor of about 2.8 on the A3000
X*	when the cache is enabled or 2.0 when the cache is disabled.
X*
X*****************************************************************************
X
X	include "exec/types.i"
X	include "exec/execbase.i"
X	include "exec/nodes.i"
X	include "graphics/gfx.i"
X
X	XDEF	_NewBltBitMap
X	XDEF	_BltBitMapAddress
X	XDEF	_OnlySingle
X	XDEF	_UsageCount
X	XDEF	_Broken
X	XDEF	_BlitFunc
X	XDEF	_MinTaskPri
X	XDEF	_StartBlit
X	XDEF	_ExitBlit
X	XDEF	_ShareBlit
X	XDEF	_Friend1
X	XDEF	_Friend2
X
X	XREF	_SysBase
X	XREF	_GfxBase
X	XREF	_LVOWait
X	XREF	_LVOWaitBlit
X
X	SECTION Scroll,CODE
X
X*****************************************************************************
X*
X*	NewBltBitMap()
X*
X*	Replacement BltBitMap which uses the 68030 instead of the blitter.
X*	The following conditions must hold for the CPU routine to be used:
X*
X*	    o	Bitmaps aligned on same longword bit offset
X*		(i.e. XSrc % 32 == XDest % 32)
X*
X*	    o	If source bitmap == destination bitmap, then YSrc != YDest
X*
X*	    o	Blitter minterm = $Cx (i.e. straight copy)
X*
X*	If any of these conditions doesn't hold, then the original BltBitMap
X*	is called instead.
X*
X*	Input:
X*		D0 - X Source
X*		D1 - Y Source
X*		D2 - X Dest
X*		D3 - Y Dest
X*		D4 - X Size
X*		D5 - Y Size
X*		D6 - Minterm
X*		D7 - Mask, indicating which planes are to be affected
X*		A0 - Pointer to source bitmap structure
X*		A1 - Pointer to destination bitmap structure
X*		A2 - Pointer to temporary bitmap structure (not used)
X*
X*	Output:
X*		D0 - Number of planes actually copied
X*
X*	The copy routine works as follows. Everything is done in longword
X*	units. If the bitmap being copied fits horizontally into a single
X*	longword, then the CopySingle() routine is used which copies a
X*	single column of longwords, masked out as appropriate. Otherwise,
X*	there are at least two longwords involved (the left and right edges
X*	of the bitmap), with possibly some longwords inbetween as well.
X*	CopyMultiple() is called to perform this copy; it uses two mask
X*	values to identify which bits in the left and right longwords should
X*	be copied. The longwords (if any) in between are copied verbatim.
X*
X*	Note that using longwords gives a big win on the A3000 since it can
X*	access CHIP ram via the 32 bit bus. This relies on the data being
X*	longword aligned of course. In the worst case (where a bitmap width
X*	is not a multiple of 4), one out of every two rows will be longword
X*	aligned, which is not too bad. In the more common case, every row
X*	is longword aligned. For overscan users, it's best to have your
X*	screen width a multiple of 32.
X*
X*****************************************************************************
X
XPreExit:
X	move.w	(sp)+,d0		; Restore original A0 register
X	exg	d6,a0			; Ignore following instruction
X_ExitBlit:
X	exg	d6,a0			; Restore original A0 register
XDoOldBlt:
X	subq.l	#1,_UsageCount		; Decrement number of callers in code
Xoldblt2:
X	move.l	(sp)+,d6		; Restore original mask
Xoldblit:
X	jmp	dummy			; Filled in with correct address later
X
X_BltBitMapAddress equ oldblit+2
X
Xdummy:	rts
X
X_NewBltBitMap:
X	tst.w	d4			; Check if width is zero
X	beq.s	dummy			; If it is, don't do anything
X	cmp.l	a0,a1			; Copying within the same bitmap?
X	bne.s	nb1			; If yes,
X	cmp.w	d1,d3			; and Y row is the same, then it's a
X	beq.s	oldblit			; sideways blit so use system routine
X	bra.s	nb2			; Else skip to next check
Xnb1:
X	tst.l	_OnlySingle		; Should we handle different src/dest
X	bne.s	oldblit			; If not, use standard system blit
Xnb2:
X	move.l	d6,-(sp)		; Save current minterm register
X	and.b	#$f0,d6			; Mask out low bits
X	cmp.b	#$c0,d6			; Is it standard COPY minterm?
X	bne.s	oldblt2			; If not, exit
X	move.l	d0,d6			; See if XSrc % 32 == XDest % 32
X	eor.l	d2,d6			; Low 5 bits should be zero if equal
X	and.b	#$1f,d6			;
X	bne.s	oldblt2			; If not, then have to do normal blit
X	tst.l	_Broken			; Are we accomodating broken s/w?
X	bne.s	nb3			; If so, skip checks
X	tst.b	bm_Flags(a0)		; Is source standard Amiga bitmap?
X	bne.s	oldblt2			; If not, use system blit routine
X	tst.w	bm_Pad(a0)		; 
X	bne.s	oldblt2			;
X	tst.b	bm_Flags(a1)		; How about destination?
X	bne.s	oldblt2			; If it isn't, use system blit
X	tst.w	bm_Pad(a1)		; 
X	bne.s	oldblt2			;
Xnb3:
X	addq.l	#1,_UsageCount		; Increment usage count
X	exg	d6,a0			; Save current A0
X	move.l	_BlitFunc,a0		; Get pointer to appropriate test func
X	jmp	(a0)			; And branch to it
X;
X;	Checks the usage count for the blitter code, to see if anyone else
X;	is currently executing it. If so, use the blitter instead (hence
X;	CPU does one blit while blitter does the other blit; multiprocessing!)
X;
X_ShareBlit:
X	exg	d6,a0			; Restore old A0
X	move.l	_UsageCount,d6		; Check if someone already in code
X	bne	DoOldBlt		; If there is, use blitter instead
X	bra.s	sblit2			; Else skip to use CPU
X;
X;	Checks to see if there is more than one task ready to run. If so,
X;	use the blitter, else use the CPU. Note that for the most common case
X;	of scrolling (in a CLI/console window), the task outputting the text
X;	that causes the scroll will be "Ready to Run" since it is pre-empted
X;	by the console device before it has a chance to go into a Wait
X;	condition.
X;
X;	If there is more than one task ready to run, but the second task
X;	in the queue has priority < MinTaskPri, then we can use the CPU
X;	anyway (since the second task is a background task that can be
X;	ignored).
X;
X_Friend2:
X	move.l	_SysBase,a0		; Get pointer to ExecBase
X	lea.l	TaskReady(a0),a0	; Get ptr to TaskReady list
X	cmp.l	8(a0),a0		; Empty list?
X	beq.s	_StartBlit		; If yes, do blit
X	move.w	d0,-(sp)		; Grab a register temporarily
X	move.l	(a0),a0			; Get pointer to first waiting task
X	move.l	(a0),a0			; Get pointer to second task
X	move.b	LN_PRI(a0),d0		; Get its priority (if it exists)
X	move.l	(a0),a0			; And final link ptr (NULL if at end)
X	exg	d6,a0			; Restore previous A0
X	tst.l	d6			; More than 1 task?
X	beq.s	F2Okay			; If no, we can use the blitter anyway
X	cmp.b	_MinTaskPri,d0		; Should we make way for waiting task?
X	bge	PreExit			; If so, use blitter instead
XF2Okay:
X	move.w	(sp)+,d0		; Else restore D0
X	bra.s	sblit2			; And skip to start blit
X;
X;	Checks to see if there are _any_ other tasks ready to run. If there
X;	are and their task priority is >= MinTaskPri, then uses system blit
X;	instead of CPU.
X;
X_Friend1:
X	move.l	_SysBase,a0		; Get pointer to ExecBase
X	lea.l	TaskReady(a0),a0	; Get ptr to TaskReady list, head node
X	cmp.l	8(a0),a0		; Empty list?
X	beq.s	_StartBlit		; If yes, we can safely blit
X	move.l	(a0),a0			; Get pointer to first task
X	move.w	LN_TYPE(a0),a0		; Read ln_Type and ln_Pri
X	exg	d6,a0			; Restore a0
X	cmp.b	_MinTaskPri,d6		; Should we ignore this task?
X	bge	DoOldBlt		; If not, then use blitter instead
X	bra.s	sblit2			; Else skip to use CPU
X
X;----------------------------------------------------------------------------
X;	Where the action starts. Initialises everything and then performs
X;	the blits using the CPU. At this stage, all registers are exactly
X;	as they were on entry to BltBitMap, except for D6 and A0, and these
X;	two are restored to the correct values immediately on entry.
X;----------------------------------------------------------------------------
X_StartBlit:
X	exg	d6,a0			; Restore A0
Xsblit2:					; Alternative entry point
X;
X;	Now we need to determine the masks to be used for clipping, along
X;	with the start address in memory of the section of the bit and
X;	the modulo of each bitplane (the amount added onto the end of each
X;	copied row address to get to the start of the next one). Then loop
X;	over all the selected bitplanes, copying those requested.
X;
X	movem.l	d1-d5/d7/a0-a6,-(sp)	; Save rest of the registers
X;
X;	Next, we need to make sure that the blitter is free. This is because
X;	some other blitter operation that operates on the bitmaps we've been
X;	passed may have started but not yet finished. Operations that
X;	depend on the blitter are guaranteed to occur in the right order
X;	(since the blitter can't multitask with itself) but when we start
X;	doing some of them with the CPU, we need to be a bit more careful.
X;
X;	Note: Since we are now "in" graphics.library, a6 holds GfxBase.
X;	      WaitBlit() is documented as preserving all registers.
X;
X	jsr	_LVOWaitBlit(a6)	; Wait for blitter to become free
X	ext.l	d0			; Convert all parameters to long
X	ext.l	d1			;
X	ext.l	d2			;
X	ext.l	d3			;
X	ext.l	d4			;
X	ext.l	d5			;
X
X	cmp	d1,d3			; See if we are scrolling up or down
X	bhi	bltdown			;
X;
X;	Since YDest < YSrc, we are copying the bitmap upwards in memory
X;	therefore start at the beginning and work down. (This is only
X;	important if the source and destination bitmaps are the same, but
X;	it doesn't do any harm to check when they are different also.)
X;
Xbltup:
X	move.w	bm_BytesPerRow(a0),d6	; Get width of source bitmap
X	ext.l	d6			; Extend to full integer
X	move.l	d6,a2			; Initialise modulo for source bitmap
X	muls	d6,d1			; Calculate row offset
X	move.l	d0,d6			; Get XSrc
X	lsr.l	#3,d6			; Get #bytes offset of XSrc
X	and.b	#$fc,d6			; Adjust to longword boundary
X	add.l	d6,d1			; Add on x offset to get bitmap offset
X	move.l	d1,a4			; Now A4 = offset into source bitmap
X;
X;	Repeat for dest bitmap
X;
X	move.w	bm_BytesPerRow(a1),d6	; Get width of dest bitmap
X	ext.l	d6			; Extend to full integer
X	move.l	d6,a3			; Initialise modulo for dest bitmap
X	muls	d6,d3			; Calculate row offset
X	move.l	d2,d6			; Get XDest
X	lsr.l	#3,d6			; (Converted to longword aligned
X	and.b	#$fc,d6			; byteoffset)
X	add.l	d6,d3			; Add on xoffset to get bitmap offset
X	move.l	d3,a5			; Now A5 = offset into dest bitmap
X	bra.s	contblit		; Skip to rest of blitcopy
X;
X;	If we get here, YDest > YSrc, so we are copying the bitmap downwards
X;	which means we need to start from the end and work back. We also
X;	need to initialise the modulo to -BytesPerRow instead of BytesPerRow.
X;
Xbltdown:
X	add.l	d5,d1			; Add YSize+YSrc to get last row addr
X	subq.l	#1,d1			; Adjust (so we don't have last_row+1)
X	move.w	bm_BytesPerRow(a0),d6	; Get width of source bitmap
X	ext.l	d6			; Extend to full longword
X	muls	d6,d1			; Calculate row offset
X	neg.l	d6			; Negate mod. since copying backwards
X	move.l	d6,a2			; Initialise modulo for source bitmap
X	move.l	d0,d6			; Get XSrc
X	lsr.l	#3,d6			; Get #bytes offset of XSrc
X	and.b	#$fc,d6			; Adjust to longword boundary
X	add.l	d6,d1			; Add on x offset to get bitmap offset
X	move.l	d1,a4			; Now A4 = offset into source bitmap
X;
X;	Do same calculations for dest bitmap
X;
X	add.l	d5,d3			; Add YSize+YSrc to get last row addr
X	subq.l	#1,d3			; Adjust (so we don't have last_row+1)
X	move.w	bm_BytesPerRow(a1),d6	; Get width of dest bitmap
X	ext.l	d6			; Extend to full longword
X	muls	d6,d3			; Calculate row offset
X	neg.l	d6			; Negate, since copying backwards
X	move.l	d6,a3			; Initialise modulo for dest bitmap
X	move.l	d2,d6			; Get XDest
X	lsr.l	#3,d6			; (Converted to longword aligned
X	and.b	#$fc,d6			; byteoffset)
X	add.l	d6,d3			; Add on xoffset to get bitmap offset
X	move.l	d3,a5			; Now A5 = offset into dest bitmap
X;
X;	Now calculate the mask values
X;
Xcontblit:
X	and.w	#$1f,d0			; Calculate XSrc longword bit offset
X	add.l	d0,d4			; Calculate width of bitmap
X	move.l	d4,d1			; Calculate longword bit offset
X	and.w	#$1f,d1			;
X	lsr.l	#5,d4			; Calc # of longwords needed for copy
X	add.l	d1,d1			; Scale XWidth bits to longword index
X	add.l	d1,d1			; into the bitmask array
X	bne.s	contb1			; If zero,
X	subq.l	#1,d4			; Decrement longword count
Xcontb1:
X	lea	RightMask(PC),a6	; Get address of right mask table
X	move.l	0(a6,d1.w),d2		; Get right bitmask
X	add.l	d0,d0			; Scale XSrc bits to longword index
X	add.l	d0,d0			; And again
Xcontb2:
X	lea	LeftMask(PC),a6		; Get address of left mask table
X	move.l	0(a6,d0.w),d1		; Get left bitmask
X;
X;	Calculate minimum number of bitplanes to copy
X;
X	moveq.l	#0,d6			; Zero out high bits of D6
X	move.b	bm_Depth(a0),d6		; Get depth of source bitmap
X	cmp.b	bm_Depth(a1),d6		; If greater than that of dest bitmap
X	blo.s	contb3			;
X	move.b	bm_Depth(a1),d6		; Use dest bitmap depth instead
Xcontb3:
X	subq.l	#1,d6			; Adjust depth to 0-based, not 1-based
X	move.l	d4,d0			; Copy longword count
X	addq.l	#1,d0			; Adjust positively
X	add.l	d0,d0			; Convert longword count to byte count
X	add.l	d0,d0			;
X	sub.l	d0,a2			; Calculate correct modulo for source
X	sub.l	d0,a3			; Calculate correct modulo for dest.
X	exg	a2,a4			; Setup A2/A3 = bitmap offsets
X	exg	a3,a5			;  and  A4/A5 = bitmap modulos
X
X	subq.l	#1,d4			; Adjust longword count to zero-based
X	move.l	d4,d3			; Move to right reg for Copy routine
X	move.l	d5,d0			; Copy YSize to right place also
X	lea.l	bm_Planes(a0),a0	; Get pointer to source bitplane array
X	lea.l	bm_Planes(a1),a1	; Get pointer to dest bitplane array
X	move.l	a0,d4			; Stash bitplane pointers here
X	move.l	a1,d5			;
X	move.l	20(sp),d7		; Read plane mask value from stack
X;
X;	Now build a list of bitmaps to be copied on the stack. To this end,
X;	we reserve 8 * 8 = 64 bytes of stack for source/destination bitmap
X;	pointers.
X;
X	lea	-64(sp),sp		; Reserve space for bitmap ptrs
X	move.l	sp,a6			; And point to it using A6
X;
X;	Loop through bitmaps building bitmap list for bitmaps specified in
X;	the copy mask. Bitplanes which have source and/or destination bitmaps
X;	set to NULL or -1 get handled immediately (new for WB 2.0). All others
X;	get stored on the stack.
X;
X	move.w	d7,-(sp)		; Save plane mask as temporary value
X	moveq.l	#0,d7			; Clear bitmap plane count
Xcmultlp:
X	lsr.w	(sp)			; See if need to copy this bitplane
X	bcc.s	cmultx			; If not, skip over code
X	addq	#1,d7			; Increment number of bitmaps copied
X	move.l	d4,a0			; Get pointer to source bitplane ptr
X	move.l	d5,a1			; And destination bitplane ptr
X	move.l	(a0),d4			; Read pointers to bitplanes
X	move.l	(a1),d5			;
X	not.l	d5			; Check if dest is -1
X	beq	skipfill		; If so, don't copy anything
X	not.l	d5			; Check if dest is zero
X	beq	skipfill		; If so, don't copy anything
X	not.l	d4			; Check if source is -1
X	beq	fillones		; If so, fill dest with 1's
X	not.l	d4			; Check if source is 0
X	beq	fillzeros		; If so, fill dest with 0's
X	exg	d4,a0			; Put registers back in right place
X	exg	d5,a1			;
X	add.l	a2,a0			; Add in correct offset for src ptr
X	add.l	a3,a1			; Add in correct offset for dest ptr
X	move.l	a0,(a6)+		; Store bitmap pointers on the stack
X	move.l	a1,(a6)+		; 	
Xcmultx:
X	addq.l	#4,d4			; Bump bitplane pointers
X	addq.l	#4,d5			;
X	dbf	d6,cmultlp		; Repeat for remaining bitplanes
X	addq.l	#2,sp			; Pop plane mask from stack
X;
X;	Now copy all the bitmaps we accumulated on the stack. There will be
X;	between 1 and 8 of them. We copy them in groups of 1 to 4, so two
X;	operations may be required.
X;
X;	A quick recap on what the various registers contain:
X;
X;	    D0 - Number of rows to copy
X;	    D1 - Mask for left edge of bitmap
X;	    D2 - Mask for right edge of bitmap
X;	    D3 - Number of longwords _between_ left edge and right edge
X;	    D7 - Total number of bitplanes copied (including 0 & -1 ptrs)
X;	    A4 - Modulo of source bitplanes
X;	    A5 - Modulo of dest bitplanes
X;	    A6 - Points to end of source/dest bitplane pointers
X;	    SP - Points to start of source/dest bitplane pointers
X;
X	sub.l	sp,a6			; Calculate how many bitplanes to copy
X	move.l	a6,d6			; Equals half # of source/dest pairs
X	lsr.l	#1,d6			; (giving a range of 0-28)
X	subq	#4,d6			; Adjust to zero based
X	bpl.s	cmultx2			; If negative, no bitplanes to copy
X	lea	64(sp),sp		; so pop bitplane pointers from stack
X	bra	doneblt			; and exit without doing any work
Xcmultx2:
X	cmpi.w	#12,d6			; More than 4 bitplanes to copy?
X	bhi.s	cmult_db		; If so, skip to do in two goes
X	move.l	d3,d3			; Does bitmap fits in one longword?
X	bpl.s	cmult_mm		; If not, skip to multiple longwords
X;
X;	We have between 1 and 4 bitplanes to copy, each a single
X;	longword wide.
X;
X	and.l	d2,d1			; Create composite mask
X	addq	#8,d6			; Adjust to index CopySingle() entries
X	addq	#8,d6			; and then fall through.
X;
X;	We have between 1 and 4 bitplanes to copy, each at least two
X;	longwords wide.
X;
Xcmult_mm:
X	move.l	FuncTab(pc,d6),a6	; Else call appropriate routine
X	jsr	(a6)			;
X	lea	64(sp),sp		; Pop everything off the stack
X	bra	doneblt			; And skip to end of blit
X
Xcmult_db:
X	move.l	d3,d3			; Does bitplane fit in one longword?
X	bpl.s	cmult_dbm		; If not, skip to multiple copy
X;
X;	We have between 5 and 8 bitplanes to copy, each just one
X;	longword wide. Note that when we exit, we branch into the code to
X;	copy the remaining bitmaps, but with the function index pointing
X;	into the CopySingle() entries rather than CopyMultiple()
X;
X	and.l	d2,d1			; Create composite mask
X	bsr	Copy4Single		; Copy first four bitplanes
X	bra.s	cmult_dbm2		; Skip to exit with correct fn index
X;
X;	We have between 5 and 8 bitplanes to copy, each at least two
X;	longwords wide.
X;
Xcmult_dbm:
X	bsr	Copy4Multiple		; Copy first four bitmaps in one gulp
X	subi.w	#16,d6			; Adjust bitmap count
Xcmult_dbm2:
X	lea	32(sp),sp		; Pop first four bitmaps off stack
X	move.l	FuncTab(pc,d6),a6	; Copy remaining bitmaps
X	jsr	(a6)			;
X	lea	32(sp),sp		; Pop remaining bitmaps
X	bra	doneblt			; And skip to end of blit
X
X;
X;	Index to table of functions for copying from 1 to 4 multiple and
X;	single longword bitmaps.
X;
XFuncTab:
X	dc.l	Copy1Multiple,Copy2Multiple,Copy3Multiple,Copy4Multiple
X	dc.l	Copy1Single,Copy2Single,Copy3Single,Copy4Single
X
X;
X;	Skip past current bitplane without doing anything to bitplane data
X;	(used when destination bitmap ptr is 0 or -1).
X;
Xskipfill:
X	exg	d4,a0			; Restore original pointers
X	exg	d5,a1			;
X	bra	cmultx			; Skip back to do next bitplane
X;
X;	Fill bitplane with one's (source bitplane pointer is -1)
X;
Xfillones:
X	exg	d4,a0			; Restore register order
X	exg	d5,a1			;
X	add.l	a3,a1			; Add in correct offset into bitplane
X	bsr	Fill_1s			; Fill the bitplane
X	bra	cmultx			; Skip back to do next bitplane
X;
X;	Fill bitplane with zero's (source bitplane pointer is NULL)
X;
Xfillzeros:
X	exg	d4,a0			; Restore register order
X	exg	d5,a1			;
X	add.l	a3,a1			; Add in correct offset into bitplane
X	bsr	Fill_0s			; Fill the bitplane
X	bra	cmultx			; Skip back to do next bitplane
X;
X;	That's it -- we're done! Now just pop remaining values off the stack
X;	and return to the caller with d0 = number of bitplanes copied.
X;
Xdoneblt:
X	move.l	d7,d0			; Set return value = #bitplanes copied
X	subq.l	#1,_UsageCount		; Decrement number of callers in code
X	movem.l	(sp)+,d1-d5/d7/a0-a6	; Restore registers
X	move.l	(sp)+,d6		; And this one too
X	rts				; Return to caller
X
X*****************************************************************************
X*
X*	CopyMultiple()
X*
X*	The following routines copy from 1 to 4 bitplanes which span more
X*	than one longword boundary horizontally (i.e. the start and finish
X*	bitplanes are in different longwords).
X*
X*	The routines are constructed mainly out of macros, to keep the source
X*	code down to size (and also more manageable). All routines take the
X*	following parameters:
X*
X*	Input:
X*		D0 - Number of rows to copy
X*		D1 - Mask for left edge of source	(000xxx)
X*		D2 - Mask for right edge of source	(xxx000)
X*		D3 - Number of longwords to copy
X*	        A4 - Modulo of source (positive or negative)
X*	        A5 - Modulo of destination (positive or negative)
X*
X*	In addition, pointers to the source/destination bitplanes are pushed
X*	onto the stack, such that 4(SP) = src bp1, 8(SP) = dest bp1,
X*	12(SP) = src bp2, 16(SP) = dest bp2 etc.
X*
X*	Output:
X*		None
X*
X*****************************************************************************
X
X*****************************************************************************
X*
X*	Macros used by the copy routines
X*
X*****************************************************************************
X
X;-----------------------------------------------------------------------------
X;	Init_Mult Label
X;
X;	This macro is the standard entry to each CopyMultiple() routine. It
X;	checks to see whether the bitplane being copied contains at least
X;	one full longword. If not, it branches to a separate routine
X;	(loop?edges) which is smaller; doing this at the start saves having
X;	to check for zero longwords each time through the main loop.
X;	Label is the name of the routine to perform the separate copy.
X;-----------------------------------------------------------------------------
X
XInit_Mult macro
X	subq.l	#1,d0			; Convert row count to zero-based
X	move.l	d1,d4			; Copy left source mask
X	not.l	d4			; And change it into destination mask
X	move.l	d2,d5			; Copy right source mask
X	not.l	d5			; Change into destination mask
X	subq.l	#1,d3			; Adjust longword count to zero based
X	bmi	\1			; If none to copy use seperate routine
X	endm
X
X;-----------------------------------------------------------------------------
X;	Left_Mult   src,dest
X;
X;	Copies the left hand side of the bitplane from register src to the
X;	bitplane pointed to by dest, using the masks in d1/d4
X;-----------------------------------------------------------------------------
X
XLeft_Mult macro
X	move.l	(\1)+,d6		; Read leftmost longword of source
X	and.l	d1,d6			; Mask out bits not to be copied
X	move.l	(\2),d7			; Read leftmost longword of dest
X	and.l	d4,d7			; Mask out bits to remain the same
X	or.l	d6,d7			; Merge source and dest columns
X	move.l	d7,(\2)+		; Output first word of bitplane again
X	endm
X
X;-----------------------------------------------------------------------------
X;	Copy_Mult src,dest
X;
X;	Copies all the full longwords between the left and right extremities
X;	of the bitplane row from src to dest. Note that for 68010 upwards, it
X;	is faster to copy using MOVE.L/DBF than to play tricks with MOVEM;
X;	since this program will only be of use to systems with fast CPU's
X;	anyway, this is the route we take.
X;-----------------------------------------------------------------------------
X
XCopy_Mult macro
X	move.l	d3,d6			; Copy longword count into scratch reg
Xloop_m\@:
X	move.l	(\1)+,(\2)+		; Copy longwords
X	dbf	d6,loop_m\@		;
X	endm
X
X;-----------------------------------------------------------------------------
X;	Right_Mult   src,dest
X;
X;	Copies the right hand side of the bitplane from register src to the
X;	bitplane pointed to by dest, using the masks in d2/d5
X;-----------------------------------------------------------------------------
X
XRight_Mult macro
X	move.l	(\1)+,d6		; Read rightmost longword of source
X	and.l	d2,d6			; Mask out bits not being copied
X	move.l	(\2),d7			; Read rightmost longword of dest
X	and.l	d5,d7			; Mask out bits to remain the same
X	or.l	d6,d7			; Merge source and dest columns
X	move.l	d7,(\2)+		; Output right longword again
X	endm
X
X;-----------------------------------------------------------------------------
X;	Advance src,dest
X;
X;	This macro advances the source and destination pointers to point to
X;	the next row in the bitplane.
X;-----------------------------------------------------------------------------
X
XAdvance macro
X	add.l	a4,\1			; Increment source pointer
X	add.l	a5,\2			; Increment dest pointer
X	endm
X
X;-----------------------------------------------------------------------------
X;	Copy_Quick src,dest
X;
X;	This macro copies the left and right edges in one go, when there
X;	are no complete longwords in between. It's quicker than having to
X;	check for zero longwords each time through the main loop. The masks
X;	used are d1/d4 for the left edge of the bitplane, d2/d5 for the
X;	right edge.
X;-----------------------------------------------------------------------------
X
XCopy_Quick macro
X	move.l	(\1)+,d6		; Read leftmost longword of source
X	and.l	d1,d6			; Mask out bits not to be copied
X	move.l	(\2),d7			; Read leftmost longword of dest
X	and.l	d4,d7			; Mask out bits to remain the same
X	or.l	d6,d7			; Merge source and dest columns
X	move.l	d7,(\2)+		; Output first word of bitplane again
X;
X;	Now tidy up right hand edge of bitplane
X;
X	move.l	(\1)+,d6		; Read rightmost longword of source
X	and.l	d2,d6			; Mask out bits not being copied
X	move.l	(\2),d7			; Read rightmost longword of dest
X	and.l	d5,d7			; Mask out bits to remain the same
X	or.l	d6,d7			; Merge source and dest columns
X	move.l	d7,(\2)+		; Output right longword again
X	endm
X
X*****************************************************************************
X*
X*	The actual copy routines, Copy1Multiple() ... Copy4Multiple()
X*
X*****************************************************************************
X
X;-----------------------------------------------------------------------------
X;
X;	Copies a single bitplane
X;
X;-----------------------------------------------------------------------------
X
XCopy1Multiple:
X	movem.l	a0-a1/a6/d0-d7,-(sp)	; Save registers
X	lea.l		48(sp),a6	; Get pointer to stack
X	move.l		(a6)+,a0	; Read bitplane pointers from stack
X	move.l		(a6)+,a1	; Read bitplane pointers from stack
X	Init_Mult	Copy1Quick	; Setup registers
Xc1m_loop:
X	Left_Mult	a0,a1		; Copy left edge of bitplane
X	Copy_Mult	a0,a1		; Copy middle of bitplane
X	Right_Mult	a0,a1		; Copy right edge of bitplane
X	Advance		a0,a1		; Increment bitplane ptrs
X	dbf		d0,c1m_loop	; Repeat for remaining rows
X	movem.l	(sp)+,a0-a1/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X;
X;	Handle inner longword count of zero
X;
XCopy1Quick:
X	Copy_Quick	a0,a1		; Copy left/right edge of bitplane
X	Advance		a0,a1		; Increment bitplane ptrs
X	dbra		d0,Copy1Quick	; Repeat for all rows
X	movem.l	(sp)+,a0-a1/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X
X;-----------------------------------------------------------------------------
X;
X;	Copies 2 bitplanes simultaneously
X;
X;-----------------------------------------------------------------------------
X
XCopy2Multiple:
X	movem.l	a0-a3/a6/d0-d7,-(sp)	; Save registers
X	lea.l		56(sp),a6	; Get pointer to bitplanes
X	movem.l		(a6),a0-a3	; Load bitplane ptrs off stack
X	Init_Mult	Copy2Quick	; Setup registers
Xc2m_loop:
X	Left_Mult	a0,a1		; Copy left edge of bitplane 1
X	Left_Mult	a2,a3		; Copy left edge of bitplane 2
X	Copy_Mult	a0,a1		; Copy middle of bitplane 1
X	Copy_Mult	a2,a3		; Copy middle of bitplane 2
X	Right_Mult	a0,a1		; Copy right edge of bitplane 1
X	Right_Mult	a2,a3		; Copy right edge of bitplane 2
X	Advance		a0,a1		; Increment bitplane 1 ptrs
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	dbf		d0,c2m_loop	; Repeat for remaining rows
X	movem.l	(sp)+,a0-a3/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X;
X;	Handle inner longword count of zero
X;
XCopy2Quick:
X	Copy_Quick	a0,a1		; Copy left/right edge of bitplane 1
X	Copy_Quick	a2,a3		; Copy left/right edge of bitplane 2
X	Advance		a0,a1		; Increment bitplane 1 ptrs
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	dbra		d0,Copy2Quick	; Repeat for all rows
X	movem.l	(sp)+,a0-a3/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X
X;-----------------------------------------------------------------------------
X;
X;	Copies 3 bitplanes simultaneously
X;
X;-----------------------------------------------------------------------------
X
XCopy3Multiple:
X	movem.l	a0-a3/a6/d0-d7,-(sp)	; Save registers
X	lea.l		56(sp),a6	; Get pointer to bitplanes
X	movem.l		(a6)+,a0-a3	; Load bitplane ptrs 1 & 2 off stack
X	Init_Mult	Copy3Quick	; Setup registers
Xc3m_loop:
X	Left_Mult	a0,a1		; Copy left edge of bitplane 1
X	Left_Mult	a2,a3		; Copy left edge of bitplane 2
X	Copy_Mult	a0,a1		; Copy middle of bitplane 1
X	Copy_Mult	a2,a3		; Copy middle of bitplane 2
X	Right_Mult	a0,a1		; Copy right edge of bitplane 1
X	Right_Mult	a2,a3		; Copy right edge of bitplane 2
X	Advance		a0,a1		; Increment bitplane 1 ptrs
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	move.l		a3,-(sp)	; Save bitplane 2 ptrs
X	move.l		a2,-(sp)	;
X	move.l		(a6)+,a2	; Load bitplane 3 ptrs
X	move.l		(a6),a3		; 
X	Left_Mult	a2,a3		; Copy left edge of bitplane 3
X	Copy_Mult	a2,a3		; Copy middle of bitplane 3
X	Right_Mult	a2,a3		; Copy right edge of bitplane 3
X	Advance		a2,a3		; Increment bitplane 3 ptrs
X	move.l		a3,(a6)		; Save bitplane 3 ptrs
X	move.l		a2,-(a6)	;
X	move.l		(sp)+,a2	; Restore bitplane 2 ptrs
X	move.l		(sp)+,a3	;
X	dbf		d0,c3m_loop	; Repeat for remaining rows
X	movem.l	(sp)+,a0-a3/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X;
X;	Handle inner longword count of zero
X;
XCopy3Quick:
X	Copy_Quick	a0,a1		; Copy left/right edge of bitplane 1
X	Copy_Quick	a2,a3		; Copy left/right edge of bitplane 2
X	Advance		a0,a1		; Increment bitplane 1 ptrs
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	move.l		a3,-(sp)	; Save bitplane 2 ptrs
X	move.l		a2,-(sp)	;
X	move.l		(a6)+,a2	; Load bitplane 3 ptrs
X	move.l		(a6),a3		; 
X	Copy_Quick	a2,a3		; Copy left/right edge of bitplane 2
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	move.l		a3,(a6)		; Save bitplane 3 ptrs
X	move.l		a2,-(a6)	;
X	move.l		(sp)+,a2	; Restore bitplane 2 ptrs
X	move.l		(sp)+,a3	;
X	dbra		d0,Copy3Quick	; Repeat for all rows
X	movem.l	(sp)+,a0-a3/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X
X;-----------------------------------------------------------------------------
X;
X;	Copies 4 bitplanes simultaneously
X;
X;-----------------------------------------------------------------------------
X
XCopy4Multiple:
X	movem.l	a0-a3/a6/d0-d7,-(sp)	; Save registers
X	lea.l		56(sp),a6	; Get pointer to bitplanes
X	movem.l		(a6)+,a0-a3	; Load bitplane ptrs 1 & 2 off stack
X	Init_Mult	Copy4Quick	; Setup registers
Xc4m_loop:
X	Left_Mult	a0,a1		; Copy left edge of bitplane 1
X	Left_Mult	a2,a3		; Copy left edge of bitplane 2
X	Copy_Mult	a0,a1		; Copy middle of bitplane 1
X	Copy_Mult	a2,a3		; Copy middle of bitplane 2
X	Right_Mult	a0,a1		; Copy right edge of bitplane 1
X	Right_Mult	a2,a3		; Copy right edge of bitplane 2
X	Advance		a0,a1		; Increment bitplane 1 ptrs
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	movem.l		a0-a3,-(sp)	; Save bitplane 2 ptrs
X	movem.l		(a6),a0-a3	; Load bitplane 3 ptrs
X	Left_Mult	a0,a1		; Copy left edge of bitplane 1
X	Left_Mult	a2,a3		; Copy left edge of bitplane 2
X	Copy_Mult	a0,a1		; Copy middle of bitplane 1
X	Copy_Mult	a2,a3		; Copy middle of bitplane 2
X	Right_Mult	a0,a1		; Copy right edge of bitplane 1
X	Right_Mult	a2,a3		; Copy right edge of bitplane 2
X	Advance		a0,a1		; Increment bitplane 1 ptrs
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	movem.l		a0-a3,(a6)	; Save bitplane 3 ptrs
X	movem.l		(sp)+,a0-a3	; Restore bitplane 2 ptrs
X	dbf		d0,c4m_loop	; Repeat for remaining rows
X	movem.l	(sp)+,a0-a3/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X;
X;	Handle inner longword count of zero
X;
XCopy4Quick:
X	Copy_Quick	a0,a1		; Copy left/right edge of bitplane 1
X	Copy_Quick	a2,a3		; Copy left/right edge of bitplane 2
X	Advance		a0,a1		; Increment bitplane 1 ptrs
X	Advance		a2,a3		; Increment bitplane 2 ptrs
X	movem.l		a0-a3,-(sp)	; Save bitplane 1,2 ptrs
X	movem.l		(a6),a0-a3	; Load bitplane 3,4 ptrs
X	Copy_Quick	a0,a1		; Copy left/right edge of bitplane 3
X	Copy_Quick	a2,a3		; Copy left/right edge of bitplane 4
X	Advance		a0,a1		; Increment bitplane 3 ptrs
X	Advance		a2,a3		; Increment bitplane 4 ptrs
X	movem.l		a0-a3,(a6)	; Save bitplane 3,4 ptrs
X	movem.l		(sp)+,a0-a3	; Restore bitplane 1,2 ptrs
X	dbra		d0,Copy4Quick	; Repeat for all rows
X	movem.l	(sp)+,a0-a3/a6/d0-d7	; Restore registers
X	rts				; Return to caller
X
X*****************************************************************************
X*
X*	CopySingle()
X*
X*	The following routines copy from 1 to 4 bitplanes that start and end
X*	(horizontally) within a single longword. CopyMultiple can't be used
X*	for such cases, since it always copies at least two longwords (one
X*	for the left edge and one for the right).
X*
X*	Input:
X*		D0 - Number of rows to copy
X*		D1 - Mask of bits to be copied from source  (000xxx000)
X*		A4 - Modulo of source bitplane
X*		A5 - Modulo of dest bitplane
X*
X*	In addition, pointers to the source/destination bitplanes are pushed
X*	onto the stack, such that 4(SP) = src bp1, 8(SP) = dest bp1,
X*	12(SP) = src bp2, 16(SP) = dest bp2 etc.
X*
X*	Output:
X*		None
X*
X*****************************************************************************
X
X*****************************************************************************
X*
X*	Macros used by the copy routines
X*
X*****************************************************************************
X
X;-----------------------------------------------------------------------------
X;	Init_Sing
X;
X;	This macro is the standard entry to each CopySingle() routine. It
X;	creates the complement mask used for masking source/destination
X;	and adjusts the row counter to be zero based.
X;-----------------------------------------------------------------------------
X
XInit_Sing macro
X	subq.l	#1,d0			; Adjust row count to zero-based
X	move.l	d1,d2			; Copy mask
X	not.l	d2			; And make mask for dest bitplane
X	endm
X
X;-----------------------------------------------------------------------------
X;	Copy_Dual src,dest
X;
X;	Copies the source longword from src to dest, masked with the value
X;	in D2/D4
X;-----------------------------------------------------------------------------
X
XCopy_Dual macro
X	move.l	(\1)+,d3		; Read src word
X	and.l	d1,d3			; Mask out unwanted bits
X	move.l	(\2),d4			; Read dest word
X	and.l	d2,d4			; Mask out bits to be replaced
X	or.l	d3,d4			; Combine src and dest bits
X	move.l	d4,(\2)+		; Replace destination word
X	endm
X
X*****************************************************************************
X*
X*	The actual copy routines, Copy1Single() ... Copy4Single()
X*
X*****************************************************************************
X
X;-----------------------------------------------------------------------------
X;
X;	Copies a single bitplane one longword wide
X;
X;-----------------------------------------------------------------------------
X
XCopy1Single:
X	movem.l	a0-a1/a6/d0-d4,-(sp)	; Save registers
X	lea		36(sp),a6	; Get pointer to bitplane
X	move.l		(a6)+,a0	; Get bitplane pointers into registers
X	move.l		(a6),a1		;
X	Init_Sing			; Initialise masks etc.
Xcopy1slp:
X	Copy_Dual	a0,a1		; Copy longword
X	Advance		a0,a1		; Move to next longword
X	dbra		d0,copy1slp	; Repeat for all rows
X	movem.l	(sp)+,a0-a1/a6/d0-d4	; Restore registers
X	rts
X
X;-----------------------------------------------------------------------------
X;
X;	Copies two bitplanes, each one longword wide
X;
X;-----------------------------------------------------------------------------
X
XCopy2Single:
X	movem.l	a0-a3/a6/d0-d4,-(sp)	; Save registers
X	lea		44(sp),a6	; Get ptr to bitplane
X	movem.l		(a6)+,a0-a3	; Get bitplane ptrs into registers
X	Init_Sing			; Initialise masks etc.
Xcopy2slp:
X	Copy_Dual	a0,a1		; Copy longword for bitplane 1
X	Copy_Dual	a2,a3		; Copy longword for bitplane 2
X	Advance		a0,a1		; Advance bitplane 1 ptrs
X	Advance		a2,a3		; Advance bitplane 2 ptrs
X	dbra		d0,copy2slp	; Repeat for all rows
X	movem.l	(sp)+,a0-a3/a6/d0-d4	; Restore registers
X	rts
X
X;-----------------------------------------------------------------------------
X;
X;	Copies three bitplanes, each one longword wide
X;
X;-----------------------------------------------------------------------------
X
XCopy3Single:
X	movem.l	a0-a3/a6/d0-d4,-(sp)	; Save registers
X	lea		44(sp),a6	; Get pointer to bitplane
X	movem.l		(a6)+,a0-a3	; Get bitplane ptrs into registers
X	Init_Sing			; Initialise masks etc.
Xcopy3slp:
X	Copy_Dual	a0,a1		; Copy longword for bitplane 1
X	Copy_Dual	a2,a3		; Copy longword for bitplane 2
X	Advance		a0,a1		; Advance bitplane 1 ptrs
X	Advance		a2,a3		; Advance bitplane 2 ptrs
X	move.l		a1,-(sp)	; Save bitplane 2 ptrs
X	move.l		a0,-(sp)	;
X	move.l		(a6)+,a0	; Load bitplane 3 ptrs
X	move.l		(a6),a1		; 
X	Copy_Dual	a0,a1		; Copy longword for bitplane 3
X	Advance		a0,a1		; Advance bitplane 3 ptrs
X	move.l		a1,(a6)		; Save bitplane 3 ptrs
X	move.l		a0,-(a6)	;
X	move.l		(sp)+,a0	; Restore bitplane 2 ptrs
X	move.l		(sp)+,a1	;
X	dbra		d0,copy3slp	; Repeat for all rows
X	movem.l	(sp)+,a0-a3/a6/d0-d4	; Restore registers
X	rts
X
X;-----------------------------------------------------------------------------
X;
X;	Copies four bitplanes, each one longword wide
X;
X;-----------------------------------------------------------------------------
X
XCopy4Single:
X	movem.l	a0-a3/a6/d0-d4,-(sp)	; Save registers
X	lea		44(sp),a6	; Get pointer to bitplane pointers
X	movem.l		(a6)+,a0-a3	; Get bitplane pointers into registers
X	Init_Sing			; Initialise masks etc.
Xcopy4slp:
X	Copy_Dual	a0,a1		; Copy longword for bitplane 1
X	Copy_Dual	a2,a3		; Copy longword for bitplane 2
X	Advance		a0,a1		; Advance bitplane 1 ptrs
X	Advance		a2,a3		; Advance bitplane 2 ptrs
X	movem.l		a0-a3,-(sp)	; Save bitplane 1 and 2 ptrs on stack
X	movem.l		(a6),a0-a3	; Read bitplane 3 and 4 ptrs
X	Copy_Dual	a0,a1		; Copy longword for bitplane 3
X	Copy_Dual	a2,a3		; Copy longword for bitplane 4
X	Advance		a0,a1		; Advance bitplane 3 ptrs
X	Advance		a2,a3		; Advance bitplane 4 ptrs
X	movem.l		a0-a3,(a6)	; Save bitplane 3 and 4 ptrs
X	movem.l		(sp)+,a0-a3	; Restore bitplane 1 and 2 ptrs
X	dbra		d0,copy4slp	; Repeat for all rows
X	movem.l	(sp)+,a0-a3/a6/d0-d4	; Restore registers
X	rts
X
X*****************************************************************************
X*
X*	Fill_1s(), Fill_0s
X*
X*	Handles the case new for Workbench 2.0 where the source bitplane
X*	pointer points to an array of all ones (ptr = $FFFFFFFF) or all
X*	zeros ($ptr = $00000000).
X*
X*	    Input:
X*		D0 - Number of rows to copy
X*		D1 - Mask for left edge of source	(000xxx)
X*		D2 - Mask for right edge of source	(xxx000)
X*		D3 - Number of longwords to copy (-1 means single column)
X*		A1 - Pointer to dest bitplane
X*		A5 - Modulo of dest bitplane
X*
X*	    Output:
X*		None
X*
X*****************************************************************************
X
XFill_1s:
X	movem.l	d0/d3/d6-d7/a1,-(sp)	; Save registers
X	moveq.l	#-1,d7			; Set register ready for fills
X	subq.l	#1,d0			; Adjust row count to zero-based
X	move.l	d3,d3			; Check how many longwords to copy
X	bmi.s	Fill_1single		; Branch if only a single longword
X	subq	#1,d3			; Adjust longword count to zero based
X	bmi.s	Fill_1quick		; If no complete longwords, handle
X;
X;	We have more than two longwords to copy, so loop over them all.
X;
Xfill_1lp1:
X	or.l	d1,(a1)+		; Set bits on left edge of bitplane
X	move.l	d3,d6			; Get number of longwords to fill
Xfill_1lp2:
X	move.l	d7,(a1)+		; Fill all the longwords
X	dbra	d6,fill_1lp2		;
X	or.l	d2,(a1)+		; Set bits on right edge of bitplane
X	add.l	a5,a1			; Advance to next bitplane row
X	dbra	d0,fill_1lp1		; And repeat until done
X	movem.l	(sp)+,d0/d3/d6-d7/a1	; Restore registers
X	rts				; Return to caller
X;
X;	Only two longwords to copy, the left and right edges
X;
XFill_1quick:
X	or.l	d1,(a1)+		; Set bits on left edge of bitplane
X	or.l	d2,(a1)+		; Set bits on right edge of bitplane
X	add.l	a5,a1			; Move to next row
X	dbra	d0,Fill_1quick		; Repeat for all rows
X	movem.l	(sp)+,d0/d3/d6-d7/a1	; Restore registers
X	rts				; Return to caller
X;
X;	Only a single longword to copy, with left and right portions masked
X;
XFill_1single:
X	move.l	d1,d6			; Create new mask
X	and.l	d2,d6			; by combining left and right masks
XFill_1s2:
X	or.l	d6,(a1)+		; Fill longword
X	add.l	a5,a1			; Advance to next row
X	dbra	d0,Fill_1s2		; Repeat for all rows
X	movem.l	(sp)+,d0/d3/d6-d7/a1	; Restore registers
X	rts				; Return to caller
X
X;-----------------------------------------------------------------------------
X;	Clear bitplane with zeros
X;-----------------------------------------------------------------------------
X
XFill_0s:
X	movem.l	d0-d3/d6-d7/a1,-(sp)	; Save registers
X	not.l	d1			; Invert masks ready for AND
X	not.l	d2			;
X	moveq.l	#0,d7			; Clear register ready for fills
X	subq.l	#1,d0			; Adjust row count to zero-based
X	move.l	d3,d3			; Check how many longwords to copy
X	bmi.s	Fill_0single		; Branch if only a single longword
X	subq	#1,d3			; Adjust longword count to zero based
X	bmi.s	Fill_0quick		; If no complete longwords, handle
X;
X;	We have more than two longwords to copy, so loop over them all.
X;
Xfill_0lp1:
X	and.l	d1,(a1)+		; Set bits on left edge of bitplane
X	move.l	d3,d6			; Get number of longwords to fill
Xfill_0lp2:
X	move.l	d7,(a1)+		; Fill all the longwords
X	dbra	d6,fill_0lp2		;
X	and.l	d2,(a1)+		; Set bits on right edge of bitplane
X	add.l	a5,a1			; Advance to next bitplane row
X	dbra	d0,fill_0lp1		; And repeat until done
X	movem.l	(sp)+,d0-d3/d6-d7/a1	; Restore registers
X	rts				; Return to caller
X;
X;	Only two longwords to copy, the left and right edges
X;
XFill_0quick:
X	and.l	d1,(a1)+		; Clear left edge of bitplane
X	and.l	d2,(a1)+		; Clear right edge of bitplane
X	add.l	a5,a1			; Move to next row
X	dbra	d0,Fill_0quick		; Repeat for all rows
X	movem.l	(sp)+,d0-d3/d6-d7/a1	; Restore registers
X	rts				; Return to caller
X;
X;	Only a single longword to copy, with left and right portions masked
X;
XFill_0single:
X	move.l	d1,d6			; Combine left and right edges
X	or.l	d2,d6			; to create new mask
XFill_0s2:
X	and.l	d6,(a1)+		; Fill longword
X	add.l	a5,a1			; Advance to next row
X	dbra	d0,Fill_0s2		; Repeat for all rows
X	movem.l	(sp)+,d0-d3/d6-d7/a1	; Restore registers
X	rts				; Return to caller
X
X*****************************************************************************
X*
X*	These two tables give the mask values used when copying the
X*	bits at the edge of each bitplane row. Note that a right edge
X*	of zero bits in width is handled as a special case in the code
X*	(it gets converted to a bitmap which is one longword narrower
X*	but has a right edge 32 bits wide).
X*
X*****************************************************************************
X
XLeftMask:
X	dc.l	$ffffffff,$7fffffff,$3fffffff,$1fffffff
X	dc.l	$0fffffff,$07ffffff,$03ffffff,$01ffffff
X	dc.l	$00ffffff,$007fffff,$003fffff,$001fffff
X	dc.l	$000fffff,$0007ffff,$0003ffff,$0001ffff
X	dc.l	$0000ffff,$00007fff,$00003fff,$00001fff
X	dc.l	$00000fff,$000007ff,$000003ff,$000001ff
X	dc.l	$000000ff,$0000007f,$0000003f,$0000001f
X	dc.l	$0000000f,$00000007,$00000003,$00000001
X
XRightMask:
X	dc.l	$ffffffff,$80000000,$c0000000,$e0000000
X	dc.l	$f0000000,$f8000000,$fc000000,$fe000000
X	dc.l	$ff000000,$ff800000,$ffc00000,$ffe00000
X	dc.l	$fff00000,$fff80000,$fffc0000,$fffe0000
X	dc.l	$ffff0000,$ffff8000,$ffffc000,$ffffe000
X	dc.l	$fffff000,$fffff800,$fffffc00,$fffffe00
X	dc.l	$ffffff00,$ffffff80,$ffffffc0,$ffffffe0
X	dc.l	$fffffff0,$fffffff8,$fffffffc,$fffffffe
X
X
X*****************************************************************************
X*
X*	Variables used by the code. _UsageCount is only ever updated
X*	atomically (since the replacement code must be re-entrant), and
X*	_BlitFunc is initialised by the startup code.
X*
X*****************************************************************************
X
X	SECTION Scroll,DATA
X
X	cnop	0,4
X
X_UsageCount:	dc.l	-1		; Number of callers currently in code
X_BlitFunc:	dc.l	_StartBlit	; Address of function for blitter test
X_OnlySingle:	dc.l	0		; Only use CPU when src bm == dest bm?
X_Broken:	dc.l	0		; Accomodate broken software?
X_MinTaskPri:	dc.b	0		; Ignore tasks with pri <= this
XPad		dc.b	0,0,0		; Padding to round to LW boundary
X
X	END
X
END_OF_FILE
if test 42602 -ne `wc -c <'src/scroll.s'`; then
    echo shar: \"'src/scroll.s'\" unpacked with wrong size!
fi
# end of 'src/scroll.s'
fi
echo shar: End of archive 2 \(of 2\).
cp /dev/null ark2isdone
MISSING=""
for I in 1 2 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have unpacked both archives.
    rm -f ark[1-9]isdone
else
    echo You still need to unpack the following archives:
    echo "        " ${MISSING}
fi
##  End of shell archive.
exit 0
-- 
Mail submissions (sources or binaries) to <amiga@uunet.uu.net>.
Mail comments to the moderator at <amiga-request@uunet.uu.net>.
Post requests for sources, and general discussion to comp.sys.amiga.misc.