[comp.sys.amiga.tech] Complete Source Code for Processor Polygon Filler

jcs@crash.cts.com (John Schultz) (06/09/90)

  To borrow a phrase from Young MC, You want it? You got it. Here are my 
complete processor polygon fill routines. Now you can find out how much
faster the processor is first hand. I made this code as generic as
possible, so Macs and other 68000 based system should be able to use this
code (although register convention may force some changes). 
  This code also points out a bug in Lattice 5.04, read on for more info.

  This *source* code may be used for whatever purpose you wish, but cannot
be published or sold without permission of author. You can use the source
without restriction in your own programs.

  If you can make this code go faster, by all means, let me know...


  Have fun, enjoy, etc,


  John Schultz
  jcs@crash.cts.com


#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	fill.c
#	scanconvert.a
# This archive created: Fri Jun  8 21:06:48 1990
export PATH; PATH=/bin:$PATH
if test -f 'fill.c'
then
	echo shar: will not over-write existing file "'fill.c'"
else
cat << \SHAR_EOF > 'fill.c'
/* fill.c, a processor polygon filler */
/* Copyright (c) 1990 John Schultz, All Rights Reserved */

/* This is the C interface to the assembly code that does most of the   */
/* work. I haven't seen an algorithm of this type for filling polygons  */
/* elsewhere, so I'd like to think my implementation of this table fill */
/* algorithm is unique :-).The algorithm works by first finding out the */
/* orientation of the polygon, then filling the tables from miny to    */
/* maxy. This allows the table to be updated unconditionally, without */
/* having to read the current values from the tables, compare, then  */
/* write back to memory.                                            */
/* The polygons are an array of drawpoints, and you don't need to  */
/* close the polygons: a triangle will only have three points.    */

/* This code has been optimized for 320x200x4 bitmaps, which have */
/* been allocated linearly (One 32,000 byte allocation as opposed */
/* to four 8,000 byte allocations).                               */ 

/* To compile this code use: 
/*   lc -O -cuf -v fill.c                                         */
/*   asm scanconvert.a                                            */
/* Link to your own code as appropriate.                          */

/* I am using Lattice 5.04, and get a CXERR: 26, Line: 0, if the  */
/* optimizer is not used!                                         */

/* Look ma, no includes. */

typedef struct drawpoint {short dx,dy;} drawpoint;

#define MAXY 201                /* Bitmap max y + 1 */

short xmin[MAXY], xmax[MAXY]; /* Scanconverter tables */

short * xtable[] = {xmin,xmax}; /* For toggling between fill tables */

/* Prototypes */

extern void __asm drawline68k(register __a1 char * p, /* 1st Plane Ptr */
                              register __d0 short x,
                              register __d1 short y,
                              register __d2 short x2,
                              register __d3 short y2,
                              register __d4 short color);

extern void __asm fillline68k(register __d0 short x,
                              register __d1 short y,
                              register __d2 short x2,
                              register __d3 short y2,
                              register __a0 short * table);

extern void __asm scanconvpix(register __a0 char * p, /* 1st Plane Ptr */
                              register __a1 short * minx,
                              register __a2 short * maxx,
                              register __d0 short miny,
                              register __d1 short maxy,
                              register __d2 short color);

void drawpoly(drawpoint * dl, /* An array of drawpoints */
              char * p,      /* 1st Plane Ptr          */
              short cl,     /* Number of points       */
              short color);

/* Code */

void drawpoly(drawpoint * dl, /* An array of drawpoints */
              char * p,      /* 1st Plane Ptr          */
              short cl,     /* Number of points       */
              short color){
short miny,maxy;
short i,inext,index;
short tminy,tmaxy;
short j;
long orient=0;

/* Find out if polygon is clockwise or counterclockwise */

  for (i=0; i < cl; i++) {        /* Use the Newell method */
    if (i == (cl-1)) {
      j = 0;
    } else {
      j = i+1;
    }
    orient += (dl[i].dx - dl[j].dx)*(dl[i].dy + dl[j].dy);
  } /* for i */ 

  if (orient != 0) {    /* Check to see if we have a line or polygon */
    index = (orient < 0) ? 0 : 1;

/* Find miny,maxy */
    tminy = 199;
    tmaxy = 0;
    for (i=0; i < cl; i++) {
      if (dl[i].dy < tminy) {miny = i; tminy = dl[i].dy;}
      if (dl[i].dy > tmaxy) {maxy = i; tmaxy = dl[i].dy;}
    } /* for i */

/* Fill tables */
    i = miny; /* Start at top, fill to bottom, filling lines from top */
    do {     /* to bottom for efficient assembly implementation.     */
      inext = i + 1;
      if (inext >= cl) inext = 0; /* Wrap around */
      fillline68k(dl[i].dx,dl[i].dy,dl[inext].dx,dl[inext].dy,xtable[index]);
      if (inext == maxy) index ^= 1; /* Toggle min/max to keep lines    */
      i++;                          /* being filled from top to bottom */
      if (i >= cl) i = 0; /* Wrap around */
    } while (inext != miny); /* Come full circle, done */

/* Draw polygon */
   scanconvpix(p,xmin,xmax,tminy,tmaxy,color);

  } else { /* Draw a line */

/* This test really only works for triangles */
    if ((dl[0].dx == dl[1].dx) && (dl[0].dy == dl[1].dy))
      drawline68k(p,dl[0].dx,dl[0].dy,dl[1].dx,dl[1].dy,color);
    else
      drawline68k(p,dl[0].dx,dl[0].dy,dl[2].dx,dl[2].dy,color);

  } /* if orient */

} /* end drawpoly */

/* end fill.c */
SHAR_EOF
fi # end of overwriting check
if test -f 'scanconvert.a'
then
	echo shar: will not over-write existing file "'scanconvert.a'"
else
cat << \SHAR_EOF > 'scanconvert.a'
; scanconvert.a
; Copyright (c) 1990 John Schultz, All Rights Reserved

; Created 14-March-90
; Modified 17-May-90 
;   Re-written with long word aligned writes: now up to twice as fast
;   as using bfset/bfclr (bitfield instructions can hit up to 5 bytes,
;   not long word aligned).
;   Code is now 68000 compatible.
; Modified 8-June-90
;   Included filline68k, drawline68k in this file for Fill example.
;   Assembles to 3840 bytes using the Devpac 2.0 assembler.

	section	scanconvert,code

	xdef	_scanconvpix
	xdef	_fillline68k
	xdef	_drawline68k

; This code has been optimized for 320x200x4 bitmaps, which have
; been allocated linearly (One 32,000 byte allocation as opposed
; to four 8,000 byte allocations). 



PLANESIZE	equ	8000	; 320x200 bitplanes
BYTESROW	equ	40	; "               "

;extern void __asm scanconvpix(register __a0 PLANEPTR p,
;                              register __a1 short * minx,
;                              register __a2 short * maxx,
;                              register __d0 short miny,
;                              register __d1 short maxy,
;                              register __d2 short color);

_scanconvpix:
	movem.l	d2-d7/a2-a5,-(sp)

	move.w	d1,d7	; get maxy. d0 = miny.
	sub.w	d0,d7	; compute yheight. +1 not necessary: using dbra.

	movea.l	a1,a4	; copy xminptr
	movea.l	a2,a5	; copy xmaxptr

	move.w	d0,d6	; copy miny
	add.w	d6,d6	; y index is 2 bytes, so *2 offset
	adda.w	d6,a4	; get to start postion minx
	adda.w	d6,a5	; get to start postion maxx

	move.w	d0,d6	; copy miny
	lsl.w	#3,d6	; *8
	lsl.w	#5,d0	; *32
	add.w	d6,d0	; *40

	adda.w	d0,a0	; go to first scanline in bitmap
	move.l	a0,d6	; setup for first pass (d6 += 40 each pass)

	moveq.l	#BYTESROW,d5	; bytes per row, to be added each scanline

	lea	colorplanes(pc),a2	; get colorplane table
	add.w	d2,d2
	add.w	d2,d2			; entries are 4 bytes long
	movea.l	0(a2,d2.w),a2		; get color jsr address

	jmp	(a2)			; do specific plane writes/clears

; end of scanconvpix

; Specific fill cases for 0-15 colors. Speed is of the essence, not
; code size :-). Each case is specifically optimized. Could be made
; shorter, but slower.

COLOR0	macro

color0:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	not.l	d2		; invert
	and.l	d2,(a0)+	; write left mask
	and.l	d2,(a1)+	; write left mask
	and.l	d2,(a2)+	; write left mask
	and.l	d2,(a3)+	; write left mask
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite
	moveq.l	#0,d2		; zereos
multiloop4\@
	move.l	d2,(a0)+	; write middle 0's
	move.l	d2,(a1)+	; write middle 0's
	move.l	d2,(a2)+	; write middle 0's
	move.l	d2,(a3)+	; write middle 0's
	dbra	d1,multiloop4\@
writeright4\@
	not.l	d3
	and.l	d3,(a0)		; write right mask
	and.l	d3,(a1)		; write right mask
	and.l	d3,(a2)		; write right mask
	and.l	d3,(a3)		; write right mask
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR1	macro

color1:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	not.l	d3
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	not.l	d3
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR2	macro

color2:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR3	macro

color3:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	not.l	d2
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	not.l	d3
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR4	macro

color4:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a2)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR5	macro

color5:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR6	macro

color6:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR7	macro

color7:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR8	macro

color8:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR9	macro

color9:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR10	macro

color10:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR11	macro

color11:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR12	macro

color12:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR13	macro

color13:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a1)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR14	macro

color14:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR15	macro

color15:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

; Expand the macros...

	COLOR0
	COLOR1
	COLOR2
	COLOR3
	COLOR4
	COLOR5
	COLOR6
	COLOR7
	COLOR8
	COLOR9
	COLOR10
	COLOR11
	COLOR12
	COLOR13
	COLOR14
	COLOR15

	CNOP	0,4	; longword align for 020/030 speed

colorplanes:
	dc.l	color0
	dc.l	color1
	dc.l	color2
	dc.l	color3
	dc.l	color4
	dc.l	color5
	dc.l	color6
	dc.l	color7
	dc.l	color8
	dc.l	color9
	dc.l	color10
	dc.l	color11
	dc.l	color12
	dc.l	color13
	dc.l	color14
	dc.l	color15

; A 68000 line drawer, originally from 68000 Assembly Language, by
; Krantz and Stanley. Modified for the Amiga and the 320x200,
; four bitplane case.

;extern void __asm drawline68k(register __a1 PLANEPTR p,
;                              register __d0 short x,
;                              register __d1 short y,
;                              register __d2 short x2,
;                              register __d3 short y2,
;                              register __d4 short color);

_drawline68k
	movem.l	d2-d7/a2-a6,-(sp)

	movea.l	a1,a2
	lea	PLANESIZE(a2),a3
	lea	PLANESIZE(a3),a4
	lea	PLANESIZE(a4),a5

	andi.w	#15,d4		; 0..15 color
	add.w	d4,d4		;
	add.w	d4,d4		; (4 byte entries) get table index
	lea	linejmptable(pc),a6 ; table base
	movea.l	0(a6,d4.w),a6	; get jmp address


	move.w	d0,d4
	swap	d4
	clr.w	d4
	move.w	d1,d5
	swap	d5
	clr.w	d5

	sub.w	d0,d2
	move.w	d2,d0
	bpl.b	sk1_vecd
	neg.w	d0
sk1_vecd
	sub.w	d1,d3
	move.w	d3,d1
	bpl.b	sk2_vecd
	neg.w	d1
sk2_vecd
	cmp.w	d0,d1
	bgt.b	y_biggerd
	tst.w	d2
	bmi.b	sk3_vecd
	move.l	#$10000,d6
	bra.b	sk4_vecd
sk3_vecd
	move.l	#$ffff0000,d6
sk4_vecd
	swap	d3
	clr.w	d3
	asr.l	#2,d3
	tst.w	d0
	beq	divtrapd
	divs	d0,d3
	ext.l	d3
	asl.l	#2,d3
	move.l	d3,d7
	move.w	d0,d2
	bra.b	sk7_vecd
y_biggerd
	tst.w	d3
	bmi.b	sk5_vecd
	move.l	#$10000,d7
	bra.b	sk6_vecd
sk5_vecd
	move.l	#$ffff0000,d7
sk6_vecd
	swap	d2
	clr.w	d2
	asr.l	#2,d2
	tst.w	d1
	beq	divtrapd
	divs	d1,d2
	ext.l	d2
	asl.l	#2,d2
	move.l	d2,d6
	move.w	d1,d2
sk7_vecd
sk8_vecd
	subq.w	#1,d2	; setup dbra

	move.l	#$8000,a0	; .5 to add to round up
	move.l	#$8000,a1	; "                   "

	jsr	(a6)	; draw line

divtrapd
	movem.l	(sp)+,d2-d7/a2-a6
	rts


LINEPLOT	macro
lp0_vecd\@
	move.l	d4,d0	; fixed point arithmetic
	move.l	d5,d1
	add.l	a0,d0
	add.l	a1,d1
	swap	d0
	swap	d1

	move.w	d1,d3	; save y
	add.w	d1,d1	; *2
	add.w	d1,d1	; *4
	add.w	d1,d3	; *5
	lsl.w	#3,d3	; *40
	move.w	d0,d1	; save x
	lsr.w	#3,d0	; get x byte offset
	add.w	d0,d3	; x offset + y offset
	andi.w	#7,d1	; get x bit set
	not.b	d1	; invert bits (left to right)

        \1.b	d1,0(a2,d3.w)	
        \2.b	d1,0(a3,d3.w)	
        \3.b	d1,0(a4,d3.w)	
        \4.b	d1,0(a5,d3.w)	

	add.l	d6,d4
	add.l	d7,d5
	dbra	d2,lp0_vecd\@
	rts
	
	endm

linecolor0:
	LINEPLOT	bclr,bclr,bclr,bclr
linecolor1:
	LINEPLOT	bset,bclr,bclr,bclr
linecolor2:
	LINEPLOT	bclr,bset,bclr,bclr
linecolor3:
	LINEPLOT	bset,bset,bclr,bclr
linecolor4:
	LINEPLOT	bclr,bclr,bset,bclr
linecolor5:
	LINEPLOT	bset,bclr,bset,bclr
linecolor6:
	LINEPLOT	bclr,bset,bset,bclr
linecolor7:
	LINEPLOT	bset,bset,bset,bclr
linecolor8:
	LINEPLOT	bclr,bclr,bclr,bset
linecolor9:
	LINEPLOT	bset,bclr,bclr,bset
linecolor10:
	LINEPLOT	bclr,bset,bclr,bset
linecolor11:
	LINEPLOT	bset,bset,bclr,bset
linecolor12:
	LINEPLOT	bclr,bclr,bset,bset
linecolor13:
	LINEPLOT	bset,bclr,bset,bset
linecolor14:
	LINEPLOT	bclr,bset,bset,bset
linecolor15:
	LINEPLOT	bset,bset,bset,bset

linejmptable:
	dc.l	linecolor0
	dc.l	linecolor1
	dc.l	linecolor2
	dc.l	linecolor3
	dc.l	linecolor4
	dc.l	linecolor5
	dc.l	linecolor6
	dc.l	linecolor7
	dc.l	linecolor8
	dc.l	linecolor9
	dc.l	linecolor10
	dc.l	linecolor11
	dc.l	linecolor12
	dc.l	linecolor13
	dc.l	linecolor14
	dc.l	linecolor15

;extern void __asm fillline68k(register __d0 short x,
;                              register __d1 short y,
;                              register __d2 short x2,
;                              register __d3 short y2,
;                              register __a0 short * table);

; The line drawer modified...

_fillline68k:
	movem.l	d2-d7,-(sp)

	cmp.w	d1,d3		; always draw top->bottom
	bgt.b	oky
	exg	d1,d3		; swap y's
	exg	d0,d2		; sway x's
oky
	move.w	d3,d6
	add.w	d6,d6		; 2 byte entries
	move.w	d2,0(a0,d6.w)	; save x in scan table

	move.w	d1,d6
	add.w	d6,d6		; 2 byte entries
	move.w	d0,0(a0,d6.w)	; save x in scan table

	adda.w	d6,a0		; set up address to start of table

	move.w	d0,d4
	swap	d4
	clr.w	d4
	move.w	d1,d5
	swap	d5
	clr.w	d5

	sub.w	d0,d2
	move.w	d2,d0
	bpl.b	sk1_vec
	neg.w	d0
sk1_vec
	sub.w	d1,d3
	move.w	d3,d1
	bpl.b	sk2_vec
	neg.w	d1
sk2_vec
	cmp.w	d0,d1
	bgt.b	y_bigger
	tst.w	d2
	bmi.b	sk3_vec
	move.l	#$10000,d6
	bra.b	sk4_vec
sk3_vec
	move.l	#$ffff0000,d6
sk4_vec
	swap	d3
	clr.w	d3
	asr.l	#2,d3
	tst.w	d0
	beq	divtrap
	divs	d0,d3
	ext.l	d3
	asl.l	#2,d3
	move.l	d3,d7
	move.w	d0,d2
	bra.b	sk7_vec
y_bigger
	tst.w	d3
	bmi.b	sk5_vec
	move.l	#$10000,d7
	bra.b	sk6_vec
sk5_vec
	move.l	#$ffff0000,d7
sk6_vec
	swap	d2
	clr.w	d2
	asr.l	#2,d2
	tst.w	d1
	beq	divtrap
	divs	d1,d2
	ext.l	d2
	asl.l	#2,d2
	move.l	d2,d6
	move.w	d1,d2
sk7_vec
;	subq.w	#1,d2		; set up dbra
	move.l	#$8000,d3	; .5 to add to round up

	move.w	d1,a1		; old y = current y

	move.l	d4,d0		; compute 2nd x point
	add.l	d3,d0
	swap	d0
	move.l	d5,d1		; compute 2nd y point
	add.l	d3,d1
	swap	d1
lp0_vec
	cmp.w	a1,d1		; newy # oldy?
	beq.b	ynochange	; no
	move.w	d0,(a0)+	; fastest possible write to memory.
ynochange			
	move.w	d1,a1

	add.l	d6,d4
	add.l	d7,d5

	move.l	d4,d0		; fixed point arithmetic
	add.l	d3,d0
	swap	d0
	move.l	d5,d1
	add.l	d3,d1
	swap	d1
sk8_vec
	dbra	d2,lp0_vec
divtrap	
	movem.l	(sp)+,d2-d7
	rts

	END
SHAR_EOF
fi # end of overwriting check
#	End of shell archive
exit 0

jcs@crash.cts.com (John Schultz) (06/10/90)

  Here are a few subtle changes to improve performance of the processor
polygon fill code I posted earlier. Both changes are for the file
scanconvert.a. Also, my polygon fill code only works on convex polygons.
If the screen width is changed from 320 pixels, the line drawer's pixel
offset calculations will have to be rewritten to handle a non-40 bytes
per row case (The 80 bytes per row (640 pixels) case is trivial, just
change the lsl.w #3,d3 to lsl.w #4,d3). The polygon routines simply
require a changed define, and could be generalized so it could
handle screen width changes dynamically.

  In _drawline68k, change the code between sk8_vecd and the second
add.l to the below:

; start _drawline changes

sk8_vecd
	subq.w	#1,d2	; setup dbra

	move.l	#$8000,a0	; .5 to add to round up
	move.w	#7,a1

	jsr	(a6)	; draw line

divtrapd
	movem.l	(sp)+,d2-d7/a2-a6
	rts


LINEPLOT	macro
lp0_vecd\@
	move.l	d4,d0	; fixed point arithmetic
	move.l	d5,d1
	add.l	a0,d0
	add.l	a0,d1

; end of _drawline68k changes

  In _fillline68k, change everything from sk7_vec to sk8_vec to:

; start _fillline68k changes

sk7_vec
	move.l	#$8000,d3	; .5 to add to round up
	move.w	d1,a1		; old y = current y
lp0_vec
	move.l	d4,d0		; fixed point arithmetic
	add.l	d3,d0
	swap	d0
	move.l	d5,d1
	add.l	d3,d1
	swap	d1

	cmp.w	a1,d1		; newy # oldy?
	beq.b	ynochange	; no
	move.w	d0,(a0)+	; fastest possible write to memory.
ynochange			
	move.w	d1,a1

	add.l	d6,d4
	add.l	d7,d5

sk8_vec

; end of _filline68k changes