[comp.sys.amiga.tech] Complete Source Code for Processor Polygon Filler

jcs@crash.cts.com (John Schultz) (06/09/90)

  To borrow a phrase from Young MC, You want it? You got it. Here are my 
complete processor polygon fill routines. Now you can find out how much
faster the processor is first hand. I made this code as generic as
possible, so Macs and other 68000 based system should be able to use this
code (although register convention may force some changes). 
  This code also points out a bug in Lattice 5.04, read on for more info.

  This *source* code may be used for whatever purpose you wish, but cannot
be published or sold without permission of author. You can use the source
without restriction in your own programs.

  If you can make this code go faster, by all means, let me know...

  Have fun, enjoy, etc,

  John Schultz

#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	fill.c
#	scanconvert.a
# This archive created: Fri Jun  8 21:06:48 1990
export PATH; PATH=/bin:$PATH
if test -f 'fill.c'
	echo shar: will not over-write existing file "'fill.c'"
cat << \SHAR_EOF > 'fill.c'
/* fill.c, a processor polygon filler */
/* Copyright (c) 1990 John Schultz, All Rights Reserved */

/* This is the C interface to the assembly code that does most of the   */
/* work. I haven't seen an algorithm of this type for filling polygons  */
/* elsewhere, so I'd like to think my implementation of this table fill */
/* algorithm is unique :-).The algorithm works by first finding out the */
/* orientation of the polygon, then filling the tables from miny to    */
/* maxy. This allows the table to be updated unconditionally, without */
/* having to read the current values from the tables, compare, then  */
/* write back to memory.                                            */
/* The polygons are an array of drawpoints, and you don't need to  */
/* close the polygons: a triangle will only have three points.    */

/* This code has been optimized for 320x200x4 bitmaps, which have */
/* been allocated linearly (One 32,000 byte allocation as opposed */
/* to four 8,000 byte allocations).                               */ 

/* To compile this code use: 
/*   lc -O -cuf -v fill.c                                         */
/*   asm scanconvert.a                                            */
/* Link to your own code as appropriate.                          */

/* I am using Lattice 5.04, and get a CXERR: 26, Line: 0, if the  */
/* optimizer is not used!                                         */

/* Look ma, no includes. */

typedef struct drawpoint {short dx,dy;} drawpoint;

#define MAXY 201                /* Bitmap max y + 1 */

short xmin[MAXY], xmax[MAXY]; /* Scanconverter tables */

short * xtable[] = {xmin,xmax}; /* For toggling between fill tables */

/* Prototypes */

extern void __asm drawline68k(register __a1 char * p, /* 1st Plane Ptr */
                              register __d0 short x,
                              register __d1 short y,
                              register __d2 short x2,
                              register __d3 short y2,
                              register __d4 short color);

extern void __asm fillline68k(register __d0 short x,
                              register __d1 short y,
                              register __d2 short x2,
                              register __d3 short y2,
                              register __a0 short * table);

extern void __asm scanconvpix(register __a0 char * p, /* 1st Plane Ptr */
                              register __a1 short * minx,
                              register __a2 short * maxx,
                              register __d0 short miny,
                              register __d1 short maxy,
                              register __d2 short color);

void drawpoly(drawpoint * dl, /* An array of drawpoints */
              char * p,      /* 1st Plane Ptr          */
              short cl,     /* Number of points       */
              short color);

/* Code */

void drawpoly(drawpoint * dl, /* An array of drawpoints */
              char * p,      /* 1st Plane Ptr          */
              short cl,     /* Number of points       */
              short color){
short miny,maxy;
short i,inext,index;
short tminy,tmaxy;
short j;
long orient=0;

/* Find out if polygon is clockwise or counterclockwise */

  for (i=0; i < cl; i++) {        /* Use the Newell method */
    if (i == (cl-1)) {
      j = 0;
    } else {
      j = i+1;
    orient += (dl[i].dx - dl[j].dx)*(dl[i].dy + dl[j].dy);
  } /* for i */ 

  if (orient != 0) {    /* Check to see if we have a line or polygon */
    index = (orient < 0) ? 0 : 1;

/* Find miny,maxy */
    tminy = 199;
    tmaxy = 0;
    for (i=0; i < cl; i++) {
      if (dl[i].dy < tminy) {miny = i; tminy = dl[i].dy;}
      if (dl[i].dy > tmaxy) {maxy = i; tmaxy = dl[i].dy;}
    } /* for i */

/* Fill tables */
    i = miny; /* Start at top, fill to bottom, filling lines from top */
    do {     /* to bottom for efficient assembly implementation.     */
      inext = i + 1;
      if (inext >= cl) inext = 0; /* Wrap around */
      if (inext == maxy) index ^= 1; /* Toggle min/max to keep lines    */
      i++;                          /* being filled from top to bottom */
      if (i >= cl) i = 0; /* Wrap around */
    } while (inext != miny); /* Come full circle, done */

/* Draw polygon */

  } else { /* Draw a line */

/* This test really only works for triangles */
    if ((dl[0].dx == dl[1].dx) && (dl[0].dy == dl[1].dy))

  } /* if orient */

} /* end drawpoly */

/* end fill.c */
fi # end of overwriting check
if test -f 'scanconvert.a'
	echo shar: will not over-write existing file "'scanconvert.a'"
cat << \SHAR_EOF > 'scanconvert.a'
; scanconvert.a
; Copyright (c) 1990 John Schultz, All Rights Reserved

; Created 14-March-90
; Modified 17-May-90 
;   Re-written with long word aligned writes: now up to twice as fast
;   as using bfset/bfclr (bitfield instructions can hit up to 5 bytes,
;   not long word aligned).
;   Code is now 68000 compatible.
; Modified 8-June-90
;   Included filline68k, drawline68k in this file for Fill example.
;   Assembles to 3840 bytes using the Devpac 2.0 assembler.

	section	scanconvert,code

	xdef	_scanconvpix
	xdef	_fillline68k
	xdef	_drawline68k

; This code has been optimized for 320x200x4 bitmaps, which have
; been allocated linearly (One 32,000 byte allocation as opposed
; to four 8,000 byte allocations). 

PLANESIZE	equ	8000	; 320x200 bitplanes
BYTESROW	equ	40	; "               "

;extern void __asm scanconvpix(register __a0 PLANEPTR p,
;                              register __a1 short * minx,
;                              register __a2 short * maxx,
;                              register __d0 short miny,
;                              register __d1 short maxy,
;                              register __d2 short color);

	movem.l	d2-d7/a2-a5,-(sp)

	move.w	d1,d7	; get maxy. d0 = miny.
	sub.w	d0,d7	; compute yheight. +1 not necessary: using dbra.

	movea.l	a1,a4	; copy xminptr
	movea.l	a2,a5	; copy xmaxptr

	move.w	d0,d6	; copy miny
	add.w	d6,d6	; y index is 2 bytes, so *2 offset
	adda.w	d6,a4	; get to start postion minx
	adda.w	d6,a5	; get to start postion maxx

	move.w	d0,d6	; copy miny
	lsl.w	#3,d6	; *8
	lsl.w	#5,d0	; *32
	add.w	d6,d0	; *40

	adda.w	d0,a0	; go to first scanline in bitmap
	move.l	a0,d6	; setup for first pass (d6 += 40 each pass)

	moveq.l	#BYTESROW,d5	; bytes per row, to be added each scanline

	lea	colorplanes(pc),a2	; get colorplane table
	add.w	d2,d2
	add.w	d2,d2			; entries are 4 bytes long
	movea.l	0(a2,d2.w),a2		; get color jsr address

	jmp	(a2)			; do specific plane writes/clears

; end of scanconvpix

; Specific fill cases for 0-15 colors. Speed is of the essence, not
; code size :-). Each case is specifically optimized. Could be made
; shorter, but slower.

COLOR0	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
	not.l	d2		; invert
	and.l	d2,(a0)+	; write left mask
	and.l	d2,(a1)+	; write left mask
	and.l	d2,(a2)+	; write left mask
	and.l	d2,(a3)+	; write left mask
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite
	moveq.l	#0,d2		; zereos
	move.l	d2,(a0)+	; write middle 0's
	move.l	d2,(a1)+	; write middle 0's
	move.l	d2,(a2)+	; write middle 0's
	move.l	d2,(a3)+	; write middle 0's
	dbra	d1,multiloop4\@
	not.l	d3
	and.l	d3,(a0)		; write right mask
	and.l	d3,(a1)		; write right mask
	and.l	d3,(a2)		; write right mask
	and.l	d3,(a3)		; write right mask
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR1	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	not.l	d3
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a0)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	not.l	d3
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR2	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
	or.l	d2,(a1)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a1)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR3	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	not.l	d2
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	not.l	d3
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR4	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a2)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR5	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a0)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR6	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a3)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR7	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a3)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR8	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR9	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a0)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR10	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a1)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR11	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR12	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	bra.b	finished4\@
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR13	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a0)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a1)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR14	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	bra.b	finished4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	dbra	d1,multiloop4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


COLOR15	macro


	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	bra.b	finished4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	dbra	d1,multiloop4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5


; Expand the macros...


	CNOP	0,4	; longword align for 020/030 speed

	dc.l	color0
	dc.l	color1
	dc.l	color2
	dc.l	color3
	dc.l	color4
	dc.l	color5
	dc.l	color6
	dc.l	color7
	dc.l	color8
	dc.l	color9
	dc.l	color10
	dc.l	color11
	dc.l	color12
	dc.l	color13
	dc.l	color14
	dc.l	color15

; A 68000 line drawer, originally from 68000 Assembly Language, by
; Krantz and Stanley. Modified for the Amiga and the 320x200,
; four bitplane case.

;extern void __asm drawline68k(register __a1 PLANEPTR p,
;                              register __d0 short x,
;                              register __d1 short y,
;                              register __d2 short x2,
;                              register __d3 short y2,
;                              register __d4 short color);

	movem.l	d2-d7/a2-a6,-(sp)

	movea.l	a1,a2
	lea	PLANESIZE(a2),a3
	lea	PLANESIZE(a3),a4
	lea	PLANESIZE(a4),a5

	andi.w	#15,d4		; 0..15 color
	add.w	d4,d4		;
	add.w	d4,d4		; (4 byte entries) get table index
	lea	linejmptable(pc),a6 ; table base
	movea.l	0(a6,d4.w),a6	; get jmp address

	move.w	d0,d4
	swap	d4
	clr.w	d4
	move.w	d1,d5
	swap	d5
	clr.w	d5

	sub.w	d0,d2
	move.w	d2,d0
	bpl.b	sk1_vecd
	neg.w	d0
	sub.w	d1,d3
	move.w	d3,d1
	bpl.b	sk2_vecd
	neg.w	d1
	cmp.w	d0,d1
	bgt.b	y_biggerd
	tst.w	d2
	bmi.b	sk3_vecd
	move.l	#$10000,d6
	bra.b	sk4_vecd
	move.l	#$ffff0000,d6
	swap	d3
	clr.w	d3
	asr.l	#2,d3
	tst.w	d0
	beq	divtrapd
	divs	d0,d3
	ext.l	d3
	asl.l	#2,d3
	move.l	d3,d7
	move.w	d0,d2
	bra.b	sk7_vecd
	tst.w	d3
	bmi.b	sk5_vecd
	move.l	#$10000,d7
	bra.b	sk6_vecd
	move.l	#$ffff0000,d7
	swap	d2
	clr.w	d2
	asr.l	#2,d2
	tst.w	d1
	beq	divtrapd
	divs	d1,d2
	ext.l	d2
	asl.l	#2,d2
	move.l	d2,d6
	move.w	d1,d2
	subq.w	#1,d2	; setup dbra

	move.l	#$8000,a0	; .5 to add to round up
	move.l	#$8000,a1	; "                   "

	jsr	(a6)	; draw line

	movem.l	(sp)+,d2-d7/a2-a6

	move.l	d4,d0	; fixed point arithmetic
	move.l	d5,d1
	add.l	a0,d0
	add.l	a1,d1
	swap	d0
	swap	d1

	move.w	d1,d3	; save y
	add.w	d1,d1	; *2
	add.w	d1,d1	; *4
	add.w	d1,d3	; *5
	lsl.w	#3,d3	; *40
	move.w	d0,d1	; save x
	lsr.w	#3,d0	; get x byte offset
	add.w	d0,d3	; x offset + y offset
	andi.w	#7,d1	; get x bit set
	not.b	d1	; invert bits (left to right)

        \1.b	d1,0(a2,d3.w)	
        \2.b	d1,0(a3,d3.w)	
        \3.b	d1,0(a4,d3.w)	
        \4.b	d1,0(a5,d3.w)	

	add.l	d6,d4
	add.l	d7,d5
	dbra	d2,lp0_vecd\@

	LINEPLOT	bclr,bclr,bclr,bclr
	LINEPLOT	bset,bclr,bclr,bclr
	LINEPLOT	bclr,bset,bclr,bclr
	LINEPLOT	bset,bset,bclr,bclr
	LINEPLOT	bclr,bclr,bset,bclr
	LINEPLOT	bset,bclr,bset,bclr
	LINEPLOT	bclr,bset,bset,bclr
	LINEPLOT	bset,bset,bset,bclr
	LINEPLOT	bclr,bclr,bclr,bset
	LINEPLOT	bset,bclr,bclr,bset
	LINEPLOT	bclr,bset,bclr,bset
	LINEPLOT	bset,bset,bclr,bset
	LINEPLOT	bclr,bclr,bset,bset
	LINEPLOT	bset,bclr,bset,bset
	LINEPLOT	bclr,bset,bset,bset
	LINEPLOT	bset,bset,bset,bset

	dc.l	linecolor0
	dc.l	linecolor1
	dc.l	linecolor2
	dc.l	linecolor3
	dc.l	linecolor4
	dc.l	linecolor5
	dc.l	linecolor6
	dc.l	linecolor7
	dc.l	linecolor8
	dc.l	linecolor9
	dc.l	linecolor10
	dc.l	linecolor11
	dc.l	linecolor12
	dc.l	linecolor13
	dc.l	linecolor14
	dc.l	linecolor15

;extern void __asm fillline68k(register __d0 short x,
;                              register __d1 short y,
;                              register __d2 short x2,
;                              register __d3 short y2,
;                              register __a0 short * table);

; The line drawer modified...

	movem.l	d2-d7,-(sp)

	cmp.w	d1,d3		; always draw top->bottom
	bgt.b	oky
	exg	d1,d3		; swap y's
	exg	d0,d2		; sway x's
	move.w	d3,d6
	add.w	d6,d6		; 2 byte entries
	move.w	d2,0(a0,d6.w)	; save x in scan table

	move.w	d1,d6
	add.w	d6,d6		; 2 byte entries
	move.w	d0,0(a0,d6.w)	; save x in scan table

	adda.w	d6,a0		; set up address to start of table

	move.w	d0,d4
	swap	d4
	clr.w	d4
	move.w	d1,d5
	swap	d5
	clr.w	d5

	sub.w	d0,d2
	move.w	d2,d0
	bpl.b	sk1_vec
	neg.w	d0
	sub.w	d1,d3
	move.w	d3,d1
	bpl.b	sk2_vec
	neg.w	d1
	cmp.w	d0,d1
	bgt.b	y_bigger
	tst.w	d2
	bmi.b	sk3_vec
	move.l	#$10000,d6
	bra.b	sk4_vec
	move.l	#$ffff0000,d6
	swap	d3
	clr.w	d3
	asr.l	#2,d3
	tst.w	d0
	beq	divtrap
	divs	d0,d3
	ext.l	d3
	asl.l	#2,d3
	move.l	d3,d7
	move.w	d0,d2
	bra.b	sk7_vec
	tst.w	d3
	bmi.b	sk5_vec
	move.l	#$10000,d7
	bra.b	sk6_vec
	move.l	#$ffff0000,d7
	swap	d2
	clr.w	d2
	asr.l	#2,d2
	tst.w	d1
	beq	divtrap
	divs	d1,d2
	ext.l	d2
	asl.l	#2,d2
	move.l	d2,d6
	move.w	d1,d2
;	subq.w	#1,d2		; set up dbra
	move.l	#$8000,d3	; .5 to add to round up

	move.w	d1,a1		; old y = current y

	move.l	d4,d0		; compute 2nd x point
	add.l	d3,d0
	swap	d0
	move.l	d5,d1		; compute 2nd y point
	add.l	d3,d1
	swap	d1
	cmp.w	a1,d1		; newy # oldy?
	beq.b	ynochange	; no
	move.w	d0,(a0)+	; fastest possible write to memory.
	move.w	d1,a1

	add.l	d6,d4
	add.l	d7,d5

	move.l	d4,d0		; fixed point arithmetic
	add.l	d3,d0
	swap	d0
	move.l	d5,d1
	add.l	d3,d1
	swap	d1
	dbra	d2,lp0_vec
	movem.l	(sp)+,d2-d7

fi # end of overwriting check
#	End of shell archive
exit 0

jcs@crash.cts.com (John Schultz) (06/10/90)

  Here are a few subtle changes to improve performance of the processor
polygon fill code I posted earlier. Both changes are for the file
scanconvert.a. Also, my polygon fill code only works on convex polygons.
If the screen width is changed from 320 pixels, the line drawer's pixel
offset calculations will have to be rewritten to handle a non-40 bytes
per row case (The 80 bytes per row (640 pixels) case is trivial, just
change the lsl.w #3,d3 to lsl.w #4,d3). The polygon routines simply
require a changed define, and could be generalized so it could
handle screen width changes dynamically.

  In _drawline68k, change the code between sk8_vecd and the second
add.l to the below:

; start _drawline changes

	subq.w	#1,d2	; setup dbra

	move.l	#$8000,a0	; .5 to add to round up
	move.w	#7,a1

	jsr	(a6)	; draw line

	movem.l	(sp)+,d2-d7/a2-a6

	move.l	d4,d0	; fixed point arithmetic
	move.l	d5,d1
	add.l	a0,d0
	add.l	a0,d1

; end of _drawline68k changes

  In _fillline68k, change everything from sk7_vec to sk8_vec to:

; start _fillline68k changes

	move.l	#$8000,d3	; .5 to add to round up
	move.w	d1,a1		; old y = current y
	move.l	d4,d0		; fixed point arithmetic
	add.l	d3,d0
	swap	d0
	move.l	d5,d1
	add.l	d3,d1
	swap	d1

	cmp.w	a1,d1		; newy # oldy?
	beq.b	ynochange	; no
	move.w	d0,(a0)+	; fastest possible write to memory.
	move.w	d1,a1

	add.l	d6,d4
	add.l	d7,d5


; end of _filline68k changes