[comp.sys.amiga.programmer] fast line & polygon drawing...here's some routines...

rda184s@monu6.cc.monash.edu.au (Richard Jones) (05/07/91)

***********************************************************
A couple of people have asked me for all the line drawers I
have got, so here they are for all to get....
Please: anyone who knows _anything_ about a fast polygon
		drawing routine...*please* tell me!!!!!!!
Anyway....here they are.....(all original articles)
************************************************************
DesertRat...
rda184s@monu6.cc.monash.edu.au


------

Path: minyos.xx.rmit.oz.au!goanna!munnari.oz.au!samsung!zaphod.mps.ohio-state.edu!rpi!uupsi!sunic!cs.umu.se!dvljrt
From: dvljrt@cs.umu.se (Joakim Rosqvist)
Newsgroups: comp.sys.amiga.programmer
Subject: Re: 3D stuff & quick line drawing
Message-ID: <1991Apr22.122442.25505@cs.umu.se>
Date: 22 Apr 91 12:24:42 GMT
References: <1991Apr21.081925.4722@lynx.CS.ORST.EDU>
Sender: news@cs.umu.se (News Administrator)
Organization: Dep. of Info.Proc, Umea Univ., Sweden
Lines: 122

In article <1991Apr21.081925.4722@lynx.CS.ORST.EDU> divineg@prism.CS.ORST.EDU writes:
>
>	Here's my situation:
>
>I've coded and coded and coded and finally found a way to crudely rotate
>3 dimensional objects ... now all of you wizards out there, what's the
>quickest way to draw a line?  I'd like to do it with the Blitter (I can
>use the processer to calculate while I draw lines) or is it better to use
>the 68'?  By the way, I'm using C & asm (pretty original, huh?).
>	Please post your responses, I'm interested in what kind of
>discussion follows.....
>

Here is a part of the source i use when i want *fast* lines.
It uses the blitter and writes directly to the DFF regs
(no flames please, i *did* call Ownblitter first)
There are som things left out, like where how to find the address of the 
bitplane to draw in, and where the coordinates comes from, but that
should be easy to fill in.
/$DR.HEX$


DMACONR=2
BLTCON0=$40
BLTCON1=$42
BLTAMOD=$64
BLTBMOD=$62
BLTCMOD=$60
BLTAPT=$50
BLTCPT=$48
BLTDPT=$54
BLTADAT=$74
BLTBDAT=$72
BLTAFWM=$44
BLTSIZE=$58

OwnBlitter=-456
DisownBlitter=-462

width=80	;80 bytes = 640 pixels



Draw_all_lines:
	move.l	4.w,a6
	move.l	(a6),a6
	move.l	(a6),a6	;get base of Gfxlib
	jsr	OwnBlitter(a6)
	
	lea	$dff000,a6
	move	#$8000,BLTADAT(a6)
	move	#-1,BLTAFWM(a6)
	move	#width,BLTCMOD(a6)
	move	#$FFFF,BLTBDAT(a6)	;$FFFF=draw , $0000=clear
;These values are the same for all lines, no need to tell
;the blitter about them more than once
	
	move.l	<startaddress of bitplane>,a0
	moveq	#<numlines-1>,d7
	bsr.s	drawem
	move.l	4.w,a6
	move.l	(a6),a6
	move.l	(a6),a6
	jmp	DisOwnBlitter(a6)
	

drawem:	move	<X0>,d0
	move	<Y0>,d1
	move	<X1>,d2
	move	<Y1>,d3
	
	moveq	#width,d4
	mulu	d1,d4
;This could be sped up by reading a table with values
;0,width,width*2,width*3 etc.
	moveq	#-16,d5
	and	d0,d5
	lsr	#3,d5
	add	d5,d4
	add.l	a0,d4
	moveq	#0,d5
	sub	d1,d3
	roxl.b	#1,d5
	tst	d3
	bge.s	y2gy1
	neg	d3
y2gy1:	sub	d0,d2
	roxl.b	#1,d5
	tst	d2
	bge.s	x2gx1
	neg	d2
x2gx1:	move	d3,d1
	sub	d2,d1
	bge.s	dygdx
	exg	d2,d3
dygdx:	roxl.b	#1,d5
	move.b	table(pc,d5.w),d5
	add	d2,d2
wait:	btst	#6,DMACONR(a6)
	bne.s	wait
	move	d2,BLTBMOD(a6)
	sub	d3,d2
	bge.s	signn1
	or.b	#$40,d5
signn1:	move	d2,BLTAPT+2(a6)
	sub	d3,d2
	move	d2,BLTAMOD(a6)
	and	#15,d0					
	ror	#4,d0
	or	#$bca,d0
	move	d0,BLTCON0(a6)
	move	d5,BLTCON1(a6)
	move.l	d4,BLTCPT(a6)
	move.l	d4,BLTDPT(a6)
	addq	#1,d3
	lsl	#6,d3
	addq	#2,d3
	move	d3,BLTSIZE(a6)	;GO!
	dbra	d7,drawem	;do next line
	rts
	
table:	dc.b 1,17,9,21,5,25,13,29	


 NEXT ARTICLE-----------------------------------
<><><><><><><><>  CUT HERE (as If you didn't realise...) <><><><>
	Lately there has been a few requests for a mythical very fast line
drawing routine, posted to a comp.sys.amiga.* news group some time in
the past, this is not it.
	This is, however, a good line drawing routine written by me, and
using Bresenham's line drawing algorithm taken from,
Computer Graphics,	Donald Hearn, M. Pauline Baker
published by Prentice-Hall International.
	It also produces lines identicle to lines drawn by the blitter.

	Stuart Twyford
	Internet: int131d@monu3.cc.monash.edu.au

Bresenham_line:
INPUT:
	d0<31-16>	Y cordinate of start point
	d0<15-0>	X cordinate of start point
	d1<31-16>	Y cordinate of end point
	d1<15-0>	X cordinate of end point
	a0		pointer to bit plane

USES:
	d0-d7
	a0-a1

Bresenham_line:
	move.w	#1,d6
	sub.w	d0,d1
	bge.s	.got_deltaX
	neg.w	d1
	neg.w	d6
.got_deltaX:
	swap	d0
	move.w	d1,d2
	swap	d1
	move.w	#Width_in_bytes,d7
	sub.w	d0,d1
	bge.s	.got_deltaY
	neg.w	d1
	neg.w	d7
.got_deltaY:
	clr.w	d5
	cmp.w	d1,d2
	bge.s	.got_L_and_Sdelta
	swap	d1
	not.w	d5
.got_L_and_Sdelta:
	move.w	d0,d2
	mulu	#Width_in_bytes,d2
	adda.w	d2,a0
	swap	d0
	movea.w	d7,a1
	move.w	d1,d2
	swap	d1
	add.w	d2,d2
	move.w	d2,d3
	sub.w	d1,d3
	move.w	d3,d4
	sub.w	d1,d4
	tst.w	d5
	bne.s	.deltaY_greater
.next_X:
	move.w	d0,d7
	not.w	d7
	move.w	d0,d5
	asr.w	#3,d5
	bset	d7,(a0,d5.w)
	tst.w	d1
	beq.s	.line_done
	subq.w	#1,d1
	add.w	d6,d0
	tst.w	d3
	bge.s	.add_d4_to_Y
	add.w	d2,d3
	bra.s	.next_X
.add_d4_to_Y:
	adda.w	a1,a0
	add.w	d4,d3
	bra.s	.next_X
.deltaY_greater:
.next_Y:
	move.w	d0,d7
	not.w	d7
	move.w	d0,d5
	asr.w	#3,d5
	bset	d7,(a0,d5.w)
	tst.w	d1
	beq.s	.line_done
	subq.w	#1,d1
	adda.w	a1,a0
	tst.w	d3
	bge.s	.add_d4_to_X
	add.w	d2,d3
	bra.s	.next_Y
.add_d4_to_X:
	add.w	d6,d0
	add.w	d4,d3
	bra.s	.next_Y
.line_done:
	rts


AND HERE'S THE LAST ONE-------------

Path: monu6!monu0.cc.monash.edu.au!bruce!munnari.oz.au!uunet!cbmvax!chrisg
From: chrisg@cbmvax.commodore.com (Chris Green)
Newsgroups: comp.sys.amiga.programmer
Subject: Re: 3D stuff & quick line drawing
Message-ID: <21106@cbmvax.commodore.com>
Date: 1 May 91 14:05:45 GMT
References: <1991Apr22.122442.25505@cs.umu.se> <20884@cbmvax.commodore.com> <00672624939@elgamy.RAIDERNET.COM> <1991Apr29.110534.10198@cs.umu.se>
Reply-To: chrisg@cbmvax.commodore.com (Chris Green)
Organization: Commodore, West Chester, PA
Lines: 41

In article <1991Apr29.110534.10198@cs.umu.se> dvljrt@cs.umu.se (Joakim Rosqvist) writes:
>Here is my inner loop:
>    BSET  d7,(a0)     ;Plot pixel.  12 cycles
>    ADD   d6,a0       ;Go to the next scanline. 8 cycles
>    SUB   d3,d5       ;This routine always goes one pixel down and sometimes
>		      ;(every second time for 292 deg) one pixels right.
>		      ;d3 is MIN(dx,dy) that is dx in this case.  4 cycles
>    BPL.S over        ;See if it is time to go right. 9 cycles
>		      ;(8 or 10 depending on wheter it branches)
>    ADD   d4,d5       ;d4 is MAX(dx,dy)=dy in this case.  4/2=2 cycles
>    SUBQ  #1,d7       ;One pixel right  4/2=2 cycles
>    BPL.S over        ;If 0<d7<6 do next pixel 10/2=5 cycles
>    MOVEQ #7,d7       ;restart from bit 7.  4/2/8=0.25 cycles
>    ADDQ  #1,a0       ;go to next byte.  8/2/8=0.5 cycles
>over:

	Another good optimization for CPU driven lines is to do further examination of
the slope. For instance, suppose you are drawing a near vertical line.
	If DX<DY/n, then there are at least n-1 vertical steps (I think I've got this
right..I don't have the code here) between each horizontal step. So, you can do, say
2 vertical steps at a time with no check in between:

	BSET d7,(a0)
	ADD d6,a0
	BSET d7,(a0)
	SUB	d3,d5			; d3 contains twice the dx!
	BPL.s	over

	The end conditions are a little hairy, but you can still win by doing this.

	For lines that are near horizontal or vertical, and are long, you win by
doing a divide (at least on the 80xxx, you do), and using quick horizontal or vertical
fills between each transition.

	-- 
*-------------------------------------------*---------------------------*
|Chris Green - Graphics Software Engineer   - chrisg@commodore.COM      f
|                  Commodore-Amiga          - uunet!cbmvax!chrisg       n
|My opinions are my own, and do not         - killyouridolssonicdeath   o
|necessarily represent those of my employer.- itstheendoftheworld       r
*-------------------------------------------*---------------------------d

jbickers@templar.actrix.gen.nz (John Bickers) (05/08/91)

Quoted from <1991May7.003110.1684@monu6.cc.monash.edu.au> by rda184s@monu6.cc.monash.edu.au (Richard Jones):

> Please: anyone who knows _anything_ about a fast polygon

    Ok, here's John Schultz' code, which was posted some months ago.
    There are two files, a C file and an assembler one. The assembler
    module has some changes appended to it, that I never applied. So
    you'll need to do some fiddling to make those active.

    ------------------------------ 8< ------------------------------
/* fill.c, a processor polygon filler */
/* Copyright (c) 1990 John Schultz, All Rights Reserved */

/* This is the C interface to the assembly code that does most of the   */
/* work. I haven't seen an algorithm of this type for filling polygons  */
/* elsewhere, so I'd like to think my implementation of this table fill */
/* algorithm is unique :-).The algorithm works by first finding out the */
/* orientation of the polygon, then filling the tables from miny to    */
/* maxy. This allows the table to be updated unconditionally, without */
/* having to read the current values from the tables, compare, then  */
/* write back to memory.                                            */
/* The polygons are an array of drawpoints, and you don't need to  */
/* close the polygons: a triangle will only have three points.    */

/* This code has been optimized for 320x200x4 bitmaps, which have */
/* been allocated linearly (One 32,000 byte allocation as opposed */
/* to four 8,000 byte allocations).                               */ 

/* To compile this code use: 
/*   lc -O -cuf -v fill.c                                         */
/*   asm scanconvert.a                                            */
/* Link to your own code as appropriate.                          */

/* I am using Lattice 5.04, and get a CXERR: 26, Line: 0, if the  */
/* optimizer is not used!                                         */

/* Look ma, no includes. */

typedef struct drawpoint {short dx,dy;} drawpoint;

#define MAXY 201                /* Bitmap max y + 1 */

short xmin[MAXY], xmax[MAXY]; /* Scanconverter tables */

short * xtable[] = {xmin,xmax}; /* For toggling between fill tables */

/* Prototypes */

extern void __asm drawline68k(register __a1 char * p, /* 1st Plane Ptr */
                              register __d0 short x,
                              register __d1 short y,
                              register __d2 short x2,
                              register __d3 short y2,
                              register __d4 short color);

extern void __asm fillline68k(register __d0 short x,
                              register __d1 short y,
                              register __d2 short x2,
                              register __d3 short y2,
                              register __a0 short * table);

extern void __asm scanconvpix(register __a0 char * p, /* 1st Plane Ptr */
                              register __a1 short * minx,
                              register __a2 short * maxx,
                              register __d0 short miny,
                              register __d1 short maxy,
                              register __d2 short color);

void drawpoly(drawpoint * dl, /* An array of drawpoints */
              char * p,      /* 1st Plane Ptr          */
              short cl,     /* Number of points       */
              short color);

/* Code */

void drawpoly(drawpoint * dl, /* An array of drawpoints */
              char * p,      /* 1st Plane Ptr          */
              short cl,     /* Number of points       */
              short color){
short miny,maxy;
short i,inext,index;
short tminy,tmaxy;
short j;
long orient=0;

/* Find out if polygon is clockwise or counterclockwise */

  for (i=0; i < cl; i++) {        /* Use the Newell method */
    if (i == (cl-1)) {
      j = 0;
    } else {
      j = i+1;
    }
    orient += (dl[i].dx - dl[j].dx)*(dl[i].dy + dl[j].dy);
  } /* for i */ 

  if (orient != 0) {    /* Check to see if we have a line or polygon */
    index = (orient < 0) ? 0 : 1;

/* Find miny,maxy */
    tminy = 199;
    tmaxy = 0;
    for (i=0; i < cl; i++) {
      if (dl[i].dy < tminy) {miny = i; tminy = dl[i].dy;}
      if (dl[i].dy > tmaxy) {maxy = i; tmaxy = dl[i].dy;}
    } /* for i */

/* Fill tables */
    i = miny; /* Start at top, fill to bottom, filling lines from top */
    do {     /* to bottom for efficient assembly implementation.     */
      inext = i + 1;
      if (inext >= cl) inext = 0; /* Wrap around */
      fillline68k(dl[i].dx,dl[i].dy,dl[inext].dx,dl[inext].dy,xtable[index]);
      if (inext == maxy) index ^= 1; /* Toggle min/max to keep lines    */
      i++;                          /* being filled from top to bottom */
      if (i >= cl) i = 0; /* Wrap around */
    } while (inext != miny); /* Come full circle, done */

/* Draw polygon */
   scanconvpix(p,xmin,xmax,tminy,tmaxy,color);

  } else { /* Draw a line */

/* This test really only works for triangles */
    if ((dl[0].dx == dl[1].dx) && (dl[0].dy == dl[1].dy))
      drawline68k(p,dl[0].dx,dl[0].dy,dl[1].dx,dl[1].dy,color);
    else
      drawline68k(p,dl[0].dx,dl[0].dy,dl[2].dx,dl[2].dy,color);

  } /* if orient */

} /* end drawpoly */

/* end fill.c */
    ------------------------------ 8< ------------------------------
; scanconvert.a
; Copyright (c) 1990 John Schultz, All Rights Reserved

; Created 14-March-90
; Modified 17-May-90 
;   Re-written with long word aligned writes: now up to twice as fast
;   as using bfset/bfclr (bitfield instructions can hit up to 5 bytes,
;   not long word aligned).
;   Code is now 68000 compatible.
; Modified 8-June-90
;   Included filline68k, drawline68k in this file for Fill example.
;   Assembles to 3840 bytes using the Devpac 2.0 assembler.

	section	scanconvert,code

	xdef	_scanconvpix
	xdef	_fillline68k
	xdef	_drawline68k

; This code has been optimized for 320x200x4 bitmaps, which have
; been allocated linearly (One 32,000 byte allocation as opposed
; to four 8,000 byte allocations). 



PLANESIZE	equ	8000	; 320x200 bitplanes
BYTESROW	equ	40	; "               "

;extern void __asm scanconvpix(register __a0 PLANEPTR p,
;                              register __a1 short * minx,
;                              register __a2 short * maxx,
;                              register __d0 short miny,
;                              register __d1 short maxy,
;                              register __d2 short color);

_scanconvpix:
	movem.l	d2-d7/a2-a5,-(sp)

	move.w	d1,d7	; get maxy. d0 = miny.
	sub.w	d0,d7	; compute yheight. +1 not necessary: using dbra.

	movea.l	a1,a4	; copy xminptr
	movea.l	a2,a5	; copy xmaxptr

	move.w	d0,d6	; copy miny
	add.w	d6,d6	; y index is 2 bytes, so *2 offset
	adda.w	d6,a4	; get to start postion minx
	adda.w	d6,a5	; get to start postion maxx

	move.w	d0,d6	; copy miny
	lsl.w	#3,d6	; *8
	lsl.w	#5,d0	; *32
	add.w	d6,d0	; *40

	adda.w	d0,a0	; go to first scanline in bitmap
	move.l	a0,d6	; setup for first pass (d6 += 40 each pass)

	moveq.l	#BYTESROW,d5	; bytes per row, to be added each scanline

	lea	colorplanes(pc),a2	; get colorplane table
	add.w	d2,d2
	add.w	d2,d2			; entries are 4 bytes long
	movea.l	0(a2,d2.w),a2		; get color jsr address

	jmp	(a2)			; do specific plane writes/clears

; end of scanconvpix

; Specific fill cases for 0-15 colors. Speed is of the essence, not
; code size :-). Each case is specifically optimized. Could be made
; shorter, but slower.

COLOR0	macro

color0:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	not.l	d2		; invert
	and.l	d2,(a0)+	; write left mask
	and.l	d2,(a1)+	; write left mask
	and.l	d2,(a2)+	; write left mask
	and.l	d2,(a3)+	; write left mask
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite
	moveq.l	#0,d2		; zereos
multiloop4\@
	move.l	d2,(a0)+	; write middle 0's
	move.l	d2,(a1)+	; write middle 0's
	move.l	d2,(a2)+	; write middle 0's
	move.l	d2,(a3)+	; write middle 0's
	dbra	d1,multiloop4\@
writeright4\@
	not.l	d3
	and.l	d3,(a0)		; write right mask
	and.l	d3,(a1)		; write right mask
	and.l	d3,(a2)		; write right mask
	and.l	d3,(a3)		; write right mask
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR1	macro

color1:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	not.l	d3
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	not.l	d3
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR2	macro

color2:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR3	macro

color3:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	not.l	d2
	and.l	d2,(a2)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d4,(a2)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	not.l	d3
	and.l	d3,(a2)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR4	macro

color4:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a2)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR5	macro

color5:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR6	macro

color6:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR7	macro

color7:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a3)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	not.l	d2
	and.l	d2,(a3)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d4,(a3)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	not.l	d3		; invert
	and.l	d3,(a3)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR8	macro

color8:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR9	macro

color9:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a1)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR10	macro

color10:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR11	macro

color11:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a2)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a2)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a2)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR12	macro

color12:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	and.l	d2,(a1)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	move.l	d4,(a1)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	and.l	d3,(a1)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR13	macro

color13:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a1)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a1)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a1)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR14	macro

color14:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	not.l	d2
	and.l	d2,(a0)+	; clear
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
	moveq.l	#0,d4		; zeroes
multiloop4\@
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	move.l	d4,(a0)+	; clear
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	not.l	d3		; invert
	and.l	d3,(a0)		; clear
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

COLOR15	macro

color15:

scanloop4\@
	move.w	(a4)+,d0	; get minx
	move.w	(a5)+,d1	; get maxx

	cmp.w	d0,d1		; error check
	bmi.b	finished4\@

	move.w	d0,d4		; copy minx
	lsr.w	#5,d4		; get first long word
	lsl.w	#2,d4		; *4 = number of bytes
	adda.w	d4,a0		; add x offset to scanline

	lea	PLANESIZE(a0),a1
	lea	PLANESIZE(a1),a2
	lea	PLANESIZE(a2),a3

	move.w	d0,d4		; copy minx
	moveq.l	#-1,d2		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	lsr.l	d4,d2		; shift mask by d4 pixels

	move.w	d1,d4		; copy maxx
	moveq.l	#-1,d3		; start mask, $ffffffff
	andi.w	#$1f,d4		; minx mod 31 = shift count
	neg.w	d4		; - d4
	add.w	#31,d4		; d4 = 31 - d4
	lsl.l	d4,d3		; shift mask by d4 pixels

	lsr.w	#5,d0		; get start long word
	lsr.w	#5,d1		; get end   long word
	sub.w	d0,d1		; get width in long words
	bne.b	doublewrite4\@	; check for single write
; single write
	and.l	d2,d3		; combine left and right masks
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
	bra.b	finished4\@
doublewrite4\@
	or.l	d2,(a0)+	; set
	or.l	d2,(a1)+	; set
	or.l	d2,(a2)+	; set
	or.l	d2,(a3)+	; set
	subq.w	#2,d1		; subtract left and right writes
	bmi.b	writeright4\@	; don't do a multiwrite
; multiwrite\@
	moveq.l	#-1,d2		; $ffffffff
multiloop4\@
	move.l	d2,(a0)+	; set
	move.l	d2,(a1)+	; set
	move.l	d2,(a2)+	; set
	move.l	d2,(a3)+	; set
	dbra	d1,multiloop4\@
writeright4\@
	or.l	d3,(a0)		; set
	or.l	d3,(a1)		; set
	or.l	d3,(a2)		; set
	or.l	d3,(a3)		; set
finished4\@
	add.l	d5,d6		; go to next scanline
	movea.l	d6,a0		; copy ptr to modify
	dbra	d7,scanloop4\@	; d7 = yheight-1.

	movem.l	(sp)+,d2-d7/a2-a5
	rts

	endm

; Expand the macros...

	COLOR0
	COLOR1
	COLOR2
	COLOR3
	COLOR4
	COLOR5
	COLOR6
	COLOR7
	COLOR8
	COLOR9
	COLOR10
	COLOR11
	COLOR12
	COLOR13
	COLOR14
	COLOR15

	CNOP	0,4	; longword align for 020/030 speed

colorplanes:
	dc.l	color0
	dc.l	color1
	dc.l	color2
	dc.l	color3
	dc.l	color4
	dc.l	color5
	dc.l	color6
	dc.l	color7
	dc.l	color8
	dc.l	color9
	dc.l	color10
	dc.l	color11
	dc.l	color12
	dc.l	color13
	dc.l	color14
	dc.l	color15

; A 68000 line drawer, originally from 68000 Assembly Language, by
; Krantz and Stanley. Modified for the Amiga and the 320x200,
; four bitplane case.

;extern void __asm drawline68k(register __a1 PLANEPTR p,
;                              register __d0 short x,
;                              register __d1 short y,
;                              register __d2 short x2,
;                              register __d3 short y2,
;                              register __d4 short color);

_drawline68k
	movem.l	d2-d7/a2-a6,-(sp)

	movea.l	a1,a2
	lea	PLANESIZE(a2),a3
	lea	PLANESIZE(a3),a4
	lea	PLANESIZE(a4),a5

	andi.w	#15,d4		; 0..15 color
	add.w	d4,d4		;
	add.w	d4,d4		; (4 byte entries) get table index
	lea	linejmptable(pc),a6 ; table base
	movea.l	0(a6,d4.w),a6	; get jmp address


	move.w	d0,d4
	swap	d4
	clr.w	d4
	move.w	d1,d5
	swap	d5
	clr.w	d5

	sub.w	d0,d2
	move.w	d2,d0
	bpl.b	sk1_vecd
	neg.w	d0
sk1_vecd
	sub.w	d1,d3
	move.w	d3,d1
	bpl.b	sk2_vecd
	neg.w	d1
sk2_vecd
	cmp.w	d0,d1
	bgt.b	y_biggerd
	tst.w	d2
	bmi.b	sk3_vecd
	move.l	#$10000,d6
	bra.b	sk4_vecd
sk3_vecd
	move.l	#$ffff0000,d6
sk4_vecd
	swap	d3
	clr.w	d3
	asr.l	#2,d3
	tst.w	d0
	beq	divtrapd
	divs	d0,d3
	ext.l	d3
	asl.l	#2,d3
	move.l	d3,d7
	move.w	d0,d2
	bra.b	sk7_vecd
y_biggerd
	tst.w	d3
	bmi.b	sk5_vecd
	move.l	#$10000,d7
	bra.b	sk6_vecd
sk5_vecd
	move.l	#$ffff0000,d7
sk6_vecd
	swap	d2
	clr.w	d2
	asr.l	#2,d2
	tst.w	d1
	beq	divtrapd
	divs	d1,d2
	ext.l	d2
	asl.l	#2,d2
	move.l	d2,d6
	move.w	d1,d2
sk7_vecd
sk8_vecd
	subq.w	#1,d2	; setup dbra

	move.l	#$8000,a0	; .5 to add to round up
	move.l	#$8000,a1	; "                   "

	jsr	(a6)	; draw line

divtrapd
	movem.l	(sp)+,d2-d7/a2-a6
	rts


LINEPLOT	macro
lp0_vecd\@
	move.l	d4,d0	; fixed point arithmetic
	move.l	d5,d1
	add.l	a0,d0
	add.l	a1,d1
	swap	d0
	swap	d1

	move.w	d1,d3	; save y
	add.w	d1,d1	; *2
	add.w	d1,d1	; *4
	add.w	d1,d3	; *5
	lsl.w	#3,d3	; *40
	move.w	d0,d1	; save x
	lsr.w	#3,d0	; get x byte offset
	add.w	d0,d3	; x offset + y offset
	andi.w	#7,d1	; get x bit set
	not.b	d1	; invert bits (left to right)

        \1.b	d1,0(a2,d3.w)	
        \2.b	d1,0(a3,d3.w)	
        \3.b	d1,0(a4,d3.w)	
        \4.b	d1,0(a5,d3.w)	

	add.l	d6,d4
	add.l	d7,d5
	dbra	d2,lp0_vecd\@
	rts
	
	endm

linecolor0:
	LINEPLOT	bclr,bclr,bclr,bclr
linecolor1:
	LINEPLOT	bset,bclr,bclr,bclr
linecolor2:
	LINEPLOT	bclr,bset,bclr,bclr
linecolor3:
	LINEPLOT	bset,bset,bclr,bclr
linecolor4:
	LINEPLOT	bclr,bclr,bset,bclr
linecolor5:
	LINEPLOT	bset,bclr,bset,bclr
linecolor6:
	LINEPLOT	bclr,bset,bset,bclr
linecolor7:
	LINEPLOT	bset,bset,bset,bclr
linecolor8:
	LINEPLOT	bclr,bclr,bclr,bset
linecolor9:
	LINEPLOT	bset,bclr,bclr,bset
linecolor10:
	LINEPLOT	bclr,bset,bclr,bset
linecolor11:
	LINEPLOT	bset,bset,bclr,bset
linecolor12:
	LINEPLOT	bclr,bclr,bset,bset
linecolor13:
	LINEPLOT	bset,bclr,bset,bset
linecolor14:
	LINEPLOT	bclr,bset,bset,bset
linecolor15:
	LINEPLOT	bset,bset,bset,bset

linejmptable:
	dc.l	linecolor0
	dc.l	linecolor1
	dc.l	linecolor2
	dc.l	linecolor3
	dc.l	linecolor4
	dc.l	linecolor5
	dc.l	linecolor6
	dc.l	linecolor7
	dc.l	linecolor8
	dc.l	linecolor9
	dc.l	linecolor10
	dc.l	linecolor11
	dc.l	linecolor12
	dc.l	linecolor13
	dc.l	linecolor14
	dc.l	linecolor15

;extern void __asm fillline68k(register __d0 short x,
;                              register __d1 short y,
;                              register __d2 short x2,
;                              register __d3 short y2,
;                              register __a0 short * table);

; The line drawer modified...

_fillline68k:
	movem.l	d2-d7,-(sp)

	cmp.w	d1,d3		; always draw top->bottom
	bgt.b	oky
	exg	d1,d3		; swap y's
	exg	d0,d2		; sway x's
oky
	move.w	d3,d6
	add.w	d6,d6		; 2 byte entries
	move.w	d2,0(a0,d6.w)	; save x in scan table

	move.w	d1,d6
	add.w	d6,d6		; 2 byte entries
	move.w	d0,0(a0,d6.w)	; save x in scan table

	adda.w	d6,a0		; set up address to start of table

	move.w	d0,d4
	swap	d4
	clr.w	d4
	move.w	d1,d5
	swap	d5
	clr.w	d5

	sub.w	d0,d2
	move.w	d2,d0
	bpl.b	sk1_vec
	neg.w	d0
sk1_vec
	sub.w	d1,d3
	move.w	d3,d1
	bpl.b	sk2_vec
	neg.w	d1
sk2_vec
	cmp.w	d0,d1
	bgt.b	y_bigger
	tst.w	d2
	bmi.b	sk3_vec
	move.l	#$10000,d6
	bra.b	sk4_vec
sk3_vec
	move.l	#$ffff0000,d6
sk4_vec
	swap	d3
	clr.w	d3
	asr.l	#2,d3
	tst.w	d0
	beq	divtrap
	divs	d0,d3
	ext.l	d3
	asl.l	#2,d3
	move.l	d3,d7
	move.w	d0,d2
	bra.b	sk7_vec
y_bigger
	tst.w	d3
	bmi.b	sk5_vec
	move.l	#$10000,d7
	bra.b	sk6_vec
sk5_vec
	move.l	#$ffff0000,d7
sk6_vec
	swap	d2
	clr.w	d2
	asr.l	#2,d2
	tst.w	d1
	beq	divtrap
	divs	d1,d2
	ext.l	d2
	asl.l	#2,d2
	move.l	d2,d6
	move.w	d1,d2
sk7_vec
;	subq.w	#1,d2		; set up dbra
	move.l	#$8000,d3	; .5 to add to round up

	move.w	d1,a1		; old y = current y

	move.l	d4,d0		; compute 2nd x point
	add.l	d3,d0
	swap	d0
	move.l	d5,d1		; compute 2nd y point
	add.l	d3,d1
	swap	d1
lp0_vec
	cmp.w	a1,d1		; newy # oldy?
	beq.b	ynochange	; no
	move.w	d0,(a0)+	; fastest possible write to memory.
ynochange			
	move.w	d1,a1

	add.l	d6,d4
	add.l	d7,d5

	move.l	d4,d0		; fixed point arithmetic
	add.l	d3,d0
	swap	d0
	move.l	d5,d1
	add.l	d3,d1
	swap	d1
sk8_vec
	dbra	d2,lp0_vec
divtrap	
	movem.l	(sp)+,d2-d7
	rts

	END

;   Here are a few subtle changes to improve performance of the processor
; polygon fill code I posted earlier. Both changes are for the file
; scanconvert.a. Also, my polygon fill code only works on convex polygons.
; If the screen width is changed from 320 pixels, the line drawer's pixel
; offset calculations will have to be rewritten to handle a non-40 bytes
; per row case (The 80 bytes per row (640 pixels) case is trivial, just
; change the lsl.w #3,d3 to lsl.w #4,d3). The polygon routines simply
; require a changed define, and could be generalized so it could
; handle screen width changes dynamically.
; 
;   In _drawline68k, change the code between sk8_vecd and the second
; add.l to the below:
; 
; ; start _drawline changes
; 
; sk8_vecd
; 	subq.w	#1,d2	; setup dbra
; 
; 	move.l	#$8000,a0	; .5 to add to round up
; 	move.w	#7,a1
; 
; 	jsr	(a6)	; draw line
; 
; divtrapd
; 	movem.l	(sp)+,d2-d7/a2-a6
; 	rts
; 
; 
; LINEPLOT	macro
; lp0_vecd\@
; 	move.l	d4,d0	; fixed point arithmetic
; 	move.l	d5,d1
; 	add.l	a0,d0
; 	add.l	a0,d1
; 
; ; end of _drawline68k changes
; 
;   In _fillline68k, change everything from sk7_vec to sk8_vec to:
; 
; ; start _fillline68k changes
; 
; sk7_vec
; 	move.l	#$8000,d3	; .5 to add to round up
; 	move.w	d1,a1		; old y = current y
; lp0_vec
; 	move.l	d4,d0		; fixed point arithmetic
; 	add.l	d3,d0
; 	swap	d0
; 	move.l	d5,d1
; 	add.l	d3,d1
; 	swap	d1
; 
; 	cmp.w	a1,d1		; newy # oldy?
; 	beq.b	ynochange	; no
; 	move.w	d0,(a0)+	; fastest possible write to memory.
; ynochange			
; 	move.w	d1,a1
; 
; 	add.l	d6,d4
; 	add.l	d7,d5
; 
; sk8_vec
; 
; ; end of _filline68k changes
    ------------------------------ 8< ------------------------------
--
*** John Bickers, TAP, NZAmigaUG.        jbickers@templar.actrix.gen.nz ***
***         "Endless variations, make it all seem new" - Devo.          ***

jcs@crash.cts.com (John Schultz) (05/11/91)

Here's some newer (optimized) code to play with...
(Just the C front end to the assembly code just reposted)

  John

/* fill.c, processor poly filler   */
/* Copyright (c) 1991 John Schultz */
/* This is the latest version of the C interface to my processor
   polygon fill code.
*/

typedef struct shortpoint2d {
  short x,y;
} shortpoint2d;

#define WIDTH  320
#define HEIGHT 200

static short xmin[HEIGHT+1], xmax[HEIGHT+1];
static short * xt;

/******************* COUNTERCLOCKWISE POLYGONS ********************/
/**************** (This version does clockwise too) ***************/

void drawpolyc(shortpoint2d * vp,
               char * p,    /* PLANEPTR */
               short count,
               short color){
shortpoint2d * tmax, * tmin;
shortpoint2d * tp, * tpn;
shortpoint2d * last;
long cmp;
long orient;

/* Get last point */
  last = &vp[count-1];

/* Cross product to check for check for line case */
  if (count > 3) { /* Use Newell method */
    orient = 0;
    tp = vp; /* Point to first point */
    while (1) {
      if (tp == last)
        tpn = vp; /* Set next pointer to first point */
      else
        tpn = tp + 1; /* point to next. Compiler does: tp + sizeof(*tp) */
      orient += (tp->x - tpn->x)*(tp->y + tpn->y);
      if (tpn == vp) break; /* Come full circle */
      tp = tpn; /* go to next */
    } /* while */
  } else { /* Simple cross-product: triangle case */
    orient = (vp[1].x - vp[0].x)*(vp[2].y - vp[0].y) - 
             (vp[1].y - vp[0].y)*(vp[2].x - vp[0].x);
  } /* if count */

  if (orient == 0L) { /* Collinear: draw a line */
/* Find two points that aren't equal */
    cmp = *(long *)vp; /* First point */
    tp = vp + 1; /* Point to next (Compiler does: vp + sizeof(*vp) */
    while (1) {
      if ((cmp != (*(long *)tp)) || (tp == last)) break;
      tp++;      
    } /* while */
    drawline(p,vp->x,vp->y,tp->x,tp->y,color);
    return;
  } /* if line case */

/* Find miny,maxy */
  tmin = vp;   /* Point at first */
  tmax = vp;   /* "            " */
  tp = vp + 1; /* Point to next (Compiler does: vp + sizeof(*vp) */

  while (1) {
    if (tp->y < tmin->y) {
      tmin = tp;
    } else if (tp->y == tmin->y) {
      if (tp->x >= tmax->x) {
        tmin = tp;
      } /* if tp->x */
    } /* if tp->y */
    if (tp->y > tmax->y) {  /* MaxY */
      tmax = tp;
    } else if (tp->y == tmax->y) {
      if (tp->x < tmax->x) { /* Get maxY, minX */
        tmax = tp;
      } /* if tp->x */
    } /* if tmaxy */
    if (tp == last) break;
    tp++;
  };

  if (orient < 0) {

/* Fill tables */
  tp = tmin; /* Temp point starts at miny */
  xt = xmin; /* Fill xmin table first */
  while (1) {
    if (tp == last)
      tpn = vp; /* Set to first point */
    else
      tpn = tp + 1; /* Next. Compiler does: tp + sizeof(*tp) */
    fillline68k(tp->x,tp->y,tpn->x,tpn->y,xt);
    if (tpn == tmin) break; /* Come full circle, quit */
    if (tpn == tmax) xt = xmax; /* Fill max table */
    tp = tpn; /* Go to next */
  } /* while */

  } else {

/* Fill tables */
  tp = tmin; /* Temp point starts at miny */
  xt = xmax; /* Fill xmax table first */
  while (1) {
    if (tp == last)
      tpn = vp; /* Set to first point */
    else
      tpn = tp + 1; /* Next. Compiler does: tp + sizeof(*tp) */
    fillline68k(tp->x,tp->y,tpn->x,tpn->y,xt);
    if (tpn == tmin) break; /* Come full circle, quit */
    if (tpn == tmax) xt = xmin; /* Fill min table */
    tp = tpn; /* Go to next */
  } /* while */

  } /* if orient */

/* Draw polygon */
  scanconvpix(p,xmin,xmax,tmin->y,tmax->y,color);

} /* drawpolyc */

/* end fill.c */