lmjm@doc.imperial.ac.UK (Lee McLoughlin) (02/02/89)
Here are some relatively simple optimisations to the CFB code. They
are the product of about a days work and are a first-round
optimisation for a particular machine:- an HLH Orion 1/05 (a fast
Clipper based 4.2 BSD Unix box with a 1280 x 1000 psudeo colour screen).
HLH have said its ok for me to give away these diff's (they are a nice
company - did I mention that Orion's are also cheap?).
These optimisations are not portable. They are only suitable for
machines which meet the following (in order of importance):-
1) PPW == 4 (1 byte == 1 pixel)
2) bcopy must be fully working (ie backwards and overlaps)
3) IMAGE_BYTE_ORDER == LSBFirst (vax byte order)
4) Need a bfill function.
The basic idea for this lot is to use bcopy whereever possible in
order to avoid the overheads of getbits/putbits, hence restrictions 1
and 2. I've no time to work on these patches but I believe that they
*should* work on a MSBFirst machine - you'll have to try them and see.
Bfill is a relative of bcopy:
bfill( b, fill, width )
char *b; /* pointer to bytes to fill */
int fill; /* the word to replicate over the bytes
* all fours bytes in the word should be the same */
int width; /* the size of the block of bytes */
It should be simple to write one in C. PFILL() generates suitable fill values.
There are a couple of bits of pretty shoddy code down in these diffs.
Blame me not HLH.
Any feedback or, better still, more cfb optimisations *very* welcome.
A cfbLineSS and an unnatural tile routine would be my next attempts
(when *and* if I can find more time).
These diffs are under the standard disclaimer found in all good X source.
diff -cbr ORIG/cfb/cfbbitblt.c cfb/cfbbitblt.c
*** ORIG/cfb/cfbbitblt.c Tue Nov 15 13:28:26 1988
--- cfb/cfbbitblt.c Sat Jan 28 18:20:44 1989
***************
*** 590,595
/* special case copy, to avoid some redundant moves into temporaries */
if (alu == GXcopy)
{
while (nbox--)
{
w = pbox->x2 - pbox->x1;
--- 590,601 -----
/* special case copy, to avoid some redundant moves into temporaries */
if (alu == GXcopy)
{
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ /* widths are in words, convert back to bytes */
+ int bwidthSrc = widthSrc << 2;
+ int bwidthDst = widthDst << 2;
+ #endif
+
while (nbox--)
{
w = pbox->x2 - pbox->x1;
***************
*** 606,611
pdstLine = pdstBase + (pbox->y1 * widthDst);
}
/* x direction doesn't matter for < 1 longword */
if (w <= PPW)
{
--- 612,631 -----
pdstLine = pdstBase + (pbox->y1 * widthDst);
}
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ {
+ /* Byte pointers to src and dst */
+ char *bpsrc, *bpdst;
+
+ bpsrc = ((char *)psrcLine) + pptSrc->x;
+ bpdst = ((char *)pdstLine) + pbox->x1;
+ while( h-- ){
+ bcopy( bpsrc, bpdst, w );
+ bpsrc += bwidthSrc;
+ bpdst += bwidthDst;
+ }
+ }
+ #else
/* x direction doesn't matter for < 1 longword */
if (w <= PPW)
{
***************
*** 744,749
}
} /* move right to left */
}
pbox++;
pptSrc++;
} /* while (nbox--) */
--- 764,770 -----
}
} /* move right to left */
}
+ #endif
pbox++;
pptSrc++;
} /* while (nbox--) */
diff -cbr ORIG/cfb/cfbfillsp.c cfb/cfbfillsp.c
*** ORIG/cfb/cfbfillsp.c Thu Oct 13 22:35:25 1988
--- cfb/cfbfillsp.c Mon Jan 30 22:04:42 1989
***************
*** 196,201
if (*pwidth)
{
if ( ((ppt->x & PIM) + *pwidth) <= PPW)
{
/* all bits inside same longword */
--- 196,219 -----
if (*pwidth)
{
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ if( rop == GXcopy && pGC->planemask == -1 )
+ {
+ char *pd = ((char *)addrl) + (ppt->x & PIM);
+
+ if( *pwidth == 1 )
+ *pd = fill;
+ else
+ bfill( pd, fill, *pwidth );
+ }
+ else if( rop == GXinvert && pGC->planemask == -1 && *pwidth == 1 )
+ {
+ char *pd = ((char *)addrl) + (ppt->x & PIM);
+
+ *pd ^= -1;
+ }
+ else
+ #endif
if ( ((ppt->x & PIM) + *pwidth) <= PPW)
{
/* all bits inside same longword */
***************
*** 367,372
}
else
#endif /* notdef */
if(((x & PIM) + w) <= PPW)
{
getbits(psrc, (rem & PIM), w, tmpSrc);
--- 385,403 -----
}
else
#endif /* notdef */
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ if( rop == GXcopy && pGC->planemask == -1 )
+ {
+ char *s = ((char *)psrc) + (rem & PIM);
+ char *d = ((char *)pdst) + (x & PIM);
+ if( w == 1 )
+ *d = *s;
+ else
+ bcopy( s, d, w );
+ if ((x & PIM) + w == PPW) ++pdst;
+ }
+ else
+ #endif
if(((x & PIM) + w) <= PPW)
{
getbits(psrc, (rem & PIM), w, tmpSrc);
diff -cbr ORIG/cfb/cfbgetsp.c cfb/cfbgetsp.c
*** ORIG/cfb/cfbgetsp.c Fri Jul 22 18:04:00 1988
--- cfb/cfbgetsp.c Sat Jan 28 22:07:57 1989
***************
*** 114,119
i = 0;
while(ppt < pptLast)
{
xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
pwidth++;
psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH);
--- 114,123 -----
i = 0;
while(ppt < pptLast)
{
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ /* Byte pointers to src and dst */
+ char *bpsrc, *bpdst;
+
xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
pwidth++;
w = xEnd - ppt->x;
***************
*** 116,121
{
xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
pwidth++;
psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH);
w = xEnd - ppt->x;
srcBit = ppt->x & PIM;
--- 120,140 -----
xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
pwidth++;
+ w = xEnd - ppt->x;
+ /* This shouldn't be needed */
+ pdstNext = pdst + PixmapWidthInPadUnits(w, PSZ);
+ pwidthPadded[i] = PixmapWidthInPadUnits(w, PSZ) * PPW;
+ i++;
+
+
+ bpsrc = ((char *)(psrcBase + (ppt->y * (widthSrc >> 2)))) + ppt->x;
+ bpdst = (char *)pdst;
+ bcopy( bpsrc, bpdst, w );
+
+ pdst = pdstNext;
+ #else
+ xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
+ pwidth++;
psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH);
w = xEnd - ppt->x;
srcBit = ppt->x & PIM;
***************
*** 173,178
pdst = pdstNext;
#endif notdef
}
ppt++;
pwidth++;
}
--- 192,198 -----
pdst = pdstNext;
#endif notdef
}
+ #endif
ppt++;
pwidth++;
}
diff -cbr ORIG/cfb/cfbpntwin.c cfb/cfbpntwin.c
*** ORIG/cfb/cfbpntwin.c Wed Sep 2 02:23:56 1987
--- cfb/cfbpntwin.c Sat Jan 28 21:40:36 1989
***************
*** 432,437
int widthSrc, widthDst, nlMiddle, startmask, endmask;
PixmapPtr pDstPixmap;
psrcLine = (int *)pSrc->devPrivate;
--- 432,439 -----
int widthSrc, widthDst, nlMiddle, startmask, endmask;
PixmapPtr pDstPixmap;
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ psrcLine = (int *)pSrc->devPrivate;
pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;
widthDst = (int)pDstPixmap->devKind;
***************
*** 433,438
PixmapPtr pDstPixmap;
psrcLine = (int *)pSrc->devPrivate;
pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;
--- 435,458 -----
#if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
psrcLine = (int *)pSrc->devPrivate;
+ pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;
+ widthDst = (int)pDstPixmap->devKind;
+ pdstLine = (int *)pDstPixmap->devPrivate + (y * (widthDst >> 2));
+ widthSrc = (int)pSrc->devKind;
+
+ {
+ /* Byte pointers to src and dst */
+ char *bpsrc, *bpdst;
+
+ bpsrc = (char *)psrcLine;
+ bpdst = ((char *)pdstLine) + x;
+ while( tileHeight-- ){
+ bcopy( bpsrc, bpdst, tileWidth );
+ bpsrc += widthSrc;
+ bpdst += widthDst;
+ }
+ }
+ #else
psrcLine = (int *)pSrc->devPrivate;
pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;
***************
*** 514,517
psrcLine += widthSrc;
}
}
}
--- 534,538 -----
psrcLine += widthSrc;
}
}
+ #endif
}
diff -cbr ORIG/cfb/cfbsetsp.c cfb/cfbsetsp.c
*** ORIG/cfb/cfbsetsp.c Fri Sep 11 00:08:26 1987
--- cfb/cfbsetsp.c Sat Jan 28 21:22:08 1989
***************
*** 64,69
int offSrc;
int startmask, endmask, nlMiddle, nl;
pdst = pdstBase + (y * widthDst) + (xStart >> PWSH);
psrc += (xStart - xOrigin) >> PWSH;
offSrc = (xStart - xOrigin) & PIM;
--- 64,85 -----
int offSrc;
int startmask, endmask, nlMiddle, nl;
+ w = xEnd - xStart;
+ dstBit = xStart & PIM;
+
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ if( alu == GXcopy && planemask == -1 ){
+ /* Byte pointers to src and dst */
+ char *bpsrc, *bpdst;
+
+ bpsrc = ((char *)psrc) + xStart - xOrigin;
+ bpdst = ((char *)(pdstBase + (y * widthDst))) + xStart;
+
+ bcopy( bpsrc, bpdst, w );
+ return;
+ }
+ #endif
+
pdst = pdstBase + (y * widthDst) + (xStart >> PWSH);
psrc += (xStart - xOrigin) >> PWSH;
offSrc = (xStart - xOrigin) & PIM;
***************
*** 67,74
pdst = pdstBase + (y * widthDst) + (xStart >> PWSH);
psrc += (xStart - xOrigin) >> PWSH;
offSrc = (xStart - xOrigin) & PIM;
- w = xEnd - xStart;
- dstBit = xStart & PIM;
if (dstBit + w <= PPW)
{
--- 83,88 -----
pdst = pdstBase + (y * widthDst) + (xStart >> PWSH);
psrc += (xStart - xOrigin) >> PWSH;
offSrc = (xStart - xOrigin) & PIM;
if (dstBit + w <= PPW)
{
diff -cbr ORIG/cfb/cfbtegblt.c cfb/cfbtegblt.c
*** ORIG/cfb/cfbtegblt.c Tue Sep 6 19:02:49 1988
--- cfb/cfbtegblt.c Sat Jan 28 20:40:44 1989
***************
*** 147,152
case rgnIN:
pdtmp = pdstBase + (widthDst * ypos);
while(nglyph--)
{
--- 148,156 -----
case rgnIN:
pdtmp = pdstBase + (widthDst * ypos);
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ /* Special case terminal emulator fonts */
+ if( pGC->planemask == -1 && wtmp == 8 ){
while(nglyph--)
{
pglyph = pglyphBase + (*ppci++)->byteOffset;
***************
*** 149,154
pdtmp = pdstBase + (widthDst * ypos);
while(nglyph--)
{
pglyph = pglyphBase + (*ppci++)->byteOffset;
pdst = pdtmp;
--- 153,161 -----
if( pGC->planemask == -1 && wtmp == 8 ){
while(nglyph--)
{
+ pglyph = pglyphBase + (*ppci++)->byteOffset;
+ pdst = pdtmp;
+ hTmp = h;
while (hTmp--)
{
***************
*** 150,155
while(nglyph--)
{
pglyph = pglyphBase + (*ppci++)->byteOffset;
pdst = pdtmp;
hTmp = h;
--- 157,191 -----
pdst = pdtmp;
hTmp = h;
+ while (hTmp--)
+ {
+ char *bpdst = ((char *)pdst) + xpos;
+ unsigned int b, tmpDst;
+ unsigned int g = *((unsigned int *)pglyph);
+ struct twowords {
+ int wd1, wd2;
+ } t;
+
+ /* Turn four bits into four bytes obeying fg and bg */
+ b = g & 0xF;
+ t.wd1 = ((~QuartetPixelMaskTable[ b ]) & bgfill) |
+ ( QuartetPixelMaskTable[ b ] & fgfill);
+ /* and the following four bits too */
+ b = (g >> 4) & 0xF;
+ t.wd2 = ((~QuartetPixelMaskTable[ b ]) & bgfill) |
+ ( QuartetPixelMaskTable[ b ] & fgfill);
+ bcopy( &t.wd1, bpdst, 8 );
+
+ pglyph += widthGlyph;
+ pdst += widthDst;
+ }
+ xpos += pci->metrics.characterWidth;
+ }
+ break;
+ }
+ #endif
+ while(nglyph--)
+ {
pglyph = pglyphBase + (*ppci++)->byteOffset;
pdst = pdtmp;
hTmp = h;
diff -cbr ORIG/cfb/cfbutils.c cfb/cfbutils.c
*** ORIG/cfb/cfbutils.c Tue May 24 18:36:42 1988
--- cfb/cfbutils.c Sat Jan 28 20:49:44 1989
***************
*** 86,91
psrcLine = psrcBase + (ySrc * wSrc);
pdstLine = pdstBase + (yDst * wDst);
}
/* x direction doesn't matter for < 1 longword */
if (w <= PPW)
--- 86,98 -----
psrcLine = psrcBase + (ySrc * wSrc);
pdstLine = pdstBase + (yDst * wDst);
}
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ {
+ /* widths are in words, convert back to bytes */
+ int bwidthSrc = wSrc << PWSH;
+ int bwidthDst = wDst << PWSH;
+ /* Byte pointers to src and dst */
+ char *bpsrc, *bpdst;
bpsrc = ((char *)psrcLine) + xSrc;
bpdst = ((char *)pdstLine) + xDst;
***************
*** 87,92
pdstLine = pdstBase + (yDst * wDst);
}
/* x direction doesn't matter for < 1 longword */
if (w <= PPW)
{
--- 94,109 -----
/* Byte pointers to src and dst */
char *bpsrc, *bpdst;
+ bpsrc = ((char *)psrcLine) + xSrc;
+ bpdst = ((char *)pdstLine) + xDst;
+ while( h-- ){
+ bcopy( bpsrc, bpdst, w );
+ bpsrc += bwidthSrc;
+ bpdst += bwidthDst;
+ }
+ }
+ #else
+
/* x direction doesn't matter for < 1 longword */
if (w <= PPW)
{
***************
*** 213,216
}
} /* move right to left */
}
}
--- 230,234 -----
}
} /* move right to left */
}
+ #endif
}
--
Lee McLoughlin 01 589 5111 X 5028
Department of Computing,Imperial College,180 Queens Gate,London SW7 2BZ, UK
Janet: lmjm@uk.ac.ic.doc Uucp: lmjm@icdoc.UUCP, ukc!icdoc!lmjm
DARPA: lmjm@doc.ic.ac.uk (or lmjm%uk.ac.ic.doc@nss.cs.ucl.ac.uk)