lmjm@doc.imperial.ac.UK (Lee McLoughlin) (02/02/89)
Here are some relatively simple optimisations to the CFB code. They are the product of about a days work and are a first-round optimisation for a particular machine:- an HLH Orion 1/05 (a fast Clipper based 4.2 BSD Unix box with a 1280 x 1000 psudeo colour screen). HLH have said its ok for me to give away these diff's (they are a nice company - did I mention that Orion's are also cheap?). These optimisations are not portable. They are only suitable for machines which meet the following (in order of importance):- 1) PPW == 4 (1 byte == 1 pixel) 2) bcopy must be fully working (ie backwards and overlaps) 3) IMAGE_BYTE_ORDER == LSBFirst (vax byte order) 4) Need a bfill function. The basic idea for this lot is to use bcopy whereever possible in order to avoid the overheads of getbits/putbits, hence restrictions 1 and 2. I've no time to work on these patches but I believe that they *should* work on a MSBFirst machine - you'll have to try them and see. Bfill is a relative of bcopy: bfill( b, fill, width ) char *b; /* pointer to bytes to fill */ int fill; /* the word to replicate over the bytes * all fours bytes in the word should be the same */ int width; /* the size of the block of bytes */ It should be simple to write one in C. PFILL() generates suitable fill values. There are a couple of bits of pretty shoddy code down in these diffs. Blame me not HLH. Any feedback or, better still, more cfb optimisations *very* welcome. A cfbLineSS and an unnatural tile routine would be my next attempts (when *and* if I can find more time). These diffs are under the standard disclaimer found in all good X source. diff -cbr ORIG/cfb/cfbbitblt.c cfb/cfbbitblt.c *** ORIG/cfb/cfbbitblt.c Tue Nov 15 13:28:26 1988 --- cfb/cfbbitblt.c Sat Jan 28 18:20:44 1989 *************** *** 590,595 /* special case copy, to avoid some redundant moves into temporaries */ if (alu == GXcopy) { while (nbox--) { w = pbox->x2 - pbox->x1; --- 590,601 ----- /* special case copy, to avoid some redundant moves into temporaries */ if (alu == GXcopy) { + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + /* widths are in words, convert back to bytes */ + int bwidthSrc = widthSrc << 2; + int bwidthDst = widthDst << 2; + #endif + while (nbox--) { w = pbox->x2 - pbox->x1; *************** *** 606,611 pdstLine = pdstBase + (pbox->y1 * widthDst); } /* x direction doesn't matter for < 1 longword */ if (w <= PPW) { --- 612,631 ----- pdstLine = pdstBase + (pbox->y1 * widthDst); } + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + { + /* Byte pointers to src and dst */ + char *bpsrc, *bpdst; + + bpsrc = ((char *)psrcLine) + pptSrc->x; + bpdst = ((char *)pdstLine) + pbox->x1; + while( h-- ){ + bcopy( bpsrc, bpdst, w ); + bpsrc += bwidthSrc; + bpdst += bwidthDst; + } + } + #else /* x direction doesn't matter for < 1 longword */ if (w <= PPW) { *************** *** 744,749 } } /* move right to left */ } pbox++; pptSrc++; } /* while (nbox--) */ --- 764,770 ----- } } /* move right to left */ } + #endif pbox++; pptSrc++; } /* while (nbox--) */ diff -cbr ORIG/cfb/cfbfillsp.c cfb/cfbfillsp.c *** ORIG/cfb/cfbfillsp.c Thu Oct 13 22:35:25 1988 --- cfb/cfbfillsp.c Mon Jan 30 22:04:42 1989 *************** *** 196,201 if (*pwidth) { if ( ((ppt->x & PIM) + *pwidth) <= PPW) { /* all bits inside same longword */ --- 196,219 ----- if (*pwidth) { + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + if( rop == GXcopy && pGC->planemask == -1 ) + { + char *pd = ((char *)addrl) + (ppt->x & PIM); + + if( *pwidth == 1 ) + *pd = fill; + else + bfill( pd, fill, *pwidth ); + } + else if( rop == GXinvert && pGC->planemask == -1 && *pwidth == 1 ) + { + char *pd = ((char *)addrl) + (ppt->x & PIM); + + *pd ^= -1; + } + else + #endif if ( ((ppt->x & PIM) + *pwidth) <= PPW) { /* all bits inside same longword */ *************** *** 367,372 } else #endif /* notdef */ if(((x & PIM) + w) <= PPW) { getbits(psrc, (rem & PIM), w, tmpSrc); --- 385,403 ----- } else #endif /* notdef */ + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + if( rop == GXcopy && pGC->planemask == -1 ) + { + char *s = ((char *)psrc) + (rem & PIM); + char *d = ((char *)pdst) + (x & PIM); + if( w == 1 ) + *d = *s; + else + bcopy( s, d, w ); + if ((x & PIM) + w == PPW) ++pdst; + } + else + #endif if(((x & PIM) + w) <= PPW) { getbits(psrc, (rem & PIM), w, tmpSrc); diff -cbr ORIG/cfb/cfbgetsp.c cfb/cfbgetsp.c *** ORIG/cfb/cfbgetsp.c Fri Jul 22 18:04:00 1988 --- cfb/cfbgetsp.c Sat Jan 28 22:07:57 1989 *************** *** 114,119 i = 0; while(ppt < pptLast) { xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) ); pwidth++; psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH); --- 114,123 ----- i = 0; while(ppt < pptLast) { + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + /* Byte pointers to src and dst */ + char *bpsrc, *bpdst; + xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) ); pwidth++; w = xEnd - ppt->x; *************** *** 116,121 { xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) ); pwidth++; psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH); w = xEnd - ppt->x; srcBit = ppt->x & PIM; --- 120,140 ----- xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) ); pwidth++; + w = xEnd - ppt->x; + /* This shouldn't be needed */ + pdstNext = pdst + PixmapWidthInPadUnits(w, PSZ); + pwidthPadded[i] = PixmapWidthInPadUnits(w, PSZ) * PPW; + i++; + + + bpsrc = ((char *)(psrcBase + (ppt->y * (widthSrc >> 2)))) + ppt->x; + bpdst = (char *)pdst; + bcopy( bpsrc, bpdst, w ); + + pdst = pdstNext; + #else + xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) ); + pwidth++; psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH); w = xEnd - ppt->x; srcBit = ppt->x & PIM; *************** *** 173,178 pdst = pdstNext; #endif notdef } ppt++; pwidth++; } --- 192,198 ----- pdst = pdstNext; #endif notdef } + #endif ppt++; pwidth++; } diff -cbr ORIG/cfb/cfbpntwin.c cfb/cfbpntwin.c *** ORIG/cfb/cfbpntwin.c Wed Sep 2 02:23:56 1987 --- cfb/cfbpntwin.c Sat Jan 28 21:40:36 1989 *************** *** 432,437 int widthSrc, widthDst, nlMiddle, startmask, endmask; PixmapPtr pDstPixmap; psrcLine = (int *)pSrc->devPrivate; --- 432,439 ----- int widthSrc, widthDst, nlMiddle, startmask, endmask; PixmapPtr pDstPixmap; + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + psrcLine = (int *)pSrc->devPrivate; pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate; widthDst = (int)pDstPixmap->devKind; *************** *** 433,438 PixmapPtr pDstPixmap; psrcLine = (int *)pSrc->devPrivate; pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate; --- 435,458 ----- #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) psrcLine = (int *)pSrc->devPrivate; + pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate; + widthDst = (int)pDstPixmap->devKind; + pdstLine = (int *)pDstPixmap->devPrivate + (y * (widthDst >> 2)); + widthSrc = (int)pSrc->devKind; + + { + /* Byte pointers to src and dst */ + char *bpsrc, *bpdst; + + bpsrc = (char *)psrcLine; + bpdst = ((char *)pdstLine) + x; + while( tileHeight-- ){ + bcopy( bpsrc, bpdst, tileWidth ); + bpsrc += widthSrc; + bpdst += widthDst; + } + } + #else psrcLine = (int *)pSrc->devPrivate; pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate; *************** *** 514,517 psrcLine += widthSrc; } } } --- 534,538 ----- psrcLine += widthSrc; } } + #endif } diff -cbr ORIG/cfb/cfbsetsp.c cfb/cfbsetsp.c *** ORIG/cfb/cfbsetsp.c Fri Sep 11 00:08:26 1987 --- cfb/cfbsetsp.c Sat Jan 28 21:22:08 1989 *************** *** 64,69 int offSrc; int startmask, endmask, nlMiddle, nl; pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); psrc += (xStart - xOrigin) >> PWSH; offSrc = (xStart - xOrigin) & PIM; --- 64,85 ----- int offSrc; int startmask, endmask, nlMiddle, nl; + w = xEnd - xStart; + dstBit = xStart & PIM; + + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + if( alu == GXcopy && planemask == -1 ){ + /* Byte pointers to src and dst */ + char *bpsrc, *bpdst; + + bpsrc = ((char *)psrc) + xStart - xOrigin; + bpdst = ((char *)(pdstBase + (y * widthDst))) + xStart; + + bcopy( bpsrc, bpdst, w ); + return; + } + #endif + pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); psrc += (xStart - xOrigin) >> PWSH; offSrc = (xStart - xOrigin) & PIM; *************** *** 67,74 pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); psrc += (xStart - xOrigin) >> PWSH; offSrc = (xStart - xOrigin) & PIM; - w = xEnd - xStart; - dstBit = xStart & PIM; if (dstBit + w <= PPW) { --- 83,88 ----- pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); psrc += (xStart - xOrigin) >> PWSH; offSrc = (xStart - xOrigin) & PIM; if (dstBit + w <= PPW) { diff -cbr ORIG/cfb/cfbtegblt.c cfb/cfbtegblt.c *** ORIG/cfb/cfbtegblt.c Tue Sep 6 19:02:49 1988 --- cfb/cfbtegblt.c Sat Jan 28 20:40:44 1989 *************** *** 147,152 case rgnIN: pdtmp = pdstBase + (widthDst * ypos); while(nglyph--) { --- 148,156 ----- case rgnIN: pdtmp = pdstBase + (widthDst * ypos); + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + /* Special case terminal emulator fonts */ + if( pGC->planemask == -1 && wtmp == 8 ){ while(nglyph--) { pglyph = pglyphBase + (*ppci++)->byteOffset; *************** *** 149,154 pdtmp = pdstBase + (widthDst * ypos); while(nglyph--) { pglyph = pglyphBase + (*ppci++)->byteOffset; pdst = pdtmp; --- 153,161 ----- if( pGC->planemask == -1 && wtmp == 8 ){ while(nglyph--) { + pglyph = pglyphBase + (*ppci++)->byteOffset; + pdst = pdtmp; + hTmp = h; while (hTmp--) { *************** *** 150,155 while(nglyph--) { pglyph = pglyphBase + (*ppci++)->byteOffset; pdst = pdtmp; hTmp = h; --- 157,191 ----- pdst = pdtmp; hTmp = h; + while (hTmp--) + { + char *bpdst = ((char *)pdst) + xpos; + unsigned int b, tmpDst; + unsigned int g = *((unsigned int *)pglyph); + struct twowords { + int wd1, wd2; + } t; + + /* Turn four bits into four bytes obeying fg and bg */ + b = g & 0xF; + t.wd1 = ((~QuartetPixelMaskTable[ b ]) & bgfill) | + ( QuartetPixelMaskTable[ b ] & fgfill); + /* and the following four bits too */ + b = (g >> 4) & 0xF; + t.wd2 = ((~QuartetPixelMaskTable[ b ]) & bgfill) | + ( QuartetPixelMaskTable[ b ] & fgfill); + bcopy( &t.wd1, bpdst, 8 ); + + pglyph += widthGlyph; + pdst += widthDst; + } + xpos += pci->metrics.characterWidth; + } + break; + } + #endif + while(nglyph--) + { pglyph = pglyphBase + (*ppci++)->byteOffset; pdst = pdtmp; hTmp = h; diff -cbr ORIG/cfb/cfbutils.c cfb/cfbutils.c *** ORIG/cfb/cfbutils.c Tue May 24 18:36:42 1988 --- cfb/cfbutils.c Sat Jan 28 20:49:44 1989 *************** *** 86,91 psrcLine = psrcBase + (ySrc * wSrc); pdstLine = pdstBase + (yDst * wDst); } /* x direction doesn't matter for < 1 longword */ if (w <= PPW) --- 86,98 ----- psrcLine = psrcBase + (ySrc * wSrc); pdstLine = pdstBase + (yDst * wDst); } + #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst) + { + /* widths are in words, convert back to bytes */ + int bwidthSrc = wSrc << PWSH; + int bwidthDst = wDst << PWSH; + /* Byte pointers to src and dst */ + char *bpsrc, *bpdst; bpsrc = ((char *)psrcLine) + xSrc; bpdst = ((char *)pdstLine) + xDst; *************** *** 87,92 pdstLine = pdstBase + (yDst * wDst); } /* x direction doesn't matter for < 1 longword */ if (w <= PPW) { --- 94,109 ----- /* Byte pointers to src and dst */ char *bpsrc, *bpdst; + bpsrc = ((char *)psrcLine) + xSrc; + bpdst = ((char *)pdstLine) + xDst; + while( h-- ){ + bcopy( bpsrc, bpdst, w ); + bpsrc += bwidthSrc; + bpdst += bwidthDst; + } + } + #else + /* x direction doesn't matter for < 1 longword */ if (w <= PPW) { *************** *** 213,216 } } /* move right to left */ } } --- 230,234 ----- } } /* move right to left */ } + #endif } -- Lee McLoughlin 01 589 5111 X 5028 Department of Computing,Imperial College,180 Queens Gate,London SW7 2BZ, UK Janet: lmjm@uk.ac.ic.doc Uucp: lmjm@icdoc.UUCP, ukc!icdoc!lmjm DARPA: lmjm@doc.ic.ac.uk (or lmjm%uk.ac.ic.doc@nss.cs.ucl.ac.uk)