[comp.windows.x] Here are some simple CFB speedups

lmjm@doc.imperial.ac.UK (Lee McLoughlin) (02/02/89)

Here are some relatively simple optimisations to the CFB code.  They
are the product of about a days work and are a first-round
optimisation for a particular machine:- an HLH Orion 1/05 (a fast
Clipper based 4.2 BSD Unix box with a 1280 x 1000 psudeo colour screen).
HLH have said its ok for me to give away these diff's (they are a nice
company - did I mention that Orion's are also cheap?).

These optimisations are not portable.  They are only suitable for
machines which meet the following (in order of importance):-

	1) PPW == 4					(1 byte == 1 pixel)
	2) bcopy must be fully working (ie backwards and overlaps)
	3) IMAGE_BYTE_ORDER == LSBFirst			(vax byte order)
	4) Need a bfill function.

The basic idea for this lot is to use bcopy whereever possible in
order to avoid the overheads of getbits/putbits, hence restrictions 1
and 2.  I've no time to work on these patches but I believe that they
*should* work on a MSBFirst machine - you'll have to try them and see.
Bfill is a relative of bcopy:
	bfill( b, fill, width )
		char *b;  /* pointer to bytes to fill */
		int fill; /* the word to replicate over the bytes
			   * all fours bytes in the word should be the same */
		int width; /* the size of the block of bytes */

It should be simple to write one in C.  PFILL() generates suitable fill values.

There are a couple of bits of pretty shoddy code down in these diffs.
Blame me not HLH.

Any feedback or, better still, more cfb optimisations *very* welcome.
A cfbLineSS and an unnatural tile routine would be my next attempts
(when *and* if I can find more time).

These diffs are under the standard disclaimer found in all good X source.


diff -cbr ORIG/cfb/cfbbitblt.c cfb/cfbbitblt.c
*** ORIG/cfb/cfbbitblt.c	Tue Nov 15 13:28:26 1988
--- cfb/cfbbitblt.c	Sat Jan 28 18:20:44 1989
***************
*** 590,595
      /* special case copy, to avoid some redundant moves into temporaries */
      if (alu == GXcopy)
      {
          while (nbox--)
          {
  	    w = pbox->x2 - pbox->x1;

--- 590,601 -----
      /* special case copy, to avoid some redundant moves into temporaries */
      if (alu == GXcopy)
      {
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+         /* widths are in words, convert back to bytes */
+         int bwidthSrc = widthSrc << 2;
+         int bwidthDst = widthDst << 2;
+ #endif
+ 
          while (nbox--)
          {
  	    w = pbox->x2 - pbox->x1;
***************
*** 606,611
  	        pdstLine = pdstBase + (pbox->y1 * widthDst);
  	    }
  
  	    /* x direction doesn't matter for < 1 longword */
  	    if (w <= PPW)
  	    {

--- 612,631 -----
  	        pdstLine = pdstBase + (pbox->y1 * widthDst);
  	    }
  
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ 	    {
+ 		    /* Byte pointers to src and dst */
+ 		    char *bpsrc, *bpdst;
+ 		    
+ 		    bpsrc = ((char *)psrcLine) + pptSrc->x;
+ 		    bpdst = ((char *)pdstLine) + pbox->x1;
+ 		    while( h-- ){
+ 			    bcopy( bpsrc, bpdst, w );
+ 			    bpsrc += bwidthSrc;
+ 			    bpdst += bwidthDst;
+ 		    }
+ 	    }
+ #else
  	    /* x direction doesn't matter for < 1 longword */
  	    if (w <= PPW)
  	    {
***************
*** 744,749
  		    }
  	        } /* move right to left */
  	    }
  	    pbox++;
  	    pptSrc++;
          } /* while (nbox--) */

--- 764,770 -----
  		    }
  	        } /* move right to left */
  	    }
+ #endif
  	    pbox++;
  	    pptSrc++;
          } /* while (nbox--) */
diff -cbr ORIG/cfb/cfbfillsp.c cfb/cfbfillsp.c
*** ORIG/cfb/cfbfillsp.c	Thu Oct 13 22:35:25 1988
--- cfb/cfbfillsp.c	Mon Jan 30 22:04:42 1989
***************
*** 196,201
  
  	if (*pwidth)
  	{
  	    if ( ((ppt->x & PIM) + *pwidth) <= PPW)
  	    {
  		/* all bits inside same longword */

--- 196,219 -----
  
  	if (*pwidth)
  	{
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ 	    if( rop == GXcopy && pGC->planemask == -1 )
+ 	    {
+ 		    char *pd = ((char *)addrl) + (ppt->x & PIM);
+ 		    
+ 		    if( *pwidth == 1 )
+ 			    *pd = fill;
+ 		    else
+ 			    bfill( pd, fill, *pwidth );
+ 	    }
+ 	    else if( rop == GXinvert && pGC->planemask == -1 && *pwidth == 1 )
+ 	    {
+ 		    char *pd = ((char *)addrl) + (ppt->x & PIM);
+ 		    
+ 		    *pd ^= -1;
+ 	    }
+ 	    else
+ #endif
  	    if ( ((ppt->x & PIM) + *pwidth) <= PPW)
  	    {
  		/* all bits inside same longword */
***************
*** 367,372
  		}
  		else
  #endif /* notdef */
  		if(((x & PIM) + w) <= PPW)
  		{
  		    getbits(psrc, (rem & PIM), w, tmpSrc);

--- 385,403 -----
  		}
  		else
  #endif /* notdef */
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ 		if( rop == GXcopy && pGC->planemask == -1 )
+ 		{
+ 			char *s = ((char *)psrc) + (rem & PIM);
+ 			char *d = ((char *)pdst) + (x & PIM);
+ 			if( w == 1 )
+ 				*d = *s;
+ 			else
+ 				bcopy( s, d, w );
+ 			if ((x & PIM) + w == PPW) ++pdst;
+ 		}
+ 		else
+ #endif
  		if(((x & PIM) + w) <= PPW)
  		{
  		    getbits(psrc, (rem & PIM), w, tmpSrc);
diff -cbr ORIG/cfb/cfbgetsp.c cfb/cfbgetsp.c
*** ORIG/cfb/cfbgetsp.c	Fri Jul 22 18:04:00 1988
--- cfb/cfbgetsp.c	Sat Jan 28 22:07:57 1989
***************
*** 114,119
      i = 0;
      while(ppt < pptLast)
      {
  	xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
  	pwidth++;
  	psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH); 

--- 114,123 -----
      i = 0;
      while(ppt < pptLast)
      {
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+         /* Byte pointers to src and dst */
+         char *bpsrc, *bpdst;
+ 
  	xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
  	pwidth++;
  	w = xEnd - ppt->x;
***************
*** 116,121
      {
  	xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
  	pwidth++;
  	psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH); 
  	w = xEnd - ppt->x;
  	srcBit = ppt->x & PIM;

--- 120,140 -----
  
  	xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
  	pwidth++;
+ 	w = xEnd - ppt->x;
+ 	/* This shouldn't be needed */
+ 	pdstNext = pdst + PixmapWidthInPadUnits(w, PSZ);
+ 	pwidthPadded[i] = PixmapWidthInPadUnits(w, PSZ) * PPW;
+ 	i++;
+ 
+ 	  
+ 	bpsrc = ((char *)(psrcBase + (ppt->y * (widthSrc >> 2)))) + ppt->x;
+ 	bpdst = (char *)pdst;
+ 	bcopy( bpsrc, bpdst, w );
+ 	
+ 	pdst = pdstNext;
+ #else
+ 	xEnd = min(ppt->x + *pwidth, widthSrc << (PWSH-2) );
+ 	pwidth++;
  	psrc = psrcBase + (ppt->y * (widthSrc >> 2)) + (ppt->x >> PWSH); 
  	w = xEnd - ppt->x;
  	srcBit = ppt->x & PIM;
***************
*** 173,178
  	    pdst = pdstNext;
  #endif notdef
  	} 
          ppt++;
  	pwidth++;
      }

--- 192,198 -----
  	    pdst = pdstNext;
  #endif notdef
  	}
+ #endif 
          ppt++;
  	pwidth++;
      }
diff -cbr ORIG/cfb/cfbpntwin.c cfb/cfbpntwin.c
*** ORIG/cfb/cfbpntwin.c	Wed Sep  2 02:23:56 1987
--- cfb/cfbpntwin.c	Sat Jan 28 21:40:36 1989
***************
*** 432,437
      int			widthSrc, widthDst, nlMiddle, startmask, endmask;
      PixmapPtr		pDstPixmap;
  
  
      psrcLine = (int *)pSrc->devPrivate;
  

--- 432,439 -----
      int			widthSrc, widthDst, nlMiddle, startmask, endmask;
      PixmapPtr		pDstPixmap;
  
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+     psrcLine = (int *)pSrc->devPrivate;
  
      pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;
      widthDst = (int)pDstPixmap->devKind;
***************
*** 433,438
      PixmapPtr		pDstPixmap;
  
  
      psrcLine = (int *)pSrc->devPrivate;
  
      pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;

--- 435,458 -----
  #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
      psrcLine = (int *)pSrc->devPrivate;
  
+     pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;
+     widthDst = (int)pDstPixmap->devKind;
+     pdstLine = (int *)pDstPixmap->devPrivate + (y * (widthDst >> 2));
+     widthSrc = (int)pSrc->devKind;
+ 
+     {
+       /* Byte pointers to src and dst */
+       char *bpsrc, *bpdst;
+       
+       bpsrc = (char *)psrcLine;
+       bpdst = ((char *)pdstLine) + x;
+       while( tileHeight-- ){
+ 	bcopy( bpsrc, bpdst, tileWidth );
+ 	bpsrc += widthSrc;
+ 	bpdst += widthDst;
+       }
+     }
+ #else
      psrcLine = (int *)pSrc->devPrivate;
  
      pDstPixmap = (PixmapPtr)pDstWin->drawable.pScreen->devPrivate;
***************
*** 514,517
  	    psrcLine += widthSrc;
  	}
      }
  }

--- 534,538 -----
  	    psrcLine += widthSrc;
  	}
      }
+ #endif
  }
diff -cbr ORIG/cfb/cfbsetsp.c cfb/cfbsetsp.c
*** ORIG/cfb/cfbsetsp.c	Fri Sep 11 00:08:26 1987
--- cfb/cfbsetsp.c	Sat Jan 28 21:22:08 1989
***************
*** 64,69
      int			offSrc;
      int		startmask, endmask, nlMiddle, nl;
  
      pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); 
      psrc += (xStart - xOrigin) >> PWSH;
      offSrc = (xStart - xOrigin) & PIM;

--- 64,85 -----
      int			offSrc;
      int		startmask, endmask, nlMiddle, nl;
  
+     w = xEnd - xStart;
+     dstBit = xStart & PIM;
+ 
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+     if( alu == GXcopy && planemask == -1 ){
+       /* Byte pointers to src and dst */
+       char *bpsrc, *bpdst;
+       
+       bpsrc = ((char *)psrc) + xStart - xOrigin;
+       bpdst = ((char *)(pdstBase + (y * widthDst))) + xStart;
+ 
+       bcopy( bpsrc, bpdst, w );
+       return;
+     }
+ #endif
+ 
      pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); 
      psrc += (xStart - xOrigin) >> PWSH;
      offSrc = (xStart - xOrigin) & PIM;
***************
*** 67,74
      pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); 
      psrc += (xStart - xOrigin) >> PWSH;
      offSrc = (xStart - xOrigin) & PIM;
-     w = xEnd - xStart;
-     dstBit = xStart & PIM;
  
      if (dstBit + w <= PPW) 
      { 

--- 83,88 -----
      pdst = pdstBase + (y * widthDst) + (xStart >> PWSH); 
      psrc += (xStart - xOrigin) >> PWSH;
      offSrc = (xStart - xOrigin) & PIM;
  
      if (dstBit + w <= PPW) 
      { 
diff -cbr ORIG/cfb/cfbtegblt.c cfb/cfbtegblt.c
*** ORIG/cfb/cfbtegblt.c	Tue Sep  6 19:02:49 1988
--- cfb/cfbtegblt.c	Sat Jan 28 20:40:44 1989
***************
*** 147,152
        case rgnIN:
  
          pdtmp = pdstBase + (widthDst * ypos);
          while(nglyph--)
          {
  

--- 148,156 -----
        case rgnIN:
  
          pdtmp = pdstBase + (widthDst * ypos);
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+ 	/* Special case terminal emulator fonts */
+ 	if( pGC->planemask == -1 && wtmp == 8 ){
  	  while(nglyph--)
  	    {
  	      pglyph = pglyphBase + (*ppci++)->byteOffset;
***************
*** 149,154
          pdtmp = pdstBase + (widthDst * ypos);
          while(nglyph--)
          {
  
  	    pglyph = pglyphBase + (*ppci++)->byteOffset;
              pdst = pdtmp;

--- 153,161 -----
  	if( pGC->planemask == -1 && wtmp == 8 ){
  	  while(nglyph--)
  	    {
+ 	      pglyph = pglyphBase + (*ppci++)->byteOffset;
+ 	      pdst = pdtmp;
+ 	      hTmp = h;
  	      
  	      while (hTmp--)
  		{
***************
*** 150,155
          while(nglyph--)
          {
  
  	    pglyph = pglyphBase + (*ppci++)->byteOffset;
              pdst = pdtmp;
  	    hTmp = h;

--- 157,191 -----
  	      pdst = pdtmp;
  	      hTmp = h;
  	      
+ 	      while (hTmp--)
+ 		{
+ 		  char *bpdst = ((char *)pdst) + xpos;
+ 		  unsigned int b, tmpDst;
+ 		  unsigned int g = *((unsigned int *)pglyph);
+ 		  struct twowords {
+ 		    int wd1, wd2;
+ 		  } t;
+ 
+ 		  /* Turn four bits into four bytes obeying fg and bg */
+ 		  b = g & 0xF;
+ 		  t.wd1 = ((~QuartetPixelMaskTable[ b ]) & bgfill) |
+ 		           (  QuartetPixelMaskTable[ b ]  & fgfill);
+ 		  /* and the following four bits too */
+ 		  b = (g >> 4) & 0xF;
+ 		  t.wd2 = ((~QuartetPixelMaskTable[ b ]) & bgfill) |
+ 		          (  QuartetPixelMaskTable[ b ]  & fgfill);
+ 		  bcopy( &t.wd1, bpdst, 8 );
+ 
+ 		  pglyph += widthGlyph;
+ 		  pdst += widthDst;
+ 		}
+ 	      xpos += pci->metrics.characterWidth;
+ 	    }
+ 	  break;
+ 	}
+ #endif
+         while(nglyph--)
+         {
  	    pglyph = pglyphBase + (*ppci++)->byteOffset;
              pdst = pdtmp;
  	    hTmp = h;
diff -cbr ORIG/cfb/cfbutils.c cfb/cfbutils.c
*** ORIG/cfb/cfbutils.c	Tue May 24 18:36:42 1988
--- cfb/cfbutils.c	Sat Jan 28 20:49:44 1989
***************
*** 86,91
  	psrcLine = psrcBase + (ySrc * wSrc);
  	pdstLine = pdstBase + (yDst * wDst);
      }
  
      /* x direction doesn't matter for < 1 longword */
      if (w <= PPW)

--- 86,98 -----
  	psrcLine = psrcBase + (ySrc * wSrc);
  	pdstLine = pdstBase + (yDst * wDst);
      }
+ #if (PPW == 4) && (IMAGE_BYTE_ORDER == LSBFirst)
+     {
+       /* widths are in words, convert back to bytes */
+       int bwidthSrc = wSrc << PWSH;
+       int bwidthDst = wDst << PWSH;
+       /* Byte pointers to src and dst */
+       char *bpsrc, *bpdst;
        
        bpsrc = ((char *)psrcLine) + xSrc;
        bpdst = ((char *)pdstLine) + xDst;
***************
*** 87,92
  	pdstLine = pdstBase + (yDst * wDst);
      }
  
      /* x direction doesn't matter for < 1 longword */
      if (w <= PPW)
      {

--- 94,109 -----
        /* Byte pointers to src and dst */
        char *bpsrc, *bpdst;
        
+       bpsrc = ((char *)psrcLine) + xSrc;
+       bpdst = ((char *)pdstLine) + xDst;
+       while( h-- ){
+ 	bcopy( bpsrc, bpdst, w );
+ 	bpsrc += bwidthSrc;
+ 	bpdst += bwidthDst;
+       }
+     }
+ #else
+ 
      /* x direction doesn't matter for < 1 longword */
      if (w <= PPW)
      {
***************
*** 213,216
  	    }
  	} /* move right to left */
      }
  }

--- 230,234 -----
  	    }
  	} /* move right to left */
      }
+ #endif
  }
--
Lee McLoughlin			01 589 5111 X 5028
Department of Computing,Imperial College,180 Queens Gate,London SW7 2BZ, UK
Janet: lmjm@uk.ac.ic.doc	Uucp:  lmjm@icdoc.UUCP, ukc!icdoc!lmjm
DARPA: lmjm@doc.ic.ac.uk (or lmjm%uk.ac.ic.doc@nss.cs.ucl.ac.uk)