[comp.windows.x] Hack speedup for Sun CG4

spaf@cs.purdue.EDU (Gene Spafford) (04/10/89)

I have been way too busy to find the time to properly extend the
"Purdue" patches to the cfb code in the server.  However, I have put
in a few quick hacks that make a *BIG* difference on the color
performance on Sun 3/50/60 machines with CG4 boards; I suspect it will
also work on CG2/CG3 machines as well, and any other 680x0-based
machine using the cfb code.

The following are quick hacks.  I do intend to do a more complete and
thorough job, so just consider these as temporary.   To take advantage
of them you need the GCC compiler.

Save a copy of the server/ddx/cfb/cfbmskbits.h file.  Next, apply this
patch.  The recompile with gcc and see the difference.

Let me know of problems/suggestions/etc.

*** /tmp/,RCSt1a03783	Sun Apr  9 19:58:12 1989
--- cfbmskbits.h	Sun Apr  9 19:34:27 1989
***************
*** 220,226 ****
--- 220,232 ----
  		   ((p)&PMSK) <<   PSZ | \
  		   ((p)&PMSK) << 2*PSZ | \
  		   ((p)&PMSK) << 3*PSZ )
+ #define PFILL2(p, pf) { \
+     pf = (p) & PMSK; \
+     pf |= (pf << PSZ); \
+     pf |= (pf << 2*PSZ); \
+ }
  
+ 
  #define maskbits(x, w, startmask, endmask, nlw) \
      startmask = cfbstarttab[(x)&PIM]; \
      endmask = cfbendtab[((x)+(w)) & PIM]; \
***************
*** 271,277 ****
--- 277,314 ----
      *((pdst)+1) = (*((pdst)+1) & (cfbstarttab[n] | ~pm)) | \
  	(SCRLEFT(src, m) & (cfbendtab[n] & pm)); \
  }
+ #ifdef mc68020 && __GNUC__
+ #undef getbits
+ #define FASTGETBITS(psrc, x, w, dst) \
+     asm ("bfextu %3{%1:%2},%0" \
+ 	 : "=d" (dst) : "di" (PSZ*x), "di" (PSZ*w), "o" (*(char *)(psrc)))
  
+ #define getbits(psrc,x,w,dst) \
+     FASTGETBITS(psrc, x, PPW, dst);\
+ 
+ #define FASTPUTBITS(src, x, w, pdst) \
+     asm ("bfins %3,%0{%1:%2}" \
+ 	 : "=o" (*(char *)(pdst)) \
+ 	 : "di" (x*PSZ), "di" (w*PSZ), "d" (src), "0" (*(char *) (pdst)))
+ 
+ #undef putbits
+ #define putbits(src, x, w, pdst, planemask) \
+ { \
+     if (planemask != 0xff) { \
+         unsigned long _m, _pm; \
+         FASTGETBITS(pdst, x, PPW, _m); \
+         PFILL2(planemask, _pm); \
+         _m &= (~_pm); \
+         _m |= (src & _pm); \
+         FASTPUTBITS(SCRRIGHT(_m, PPW-(w)), x, w, pdst); \
+     } else { \
+         FASTPUTBITS(SCRRIGHT(src, PPW-(w)), x, w, pdst); \
+     } \
+ }
+     
+ 
+ #endif mc68020
+ 
  #define putbitsrop(src, x, w, pdst, planemask, rop) \
  if ( ((x)+(w)) <= PPW) \
  { \
***************
*** 278,284 ****
      unsigned long tmpmask; \
      unsigned long t1, t2; \
      maskpartialbits((x), (w), tmpmask); \
!     tmpmask &= PFILL(planemask); \
      t1 = SCRRIGHT((src), (x)); \
      t2 = DoRop(rop, t1, *(pdst)); \
      *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
--- 315,322 ----
      unsigned long tmpmask; \
      unsigned long t1, t2; \
      maskpartialbits((x), (w), tmpmask); \
!     PFILL2(planemask, t1); \
!     tmpmask &= t1; \
      t1 = SCRRIGHT((src), (x)); \
      t2 = DoRop(rop, t1, *(pdst)); \
      *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
***************
*** 288,294 ****
      unsigned long m; \
      unsigned long n; \
      unsigned long t1, t2; \
!     unsigned long pm = PFILL(planemask); \
      m = PPW-(x); \
      n = (w) - m; \
      t1 = SCRRIGHT((src), (x)); \
--- 326,333 ----
      unsigned long m; \
      unsigned long n; \
      unsigned long t1, t2; \
!     unsigned long pm; \
!     PFILL2(planemask, pm); \
      m = PPW-(x); \
      n = (w) - m; \
      t1 = SCRRIGHT((src), (x)); \
-- 
Gene Spafford
NSF/Purdue/U of Florida  Software Engineering Research Center,
Dept. of Computer Sciences, Purdue University, W. Lafayette IN 47907-2004
Internet:  spaf@cs.purdue.edu	uucp:	...!{decwrl,gatech,ucbvax}!purdue!spaf

spaf@cs.purdue.edu (Gene Spafford) (04/11/89)

Thanks to Arthur David Olson, here's a better way of expressing those
hacks:

*** /tmp/,RCSt1a07012	Mon Apr 10 23:12:03 1989
--- cfbmskbits.h	Mon Apr 10 23:11:50 1989
***************
*** 220,226 ****
--- 220,232 ----
  		   ((p)&PMSK) <<   PSZ | \
  		   ((p)&PMSK) << 2*PSZ | \
  		   ((p)&PMSK) << 3*PSZ )
+ #define PFILL2(p, pf) { \
+     pf = (p) & PMSK; \
+     pf |= (pf << PSZ); \
+     pf |= (pf << 2*PSZ); \
+ }
  
+ 
  #define maskbits(x, w, startmask, endmask, nlw) \
      startmask = cfbstarttab[(x)&PIM]; \
      endmask = cfbendtab[((x)+(w)) & PIM]; \
***************
*** 271,277 ****
--- 277,314 ----
      *((pdst)+1) = (*((pdst)+1) & (cfbstarttab[n] | ~pm)) | \
  	(SCRLEFT(src, m) & (cfbendtab[n] & pm)); \
  }
+ #if defined(__GNUC__) && defined(mc68020)
+ #undef getbits
+ #define FASTGETBITS(psrc, x, w, dst) \
+     asm ("bfextu %3{%1:%2},%0" \
+ 	 : "=d" (dst) : "di" (PSZ*x), "di" (PSZ*w), "o" (*(char *)(psrc)))
  
+ #define getbits(psrc,x,w,dst) \
+     FASTGETBITS(psrc, x, PPW, dst);\
+ 
+ #define FASTPUTBITS(src, x, w, pdst) \
+     asm ("bfins %3,%0{%1:%2}" \
+ 	 : "=o" (*(char *)(pdst)) \
+ 	 : "di" (x*PSZ), "di" (w*PSZ), "d" (src), "0" (*(char *) (pdst)))
+ 
+ #undef putbits
+ #define putbits(src, x, w, pdst, planemask) \
+ { \
+     if (planemask != 0xff) { \
+         unsigned long _m, _pm; \
+         FASTGETBITS(pdst, x, PPW, _m); \
+         PFILL2(planemask, _pm); \
+         _m &= (~_pm); \
+         _m |= (src & _pm); \
+         FASTPUTBITS(SCRRIGHT(_m, PPW-(w)), x, w, pdst); \
+     } else { \
+         FASTPUTBITS(SCRRIGHT(src, PPW-(w)), x, w, pdst); \
+     } \
+ }
+     
+ 
+ #endif mc68020
+ 
  #define putbitsrop(src, x, w, pdst, planemask, rop) \
  if ( ((x)+(w)) <= PPW) \
  { \
***************
*** 278,284 ****
      unsigned long tmpmask; \
      unsigned long t1, t2; \
      maskpartialbits((x), (w), tmpmask); \
!     tmpmask &= PFILL(planemask); \
      t1 = SCRRIGHT((src), (x)); \
      t2 = DoRop(rop, t1, *(pdst)); \
      *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
--- 315,322 ----
      unsigned long tmpmask; \
      unsigned long t1, t2; \
      maskpartialbits((x), (w), tmpmask); \
!     PFILL2(planemask, t1); \
!     tmpmask &= t1; \
      t1 = SCRRIGHT((src), (x)); \
      t2 = DoRop(rop, t1, *(pdst)); \
      *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
***************
*** 288,294 ****
      unsigned long m; \
      unsigned long n; \
      unsigned long t1, t2; \
!     unsigned long pm = PFILL(planemask); \
      m = PPW-(x); \
      n = (w) - m; \
      t1 = SCRRIGHT((src), (x)); \
--- 326,333 ----
      unsigned long m; \
      unsigned long n; \
      unsigned long t1, t2; \
!     unsigned long pm; \
!     PFILL2(planemask, pm); \
      m = PPW-(x); \
      n = (w) - m; \
      t1 = SCRRIGHT((src), (x)); \
-- 
Gene Spafford
NSF/Purdue/U of Florida  Software Engineering Research Center,
Dept. of Computer Sciences, Purdue University, W. Lafayette IN 47907-2004
Internet:  spaf@cs.purdue.edu	uucp:	...!{decwrl,gatech,ucbvax}!purdue!spaf