thomson@cs.utah.edu (Rich Thomson) (03/29/91)
Thanks to Brian McClendon and Bruce Holloway for providing the insight on the VGX 1 M p/s numbers. Here is code that Brian tells me reproduces the performance (I don't have a VGX to try it on ... yet). -- Rich Rich Thomson thomson@cs.utah.edu {bellcore,hplabs,uunet}!utah-cs!thomson ``Read my MIPs -- no new VAXes!!'' --George Bush after sniffing freon Date: Thu, 28 Mar 91 19:16:35 -0800 From: bam@rudedog.asd.sgi.com (Brian McClendon) Organization: Silicon Graphics, Inc. After looking at bruce's numbers, I re-ran the test that I used to convince myself of VGX performace. Here's the output from a run that avoided fill limitations: spmdevi> DLmesh2 2 0 10000000 0 1 size=2.000000, zb(0), cnt=10000000, shademodel:0,subpixel:1 tmeshes, lighted, cached: 607902 polygons per second,1645.000111.0 nsec/poly tmeshes COLORed (one c3f/vertex), cached: 1014198 polygons per second,985.999918.0 nsec/poly tmeshes COLORed (one c3i/vertex), cached: 736377 polygons per second,1357.999963.0 nsec/poly tmeshes COLORed (one c3s/vertex), cached: 584453 polygons per second,1711.000095.0 nsec/poly tmeshes, FLAT (one c3f), cached: 1055966 polygons per second,947.000046.0 nsec/poly Here's the src code: --- DLmesh2.c --- cc DLmesh2.c -lgl_s -lm -o DLmesh2 #include <stdio.h> #include <gl.h> #include <device.h> #include <sys/types.h> #include <sys/times.h> #include <sys/param.h> #include <math.h> #define NUMTRI 600 #define XOFFSET 20 #define YOFFSET 20 #define XSPACE 50 #define XWINDOW 20 #define YWINDOW 20 #define PAGESIZ 4096 #define TRIBUFSIZ ((NUMTRI + 2) * 4) /* number of floats */ #define NORMBUFSIZ (4 * 4) /* number of floats */ #define COLORBUFSIZ (4 * 4) /* number of floats */ extern char *malloc(); float normbuf[16] = {1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0, 1.0,0.0,0.0,0.0}; float tribuf[TRIBUFSIZ]; float colorbuf[16] = {0.9,0.0,0.0,0.9,0.0,0.9,0.0,0.9,0.0,0.0,0.9,0.9, 0.9,0.9,0.9,0.9}; int coloribuf[16] = {230,0,0,230,0,230,0,230,0,0,230,230, 230,230,230,230}; short colorsbuf[16] = {260,0,0,260,0,260,0,260,0,0,260,260, 260,260,260,260}; int lightobj, colorobj, coloriobj, colorsobj, flatobj; main(argc,argv) char *argv[]; { register i; register events; int rotated; float size; int doz,sm,sp; /* evaluate command line arguments */ size = 10; doz = TRUE; sm = FLAT; sp = TRUE; events = 100000; if (argc > 1) size = (float) atoi(argv[1]); if (argc > 2) doz = atoi(argv[2]); if (argc > 3) events = atoi(argv[3]); if (argc > 4) sm = atoi(argv[4]); if (argc > 5) sp = atoi(argv[5]); printf("size=%f, zb(%d), cnt=%d, shademodel:%d,subpixel:%d\n", size, doz, events,sm,sp); /* initialize graphics */ prefposition(0,1279,0,200); foreground(); winopen("speed"); RGBmode(); gconfig(); initlight(); if(doz) { zbuffer(doz); zfunction(ZF_ALWAYS); printf("calling zbuffer(%d)\n",doz); } /* clear the window */ cpack(0); clear(); #define SQRT3_2 (1.7321/2.0) /* initialize data arrays */ for (i=0; i<(1 + NUMTRI/2); i++) { tribuf[i*8+0] = size*i; tribuf[i*8+1] = 0; tribuf[i*8+2] = 0; tribuf[i*8+4] = size*i + size/2; tribuf[i*8+5] = size*SQRT3_2; tribuf[i*8+6] = 0; } makeobj(lightobj = genobj()); bgntmesh(); for(i=0;i<(1 + NUMTRI/2);i++) { n3f(&normbuf[(i%2)*4]); v3f(&tribuf[i*8]); n3f(&normbuf[(i%4)*4]); v3f(&tribuf[i*8 + 4]); } endtmesh(); closeobj(); makeobj(flatobj = genobj()); bgntmesh(); c3f(&colorbuf[0]); for(i=0;i<(1 + NUMTRI/2);i++) { v3f(&tribuf[i*8]); v3f(&tribuf[i*8 + 4]); } endtmesh(); closeobj(); makeobj(colorobj = genobj()); bgntmesh(); for(i=0;i<(1 + NUMTRI/2);i++) { c3f(&colorbuf[(i%2)*4]); v3f(&tribuf[i*8]); c3f(&colorbuf[(i%4)*4]); v3f(&tribuf[i*8 + 4]); } endtmesh(); closeobj(); makeobj(coloriobj = genobj()); bgntmesh(); for(i=0;i<(1 + NUMTRI/2);i++) { c3i(&coloribuf[(i%2)*4]); v3f(&tribuf[i*8]); c3i(&coloribuf[(i%4)*4]); v3f(&tribuf[i*8 + 4]); } endtmesh(); closeobj(); makeobj(colorsobj = genobj()); bgntmesh(); for(i=0;i<(1 + NUMTRI/2);i++) { c3s(&colorsbuf[(i%2)*4]); v3f(&tribuf[i*8]); c3s(&colorsbuf[(i%4)*4]); v3f(&tribuf[i*8 + 4]); } endtmesh(); closeobj(); shademodel(sm); subpixel(sp); translate(XOFFSET,YOFFSET,0); /* run the timing tests */ lightvpoly(events); colorvpoly(events); colorivpoly(events); colorsvpoly(events); flatvpoly(events); } colorvpoly(events) { /* measure performance of 4-vertex, COLORed, cached polygons */ register i; printf("tmeshes COLORed (one c3f/vertex), cached:\n"); cpack(0); clear(); startclock(); for (i=events/NUMTRI; i>0; i--) { callobj(colorobj); } finish(); stopclock(events); } colorivpoly(events) { register i; printf("tmeshes COLORed (one c3i/vertex), cached:\n"); cpack(0); clear(); startclock(); for (i=events/NUMTRI; i>0; i--) { callobj(coloriobj); } finish(); stopclock(events); } colorsvpoly(events) { register i; printf("tmeshes COLORed (one c3s/vertex), cached:\n"); cpack(0); clear(); startclock(); for (i=events/NUMTRI; i>0; i--) { callobj(colorsobj); } finish(); stopclock(events); } flatvpoly(events) { /* measure performance of 4-vertex, FLAT, cached polygons */ register i; printf("tmeshes, FLAT (one c3f), cached:\n"); cpack(0); clear(); startclock(); for (i=events/NUMTRI; i>0; i--) { callobj(flatobj); } finish(); stopclock(events); } lightvpoly(events) { /* measure performance of 4-vertex, lighted, cached polygons */ register i; printf("tmeshes, lighted, cached:\n"); cpack(0); clear(); light(TRUE); startclock(); for (i=events/NUMTRI; i>0; i--) callobj(lightobj); finish(); stopclock(events); light(FALSE); } struct tms tbuf; long timecnt; startclock() { /* sample the 10 millisecond clock */ swapcontext(); timecnt = times(&tbuf); } stopclock(polygons) { /* compute elapsed time, then polygon rate. print results */ float period; float timeperpoly; float rate; period = (float)(times(&tbuf) - timecnt) / 100.0; timeperpoly = period / (float)polygons; rate = 1.0 / timeperpoly; printf(" %6d polygons per second,%f.0 nsec/poly\n", (int)rate,timeperpoly * 1000000000); interrupt(); } swapcontext() { /* call a GL routine to insure that my context is in the pipe */ sleep(1); getcolor(); } float brass[] = { AMBIENT, 0.35, 0.25, 0.1, DIFFUSE, 0.65, 0.5, 0.35, SPECULAR, 0.0, 0.0, 0.0, SHININESS, 5.0, LMNULL }; float whitelight[] = { AMBIENT, 0.0, 0.0, 0.0, LCOLOR, 1.0, 1.0, 1.0, POSITION, 0.0, 0.0, 1.0, 0.0, LMNULL }; float infinite[] = { AMBIENT, 0.3, 0.3, 0.3, LOCALVIEWER, 0.0, LMNULL }; float local[] = { AMBIENT, 0.3, 0.3, 0.3, LOCALVIEWER, 1.0, LMNULL }; float idmat[] = { 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 }; initlight() { /* provide a simple lighting model just for timing purposes */ long xsize,ysize; getsize(&xsize,&ysize); mmode(MPROJECTION); ortho(-0.5,(float)xsize-0.5,-0.5,(float)ysize-0.5,0.0,10000.0); mmode(MVIEWING); loadmatrix(idmat); lmdef(DEFMATERIAL, 1, 0, brass); lmdef(DEFLIGHT, 1, 0, whitelight); lmdef(DEFLMODEL, 1, 0, infinite); lmbind(LIGHT1, 1); lmbind(LMODEL, 1); } light(b) { /* turn lighting on and off */ lmbind(MATERIAL, b ? 1 : 0); } interrupt() { /* check queue for escape key and exit if found */ short dev,val; while (qtest()) { if (qread(&val) == ESCKEY) { exit(0); } } } ----- It's pretty gross because it has been borrowed and hacked over since the 4D/60G days, but I believe that it gives honest numbers. The DL program currently has DL's so big that the triangles won't fit onto the screen if they're bigger than 2 or 3 (in one dimension, not pixels). This could be easily changed. The last time I checked fill rate, I was able to get 1M+ triangles at 60 pixels/triangle, but I haven't done that since 3.3 went out. Note that this uses display lists because thats only way to get 1M c3f/v3f or n3f/v3f numbers. This is due to a bus bandwidth limitation that DL's can sneak around (but user code can't). a c3f/v3f/v3f... can easily hit 1M though. Also note that we never bothered to tune c3i in the same way and c3s has some problems that cause a significant performance hit. Almost all customers I have ever talked to use n3f, c3f, v3f, and maybe cpack. Hope this helps. -- ---------------------------------------------------------------------------- Brian McClendon bam@rudedog.SGI.COM ...!uunet!sgi!rudedog!bam 415-335-1110 ----------------------------------------------------------------------------