thomson@cs.utah.edu (Rich Thomson) (03/29/91)
Thanks to Brian McClendon and Bruce Holloway for providing the insight
on the VGX 1 M p/s numbers. Here is code that Brian tells me
reproduces the performance (I don't have a VGX to try it on ... yet).
-- Rich
Rich Thomson thomson@cs.utah.edu {bellcore,hplabs,uunet}!utah-cs!thomson
``Read my MIPs -- no new VAXes!!'' --George Bush after sniffing freon
Date: Thu, 28 Mar 91 19:16:35 -0800
From: bam@rudedog.asd.sgi.com (Brian McClendon)
Organization: Silicon Graphics, Inc.
After looking at bruce's numbers, I re-ran the test that I used
to convince myself of VGX performace. Here's the output from
a run that avoided fill limitations:
spmdevi> DLmesh2 2 0 10000000 0 1
size=2.000000, zb(0), cnt=10000000, shademodel:0,subpixel:1
tmeshes, lighted, cached:
607902 polygons per second,1645.000111.0 nsec/poly
tmeshes COLORed (one c3f/vertex), cached:
1014198 polygons per second,985.999918.0 nsec/poly
tmeshes COLORed (one c3i/vertex), cached:
736377 polygons per second,1357.999963.0 nsec/poly
tmeshes COLORed (one c3s/vertex), cached:
584453 polygons per second,1711.000095.0 nsec/poly
tmeshes, FLAT (one c3f), cached:
1055966 polygons per second,947.000046.0 nsec/poly
Here's the src code:
--- DLmesh2.c --- cc DLmesh2.c -lgl_s -lm -o DLmesh2
#include <stdio.h>
#include <gl.h>
#include <device.h>
#include <sys/types.h>
#include <sys/times.h>
#include <sys/param.h>
#include <math.h>
#define NUMTRI 600
#define XOFFSET 20
#define YOFFSET 20
#define XSPACE 50
#define XWINDOW 20
#define YWINDOW 20
#define PAGESIZ 4096
#define TRIBUFSIZ ((NUMTRI + 2) * 4) /* number of floats */
#define NORMBUFSIZ (4 * 4) /* number of floats */
#define COLORBUFSIZ (4 * 4) /* number of floats */
extern char *malloc();
float normbuf[16] = {1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,
1.0,0.0,0.0,0.0};
float tribuf[TRIBUFSIZ];
float colorbuf[16] = {0.9,0.0,0.0,0.9,0.0,0.9,0.0,0.9,0.0,0.0,0.9,0.9,
0.9,0.9,0.9,0.9};
int coloribuf[16] = {230,0,0,230,0,230,0,230,0,0,230,230,
230,230,230,230};
short colorsbuf[16] = {260,0,0,260,0,260,0,260,0,0,260,260,
260,260,260,260};
int lightobj, colorobj, coloriobj, colorsobj, flatobj;
main(argc,argv) char *argv[]; {
register i;
register events;
int rotated;
float size;
int doz,sm,sp;
/* evaluate command line arguments */
size = 10;
doz = TRUE;
sm = FLAT;
sp = TRUE;
events = 100000;
if (argc > 1) size = (float) atoi(argv[1]);
if (argc > 2) doz = atoi(argv[2]);
if (argc > 3) events = atoi(argv[3]);
if (argc > 4) sm = atoi(argv[4]);
if (argc > 5) sp = atoi(argv[5]);
printf("size=%f, zb(%d), cnt=%d, shademodel:%d,subpixel:%d\n",
size, doz, events,sm,sp);
/* initialize graphics */
prefposition(0,1279,0,200);
foreground();
winopen("speed");
RGBmode();
gconfig();
initlight();
if(doz)
{
zbuffer(doz);
zfunction(ZF_ALWAYS);
printf("calling zbuffer(%d)\n",doz);
}
/* clear the window */
cpack(0);
clear();
#define SQRT3_2 (1.7321/2.0)
/* initialize data arrays */
for (i=0; i<(1 + NUMTRI/2); i++) {
tribuf[i*8+0] = size*i;
tribuf[i*8+1] = 0;
tribuf[i*8+2] = 0;
tribuf[i*8+4] = size*i + size/2;
tribuf[i*8+5] = size*SQRT3_2;
tribuf[i*8+6] = 0;
}
makeobj(lightobj = genobj());
bgntmesh();
for(i=0;i<(1 + NUMTRI/2);i++)
{
n3f(&normbuf[(i%2)*4]);
v3f(&tribuf[i*8]);
n3f(&normbuf[(i%4)*4]);
v3f(&tribuf[i*8 + 4]);
}
endtmesh();
closeobj();
makeobj(flatobj = genobj());
bgntmesh();
c3f(&colorbuf[0]);
for(i=0;i<(1 + NUMTRI/2);i++)
{
v3f(&tribuf[i*8]);
v3f(&tribuf[i*8 + 4]);
}
endtmesh();
closeobj();
makeobj(colorobj = genobj());
bgntmesh();
for(i=0;i<(1 + NUMTRI/2);i++)
{
c3f(&colorbuf[(i%2)*4]);
v3f(&tribuf[i*8]);
c3f(&colorbuf[(i%4)*4]);
v3f(&tribuf[i*8 + 4]);
}
endtmesh();
closeobj();
makeobj(coloriobj = genobj());
bgntmesh();
for(i=0;i<(1 + NUMTRI/2);i++)
{
c3i(&coloribuf[(i%2)*4]);
v3f(&tribuf[i*8]);
c3i(&coloribuf[(i%4)*4]);
v3f(&tribuf[i*8 + 4]);
}
endtmesh();
closeobj();
makeobj(colorsobj = genobj());
bgntmesh();
for(i=0;i<(1 + NUMTRI/2);i++)
{
c3s(&colorsbuf[(i%2)*4]);
v3f(&tribuf[i*8]);
c3s(&colorsbuf[(i%4)*4]);
v3f(&tribuf[i*8 + 4]);
}
endtmesh();
closeobj();
shademodel(sm);
subpixel(sp);
translate(XOFFSET,YOFFSET,0);
/* run the timing tests */
lightvpoly(events);
colorvpoly(events);
colorivpoly(events);
colorsvpoly(events);
flatvpoly(events);
}
colorvpoly(events) {
/* measure performance of 4-vertex, COLORed, cached polygons */
register i;
printf("tmeshes COLORed (one c3f/vertex), cached:\n");
cpack(0);
clear();
startclock();
for (i=events/NUMTRI; i>0; i--) {
callobj(colorobj);
}
finish();
stopclock(events);
}
colorivpoly(events) {
register i;
printf("tmeshes COLORed (one c3i/vertex), cached:\n");
cpack(0);
clear();
startclock();
for (i=events/NUMTRI; i>0; i--) {
callobj(coloriobj);
}
finish();
stopclock(events);
}
colorsvpoly(events) {
register i;
printf("tmeshes COLORed (one c3s/vertex), cached:\n");
cpack(0);
clear();
startclock();
for (i=events/NUMTRI; i>0; i--) {
callobj(colorsobj);
}
finish();
stopclock(events);
}
flatvpoly(events) {
/* measure performance of 4-vertex, FLAT, cached polygons */
register i;
printf("tmeshes, FLAT (one c3f), cached:\n");
cpack(0);
clear();
startclock();
for (i=events/NUMTRI; i>0; i--) {
callobj(flatobj);
}
finish();
stopclock(events);
}
lightvpoly(events) {
/* measure performance of 4-vertex, lighted, cached polygons */
register i;
printf("tmeshes, lighted, cached:\n");
cpack(0);
clear();
light(TRUE);
startclock();
for (i=events/NUMTRI; i>0; i--)
callobj(lightobj);
finish();
stopclock(events);
light(FALSE);
}
struct tms tbuf;
long timecnt;
startclock() {
/* sample the 10 millisecond clock */
swapcontext();
timecnt = times(&tbuf);
}
stopclock(polygons) {
/* compute elapsed time, then polygon rate. print results */
float period;
float timeperpoly;
float rate;
period = (float)(times(&tbuf) - timecnt) / 100.0;
timeperpoly = period / (float)polygons;
rate = 1.0 / timeperpoly;
printf(" %6d polygons per second,%f.0 nsec/poly\n", (int)rate,timeperpoly * 1000000000);
interrupt();
}
swapcontext() {
/* call a GL routine to insure that my context is in the pipe */
sleep(1);
getcolor();
}
float brass[] = {
AMBIENT, 0.35, 0.25, 0.1,
DIFFUSE, 0.65, 0.5, 0.35,
SPECULAR, 0.0, 0.0, 0.0,
SHININESS, 5.0,
LMNULL
};
float whitelight[] = {
AMBIENT, 0.0, 0.0, 0.0,
LCOLOR, 1.0, 1.0, 1.0,
POSITION, 0.0, 0.0, 1.0, 0.0,
LMNULL
};
float infinite[] = {
AMBIENT, 0.3, 0.3, 0.3,
LOCALVIEWER, 0.0,
LMNULL
};
float local[] = {
AMBIENT, 0.3, 0.3, 0.3,
LOCALVIEWER, 1.0,
LMNULL
};
float idmat[] = {
1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.0, 0.0, 0.0, 1.0
};
initlight() {
/* provide a simple lighting model just for timing purposes */
long xsize,ysize;
getsize(&xsize,&ysize);
mmode(MPROJECTION);
ortho(-0.5,(float)xsize-0.5,-0.5,(float)ysize-0.5,0.0,10000.0);
mmode(MVIEWING);
loadmatrix(idmat);
lmdef(DEFMATERIAL, 1, 0, brass);
lmdef(DEFLIGHT, 1, 0, whitelight);
lmdef(DEFLMODEL, 1, 0, infinite);
lmbind(LIGHT1, 1);
lmbind(LMODEL, 1);
}
light(b) {
/* turn lighting on and off */
lmbind(MATERIAL, b ? 1 : 0);
}
interrupt() {
/* check queue for escape key and exit if found */
short dev,val;
while (qtest()) {
if (qread(&val) == ESCKEY) {
exit(0);
}
}
}
-----
It's pretty gross because it has been borrowed and hacked over since
the 4D/60G days, but I believe that it gives honest numbers. The DL
program currently has DL's so big that the triangles won't fit onto
the screen if they're bigger than 2 or 3 (in one dimension, not pixels).
This could be easily changed. The last time I checked fill rate, I
was able to get 1M+ triangles at 60 pixels/triangle, but I haven't
done that since 3.3 went out.
Note that this uses display lists because thats only way to get 1M
c3f/v3f or n3f/v3f numbers. This is due to a bus bandwidth limitation
that DL's can sneak around (but user code can't). a c3f/v3f/v3f...
can easily hit 1M though.
Also note that we never bothered to tune c3i in the same way and c3s
has some problems that cause a significant performance hit. Almost
all customers I have ever talked to use n3f, c3f, v3f, and maybe cpack.
Hope this helps.
--
----------------------------------------------------------------------------
Brian McClendon bam@rudedog.SGI.COM ...!uunet!sgi!rudedog!bam 415-335-1110
----------------------------------------------------------------------------