[comp.benchmarks] VGX benchmark program

thomson@cs.utah.edu (Rich Thomson) (03/29/91)

Thanks to Brian McClendon and Bruce Holloway for providing the insight
on the VGX 1 M p/s numbers.  Here is code that Brian tells me
reproduces the performance (I don't have a VGX to try it on ... yet).

							-- Rich
Rich Thomson	thomson@cs.utah.edu  {bellcore,hplabs,uunet}!utah-cs!thomson
    ``Read my MIPs -- no new VAXes!!''  --George Bush after sniffing freon

Date: Thu, 28 Mar 91 19:16:35 -0800
From: bam@rudedog.asd.sgi.com (Brian McClendon)
Organization: Silicon Graphics, Inc.

After looking at bruce's numbers, I re-ran the test that I used
to convince myself of VGX performace.  Here's the output from
a run that avoided fill limitations:

spmdevi> DLmesh2 2 0 10000000 0 1
size=2.000000, zb(0), cnt=10000000, shademodel:0,subpixel:1
tmeshes, lighted, cached:
  607902 polygons per second,1645.000111.0 nsec/poly
tmeshes COLORed (one c3f/vertex), cached:
  1014198 polygons per second,985.999918.0 nsec/poly
tmeshes COLORed (one c3i/vertex), cached:
  736377 polygons per second,1357.999963.0 nsec/poly
tmeshes COLORed (one c3s/vertex), cached:
  584453 polygons per second,1711.000095.0 nsec/poly
tmeshes, FLAT (one c3f), cached:
  1055966 polygons per second,947.000046.0 nsec/poly

	  
Here's the src code:

--- DLmesh2.c --- cc DLmesh2.c -lgl_s -lm -o DLmesh2
#include <stdio.h>
#include <gl.h>
#include <device.h>
#include <sys/types.h>
#include <sys/times.h>
#include <sys/param.h>
#include <math.h>

#define NUMTRI	600

#define XOFFSET	20
#define YOFFSET	20
#define XSPACE  50
#define XWINDOW 20
#define YWINDOW 20

#define PAGESIZ		4096
#define TRIBUFSIZ	((NUMTRI + 2) * 4)	/* number of floats */
#define NORMBUFSIZ	(4 * 4)			/* number of floats */
#define COLORBUFSIZ	(4 * 4)			/* number of floats */

extern char *malloc();

float normbuf[16] = {1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,
			1.0,0.0,0.0,0.0};

float tribuf[TRIBUFSIZ];

float colorbuf[16] = {0.9,0.0,0.0,0.9,0.0,0.9,0.0,0.9,0.0,0.0,0.9,0.9,
			0.9,0.9,0.9,0.9};

int coloribuf[16] = {230,0,0,230,0,230,0,230,0,0,230,230,
			230,230,230,230};

short colorsbuf[16] = {260,0,0,260,0,260,0,260,0,0,260,260,
			260,260,260,260};

int lightobj, colorobj, coloriobj, colorsobj, flatobj;

main(argc,argv) char *argv[]; {
    register i;
    register events;
    int rotated;
    float size;
    int doz,sm,sp;

    /* evaluate command line arguments */
    size = 10;
    doz = TRUE;
    sm = FLAT;
    sp = TRUE;
    events = 100000;
    if (argc > 1)	size = (float) atoi(argv[1]);
    if (argc > 2)	doz = atoi(argv[2]);
    if (argc > 3)	events = atoi(argv[3]);
    if (argc > 4)	sm = atoi(argv[4]);
    if (argc > 5)	sp = atoi(argv[5]);

    printf("size=%f, zb(%d), cnt=%d, shademodel:%d,subpixel:%d\n",
	size, doz, events,sm,sp);
    

    /* initialize graphics */

    prefposition(0,1279,0,200);

    foreground();
    winopen("speed");
    RGBmode();
    gconfig();
    initlight();
    if(doz)
    {
	    zbuffer(doz);
	    zfunction(ZF_ALWAYS);
	    printf("calling zbuffer(%d)\n",doz);
  }

    /* clear the window */
    cpack(0);
    clear();

#define SQRT3_2	(1.7321/2.0)
    /* initialize data arrays */
    for (i=0; i<(1 + NUMTRI/2); i++) {
	tribuf[i*8+0] = size*i;
	tribuf[i*8+1] = 0;
	tribuf[i*8+2] = 0;
	tribuf[i*8+4] = size*i + size/2;
	tribuf[i*8+5] = size*SQRT3_2;
	tribuf[i*8+6] = 0;
    }

    makeobj(lightobj = genobj());

    bgntmesh();
    for(i=0;i<(1 + NUMTRI/2);i++)
    {
    	n3f(&normbuf[(i%2)*4]);
    	v3f(&tribuf[i*8]);
    	n3f(&normbuf[(i%4)*4]);
    	v3f(&tribuf[i*8 + 4]);
    }
    endtmesh();
    closeobj();
    
    makeobj(flatobj = genobj());

    bgntmesh();
    c3f(&colorbuf[0]);
    for(i=0;i<(1 + NUMTRI/2);i++)
    {
    	v3f(&tribuf[i*8]);
    	v3f(&tribuf[i*8 + 4]);
    }
    endtmesh();
    closeobj();

    makeobj(colorobj = genobj());

    bgntmesh();
    for(i=0;i<(1 + NUMTRI/2);i++)
    {
    	c3f(&colorbuf[(i%2)*4]);
    	v3f(&tribuf[i*8]);
    	c3f(&colorbuf[(i%4)*4]);
    	v3f(&tribuf[i*8 + 4]);
    }
    endtmesh();
    closeobj();

    makeobj(coloriobj = genobj());

    bgntmesh();
    for(i=0;i<(1 + NUMTRI/2);i++)
    {
    	c3i(&coloribuf[(i%2)*4]);
    	v3f(&tribuf[i*8]);
    	c3i(&coloribuf[(i%4)*4]);
    	v3f(&tribuf[i*8 + 4]);
    }
    endtmesh();
    closeobj();

    makeobj(colorsobj = genobj());

    bgntmesh();
    for(i=0;i<(1 + NUMTRI/2);i++)
    {
    	c3s(&colorsbuf[(i%2)*4]);
    	v3f(&tribuf[i*8]);
    	c3s(&colorsbuf[(i%4)*4]);
    	v3f(&tribuf[i*8 + 4]);
    }
    endtmesh();
    closeobj();

    shademodel(sm);
    subpixel(sp);
    translate(XOFFSET,YOFFSET,0);
    /* run the timing tests */
    lightvpoly(events);
    colorvpoly(events);
    colorivpoly(events);
    colorsvpoly(events);
    flatvpoly(events);
}

colorvpoly(events) {
    /* measure performance of 4-vertex, COLORed, cached polygons */
    register i;

    printf("tmeshes COLORed (one c3f/vertex), cached:\n");
    cpack(0);
    clear();
    startclock();
    for (i=events/NUMTRI; i>0; i--) {
	callobj(colorobj);
    }
    finish();
    stopclock(events);
}

colorivpoly(events) {
    register i;

    printf("tmeshes COLORed (one c3i/vertex), cached:\n");
    cpack(0);
    clear();
    startclock();
    for (i=events/NUMTRI; i>0; i--) {
	callobj(coloriobj);
    }
    finish();
    stopclock(events);
}

colorsvpoly(events) {
    register i;

    printf("tmeshes COLORed (one c3s/vertex), cached:\n");
    cpack(0);
    clear();
    startclock();
    for (i=events/NUMTRI; i>0; i--) {
	callobj(colorsobj);
    }
    finish();
    stopclock(events);
}

flatvpoly(events) {
    /* measure performance of 4-vertex, FLAT, cached polygons */
    register i;

    printf("tmeshes, FLAT (one c3f), cached:\n");
    cpack(0);
    clear();
    startclock();
    for (i=events/NUMTRI; i>0; i--) {
	callobj(flatobj);
    }
    finish();
    stopclock(events);
}

lightvpoly(events) {
    /* measure performance of 4-vertex, lighted, cached polygons */
    register i;

    printf("tmeshes, lighted, cached:\n");
    cpack(0);
    clear();
    light(TRUE);
    startclock();
    for (i=events/NUMTRI; i>0; i--)
	callobj(lightobj);
    finish();
    stopclock(events);
    light(FALSE);
}
struct tms tbuf;
long timecnt;

startclock() {
    /* sample the 10 millisecond clock */
    swapcontext();
    timecnt = times(&tbuf);
}

stopclock(polygons) {
    /* compute elapsed time, then polygon rate.  print results */
    float period;
    float timeperpoly;
    float rate;
    period = (float)(times(&tbuf) - timecnt) / 100.0;
    timeperpoly = period / (float)polygons;
    rate = 1.0 / timeperpoly;
    printf("  %6d polygons per second,%f.0 nsec/poly\n", (int)rate,timeperpoly * 1000000000);
    interrupt();
}

swapcontext() {
    /* call a GL routine to insure that my context is in the pipe */
    sleep(1);
    getcolor();
}

float brass[] = {
    AMBIENT, 0.35, 0.25,  0.1,
    DIFFUSE, 0.65, 0.5, 0.35,
    SPECULAR, 0.0, 0.0, 0.0,
    SHININESS, 5.0,
    LMNULL
};

float whitelight[] = {
    AMBIENT, 0.0, 0.0, 0.0, 
    LCOLOR, 1.0, 1.0, 1.0, 
    POSITION, 0.0, 0.0, 1.0, 0.0,
    LMNULL
};
		    
float infinite[] = {
    AMBIENT, 0.3,  0.3, 0.3, 
    LOCALVIEWER, 0.0, 
    LMNULL
};

float local[] = {
    AMBIENT, 0.3,  0.3, 0.3, 
    LOCALVIEWER, 1.0, 
    LMNULL
};

float idmat[] = {
    1.0, 0.0, 0.0, 0.0,
    0.0, 1.0, 0.0, 0.0,
    0.0, 0.0, 1.0, 0.0,
    0.0, 0.0, 0.0, 1.0
};



initlight() {
    /* provide a simple lighting model just for timing purposes */
    long xsize,ysize;
    getsize(&xsize,&ysize);
    mmode(MPROJECTION);
    ortho(-0.5,(float)xsize-0.5,-0.5,(float)ysize-0.5,0.0,10000.0);
    mmode(MVIEWING); 
    loadmatrix(idmat);
    
    lmdef(DEFMATERIAL, 1, 0, brass);
    lmdef(DEFLIGHT,    1, 0, whitelight);
    lmdef(DEFLMODEL,   1, 0, infinite);
    lmbind(LIGHT1, 1);
    lmbind(LMODEL, 1);
}

light(b) {
    /* turn lighting on and off */
    lmbind(MATERIAL, b ? 1 : 0);
}


interrupt() {
    /* check queue for escape key and exit if found */
    short dev,val;
    while (qtest()) {
	if (qread(&val) == ESCKEY) {
	    exit(0);
	}
    }
}


-----


It's pretty gross because it has been borrowed and hacked over since
the 4D/60G days, but I believe that it gives honest numbers.  The DL
program currently has DL's so big that the triangles won't fit onto
the screen if they're bigger than 2 or 3 (in one dimension, not pixels).
This could be easily changed.  The last time I checked fill rate, I
was able to get 1M+ triangles at 60 pixels/triangle, but I haven't
done that since 3.3 went out.


Note that this uses display lists because thats only way to get 1M
c3f/v3f or n3f/v3f numbers.  This is due to a bus bandwidth limitation
that DL's can sneak around (but user code can't).  a c3f/v3f/v3f...
can easily hit 1M though.

Also note that we never bothered to tune c3i in the same way and c3s
has some problems that cause a significant performance hit.  Almost
all customers I have ever talked to use n3f, c3f, v3f, and maybe cpack.


Hope this helps.

-- 
----------------------------------------------------------------------------
 Brian McClendon bam@rudedog.SGI.COM ...!uunet!sgi!rudedog!bam 415-335-1110
----------------------------------------------------------------------------