[comp.sources.unix] v20i047: Plum-Hall benchmarks for timing common C operations

rsalz@uunet.uu.net (Rich Salz) (10/24/89)

Submitted-by: Eric S. Raymond <eric@snark.uu.net>
Posting-number: Volume 20, Issue 47
Archive-name: plum-benchmarks

This is a simple set of benchmarks intended to give programmers timing
information about common C operations. See the Makefile for instructions;
basically, all you have to do is type `make', wait, and look at bench.out.

A table of results for some popular machines is included in bench.tbl.
For more information on the benchmark techniques and the philosophy
behind them, browse the file ARTICLE.  Here's an excerpt from the article:

    We are placing into the public domain some simple  benchmarks  with
    several appealing properties:
	They are short enough to type while browsing at trade shows.

	They are protected against overly-aggressive compiler optimizations.

	They reflect empirically-observed operator frequencies in C programs.

	They give a C programmer information directly relevant to programming.

Enjoy!
eric@snark.uu.net


#!/bin/sh
: "This is a shell archive, meaning:                              "
: "1. Remove everything above the #! /bin/sh line.                "
: "2. Save the resulting test in a file.                          "
: "3. Execute the file with /bin/sh (not csh) to create the files:"
: "	READ.ME"
: "	Makefile"
: "	benchdbl.c"
: "	benches.c"
: "	benchfn.c"
: "	benchlng.c"
: "	benchmul.c"
: "	benchreg.c"
: "	benchsho.c"
: "	clock.c"
: "	f3.c"
: "	fround.c"
: "	run-all.c"
: "	benches.out"
: "	run-all.out"
: "	run-all.bat"
: "	time-cmd.bat"
: "	time-dbl.bat"
: "	run-all.sh"
: "	cr-lf"
: "	n-n"
: "	bench.tbl"
: "	ARTICLE"
echo file: READ.ME
sed 's/^X//' >READ.ME << 'END-of-READ.ME'
X			The Plum-Hall Benchmarks
X
XThis is a simple set of benchmarks intended to give programmers timing
Xinformation about common C operations. See the Makefile for instructions;
Xbasically, all you have to do is type `make', wait, and look at bench.out.
X
XA table of results for some popular machines is included in bench.tbl.
XFor more information on the benchmark techniques and the philosophy
Xbehind them, browse the file ARTICLE.
X
X						    Enjoy!
X						eric@snark.uu.net
END-of-READ.ME
echo file: Makefile
sed 's/^X//' >Makefile << 'END-of-Makefile'
X#
X# Makefile for the Plum-Hall benchmarks package
X#		by Eric S. Raymond (eric@snark.uu.net)
X#
X# Your system type for the benchmark list (do *not* string-quote it)
XSYSTYPE = AT&T 6386/375
X
X# Ditto, if your headers define CLOCKS_PER_SEC
XCFLAGS = -DCLOCKS_PER_SEC=60
X
X# Comment this out if you have ANSI clock(3)
XCLOCK = clock.o
X
Xbench.out: nbench obench
X	nbench 1 "$(SYSTYPE) (no -O)" >bench.out
X	obench 1 "$(SYSTYPE) (-O)" >>bench.out
X
X# optimizer off
Xnbench.o: benches.c
X	$(CC) $(CFLAGS) -c benches.c
X	mv benches.o nbench.o
Xnbench: nbench.o f3.o $(CLOCK)
X	cc -o nbench nbench.o f3.o $(CLOCK) -lm
X
X# optimizer on
Xobench.o: benches.c
X	$(CC) -O $(CFLAGS) -c benches.c
X	mv benches.o obench.o
Xobench: obench.o f3.o $(CLOCK)
X	cc -o obench obench.o f3.o $(CLOCK) -lm
X
Xclean:
X	rm -f [no]bench *.o bench.shar
X
Xbench.shar:
X	shar READ.ME Makefile *.c *.out *.bat *.sh cr-lf n-n bench.tbl ARTICLE >bench.shar
END-of-Makefile
echo file: benchdbl.c
sed 's/^X//' >benchdbl.c << 'END-of-benchdbl.c'
X/* benchdbl - benchmark for  double   
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an  unsigned  type 
X * Let  T  be the execution time in milliseconds
X * Then  average time per operator  =  T/major  usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE double
X#include <stdio.h>
Xmain(ac, av)
X        int ac;
X        char *av[];
X        {
X        STOR_CL TYPE a, b, c;
X        long d, major, atol();
X        static TYPE m[10] = {0};
X
X        major = atol(av[1]);
X        printf("executing %ld iterations\n", major);
X        a = b = (av[1][0] - '0');
X        for (d = 1; d <= major; ++d)
X                {
X                /* inner loop executes 1000 selected operations */
X                for (c = 1; c <= 40; ++c)
X                        {
X                        a = a + b + c;
X                        b = a * 2;
X                        a = b / 10;
X                        a = -a;
X                        b = -a - b - c;
X                        a = b == c;
X                        b = a + c;
X                        a = !b;
X                        b = a + c;
X                        a = b > c;
X                        }
X                }
X        printf("a=%d\n", a);
X        }
END-of-benchdbl.c
echo file: benches.c
sed 's/^X//' >benches.c << 'END-of-benches.c'
X/* benches - driver for Plum Hall benchmarks */
X#include <stdio.h>
X#include <time.h>
X
Xint benchreg(), benchsho(), benchlng();
Xint benchmul(), benchfn(), benchdbl();
X
Xvoid tabulate();
Xchar *fround();
Xmain(argc, argv)
X	int argc;
X	char *argv[];
X	{
X	char result[6][10];
X	int i;
X
X	if (argv[1][0] != '1')
X		printf("argv[1] must be   1   !\n");
X	if (argc < 3)
X		{
X		fprintf(stderr, "usage: benches 1 'compiler-id'\n");
X		exit(2);
X		}
X	tabulate(benchreg, result[0]);
X	tabulate(benchsho, result[1]);
X	tabulate(benchlng, result[2]);
X	tabulate(benchmul, result[3]);
X	tabulate(benchfn,  result[4]);
X	tabulate(benchdbl, result[5]);
X	printf("\n\n");
X	printf("%20.20s %9s %9s %9s %9s %9s %9s\n",
X		"", "register", "auto", "auto", "int", "function", "auto");
X	printf("%20.20s %9s %9s %9s %9s %9s %9s\n",
X		"", "int", "short", "long", "multiply", "call+ret", "double");
X	printf("%22.22s ",
X		argv[2]);
X	for (i = 0; i <= 5; ++i)
X		printf("%9.9s ", result[i]);
X	printf("\n");
X	exit(0);
X	}
Xvoid tabulate(fn, s)
X	void (*fn)();
X	char *s;
X	{
X	static char arg1[20];
X	static char *arga[3] = { "x", &arg1[0], 0 };
X	double before, after, microsec;
X	long major, major_next;
X
X	major_next = 1;
X	do  {
X		major = major_next;
X		sprintf(arg1, "%ld", major);
X		before = (double)clock();
X		(*fn)(2, arga);
X		after = (double)clock();
X		major_next *= 10;
X		} while (after-before < 100);
X	microsec = 1e3 * (after - before) / CLOCKS_PER_SEC / major;
X	sprintf(s, "%9s ", fround(microsec, 5, 3));
X	}
X
X/* fround - round double x to precision p, n significant digits
X * uses static string for result - not re-entrant
X * fround is an accomodation for K+R-level printf which lacks %.*e or %g
X * slow, fat version - uses sprintf
X */
X#include <stdio.h>
Xchar *fround(x, p, n)
X    double x;
X    short p;
X    short n;
X    {
X    double y;
X    double log10();
X    short digs;
X    short nlog;
X    static char s[40] = {0};
X    char fmt[20];
X
X    sprintf(fmt, "%%.%de", n-1);
X    sprintf(s, fmt, x);
X    sscanf(s, "%lf", &y);
X    if (y == 0)
X        nlog = 0;
X    else
X        nlog = log10(y);
X    if (nlog < 0)
X        --nlog;
X    digs = n - nlog - 1;
X    if (digs < 0)
X        digs = 0;
X    else if (digs > p)
X        digs = p;
X    sprintf(fmt, "%%.%df", digs);
X    sprintf(s, fmt, y);
X    if (digs == 0)
X        strcat(s, ".");
X    while (digs++ < p)
X        strcat(s, " ");
X    return (s);
X    }
X
X
X
X
X#define main benchreg
X#include "benchreg.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchsho
X#include "benchsho.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchlng
X#include "benchlng.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchmul
X#include "benchmul.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchfn
X#include "benchfn.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchdbl
X#include "benchdbl.c"
END-of-benches.c
echo file: benchfn.c
sed 's/^X//' >benchfn.c << 'END-of-benchfn.c'
X/* benchfn - benchmark for function calls
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * Let  T  be the execution time in milliseconds
X * Then  average time per operator  =  T/major  usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#include <stdio.h>
Xint dummy = 0;
X
Xf2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */
Xf1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */
Xf0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */
X
Xmain(ac, av)
X        int ac;
X        char *av[];
X        {
X        long d, major, atol();
X
X        major = atol(av[1]);
X        printf("executing %ld iterations\n", major);
X        for (d = 1; d <= major; ++d)
X                f0(); /* executes 1000 calls */
X        printf("dummy=%d\n", dummy);
X        }
END-of-benchfn.c
echo file: benchlng.c
sed 's/^X//' >benchlng.c << 'END-of-benchlng.c'
X/* benchlng - benchmark for  long  integers 
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an  unsigned  type 
X * Let  T  be the execution time in milliseconds
X * Then  average time per operator  =  T/major  usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE long
X#include <stdio.h>
Xmain(ac, av)
X        int ac;
X        char *av[];
X        {
X        STOR_CL TYPE a, b, c;
X        long d, major, atol();
X        static TYPE m[10] = {0};
X
X        major = atol(av[1]);
X        printf("executing %ld iterations\n", major);
X        a = b = (av[1][0] - '0');
X        for (d = 1; d <= major; ++d)
X                {
X                /* inner loop executes 1000 selected operations */
X                for (c = 1; c <= 40; ++c)
X                        {
X                        a = a + b + c;
X                        b = a >> 1;
X                        a = b % 10;
X                        m[a] = a;
X                        b = m[a] - b - c;
X                        a = b == c;
X                        b = a | c;
X                        a = !b;
X                        b = a + c;
X                        a = b > c;
X                        }
X                }
X        printf("a=%d\n", a);
X        }
END-of-benchlng.c
echo file: benchmul.c
sed 's/^X//' >benchmul.c << 'END-of-benchmul.c'
X/* benchmul - benchmark for  int multiply
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an  unsigned  type 
X * Let  T  be the execution time in milliseconds
X * Then  average time per operator  =  T/major  usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE int
X#include <stdio.h>
Xmain(ac, av)
X        int ac;
X        char *av[];
X        {
X        STOR_CL TYPE a, b, c;
X        long d, major, atol();
X        static TYPE m[10] = {0};
X
X        major = atol(av[1]);
X        printf("executing %ld iterations\n", major);
X        a = b = (av[1][0] - '0');
X        for (d = 1; d <= major; ++d)
X                {
X                /* inner loop executes 1000 selected operations */
X                for (c = 1; c <= 40; ++c)
X                        {
X                        a = 3 *a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a; /* 25 * */
X                        }
X                }
X        printf("a=%d\n", a);
X        }
END-of-benchmul.c
echo file: benchreg.c
sed 's/^X//' >benchreg.c << 'END-of-benchreg.c'
X/* benchreg - benchmark for  register  integers 
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an  unsigned  type 
X * Let  T  be the execution time in milliseconds
X * Then  average time per operator  =  T/major  usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL register
X#define TYPE int
X#include <stdio.h>
Xmain(ac, av)
X        int ac;
X        char *av[];
X        {
X        STOR_CL TYPE a, b, c;
X        long d, major, atol();
X        static TYPE m[10] = {0};
X
X        major = atol(av[1]);
X        printf("executing %ld iterations\n", major);
X        a = b = (av[1][0] - '0');
X        for (d = 1; d <= major; ++d)
X                {
X                /* inner loop executes 1000 selected operations */
X                for (c = 1; c <= 40; ++c)
X                        {
X                        a = a + b + c;
X                        b = a >> 1;
X                        a = b % 10;
X                        m[a] = a;
X                        b = m[a] - b - c;
X                        a = b == c;
X                        b = a | c;
X                        a = !b;
X                        b = a + c;
X                        a = b > c;
X                        }
X                }
X        printf("a=%d\n", a);
X        }
END-of-benchreg.c
echo file: benchsho.c
sed 's/^X//' >benchsho.c << 'END-of-benchsho.c'
X/* benchsho - benchmark for  short  integers 
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an  unsigned  type 
X * Let  T  be the execution time in milliseconds
X * Then  average time per operator  =  T/major  usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE short
X#include <stdio.h>
Xmain(ac, av)
X        int ac;
X        char *av[];
X        {
X        STOR_CL TYPE a, b, c;
X        long d, major, atol();
X        static TYPE m[10] = {0};
X
X        major = atol(av[1]);
X        printf("executing %ld iterations\n", major);
X        a = b = (av[1][0] - '0');
X        for (d = 1; d <= major; ++d)
X                {
X                /* inner loop executes 1000 selected operations */
X                for (c = 1; c <= 40; ++c)
X                        {
X                        a = a + b + c;
X                        b = a >> 1;
X                        a = b % 10;
X                        m[a] = a;
X                        b = m[a] - b - c;
X                        a = b == c;
X                        b = a | c;
X                        a = !b;
X                        b = a + c;
X                        a = b > c;
X                        }
X                }
X        printf("a=%d\n", a);
X        }
END-of-benchsho.c
echo file: clock.c
sed 's/^X//' >clock.c << 'END-of-clock.c'
X/* clock - primitive version of ANSI 'clock' function for UNIX */
Xlong clock()
X	{
X	struct tbuff { long pu; long ps; long cu; long cs; } tbuff;
X
X	times(&tbuff);
X	return(tbuff.pu + tbuff.ps);
X	}
END-of-clock.c
echo file: f3.c
sed 's/^X//' >f3.c << 'END-of-f3.c'
X/* f3 - lowest level function
X * Put this in separate source file if compiler detects and optimizes
X * useless code
X */
Xf3() { }
END-of-f3.c
echo file: fround.c
sed 's/^X//' >fround.c << 'END-of-fround.c'
X/* fround - round double x to precision p, n significant digits
X * uses static string for result - not re-entrant
X * fround is an accomodation for K+R-level printf which lacks %.*e or %g
X * slow, fat version - uses sprintf
X */
X#include <stdio.h>
Xchar *fround(x, p, n)
X    double x;
X    short p;
X    short n;
X    {
X    double y;
X    double log10();
X    short digs;
X    short nlog;
X    static char s[40] = {0};
X    char fmt[20];
X
X    sprintf(fmt, "%%.%de", n-1);
X    sprintf(s, fmt, x);
X    sscanf(s, "%lf", &y);
X    if (y == 0)
X        nlog = 0;
X    else
X        nlog = log10(y);
X    if (nlog < 0)
X        --nlog;
X    digs = n - nlog - 1;
X    if (digs < 0)
X        digs = 0;
X    else if (digs > p)
X        digs = p;
X    sprintf(fmt, "%%.%df", digs);
X    sprintf(s, fmt, y);
X    if (digs == 0)
X        strcat(s, ".");
X    while (digs++ < p)
X        strcat(s, " ");
X    return (s);
X    }
X#ifdef TRYMAIN
Xmain()
X    {
X    short m;
X
X    for (m = 1; m <= 5; ++m)
X        printf("fround(123.57, 2, %d) = %s;\n", m, fround(123.57, 2, m));
X    for (m = 1; m <= 5; ++m)
X        printf("fround(.013579, 5, %d) = %s;\n", m, fround(.013579, 5, m));
X    }
X#endif
END-of-fround.c
echo file: run-all.c
sed 's/^X//' >run-all.c << 'END-of-run-all.c'
X/* do_allbench - run all the benchmark programs */
X#include <stdio.h>
X#define NBENCHES 6
X#define TIME_FMT "Current time is %lf:%lf:%lf"
X#define CPUTIME_MIN 10000.
Xstatic struct timing
X	{
X	double cputime; char *fname; char *title1; char *title2;
X	} timings[NBENCHES] =
X	{
X	0., "benchreg",	"register",	"int",
X	0., "benchsho",	"auto",		"short",
X	0., "benchlng",	"auto",		"long",
X	0., "benchmul",	"integer",	"multiply",
X	0., "benchfn",	"function",	"call",
X	0., "benchdbl",	"auto",		"double",
X	};
Xstatic char cc_cmd[BUFSIZ] = {0};
Xstatic char command[BUFSIZ] = {0};
Xint compile(fname)
X	char *fname;
X	{
X	sprintf(command, cc_cmd, fname);
X	return (system(command));
X	}
Xint mk_crlf()
X	{
X	FILE *crlf;
X
X	crlf = fopen("cr-lf", "w");
X	if (crlf == NULL)
X		{
X		fprintf(stderr, "unable to create file  crlf\n");
X		exit(2);
X		}
X	putc('\n', crlf);
X	fclose(crlf);
X	}
Xdouble rd_time(tmpname)
X	char *tmpname;
X	{
X	FILE *fp;
X	double hrs, mins, secs;
X
X	fp = fopen(tmpname, "r");
X	fgets(buf, sizeof(buf), fp);
X	sscanf(buf, TIME_FMT, &hrs, &mins, &secs);
X	fclose(fp);
X	return (1000 * (secs + 60 * (mins + 60 * hrs));
X	}
Xdouble time_it(fname, iterations)
X	char *fname;
X	long iterations;
X	{
X	double t0, t1;
X
X	sprintf(command, "time <cr-lf >t0");
X	system(command);
X	t0 = rd_time("t0");
X	sprintf(command, "%s %ld", fname, iterations);
X	system(command);
X	sprintf(command, "time <cr-lf >t1");
X	system(command);
X	t1 = rd_time("t1");
X	return (t1 - t0);
X	}
Xdouble run(fname, major)
X	char *fname;
X	long major;
X	{
X	double t_empty, t_major;
X
X	t_empty = time_it(fname, 0L);
X	t_major = time_it(fname, major);
X	return (t_major - t_empty);
X	}
Xdouble do_all(fname)
X	char *fname;
X	{
X	double cputime;
X	long major;
X
X	compile(fname);
X	major = MAJOR_MIN;
X	do {
X		cputime = run(fname, major);
X		major *= 10;
X		} while (cputime < CPUTIME_MIN);
X	return (cputime / major);
X	}
Xmain(ac, av)
X	int ac;
X	char *av[];
X	{
X	int i;
X
X	strcpy(cc_cmd, av[1]);
X	for (i = 0; i <= NBENCHES; ++i)
X		timings[i].cputime = do_all(timings[i].fname);
X	printf("\n\n\nRESULTS:\n\n");
X	for (i = 0; i <= NBENCHES; ++i)
X		printf("%10s  ", timings[i].title1;
X	printf(\n");
X	for (i = 0; i <= NBENCHES; ++i)
X		printf("%10s  ", timings[i].title2;
X	printf(\n");
X	for (i = 0; i <= NBENCHES; ++i)
X		printf("%10.4f  ", timings[i].cputime);
X	printf("\n\n(All times are in microseconds\n");
X	}
END-of-run-all.c
echo file: benches.out
sed 's/^X//' >benches.out << 'END-of-benches.out'
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
Xexecuting 1000 iterations
Xa=0
Xexecuting 10000 iterations
Xa=0
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
Xexecuting 1000 iterations
Xa=0
Xexecuting 10000 iterations
Xa=0
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
Xexecuting 1000 iterations
Xa=0
Xexecuting 10000 iterations
Xa=0
Xexecuting 1 iterations
Xa=-407629151
Xexecuting 10 iterations
Xa=-483154367
Xexecuting 100 iterations
Xa=-1034506623
Xexecuting 1000 iterations
Xa=-1045589759
Xexecuting 1 iterations
Xdummy=0
Xexecuting 10 iterations
Xdummy=0
Xexecuting 100 iterations
Xdummy=0
Xexecuting 1000 iterations
Xdummy=0
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
X
X
X                      register      auto      auto       int  function      auto
X                           int     short      long  multiply  call+ret    double
X             xenix-386   0.24      0.46      0.43      1.87      3.00     90.5     
END-of-benches.out
echo file: run-all.out
sed 's/^X//' >run-all.out << 'END-of-run-all.out'
X+ cc -o benchfn.x benchfn.c 
Xbenchfn.c
X+ time benchfn.x 1000 
Xexecuting 1000 iterations
Xdummy=0
X
Xreal         3.3
Xuser         3.1
Xsys          0.0
X+ cc -o benchmul.x benchmul.c 
Xbenchmul.c
X+ time benchmul.x 10000 
Xexecuting 10000 iterations
Xa=427469313
X
Xreal        19.0
Xuser        18.8
Xsys          0.1
X+ cc -o benchlng.x benchlng.c 
Xbenchlng.c
X+ time benchlng.x 10000 
Xexecuting 10000 iterations
Xa=0
X
Xreal         5.1
Xuser         5.0
Xsys          0.0
X+ cc -o benchsho.x benchsho.c 
Xbenchsho.c
X+ time benchsho.x 10000 
Xexecuting 10000 iterations
Xa=0
X
Xreal         5.2
Xuser         5.1
Xsys          0.1
X+ cc -o benchreg.x benchreg.c 
Xbenchreg.c
X+ time benchreg.x 10000 
Xexecuting 10000 iterations
Xa=0
X
Xreal         2.6
Xuser         2.5
Xsys          0.0
X+ cc -o benchdbl.x benchdbl.c 
Xbenchdbl.c
X+ time benchdbl.x 10000 
Xexecuting 10000 iterations
Xa=0
X
Xreal     15:04.5
Xuser     15:04.4
Xsys          0.1
END-of-run-all.out
echo file: run-all.bat
sed 's/^X//' >run-all.bat << 'END-of-run-all.bat'
Xcl benchreg.c
Xcommand /c time-cmd benchreg >benchreg.out
X
Xcl benchsho.c
Xcommand /c time-cmd benchsho >benchsho.out
X
Xcl benchlng.c
Xcommand /c time-cmd benchlng >benchlng.out
X
Xcl benchfn.c
Xcommand /c time-cmd benchfn >benchfn.out
X
Xcl benchmul.c
Xcommand /c time-cmd benchmul >benchmul.out
X
Xcl benchdbl.c
Xcommand /c time-cmd benchdbl >benchdbl.out
X
END-of-run-all.bat
echo file: time-cmd.bat
sed 's/^X//' >time-cmd.bat << 'END-of-time-cmd.bat'
Xtime <cr-lf
X%1 0
Xtime <cr-lf
X%1 10000
Xtime <cr-lf
END-of-time-cmd.bat
echo file: time-dbl.bat
sed 's/^X//' >time-dbl.bat << 'END-of-time-dbl.bat'
Xtime <cr-lf
Xbenchdbl 0
Xtime <cr-lf
Xbenchdbl 100
Xtime <cr-lf
END-of-time-dbl.bat
echo file: run-all.sh
sed 's/^X//' >run-all.sh << 'END-of-run-all.sh'
Xcc -o benchfn.x benchfn.c
Xtime benchfn.x  1000
Xcc -o benchmul.x benchmul.c
Xtime benchmul.x  10000
Xcc -o benchlng.x benchlng.c
Xtime benchlng.x  10000
Xcc -o benchsho.x benchsho.c
Xtime benchsho.x  10000
Xcc -o benchreg.x benchreg.c
Xtime benchreg.x  10000
Xcc -o benchdbl.x benchdbl.c
Xtime benchdbl.x  10000
END-of-run-all.sh
echo file: cr-lf
sed 's/^X//' >cr-lf << 'END-of-cr-lf'
X
END-of-cr-lf
echo file: n-n
sed 's/^X//' >n-n << 'END-of-n-n'
Xn
Xn
END-of-n-n
echo file: bench.tbl
sed 's/^X//' >bench.tbl << 'END-of-bench.tbl'
XMachine/compiler    register   auto       auto     int        func    auto
X                    int        short      long     multiply   call    dbl
X
XAT&T 3B2/05 (-O)      1.36      3.87      2.62     15.4       7.7     22.5
XAT&T 3B2/05 (no -O)   1.78      4.66      2.75     16.2       9.3     22.5
XAT&T 3B2/400 (-O)     1.09      1.36      1.10     16.2      10.0(?)  91.4
XAT&T 3B2/400 (no -O)  1.14      2.61      2.36     17.3      11.3     91.1
XAT&T 6386/375 (no -O) 0.61      1.39      1.23      3.85      5.62     6.77    
XAT&T 6386/375 (-O)    0.52      1.17      0.54      3.68      5.78     7.68    
XApollo DN330 (-O)     1.36       .78      1.36     10.17      3.57
XApollo DN330 (no -O)  1.54      1.28      1.54     11.30      3.64
XApollo DN580 (-O)     1.03       .59      1.03      7.67      2.72
XApollo DN580 (no -O)  1.18       .97      1.18      8.48      2.77
XApollo DN660 (_O)     5.88      1.24      5.88     21.86      4.26
XApollo DN660 (no -O)  5.93      1.52      5.93     21.93      4.29
XMasscomp 5500         3.18      2.7       4.9      30.8       7.3
XMasscomp 5600 (-O)     .45       .61       .46      2.83      1.04
XMasscomp 5600 (no -O)  .46       .78       .64      2.99      1.76
XPC/8088 (InstantC)   25.8      25.8      82.0      74.2     152.
XPC/8088 (WSL 3.1 lg)  6.18     10.4      66.5      31.8      28.8
XPyramid 90X (-O)       .85      1.04       .86      3.64      1.9      2.37
XPyramid 90X (no -O)    .86      1.01       .86      3.65      1.8      2.34
XSequent (-O)          1.39      2.99      2.53      9.90      9.3
XSequent (no -O)       1.50      3.25      2.83      9.95     13.2
XSun 3/260HM (-O)       .31       .48       .47      1.98      1.16
XSun 3/260HM (no -O)    .36       .58       .57      1.99      1.62
XSun 3/75M (-O)         .47       .77       .76      3.00      2.12
XSun 3/75M (no -O)      .53       .95       .94      3.01      2.73
XSun 3/75M(4.2, -O)     .50       .81       .83      2.85      1.5     20.7
XSun 3/75M(4.2, no -O)  .54      1.00      1.01      2.97      2.7     21.1
XSun 3/75M(VM, -O)      .46       .77       .75      2.96      2.1     20.8
XSun 3/75M(VM, no -O)   .52       .96       .93      2.97      2.7     21.1
XVAX 11/730 (-O)       4.00      9.80      6.20     16.2      42.8     12.4
XVAX 11/730 (no -O)    4.73     10.2       7.45     16.57     51.5     17.0
XVAX 11/780 (-O)       1.21      2.43      1.67      2.76     15.04     2.95
XVAX 11/780 (BSD 4.2)  1.38      2.42      1.96      2.92     17.2
XVAX 11/780 (UNIX 5.2) 1.24      2.48      1.79      2.72     15.7      3.89
XVAX 11/780 (no -O)    1.29      2.51      1.85      2.70     16.7      3.89
XVAX 11/785 (-O)        .93      1.85      1.32      5.00     13.9     47.5
XVAX 11/785 (no -O)    1.01      1.96      1.44      5.08     14.2      5.42
XVAX 8650(UNIX -O)      .236      .484      .298      .589     2.63      .578
XVAX 8650(UNIX no -O)   .258      .482      .316      .574     3.06      .791
XVAX 8650(Ultrix -O)    .23       .40       .29       .53      2.4       .56
XVAX 8650(Ultrix no -O) .26       .41       .34       .56      2.8       .77
END-of-bench.tbl
echo file: ARTICLE
sed 's/^X//' >ARTICLE << 'END-of-ARTICLE'
X
X
X
X
X
X
X[The following article appeared in  "C Users Journal" May 1988.
X It describes the purpose and use of the enclosed benchmarks. ]
X
X
XSIMPLE BENCHMARKS FOR C COMPILERS
X
Xby Thomas Plum
X
XDr.Plum is the author of several books on  C,  including  Efficient  C  (co-
Xauthored  with  Jim  Brodie).  He is Vice-Chair of the ANSI X3J11 Committee,
Xand Chairman of Plum Hall Inc, which offers introductory and  advanced  sem-
Xinars on C.
X
XCopyright (c) 1988, Plum Hall Inc
X
X
XWe are placing into the public domain some simple  benchmarks  with  several
Xappealing properties:
X
X    They are short enough to type while browsing at trade shows.
X
X    They are protected against overly-aggressive compiler optimizations.
X
X    They reflect empirically-observed operator frequencies in C programs.
X
X    They give a C programmer information directly relevant to programming.
X
XIn Efficient C, Jim Brodie and I described how useful it can be for  a  pro-
Xgrammer  to have a general idea of how many microseconds it takes to execute
Xthe "average operator" on   register  int's,  on   auto  short's,  on   auto
Xlong's,  and  on  double  data, as well as the time for an integer multiply,
Xand the time to call-and-return from a function.  These six numbers allow  a
Xprogrammer  to  make  very good first-order estimates of the CPU time that a
Xparticular algorithm will take.
X
XThe  following  easily-typed  benchmark  programs  determine   these   times
Xdirectly.   The  first  one  is  benchreg.c  ("benchmark for register opera-
Xtors"):
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X                                   - 1 -
X
X
X
X
X
X                                   - 2 -
X
X
X    1   /* benchreg - benchmark for  register  integers
X    2    * Thomas Plum, Plum Hall Inc, 609-927-3770
X    3    * If machine traps overflow, use an  unsigned  type
X    4    * Let  T  be the execution time in milliseconds
X    5    * Then  average time per operator  =  T/major  usec
X    6    * (Because the inner loop has exactly 1000 operations)
X    7    */
X    8   #define STOR_CL register
X    9   #define TYPE int
X   10   #include <stdio.h>
X   11   main(ac, av)
X   12           int ac;
X   13           char *av[];
X   14           {
X   15           STOR_CL TYPE a, b, c;
X   16           long d, major, atol();
X   17           static TYPE m[10] = {0};
X   18
X   19           major = atol(av[1]);
X   20           printf("executing %ld iterations0, major);
X   21           a = b = (av[1][0] - '0');
X   22           for (d = 1; d <= major; ++d)
X   23                   {
X   24                   /* inner loop executes 1000 selected operations */
X   25                   for (c = 1; c <= 40; ++c)
X   26                           {
X   27                           a = a + b + c;
X   28                           b = a >> 1;
X   29                           a = b % 10;
X   30                           m[a] = a;
X   31                           b = m[a] - b - c;
X   32                           a = b == c;
X   33                           b = a | c;
X   34                           a = !b;
X   35                           b = a + c;
X   36                           a = b > c;
X   37                           }
X   38                   }
X   39           printf("a=%d0, a);
X   40           }
X
XIf you enter this and compile it to produce an executable program,  you  can
Xinvoke it with one argument, the number of iterations for the major loop:
X
X    benchreg  10000
X
XIf this execution takes 16 seconds, this means that  the  average   register
Xoperation  takes  1.6  microseconds  (16,000  milliseconds divided by 10,000
Xiterations of the major loop).
X
XLet us examine the program  in  detail.   Lines  8  and  9  define   STOR_CL
X("storage  class")  and  TYPE  to be register  and  int .  Thus, on line 15,
Xthree variables ( a , b , and  c ) are declared to be of this storage  class
Xand type.  At line 16, the major loop control variables are  long  integers,
Xbut they are touched only one one-thousandth as  often  as  the  inner  loop
X
X
X
X
X
X
X
X
X                                   - 3 -
X
X
Xvariables, so they have little effect upon the timings.   We  are  declaring
Xthe   atol   function to return a  long  integer; it would otherwise default
Xto an  int  return.  (If we were using a compiler based upon draft  ANSI  C,
Xwe  could   #include  <stdlib.h>  to get the declaration of  atol , but this
Xwould limit the applicability of the benchmarks.  This simple declaration is
Xall that even an ANSI compiler would need.)
X
XAt line 19, we set the  major  loop variable to the number given on the com-
Xmand line, and at line 20, we confirm it to the output.
X
XLine 21 is crucial to preventing some overly aggressive optimizations.  Ear-
Xlier  versions  of  these benchmarks had simply initialized  a  and  b to 1,
Xbut this allows a compiler to forward-propagate a known constant value.  The
Xexpression   av[1][0]   gives  the first digit-character of the command-line
Xargument; subtracting  '0'  produces a digit between 0  and  9.   (Yes,  the
Xlatest  ANSI draft now guarantees that the digit characters are a contiguous
Xsequence in any environment.)
X
XLine 22 simply executes the major loop the number  of  times  given  by  the
Xvariable   major  .   Line  25  repeats the inner loop 40 times, and with 25
Xoperators in that loop, this produces 1000 operators.  (Actually  there  are
X1003, because of the initialization and the extra increment and test at loop
Xcompletion.  The discrepancy is well within acceptable tolerances.)
X
XWithin the inner loop, 40% of the operators are assignments, in keeping with
Xthe  percentages  reported  in  the  original  Drhystone work.  Of the other
Xoperators, the most frequent are plus and minus.  The sequence of operations
Xis  carefully  chosen to ensure that a very aggressive optimizer cannot find
Xany useless code sections; each result depends  functionally  upon  previous
Xresults.
X
XFinally, the printout at line 39  is  also  important  to  preventing  over-
Xoptimization.   If  the  compiler  could notice that we did nothing with the
Xcomputed result, it could discard all  the  operations  that  produced  that
Xresult.
X
XWe have completed our perusal of the first benchmark program,  benchreg.c  .
XThe second program (  benchsho.c , for  short's) is derived from  benchreg.c
Xby changing lines 8 and 9:  STOR_CL  becomes   auto  ,  and   TYPE   becomes
Xshort .  The program is otherwise unchanged.
X
XThe third program (  benchlng.c  ,  for   long's)  is  obtained  by  leaving
XSTOR_CL  as  auto and changing  TYPE  to  long .
X
XTo make the fourth program ( benchmul.c , for multiplies) we set   TYPE   to
Xint , and change lines 27 through 36 to one source line which does 25 multi-
Xplies:
X
X    a = 3 *a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a; /* 25 * */
X
XThe fifth program ( benchfn.c , for  functions)  is  a  major  rewrite.   We
Xarrange  a series of function definitions for  f3 ,  f2 ,  f1 , and  f0 such
Xthat each call to function  f0  generates exactly 1000 function-call  opera-
Xtions.   In case the compiler has an aggressive optimizer, move the function
Xf3  to a separate source file, so that the compiler cannot see  how  useless
X
X
X
X
X
X
X
X
X                                   - 4 -
X
X
Xit is.  The global variable  dummy  will make the compiler  think  that   f3
Xmight be up to something useful.  Here, then, is the  benchfn.c  function:
X
X    1   /* benchfn - benchmark for function calls
X    2    * Thomas Plum, Plum Hall Inc, 609-927-3770
X    3    * Let  T  be the execution time in milliseconds
X    4    * Then  average time per operator  =  T/major  usec
X    5    * (Because the inner loop has exactly 1000 operations)
X    6    */
X    7   #include <stdio.h>
X    8   int dummy = 0;
X    9
X   10   /* f3 - lowest level function
X   11    * Put this in separate source file if compiler detects and
X   12    * optimizes useless code
X   13    */
X   14   f3() { }
X   15
X   16   f2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */
X   17   f1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */
X   18   f0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */
X   19
X   20   main(ac, av)
X   21           int ac;
X   22           char *av[];
X   23           {
X   24           long d, major, atol();
X   25
X   26           major = atol(av[1]);
X   27           printf("executing %ld iterations0, major);
X   28           for (d = 1; d <= major; ++d)
X   29                   f0(); /* executes 1000 calls */
X   30           printf("dummy=%d0, dummy);
X   31           }
X
XThe sixth program ( benchdblc. , for  double's ) is derived from  benchlng.c
Xby  changing  STOR_CL  to  auto , TYPE  to  double , and replacing the inner
Xloop body with this slightly different version:
X
X    a = a + b + c;
X    b = a * 2;
X    a = b / 10;
X    a = -a;
X    b = -a - b - c;
X    a = b == c;
X    b = a + c;
X    a = !b;
X    b = a + c;
X    a = b > c;
X
XThese changes are necessary because floating-point operands are not  allowed
Xfor  the  shift, remainder, and bitwise operators, and because the subscript
Xoperator does not really exercise  the  floating-point  instructions.   This
Xrevised  inner  loop  still  gives us a representative mix of typical opera-
Xtions.
X
X
X
X
X
X
X
X
X                                   - 5 -
X
X
XThis, then, completes our collection of six benchmark programs.  After  they
Xare  compiled to produce executable programs, the next question is "How do I
Xtime the execution?"
X
XOn UNIX systems, the timing is easy -- just run the  time  command:
X
X    $ time benchreg 10000
X
XThe sum of the "user" and "system" times will give the CPU time used by  the
Xprogram.
X
XMore accurately, we could time the execution of zero  iterations,  and  sub-
Xtract that time from the time for the measured number of iterations.
X
XOn MS-DOS systems, timings can be obtained, but with greater difficulty.  If
Xwe  create  a  file  named   CR-LF   which  contains  just  one  newline (or
X"carriage-return-newline" in DOS parlance), we could time our program with a
X"batch" file such as this:
X
X    time <cr-lf
X    benchreg 0
X    time <cr-lf
X    benchreg 10000
X    time <cr-lf
X
XWe must then take times that are expressed in minutes-and-seconds  and  pro-
Xduce differences expressed in seconds.
X
XWith whichever method, we eventually produce six numbers that are character-
Xistic of a particular environment (a specific compiler supporting a specific
Xmachine).
X
X[NOTE: Since this article appeared, I have added a driver program,  benches.c.
XIn an ANSI environment with the  clock  function, it will run all the tests
Xand report the results, eliminating the need for manual computations.]
X
XHere are some examples of timing  results  that  have  been  obtained  on  a
Xvariety of minicomputer and workstation environments:
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X                                   - 6 -
X
X
XMachine/compiler    register   auto       auto     int        func    auto
X                    int        short      long     multiply   call    dbl
X
XAT&T 3B2/05 (-O)      1.36      3.87      2.62     15.4       7.7     22.5
XAT&T 3B2/05 (no -O)   1.78      4.66      2.75     16.2       9.3     22.5
XAT&T 3B2/400 (-O)     1.09      1.36      1.10     16.2      10.0(?)  91.4
XAT&T 3B2/400 (no -O)  1.14      2.61      2.36     17.3      11.3     91.1
XApollo DN330 (-O)     1.36       .78      1.36     10.17      3.57
XApollo DN330 (no -O)  1.54      1.28      1.54     11.30      3.64
XApollo DN580 (-O)     1.03       .59      1.03      7.67      2.72
XApollo DN580 (no -O)  1.18       .97      1.18      8.48      2.77
XApollo DN660 (-O)     5.88      1.24      5.88     21.86      4.26
XApollo DN660 (no -O)  5.93      1.52      5.93     21.93      4.29
XCray X-MP (no vectors) .0567     .0656     .0822     .366      .821     .082
XMasscomp 5500         3.18      2.7       4.9      30.8       7.3
XMasscomp 5600 (-O)     .45       .61       .46      2.83      1.04
XMasscomp 5600 (no -O)  .46       .78       .64      2.99      1.76
XPyramid 90X (-O)       .85      1.04       .86      3.64      1.9      2.37
XPyramid 90X (no -O)    .86      1.01       .86      3.65      1.8      2.34
XSequent (-O)          1.39      2.99      2.53      9.90      9.3
XSequent (no -O)       1.50      3.25      2.83      9.95     13.2
XSun 3/260HM (-O)       .31       .48       .47      1.98      1.16
XSun 3/260HM (no -O)    .36       .58       .57      1.99      1.62
XSun 3/75M (-O)         .47       .77       .76      3.00      2.12
XSun 3/75M (no -O)      .53       .95       .94      3.01      2.73
XSun 3/75M(4.2, -O)     .50       .81       .83      2.85      1.5     20.7
XSun 3/75M(4.2, no -O)  .54      1.00      1.01      2.97      2.7     21.1
XSun 3/75M(VM, -O)      .46       .77       .75      2.96      2.1     20.8
XSun 3/75M(VM, no -O)   .52       .96       .93      2.97      2.7     21.1
XVAX 11/730 (-O)       4.00      9.80      6.20     16.2      42.8     12.4
XVAX 11/730 (no -O)    4.73     10.2       7.45     16.57     51.5     17.0
XVAX 11/780 (-O)       1.21      2.43      1.67      2.76     15.0      2.95
XVAX 11/780 (BSD 4.2)  1.38      2.42      1.96      2.92     17.2
XVAX 11/780 (UNIX 5.2) 1.24      2.48      1.79      2.72     15.7      3.89
XVAX 11/780 (no -O)    1.29      2.51      1.85      2.70     16.7      3.89
XVAX 11/785 (-O)        .93      1.85      1.32      5.00     13.9     47.5
XVAX 11/785 (no -O)    1.01      1.96      1.44      5.08     14.2      5.42
XVAX 8650(UNIX -O)      .236      .484      .298      .589     2.63      .578
XVAX 8650(UNIX no -O)   .258      .482      .316      .574     3.06      .791
XVAX 8650(Ultrix -O)    .23       .40       .29       .53      2.4       .56
XVAX 8650(Ultrix no -O) .26       .41       .34       .56      2.8       .77
X
XNotice that some of these timings were run before the   benchdbl   benchmark
Xhad  been  written.  There are no examples of the popular PC environments in
Xthis table.  If interested readers wish to run these benchmarks on their own
Xenvironments, I will endeavor to present these results in a future article.
X
XProcessor speeds are sometimes described in "MIPS" (millions of instructions
Xper  second);  using  a value such as the number of  register  operators per
Xsecond in C might give rise to a "MOPS" measurement of more use  to  C  pro-
Xgrammers.   Those of us who have tried these benchmarks have appreciated the
Xintuitive grasp that they give of the speed of  current  machines  and  com-
Xpilers.  I hope that you too will find them of interest.
X
X
X
X
X
X
X
END-of-ARTICLE
exit
-- 
      Eric S. Raymond = eric@snark.uu.net    (mad mastermind of TMN-Netnews)


-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.