rsalz@uunet.uu.net (Rich Salz) (10/24/89)
Submitted-by: Eric S. Raymond <eric@snark.uu.net> Posting-number: Volume 20, Issue 47 Archive-name: plum-benchmarks This is a simple set of benchmarks intended to give programmers timing information about common C operations. See the Makefile for instructions; basically, all you have to do is type `make', wait, and look at bench.out. A table of results for some popular machines is included in bench.tbl. For more information on the benchmark techniques and the philosophy behind them, browse the file ARTICLE. Here's an excerpt from the article: We are placing into the public domain some simple benchmarks with several appealing properties: They are short enough to type while browsing at trade shows. They are protected against overly-aggressive compiler optimizations. They reflect empirically-observed operator frequencies in C programs. They give a C programmer information directly relevant to programming. Enjoy! eric@snark.uu.net #!/bin/sh : "This is a shell archive, meaning: " : "1. Remove everything above the #! /bin/sh line. " : "2. Save the resulting test in a file. " : "3. Execute the file with /bin/sh (not csh) to create the files:" : " READ.ME" : " Makefile" : " benchdbl.c" : " benches.c" : " benchfn.c" : " benchlng.c" : " benchmul.c" : " benchreg.c" : " benchsho.c" : " clock.c" : " f3.c" : " fround.c" : " run-all.c" : " benches.out" : " run-all.out" : " run-all.bat" : " time-cmd.bat" : " time-dbl.bat" : " run-all.sh" : " cr-lf" : " n-n" : " bench.tbl" : " ARTICLE" echo file: READ.ME sed 's/^X//' >READ.ME << 'END-of-READ.ME' X The Plum-Hall Benchmarks X XThis is a simple set of benchmarks intended to give programmers timing Xinformation about common C operations. See the Makefile for instructions; Xbasically, all you have to do is type `make', wait, and look at bench.out. X XA table of results for some popular machines is included in bench.tbl. XFor more information on the benchmark techniques and the philosophy Xbehind them, browse the file ARTICLE. X X Enjoy! X eric@snark.uu.net END-of-READ.ME echo file: Makefile sed 's/^X//' >Makefile << 'END-of-Makefile' X# X# Makefile for the Plum-Hall benchmarks package X# by Eric S. Raymond (eric@snark.uu.net) X# X# Your system type for the benchmark list (do *not* string-quote it) XSYSTYPE = AT&T 6386/375 X X# Ditto, if your headers define CLOCKS_PER_SEC XCFLAGS = -DCLOCKS_PER_SEC=60 X X# Comment this out if you have ANSI clock(3) XCLOCK = clock.o X Xbench.out: nbench obench X nbench 1 "$(SYSTYPE) (no -O)" >bench.out X obench 1 "$(SYSTYPE) (-O)" >>bench.out X X# optimizer off Xnbench.o: benches.c X $(CC) $(CFLAGS) -c benches.c X mv benches.o nbench.o Xnbench: nbench.o f3.o $(CLOCK) X cc -o nbench nbench.o f3.o $(CLOCK) -lm X X# optimizer on Xobench.o: benches.c X $(CC) -O $(CFLAGS) -c benches.c X mv benches.o obench.o Xobench: obench.o f3.o $(CLOCK) X cc -o obench obench.o f3.o $(CLOCK) -lm X Xclean: X rm -f [no]bench *.o bench.shar X Xbench.shar: X shar READ.ME Makefile *.c *.out *.bat *.sh cr-lf n-n bench.tbl ARTICLE >bench.shar END-of-Makefile echo file: benchdbl.c sed 's/^X//' >benchdbl.c << 'END-of-benchdbl.c' X/* benchdbl - benchmark for double X * Thomas Plum, Plum Hall Inc, 609-927-3770 X * If machine traps overflow, use an unsigned type X * Let T be the execution time in milliseconds X * Then average time per operator = T/major usec X * (Because the inner loop has exactly 1000 operations) X */ X#define STOR_CL auto X#define TYPE double X#include <stdio.h> Xmain(ac, av) X int ac; X char *av[]; X { X STOR_CL TYPE a, b, c; X long d, major, atol(); X static TYPE m[10] = {0}; X X major = atol(av[1]); X printf("executing %ld iterations\n", major); X a = b = (av[1][0] - '0'); X for (d = 1; d <= major; ++d) X { X /* inner loop executes 1000 selected operations */ X for (c = 1; c <= 40; ++c) X { X a = a + b + c; X b = a * 2; X a = b / 10; X a = -a; X b = -a - b - c; X a = b == c; X b = a + c; X a = !b; X b = a + c; X a = b > c; X } X } X printf("a=%d\n", a); X } END-of-benchdbl.c echo file: benches.c sed 's/^X//' >benches.c << 'END-of-benches.c' X/* benches - driver for Plum Hall benchmarks */ X#include <stdio.h> X#include <time.h> X Xint benchreg(), benchsho(), benchlng(); Xint benchmul(), benchfn(), benchdbl(); X Xvoid tabulate(); Xchar *fround(); Xmain(argc, argv) X int argc; X char *argv[]; X { X char result[6][10]; X int i; X X if (argv[1][0] != '1') X printf("argv[1] must be 1 !\n"); X if (argc < 3) X { X fprintf(stderr, "usage: benches 1 'compiler-id'\n"); X exit(2); X } X tabulate(benchreg, result[0]); X tabulate(benchsho, result[1]); X tabulate(benchlng, result[2]); X tabulate(benchmul, result[3]); X tabulate(benchfn, result[4]); X tabulate(benchdbl, result[5]); X printf("\n\n"); X printf("%20.20s %9s %9s %9s %9s %9s %9s\n", X "", "register", "auto", "auto", "int", "function", "auto"); X printf("%20.20s %9s %9s %9s %9s %9s %9s\n", X "", "int", "short", "long", "multiply", "call+ret", "double"); X printf("%22.22s ", X argv[2]); X for (i = 0; i <= 5; ++i) X printf("%9.9s ", result[i]); X printf("\n"); X exit(0); X } Xvoid tabulate(fn, s) X void (*fn)(); X char *s; X { X static char arg1[20]; X static char *arga[3] = { "x", &arg1[0], 0 }; X double before, after, microsec; X long major, major_next; X X major_next = 1; X do { X major = major_next; X sprintf(arg1, "%ld", major); X before = (double)clock(); X (*fn)(2, arga); X after = (double)clock(); X major_next *= 10; X } while (after-before < 100); X microsec = 1e3 * (after - before) / CLOCKS_PER_SEC / major; X sprintf(s, "%9s ", fround(microsec, 5, 3)); X } X X/* fround - round double x to precision p, n significant digits X * uses static string for result - not re-entrant X * fround is an accomodation for K+R-level printf which lacks %.*e or %g X * slow, fat version - uses sprintf X */ X#include <stdio.h> Xchar *fround(x, p, n) X double x; X short p; X short n; X { X double y; X double log10(); X short digs; X short nlog; X static char s[40] = {0}; X char fmt[20]; X X sprintf(fmt, "%%.%de", n-1); X sprintf(s, fmt, x); X sscanf(s, "%lf", &y); X if (y == 0) X nlog = 0; X else X nlog = log10(y); X if (nlog < 0) X --nlog; X digs = n - nlog - 1; X if (digs < 0) X digs = 0; X else if (digs > p) X digs = p; X sprintf(fmt, "%%.%df", digs); X sprintf(s, fmt, y); X if (digs == 0) X strcat(s, "."); X while (digs++ < p) X strcat(s, " "); X return (s); X } X X X X X#define main benchreg X#include "benchreg.c" X X#undef main X#undef STOR_CL X#undef TYPE X#define main benchsho X#include "benchsho.c" X X#undef main X#undef STOR_CL X#undef TYPE X#define main benchlng X#include "benchlng.c" X X#undef main X#undef STOR_CL X#undef TYPE X#define main benchmul X#include "benchmul.c" X X#undef main X#undef STOR_CL X#undef TYPE X#define main benchfn X#include "benchfn.c" X X#undef main X#undef STOR_CL X#undef TYPE X#define main benchdbl X#include "benchdbl.c" END-of-benches.c echo file: benchfn.c sed 's/^X//' >benchfn.c << 'END-of-benchfn.c' X/* benchfn - benchmark for function calls X * Thomas Plum, Plum Hall Inc, 609-927-3770 X * Let T be the execution time in milliseconds X * Then average time per operator = T/major usec X * (Because the inner loop has exactly 1000 operations) X */ X#include <stdio.h> Xint dummy = 0; X Xf2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */ Xf1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */ Xf0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */ X Xmain(ac, av) X int ac; X char *av[]; X { X long d, major, atol(); X X major = atol(av[1]); X printf("executing %ld iterations\n", major); X for (d = 1; d <= major; ++d) X f0(); /* executes 1000 calls */ X printf("dummy=%d\n", dummy); X } END-of-benchfn.c echo file: benchlng.c sed 's/^X//' >benchlng.c << 'END-of-benchlng.c' X/* benchlng - benchmark for long integers X * Thomas Plum, Plum Hall Inc, 609-927-3770 X * If machine traps overflow, use an unsigned type X * Let T be the execution time in milliseconds X * Then average time per operator = T/major usec X * (Because the inner loop has exactly 1000 operations) X */ X#define STOR_CL auto X#define TYPE long X#include <stdio.h> Xmain(ac, av) X int ac; X char *av[]; X { X STOR_CL TYPE a, b, c; X long d, major, atol(); X static TYPE m[10] = {0}; X X major = atol(av[1]); X printf("executing %ld iterations\n", major); X a = b = (av[1][0] - '0'); X for (d = 1; d <= major; ++d) X { X /* inner loop executes 1000 selected operations */ X for (c = 1; c <= 40; ++c) X { X a = a + b + c; X b = a >> 1; X a = b % 10; X m[a] = a; X b = m[a] - b - c; X a = b == c; X b = a | c; X a = !b; X b = a + c; X a = b > c; X } X } X printf("a=%d\n", a); X } END-of-benchlng.c echo file: benchmul.c sed 's/^X//' >benchmul.c << 'END-of-benchmul.c' X/* benchmul - benchmark for int multiply X * Thomas Plum, Plum Hall Inc, 609-927-3770 X * If machine traps overflow, use an unsigned type X * Let T be the execution time in milliseconds X * Then average time per operator = T/major usec X * (Because the inner loop has exactly 1000 operations) X */ X#define STOR_CL auto X#define TYPE int X#include <stdio.h> Xmain(ac, av) X int ac; X char *av[]; X { X STOR_CL TYPE a, b, c; X long d, major, atol(); X static TYPE m[10] = {0}; X X major = atol(av[1]); X printf("executing %ld iterations\n", major); X a = b = (av[1][0] - '0'); X for (d = 1; d <= major; ++d) X { X /* inner loop executes 1000 selected operations */ X for (c = 1; c <= 40; ++c) X { X a = 3 *a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a; /* 25 * */ X } X } X printf("a=%d\n", a); X } END-of-benchmul.c echo file: benchreg.c sed 's/^X//' >benchreg.c << 'END-of-benchreg.c' X/* benchreg - benchmark for register integers X * Thomas Plum, Plum Hall Inc, 609-927-3770 X * If machine traps overflow, use an unsigned type X * Let T be the execution time in milliseconds X * Then average time per operator = T/major usec X * (Because the inner loop has exactly 1000 operations) X */ X#define STOR_CL register X#define TYPE int X#include <stdio.h> Xmain(ac, av) X int ac; X char *av[]; X { X STOR_CL TYPE a, b, c; X long d, major, atol(); X static TYPE m[10] = {0}; X X major = atol(av[1]); X printf("executing %ld iterations\n", major); X a = b = (av[1][0] - '0'); X for (d = 1; d <= major; ++d) X { X /* inner loop executes 1000 selected operations */ X for (c = 1; c <= 40; ++c) X { X a = a + b + c; X b = a >> 1; X a = b % 10; X m[a] = a; X b = m[a] - b - c; X a = b == c; X b = a | c; X a = !b; X b = a + c; X a = b > c; X } X } X printf("a=%d\n", a); X } END-of-benchreg.c echo file: benchsho.c sed 's/^X//' >benchsho.c << 'END-of-benchsho.c' X/* benchsho - benchmark for short integers X * Thomas Plum, Plum Hall Inc, 609-927-3770 X * If machine traps overflow, use an unsigned type X * Let T be the execution time in milliseconds X * Then average time per operator = T/major usec X * (Because the inner loop has exactly 1000 operations) X */ X#define STOR_CL auto X#define TYPE short X#include <stdio.h> Xmain(ac, av) X int ac; X char *av[]; X { X STOR_CL TYPE a, b, c; X long d, major, atol(); X static TYPE m[10] = {0}; X X major = atol(av[1]); X printf("executing %ld iterations\n", major); X a = b = (av[1][0] - '0'); X for (d = 1; d <= major; ++d) X { X /* inner loop executes 1000 selected operations */ X for (c = 1; c <= 40; ++c) X { X a = a + b + c; X b = a >> 1; X a = b % 10; X m[a] = a; X b = m[a] - b - c; X a = b == c; X b = a | c; X a = !b; X b = a + c; X a = b > c; X } X } X printf("a=%d\n", a); X } END-of-benchsho.c echo file: clock.c sed 's/^X//' >clock.c << 'END-of-clock.c' X/* clock - primitive version of ANSI 'clock' function for UNIX */ Xlong clock() X { X struct tbuff { long pu; long ps; long cu; long cs; } tbuff; X X times(&tbuff); X return(tbuff.pu + tbuff.ps); X } END-of-clock.c echo file: f3.c sed 's/^X//' >f3.c << 'END-of-f3.c' X/* f3 - lowest level function X * Put this in separate source file if compiler detects and optimizes X * useless code X */ Xf3() { } END-of-f3.c echo file: fround.c sed 's/^X//' >fround.c << 'END-of-fround.c' X/* fround - round double x to precision p, n significant digits X * uses static string for result - not re-entrant X * fround is an accomodation for K+R-level printf which lacks %.*e or %g X * slow, fat version - uses sprintf X */ X#include <stdio.h> Xchar *fround(x, p, n) X double x; X short p; X short n; X { X double y; X double log10(); X short digs; X short nlog; X static char s[40] = {0}; X char fmt[20]; X X sprintf(fmt, "%%.%de", n-1); X sprintf(s, fmt, x); X sscanf(s, "%lf", &y); X if (y == 0) X nlog = 0; X else X nlog = log10(y); X if (nlog < 0) X --nlog; X digs = n - nlog - 1; X if (digs < 0) X digs = 0; X else if (digs > p) X digs = p; X sprintf(fmt, "%%.%df", digs); X sprintf(s, fmt, y); X if (digs == 0) X strcat(s, "."); X while (digs++ < p) X strcat(s, " "); X return (s); X } X#ifdef TRYMAIN Xmain() X { X short m; X X for (m = 1; m <= 5; ++m) X printf("fround(123.57, 2, %d) = %s;\n", m, fround(123.57, 2, m)); X for (m = 1; m <= 5; ++m) X printf("fround(.013579, 5, %d) = %s;\n", m, fround(.013579, 5, m)); X } X#endif END-of-fround.c echo file: run-all.c sed 's/^X//' >run-all.c << 'END-of-run-all.c' X/* do_allbench - run all the benchmark programs */ X#include <stdio.h> X#define NBENCHES 6 X#define TIME_FMT "Current time is %lf:%lf:%lf" X#define CPUTIME_MIN 10000. Xstatic struct timing X { X double cputime; char *fname; char *title1; char *title2; X } timings[NBENCHES] = X { X 0., "benchreg", "register", "int", X 0., "benchsho", "auto", "short", X 0., "benchlng", "auto", "long", X 0., "benchmul", "integer", "multiply", X 0., "benchfn", "function", "call", X 0., "benchdbl", "auto", "double", X }; Xstatic char cc_cmd[BUFSIZ] = {0}; Xstatic char command[BUFSIZ] = {0}; Xint compile(fname) X char *fname; X { X sprintf(command, cc_cmd, fname); X return (system(command)); X } Xint mk_crlf() X { X FILE *crlf; X X crlf = fopen("cr-lf", "w"); X if (crlf == NULL) X { X fprintf(stderr, "unable to create file crlf\n"); X exit(2); X } X putc('\n', crlf); X fclose(crlf); X } Xdouble rd_time(tmpname) X char *tmpname; X { X FILE *fp; X double hrs, mins, secs; X X fp = fopen(tmpname, "r"); X fgets(buf, sizeof(buf), fp); X sscanf(buf, TIME_FMT, &hrs, &mins, &secs); X fclose(fp); X return (1000 * (secs + 60 * (mins + 60 * hrs)); X } Xdouble time_it(fname, iterations) X char *fname; X long iterations; X { X double t0, t1; X X sprintf(command, "time <cr-lf >t0"); X system(command); X t0 = rd_time("t0"); X sprintf(command, "%s %ld", fname, iterations); X system(command); X sprintf(command, "time <cr-lf >t1"); X system(command); X t1 = rd_time("t1"); X return (t1 - t0); X } Xdouble run(fname, major) X char *fname; X long major; X { X double t_empty, t_major; X X t_empty = time_it(fname, 0L); X t_major = time_it(fname, major); X return (t_major - t_empty); X } Xdouble do_all(fname) X char *fname; X { X double cputime; X long major; X X compile(fname); X major = MAJOR_MIN; X do { X cputime = run(fname, major); X major *= 10; X } while (cputime < CPUTIME_MIN); X return (cputime / major); X } Xmain(ac, av) X int ac; X char *av[]; X { X int i; X X strcpy(cc_cmd, av[1]); X for (i = 0; i <= NBENCHES; ++i) X timings[i].cputime = do_all(timings[i].fname); X printf("\n\n\nRESULTS:\n\n"); X for (i = 0; i <= NBENCHES; ++i) X printf("%10s ", timings[i].title1; X printf(\n"); X for (i = 0; i <= NBENCHES; ++i) X printf("%10s ", timings[i].title2; X printf(\n"); X for (i = 0; i <= NBENCHES; ++i) X printf("%10.4f ", timings[i].cputime); X printf("\n\n(All times are in microseconds\n"); X } END-of-run-all.c echo file: benches.out sed 's/^X//' >benches.out << 'END-of-benches.out' Xexecuting 1 iterations Xa=0 Xexecuting 10 iterations Xa=0 Xexecuting 100 iterations Xa=0 Xexecuting 1000 iterations Xa=0 Xexecuting 10000 iterations Xa=0 Xexecuting 1 iterations Xa=0 Xexecuting 10 iterations Xa=0 Xexecuting 100 iterations Xa=0 Xexecuting 1000 iterations Xa=0 Xexecuting 10000 iterations Xa=0 Xexecuting 1 iterations Xa=0 Xexecuting 10 iterations Xa=0 Xexecuting 100 iterations Xa=0 Xexecuting 1000 iterations Xa=0 Xexecuting 10000 iterations Xa=0 Xexecuting 1 iterations Xa=-407629151 Xexecuting 10 iterations Xa=-483154367 Xexecuting 100 iterations Xa=-1034506623 Xexecuting 1000 iterations Xa=-1045589759 Xexecuting 1 iterations Xdummy=0 Xexecuting 10 iterations Xdummy=0 Xexecuting 100 iterations Xdummy=0 Xexecuting 1000 iterations Xdummy=0 Xexecuting 1 iterations Xa=0 Xexecuting 10 iterations Xa=0 Xexecuting 100 iterations Xa=0 X X X register auto auto int function auto X int short long multiply call+ret double X xenix-386 0.24 0.46 0.43 1.87 3.00 90.5 END-of-benches.out echo file: run-all.out sed 's/^X//' >run-all.out << 'END-of-run-all.out' X+ cc -o benchfn.x benchfn.c Xbenchfn.c X+ time benchfn.x 1000 Xexecuting 1000 iterations Xdummy=0 X Xreal 3.3 Xuser 3.1 Xsys 0.0 X+ cc -o benchmul.x benchmul.c Xbenchmul.c X+ time benchmul.x 10000 Xexecuting 10000 iterations Xa=427469313 X Xreal 19.0 Xuser 18.8 Xsys 0.1 X+ cc -o benchlng.x benchlng.c Xbenchlng.c X+ time benchlng.x 10000 Xexecuting 10000 iterations Xa=0 X Xreal 5.1 Xuser 5.0 Xsys 0.0 X+ cc -o benchsho.x benchsho.c Xbenchsho.c X+ time benchsho.x 10000 Xexecuting 10000 iterations Xa=0 X Xreal 5.2 Xuser 5.1 Xsys 0.1 X+ cc -o benchreg.x benchreg.c Xbenchreg.c X+ time benchreg.x 10000 Xexecuting 10000 iterations Xa=0 X Xreal 2.6 Xuser 2.5 Xsys 0.0 X+ cc -o benchdbl.x benchdbl.c Xbenchdbl.c X+ time benchdbl.x 10000 Xexecuting 10000 iterations Xa=0 X Xreal 15:04.5 Xuser 15:04.4 Xsys 0.1 END-of-run-all.out echo file: run-all.bat sed 's/^X//' >run-all.bat << 'END-of-run-all.bat' Xcl benchreg.c Xcommand /c time-cmd benchreg >benchreg.out X Xcl benchsho.c Xcommand /c time-cmd benchsho >benchsho.out X Xcl benchlng.c Xcommand /c time-cmd benchlng >benchlng.out X Xcl benchfn.c Xcommand /c time-cmd benchfn >benchfn.out X Xcl benchmul.c Xcommand /c time-cmd benchmul >benchmul.out X Xcl benchdbl.c Xcommand /c time-cmd benchdbl >benchdbl.out X END-of-run-all.bat echo file: time-cmd.bat sed 's/^X//' >time-cmd.bat << 'END-of-time-cmd.bat' Xtime <cr-lf X%1 0 Xtime <cr-lf X%1 10000 Xtime <cr-lf END-of-time-cmd.bat echo file: time-dbl.bat sed 's/^X//' >time-dbl.bat << 'END-of-time-dbl.bat' Xtime <cr-lf Xbenchdbl 0 Xtime <cr-lf Xbenchdbl 100 Xtime <cr-lf END-of-time-dbl.bat echo file: run-all.sh sed 's/^X//' >run-all.sh << 'END-of-run-all.sh' Xcc -o benchfn.x benchfn.c Xtime benchfn.x 1000 Xcc -o benchmul.x benchmul.c Xtime benchmul.x 10000 Xcc -o benchlng.x benchlng.c Xtime benchlng.x 10000 Xcc -o benchsho.x benchsho.c Xtime benchsho.x 10000 Xcc -o benchreg.x benchreg.c Xtime benchreg.x 10000 Xcc -o benchdbl.x benchdbl.c Xtime benchdbl.x 10000 END-of-run-all.sh echo file: cr-lf sed 's/^X//' >cr-lf << 'END-of-cr-lf' X END-of-cr-lf echo file: n-n sed 's/^X//' >n-n << 'END-of-n-n' Xn Xn END-of-n-n echo file: bench.tbl sed 's/^X//' >bench.tbl << 'END-of-bench.tbl' XMachine/compiler register auto auto int func auto X int short long multiply call dbl X XAT&T 3B2/05 (-O) 1.36 3.87 2.62 15.4 7.7 22.5 XAT&T 3B2/05 (no -O) 1.78 4.66 2.75 16.2 9.3 22.5 XAT&T 3B2/400 (-O) 1.09 1.36 1.10 16.2 10.0(?) 91.4 XAT&T 3B2/400 (no -O) 1.14 2.61 2.36 17.3 11.3 91.1 XAT&T 6386/375 (no -O) 0.61 1.39 1.23 3.85 5.62 6.77 XAT&T 6386/375 (-O) 0.52 1.17 0.54 3.68 5.78 7.68 XApollo DN330 (-O) 1.36 .78 1.36 10.17 3.57 XApollo DN330 (no -O) 1.54 1.28 1.54 11.30 3.64 XApollo DN580 (-O) 1.03 .59 1.03 7.67 2.72 XApollo DN580 (no -O) 1.18 .97 1.18 8.48 2.77 XApollo DN660 (_O) 5.88 1.24 5.88 21.86 4.26 XApollo DN660 (no -O) 5.93 1.52 5.93 21.93 4.29 XMasscomp 5500 3.18 2.7 4.9 30.8 7.3 XMasscomp 5600 (-O) .45 .61 .46 2.83 1.04 XMasscomp 5600 (no -O) .46 .78 .64 2.99 1.76 XPC/8088 (InstantC) 25.8 25.8 82.0 74.2 152. XPC/8088 (WSL 3.1 lg) 6.18 10.4 66.5 31.8 28.8 XPyramid 90X (-O) .85 1.04 .86 3.64 1.9 2.37 XPyramid 90X (no -O) .86 1.01 .86 3.65 1.8 2.34 XSequent (-O) 1.39 2.99 2.53 9.90 9.3 XSequent (no -O) 1.50 3.25 2.83 9.95 13.2 XSun 3/260HM (-O) .31 .48 .47 1.98 1.16 XSun 3/260HM (no -O) .36 .58 .57 1.99 1.62 XSun 3/75M (-O) .47 .77 .76 3.00 2.12 XSun 3/75M (no -O) .53 .95 .94 3.01 2.73 XSun 3/75M(4.2, -O) .50 .81 .83 2.85 1.5 20.7 XSun 3/75M(4.2, no -O) .54 1.00 1.01 2.97 2.7 21.1 XSun 3/75M(VM, -O) .46 .77 .75 2.96 2.1 20.8 XSun 3/75M(VM, no -O) .52 .96 .93 2.97 2.7 21.1 XVAX 11/730 (-O) 4.00 9.80 6.20 16.2 42.8 12.4 XVAX 11/730 (no -O) 4.73 10.2 7.45 16.57 51.5 17.0 XVAX 11/780 (-O) 1.21 2.43 1.67 2.76 15.04 2.95 XVAX 11/780 (BSD 4.2) 1.38 2.42 1.96 2.92 17.2 XVAX 11/780 (UNIX 5.2) 1.24 2.48 1.79 2.72 15.7 3.89 XVAX 11/780 (no -O) 1.29 2.51 1.85 2.70 16.7 3.89 XVAX 11/785 (-O) .93 1.85 1.32 5.00 13.9 47.5 XVAX 11/785 (no -O) 1.01 1.96 1.44 5.08 14.2 5.42 XVAX 8650(UNIX -O) .236 .484 .298 .589 2.63 .578 XVAX 8650(UNIX no -O) .258 .482 .316 .574 3.06 .791 XVAX 8650(Ultrix -O) .23 .40 .29 .53 2.4 .56 XVAX 8650(Ultrix no -O) .26 .41 .34 .56 2.8 .77 END-of-bench.tbl echo file: ARTICLE sed 's/^X//' >ARTICLE << 'END-of-ARTICLE' X X X X X X X[The following article appeared in "C Users Journal" May 1988. X It describes the purpose and use of the enclosed benchmarks. ] X X XSIMPLE BENCHMARKS FOR C COMPILERS X Xby Thomas Plum X XDr.Plum is the author of several books on C, including Efficient C (co- Xauthored with Jim Brodie). He is Vice-Chair of the ANSI X3J11 Committee, Xand Chairman of Plum Hall Inc, which offers introductory and advanced sem- Xinars on C. X XCopyright (c) 1988, Plum Hall Inc X X XWe are placing into the public domain some simple benchmarks with several Xappealing properties: X X They are short enough to type while browsing at trade shows. X X They are protected against overly-aggressive compiler optimizations. X X They reflect empirically-observed operator frequencies in C programs. X X They give a C programmer information directly relevant to programming. X XIn Efficient C, Jim Brodie and I described how useful it can be for a pro- Xgrammer to have a general idea of how many microseconds it takes to execute Xthe "average operator" on register int's, on auto short's, on auto Xlong's, and on double data, as well as the time for an integer multiply, Xand the time to call-and-return from a function. These six numbers allow a Xprogrammer to make very good first-order estimates of the CPU time that a Xparticular algorithm will take. X XThe following easily-typed benchmark programs determine these times Xdirectly. The first one is benchreg.c ("benchmark for register opera- Xtors"): X X X X X X X X X X X X X X X X X X X X X X - 1 - X X X X X X - 2 - X X X 1 /* benchreg - benchmark for register integers X 2 * Thomas Plum, Plum Hall Inc, 609-927-3770 X 3 * If machine traps overflow, use an unsigned type X 4 * Let T be the execution time in milliseconds X 5 * Then average time per operator = T/major usec X 6 * (Because the inner loop has exactly 1000 operations) X 7 */ X 8 #define STOR_CL register X 9 #define TYPE int X 10 #include <stdio.h> X 11 main(ac, av) X 12 int ac; X 13 char *av[]; X 14 { X 15 STOR_CL TYPE a, b, c; X 16 long d, major, atol(); X 17 static TYPE m[10] = {0}; X 18 X 19 major = atol(av[1]); X 20 printf("executing %ld iterations0, major); X 21 a = b = (av[1][0] - '0'); X 22 for (d = 1; d <= major; ++d) X 23 { X 24 /* inner loop executes 1000 selected operations */ X 25 for (c = 1; c <= 40; ++c) X 26 { X 27 a = a + b + c; X 28 b = a >> 1; X 29 a = b % 10; X 30 m[a] = a; X 31 b = m[a] - b - c; X 32 a = b == c; X 33 b = a | c; X 34 a = !b; X 35 b = a + c; X 36 a = b > c; X 37 } X 38 } X 39 printf("a=%d0, a); X 40 } X XIf you enter this and compile it to produce an executable program, you can Xinvoke it with one argument, the number of iterations for the major loop: X X benchreg 10000 X XIf this execution takes 16 seconds, this means that the average register Xoperation takes 1.6 microseconds (16,000 milliseconds divided by 10,000 Xiterations of the major loop). X XLet us examine the program in detail. Lines 8 and 9 define STOR_CL X("storage class") and TYPE to be register and int . Thus, on line 15, Xthree variables ( a , b , and c ) are declared to be of this storage class Xand type. At line 16, the major loop control variables are long integers, Xbut they are touched only one one-thousandth as often as the inner loop X X X X X X X X X - 3 - X X Xvariables, so they have little effect upon the timings. We are declaring Xthe atol function to return a long integer; it would otherwise default Xto an int return. (If we were using a compiler based upon draft ANSI C, Xwe could #include <stdlib.h> to get the declaration of atol , but this Xwould limit the applicability of the benchmarks. This simple declaration is Xall that even an ANSI compiler would need.) X XAt line 19, we set the major loop variable to the number given on the com- Xmand line, and at line 20, we confirm it to the output. X XLine 21 is crucial to preventing some overly aggressive optimizations. Ear- Xlier versions of these benchmarks had simply initialized a and b to 1, Xbut this allows a compiler to forward-propagate a known constant value. The Xexpression av[1][0] gives the first digit-character of the command-line Xargument; subtracting '0' produces a digit between 0 and 9. (Yes, the Xlatest ANSI draft now guarantees that the digit characters are a contiguous Xsequence in any environment.) X XLine 22 simply executes the major loop the number of times given by the Xvariable major . Line 25 repeats the inner loop 40 times, and with 25 Xoperators in that loop, this produces 1000 operators. (Actually there are X1003, because of the initialization and the extra increment and test at loop Xcompletion. The discrepancy is well within acceptable tolerances.) X XWithin the inner loop, 40% of the operators are assignments, in keeping with Xthe percentages reported in the original Drhystone work. Of the other Xoperators, the most frequent are plus and minus. The sequence of operations Xis carefully chosen to ensure that a very aggressive optimizer cannot find Xany useless code sections; each result depends functionally upon previous Xresults. X XFinally, the printout at line 39 is also important to preventing over- Xoptimization. If the compiler could notice that we did nothing with the Xcomputed result, it could discard all the operations that produced that Xresult. X XWe have completed our perusal of the first benchmark program, benchreg.c . XThe second program ( benchsho.c , for short's) is derived from benchreg.c Xby changing lines 8 and 9: STOR_CL becomes auto , and TYPE becomes Xshort . The program is otherwise unchanged. X XThe third program ( benchlng.c , for long's) is obtained by leaving XSTOR_CL as auto and changing TYPE to long . X XTo make the fourth program ( benchmul.c , for multiplies) we set TYPE to Xint , and change lines 27 through 36 to one source line which does 25 multi- Xplies: X X a = 3 *a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a; /* 25 * */ X XThe fifth program ( benchfn.c , for functions) is a major rewrite. We Xarrange a series of function definitions for f3 , f2 , f1 , and f0 such Xthat each call to function f0 generates exactly 1000 function-call opera- Xtions. In case the compiler has an aggressive optimizer, move the function Xf3 to a separate source file, so that the compiler cannot see how useless X X X X X X X X X - 4 - X X Xit is. The global variable dummy will make the compiler think that f3 Xmight be up to something useful. Here, then, is the benchfn.c function: X X 1 /* benchfn - benchmark for function calls X 2 * Thomas Plum, Plum Hall Inc, 609-927-3770 X 3 * Let T be the execution time in milliseconds X 4 * Then average time per operator = T/major usec X 5 * (Because the inner loop has exactly 1000 operations) X 6 */ X 7 #include <stdio.h> X 8 int dummy = 0; X 9 X 10 /* f3 - lowest level function X 11 * Put this in separate source file if compiler detects and X 12 * optimizes useless code X 13 */ X 14 f3() { } X 15 X 16 f2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */ X 17 f1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */ X 18 f0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */ X 19 X 20 main(ac, av) X 21 int ac; X 22 char *av[]; X 23 { X 24 long d, major, atol(); X 25 X 26 major = atol(av[1]); X 27 printf("executing %ld iterations0, major); X 28 for (d = 1; d <= major; ++d) X 29 f0(); /* executes 1000 calls */ X 30 printf("dummy=%d0, dummy); X 31 } X XThe sixth program ( benchdblc. , for double's ) is derived from benchlng.c Xby changing STOR_CL to auto , TYPE to double , and replacing the inner Xloop body with this slightly different version: X X a = a + b + c; X b = a * 2; X a = b / 10; X a = -a; X b = -a - b - c; X a = b == c; X b = a + c; X a = !b; X b = a + c; X a = b > c; X XThese changes are necessary because floating-point operands are not allowed Xfor the shift, remainder, and bitwise operators, and because the subscript Xoperator does not really exercise the floating-point instructions. This Xrevised inner loop still gives us a representative mix of typical opera- Xtions. X X X X X X X X X - 5 - X X XThis, then, completes our collection of six benchmark programs. After they Xare compiled to produce executable programs, the next question is "How do I Xtime the execution?" X XOn UNIX systems, the timing is easy -- just run the time command: X X $ time benchreg 10000 X XThe sum of the "user" and "system" times will give the CPU time used by the Xprogram. X XMore accurately, we could time the execution of zero iterations, and sub- Xtract that time from the time for the measured number of iterations. X XOn MS-DOS systems, timings can be obtained, but with greater difficulty. If Xwe create a file named CR-LF which contains just one newline (or X"carriage-return-newline" in DOS parlance), we could time our program with a X"batch" file such as this: X X time <cr-lf X benchreg 0 X time <cr-lf X benchreg 10000 X time <cr-lf X XWe must then take times that are expressed in minutes-and-seconds and pro- Xduce differences expressed in seconds. X XWith whichever method, we eventually produce six numbers that are character- Xistic of a particular environment (a specific compiler supporting a specific Xmachine). X X[NOTE: Since this article appeared, I have added a driver program, benches.c. XIn an ANSI environment with the clock function, it will run all the tests Xand report the results, eliminating the need for manual computations.] X XHere are some examples of timing results that have been obtained on a Xvariety of minicomputer and workstation environments: X X X X X X X X X X X X X X X X X X X X X X X X X X - 6 - X X XMachine/compiler register auto auto int func auto X int short long multiply call dbl X XAT&T 3B2/05 (-O) 1.36 3.87 2.62 15.4 7.7 22.5 XAT&T 3B2/05 (no -O) 1.78 4.66 2.75 16.2 9.3 22.5 XAT&T 3B2/400 (-O) 1.09 1.36 1.10 16.2 10.0(?) 91.4 XAT&T 3B2/400 (no -O) 1.14 2.61 2.36 17.3 11.3 91.1 XApollo DN330 (-O) 1.36 .78 1.36 10.17 3.57 XApollo DN330 (no -O) 1.54 1.28 1.54 11.30 3.64 XApollo DN580 (-O) 1.03 .59 1.03 7.67 2.72 XApollo DN580 (no -O) 1.18 .97 1.18 8.48 2.77 XApollo DN660 (-O) 5.88 1.24 5.88 21.86 4.26 XApollo DN660 (no -O) 5.93 1.52 5.93 21.93 4.29 XCray X-MP (no vectors) .0567 .0656 .0822 .366 .821 .082 XMasscomp 5500 3.18 2.7 4.9 30.8 7.3 XMasscomp 5600 (-O) .45 .61 .46 2.83 1.04 XMasscomp 5600 (no -O) .46 .78 .64 2.99 1.76 XPyramid 90X (-O) .85 1.04 .86 3.64 1.9 2.37 XPyramid 90X (no -O) .86 1.01 .86 3.65 1.8 2.34 XSequent (-O) 1.39 2.99 2.53 9.90 9.3 XSequent (no -O) 1.50 3.25 2.83 9.95 13.2 XSun 3/260HM (-O) .31 .48 .47 1.98 1.16 XSun 3/260HM (no -O) .36 .58 .57 1.99 1.62 XSun 3/75M (-O) .47 .77 .76 3.00 2.12 XSun 3/75M (no -O) .53 .95 .94 3.01 2.73 XSun 3/75M(4.2, -O) .50 .81 .83 2.85 1.5 20.7 XSun 3/75M(4.2, no -O) .54 1.00 1.01 2.97 2.7 21.1 XSun 3/75M(VM, -O) .46 .77 .75 2.96 2.1 20.8 XSun 3/75M(VM, no -O) .52 .96 .93 2.97 2.7 21.1 XVAX 11/730 (-O) 4.00 9.80 6.20 16.2 42.8 12.4 XVAX 11/730 (no -O) 4.73 10.2 7.45 16.57 51.5 17.0 XVAX 11/780 (-O) 1.21 2.43 1.67 2.76 15.0 2.95 XVAX 11/780 (BSD 4.2) 1.38 2.42 1.96 2.92 17.2 XVAX 11/780 (UNIX 5.2) 1.24 2.48 1.79 2.72 15.7 3.89 XVAX 11/780 (no -O) 1.29 2.51 1.85 2.70 16.7 3.89 XVAX 11/785 (-O) .93 1.85 1.32 5.00 13.9 47.5 XVAX 11/785 (no -O) 1.01 1.96 1.44 5.08 14.2 5.42 XVAX 8650(UNIX -O) .236 .484 .298 .589 2.63 .578 XVAX 8650(UNIX no -O) .258 .482 .316 .574 3.06 .791 XVAX 8650(Ultrix -O) .23 .40 .29 .53 2.4 .56 XVAX 8650(Ultrix no -O) .26 .41 .34 .56 2.8 .77 X XNotice that some of these timings were run before the benchdbl benchmark Xhad been written. There are no examples of the popular PC environments in Xthis table. If interested readers wish to run these benchmarks on their own Xenvironments, I will endeavor to present these results in a future article. X XProcessor speeds are sometimes described in "MIPS" (millions of instructions Xper second); using a value such as the number of register operators per Xsecond in C might give rise to a "MOPS" measurement of more use to C pro- Xgrammers. Those of us who have tried these benchmarks have appreciated the Xintuitive grasp that they give of the speed of current machines and com- Xpilers. I hope that you too will find them of interest. X X X X X X X END-of-ARTICLE exit -- Eric S. Raymond = eric@snark.uu.net (mad mastermind of TMN-Netnews) -- Please send comp.sources.unix-related mail to rsalz@uunet.uu.net. Use a domain-based address or give alternate paths, or you may lose out.