[comp.lang.c] Challenge -- build the fastest sorting mousetrap!

schmidt@glacier.ics.uci.edu (Doug Schmidt) (03/26/89)

In article <16539@mimsy.UUCP> chris@mimsy.UUCP (Chris Torek) writes:
++ This sounds like a CHALLENGE!  :-)

Below is a *real* challenge, for all those in net land with extra time
on their hands during easter and spring break!  I've included below a
very efficient sorting routine, together with a nifty
`sort-program-correctness-tester.'

======================================================================

            Your challenge, should you wish to accept it,
          is to write a faster, pseudo-portable, and correct
          sort routine that fulfills the same specification
                      as the one included below.

======================================================================

Pseduo-portable is eclectically defined as ``compiling and executing
correctly with GNU GCC, AT&T cfront, and GNU G++!'' Actually, K&R C is
fine too, I just didn't feel like cluttering up my program with
#ifdef's everywhere....  The point here is to emphasize creating a
clever algorithm, without being overly dependent on the machine or
compiler.  When I compare the final entries, I will use GCC, with the
-O and -fstrength-reduce options enabled.

To make this interesting, assume that the input sizes to the supplied
driver program are: 100,000, 500,000, and 1,000,000

Here are the my results on a Sun 4/260, using G++ 1.34.2 with -O
----------------------------------------
sorttest 100000
time = 1.530
sorttest 500000
time = 8.820
sorttest 1000000
time = 19.130
----------------------------------------

Naturally, not everyone has a Sun 4 ;-).  So to make this fair I'll
collect entries, time them on an otherwise empty Sun 4/260, and report
the results.  Naturally, I'd be interested in seeing how your programs
perform on other machines, too.  If anyone feels that the rules aren't
clear enough, feel free to elaborate them.  I'm mostly interested in
seeing whether anyone can beat the following sorting algorithm for
large input values.

If enough people enter good programs, I'll archive them and make
them available for anonymous FTP.

Good luck!!!

   Doug        
---
On a clear day, under blue skies, there is no need to seek.
And asking about Buddha                +------------------------+
Is like proclaiming innocence,         | schmidt@ics.uci.edu    |
With loot in your pocket.              | office: (714) 856-4043 |
----------------------------------------------------------------------
#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  checksort.c sort.c test.c Makefile
# Wrapped by schmidt@zola.ics.uci.edu on Sun Mar 26 00:06:31 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'checksort.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'checksort.c'\"
else
echo shar: Extracting \"'checksort.c'\" \(3067 characters\)
sed "s/^X//" >'checksort.c' <<'END_OF_FILE'
X/* Efficient and general mechanism for testing correctness of sort routines. 
X   Please excuse the lack of comments.  I wrote this long ago, before
X   learning the importance of documentation (the hard way!) ;-). */
X
Xtypedef struct node
X{
X  int          item;
X  struct node *next;
X} node;
X
Xint   *range_buf;
Xnode **hash_buf;
Xint    buf_size;
Xnode  *mem_stack;
Xnode  *stack_top;
Xnode   sentinel;
Xnode  *sentinel_ptr = &sentinel;
X
Xint
Xfree_mem_and_exit(int status)
X{
X  free (mem_stack);
X  free (hash_buf);
X  free (range_buf);
X  return status;
X}   
X
Xint 
Xinit_hash_buf (void)
X{
X  register int i;
X   
X  if (!(hash_buf = (node **) malloc (buf_size * sizeof (node *))))
X    return 0;
X
X  for (i = 0;i < buf_size;i++) 
X    hash_buf[i] = sentinel_ptr;
X
X  return (mem_stack = stack_top = (node *) calloc (buf_size, sizeof (node))) != 0;
X}
X
Xnode *
Xmake_node (int item)
X{
X  node *temp = stack_top++; 
X   
X  temp->item = item;
X  return temp;
X}
X
Xint 
Xhash_value (int item)
X{
X  return item % buf_size;
X}
X
Xvoid 
Xhash_insert (int item)
X{
X  int loc           = hash_value (item);
X  node *new_node = make_node (item);
X   
X  new_node->next = hash_buf[loc];
X  hash_buf[loc]  = new_node;
X}
X
Xint 
Xremove_node (int item)
X{
X   int      loc  = hash_value (item);
X   node *temp = hash_buf[loc];
X   node *prev = 0;
X
X   for (sentinel.item = item; temp->item != item; prev = temp, temp = temp->next)
X     ;
X   
X   if (temp == sentinel_ptr) 
X     return 0;
X   else if (!prev) 
X     hash_buf[loc] = temp->next;
X   else 
X     prev->next = temp->next;
X
X   return 1;
X}
X
Xint 
Xdelete_buf(int *base)
X{
X  int i;
X   
X  for (i = 0; i < buf_size; i++)
X    if (!remove_node (base[i])) 
X      return 0;
X
X  for (i = 0; i < buf_size; i++)
X    if (hash_buf[i] != sentinel_ptr) 
X      return 0;
X
X  return 1;
X}
X
Xint 
Xcheck_sort (int *original, int *base, int size)
X{
X  int range = base[size - 1];
X
X  if ((buf_size = size) >= range) 
X    {
X      if (!(range_buf = (int *) calloc (range, sizeof (int))))
X        return -1;
X      else 
X        {
X          int i;
X         
X          for (i = 1; i < buf_size; i++) 
X            if ((base[i] < base[i - 1]) || base[i - 1] > range || base[i - 1] < 0)
X              return free_mem_and_exit (0);
X            else 
X              ++range_buf[base[i - 1]];
X
X          ++range_buf[base[buf_size - 1]];
X         
X          for (i = 0; i < buf_size; i++)
X            if (range_buf[original[i]] <= 0) 
X              return free_mem_and_exit (0);
X            else 
X              --range_buf[original[i]];
X
X          for (i = 0; i < range; i++) 
X            if (range_buf[i] != 0) 
X              return free_mem_and_exit (0);
X         
X        }
X    }
X  else if (!init_hash_buf ())
X    return -1;
X  else 
X    {
X      int i;
X      
X      for (i = 1; i < buf_size; i++)
X        {
X          if (base[i] < base[i - 1]) 
X            return free_mem_and_exit (0);
X          else 
X            hash_insert(base[i - 1]);
X        }
X   
X      hash_insert (base[buf_size - 1]);
X      if (!delete_buf (original)) 
X        return free_mem_and_exit (0);
X    }   
X  return free_mem_and_exit (1);
X}
END_OF_FILE
if test 3067 -ne `wc -c <'checksort.c'`; then
    echo shar: \"'checksort.c'\" unpacked with wrong size!
fi
# end of 'checksort.c'
fi
if test -f 'sort.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'sort.c'\"
else
echo shar: Extracting \"'sort.c'\" \(5893 characters\)
sed "s/^X//" >'sort.c' <<'END_OF_FILE'
X#include <stdio.h>
X#include <ctype.h>
X
X/* the next 5 #defines implement a very fast in-line stack abstraction       */
X
X#define MAKE_STACK(S) ((stack_node *) malloc (sizeof(stack_node) * (S)))
X#define PUSH(LOW,HIGH) do {top->lo = LOW;top->hi = HIGH;top++;} while (0)
X#define POP(LOW,HIGH)  do {--top;LOW = top->lo;HIGH = top->hi;} while (0)
X#define STACK_NOT_EMPTY (stack < top)                
X#define SWAP(A,B) do {int _temp = (A);(A) = (B);(B) = _temp;} while (0)
X
X/* Discontinue quicksort algorithm when partition gets below this size.
X   This particular magic number was chosen to work best on a Sun 4/260. */
X#define MAX_THRESH 15
X
X#ifdef __GNUG__
X#define MAX(X,Y) ((X) >? (Y))
X#else
X#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
X#endif
X
X/* Data structure for stack of unfulfilled obligations. */
Xtypedef struct 
X{
X  int *lo;
X  int *hi;
X} stack_node;
X
X/* Once main array is partially sorted by quicksort the remainder is 
X   completely sorted using insertion sort, since this is efficient 
X   for partitions below MAX_THRESH size. BASE points to the beginning 
X   of the array to sort, and END_PTR points at the very last element in
X   the array (*not* one beyond it!). */
X
Xinline static void 
Xinsert_sort(int *base, int *end_ptr)
X{
X  int *run_ptr;
X  int *tmp_ptr = end_ptr;
X  int *thresh  = MAX (base, end_ptr - MAX_THRESH);
X
X  /* Find the smallest element in the first threshold and put it at the 
X     end of the array.  This is guaranteed to be the smallest element in 
X     the array, and it speeds up the inner loop of insertion sort. */
X
X  for (run_ptr = tmp_ptr - 1; run_ptr >= thresh; run_ptr--)
X    if (*run_ptr > *tmp_ptr) 
X      tmp_ptr = run_ptr;
X
X  SWAP (*tmp_ptr, *end_ptr);
X
X  /* Typical insertion sort, but we run from the `right-hand-side'
X     downto the `left-hand-side.' */
X
X  for (run_ptr = end_ptr - 1; run_ptr > base; run_ptr--) 
X    {
X      int temp = *(tmp_ptr = run_ptr - 1);
X
X      /* Select the correct location for the new element, 
X         by sliding everyone down by 1 to make room! */
X
X      while (temp > *++tmp_ptr)
X        tmp_ptr[-1] = *tmp_ptr;
X
X      tmp_ptr[-1] = temp; 
X    }
X}
X
X/* Return the median value selected from among the 
X   LOW, MIDDLE, and HIGH.  Rearrange LOW and HIGH so
X   the three values are sorted amongst themselves. 
X   This speeds up later work... */
X
Xinline static int 
Xfind_pivot(int *low, int *high)
X{
X  int *middle = low + ((high - low ) >> 1);
X
X  if (*middle < *low) 
X    SWAP (*middle, *low);
X  if (*high < *middle) 
X    SWAP (*middle, *high);
X  else 
X    return *middle;
X
X  if (*middle < *low) 
X    SWAP (*middle, *low);
X
X  return *middle;
X}
X
X/* Order elements using quicksort.  This implementation incorporates
X   4 optimizations discussed in Sedgewick:
X   
X   1. Non-recursive, using an explicit stack of log (n) pointers that 
X      indicate the next array partition to sort.
X
X   2. Choses the pivot element to be the median-of-three, reducing
X      the probability of selecting a bad pivot value.
X
X   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
X      insertion sort to sort within the partitions.  This is a
X      big win, since insertion sort is faster for small, mostly
X      sorted array segements.
X   
X   4. The larger of the 2 sub-partitions are always pushed onto the
X      stack first, with the algorithm then concentrating on the
X      smaller partition.  This *guarantees* no more than log (n)
X      stack size is needed! */
X      
Xint 
Xsort (int *base,int total_elems)
X{
X  if (total_elems > 1)
X    {
X      int        *lo = base;
X      int        *hi;
X      int        *left_ptr;
X      int        *right_ptr;
X      stack_node *stack;
X      stack_node *top;  
X      int i;
X      int max_stack_size = 1;
X
X      /* Calculate the exact stack space required in the worst case.
X         This is approximately equal to the log base 2 of TOTAL_ELEMS. */
X
X      for (i = 1; i < total_elems; i += i) 
X        max_stack_size++;
X
X      /* Create the stack, or die trying! */
X      if (stack = MAKE_STACK (max_stack_size))
X        top = stack + 1;
X      else
X        return 0;
X
X      for (hi = lo + (total_elems - 1); STACK_NOT_EMPTY; )
X        {
X          /* Select the median-of-three here. This allows us to
X             skip forward for the LEFT_PTR and RIGHT_PTR. */
X          int pivot = find_pivot (lo, hi);
X          left_ptr  = lo + 1;
X          right_ptr = hi - 1; 
X
X          /* Here's the famous ``collapse the walls'' section of
X             quicksort.  Gotta like those tight inner loops! */
X          do 
X            {
X              while (*left_ptr < pivot)
X                left_ptr++;
X
X              while (pivot < *right_ptr)
X                right_ptr--;
X
X              if (left_ptr < right_ptr) 
X                {
X                  SWAP (*left_ptr, *right_ptr);
X                  left_ptr++;
X                  right_ptr--;
X                }
X              else if (left_ptr == right_ptr) 
X                {
X                  left_ptr++;
X                  right_ptr--;
X                  break;
X                }
X            } 
X          while (left_ptr <= right_ptr);
X
X          if ((right_ptr - lo) <= MAX_THRESH) 
X            {
X              if ((hi - left_ptr) <= MAX_THRESH) /* ignore both small tables */
X                POP (lo, hi);
X              else              /* ignore small left table */
X                lo = left_ptr;
X            }
X          else if ((hi - left_ptr) <= MAX_THRESH) /* ignore small right table */
X            hi = right_ptr;
X          else if ((right_ptr - lo) > (hi - left_ptr)) 
X            {                   /* push larger left table */
X              PUSH (lo,right_ptr);
X              lo = left_ptr;
X            }
X          else 
X            {                   /* push larger right table */
X              PUSH (left_ptr,hi);
X              hi = right_ptr;
X            }
X        }
X
X    }
X  insert_sort (base, base + (total_elems - 1));
X  return 1;
X}
END_OF_FILE
if test 5893 -ne `wc -c <'sort.c'`; then
    echo shar: \"'sort.c'\" unpacked with wrong size!
fi
# end of 'sort.c'
fi
if test -f 'test.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'test.c'\"
else
echo shar: Extracting \"'test.c'\" \(2610 characters\)
sed "s/^X//" >'test.c' <<'END_OF_FILE'
X/* Driver routine for sort program. */
X
X#include <stdio.h>
X#include <sys/time.h>
X#include <sys/resource.h>
X
X#define MAX_INT ((~(unsigned)0)>>1)
Xstatic struct rusage old_time;
Xstatic struct rusage new_time;
X
X/* Set starting process time. */
X
Xvoid start_timer(void)
X{
X  getrusage (RUSAGE_SELF, &old_time); 
X}
X
X/* Returns process time since OLD_TIME. */
X
Xdouble return_elapsed_time(void)
X{
X  getrusage (RUSAGE_SELF, &new_time);
X  return((new_time.ru_utime.tv_sec - old_time.ru_utime.tv_sec) + 
X         ((new_time.ru_utime.tv_usec - old_time.ru_utime.tv_usec) 
X          / 1000000.0));
X}
X
X/* Generates SIZE random integers If *BASE is NULL then space is 
X   allocated, initialized, and returned, otherwise the *BASE buffer 
X   is used directly. */
X
Xint *
Xgen_rand (int **base, unsigned int size)
X{
X  int i;
X   
X  if (!*base)
X    *base = (int *) malloc (size * sizeof (int));
X  srandom ((getpid () * (time (0) ^ time (0) * 1009)) % MAX_INT);
X 
X  for (i = 0; i < size; i++)
X    (*base)[i] = (random () ^ time (0));
X
X  return *base;
X}
X
X/* Copies the contents of BASE into *NEW_BASE.  *NEW_BASE
X   is dynamically allocated if it is initially NULL, otherwise
X   the *NEW_BASE buffer is used. */
X
Xint *
Xstore_buf (int *base, int **new_base, unsigned size)
X{
X  if (!*new_base) 
X    *new_base = (int *) malloc (sizeof (int) * size);
X
X  bcopy (base, *new_base, size * sizeof(int));
X  return *new_base;
X}
X
X/* Prints out contents of array in case the sort routine
X   fails. */
X
Xvoid
Xprint (int *a, int s)
X{
X  int i;
X  printf ("---\n");
X  for (i = 0; i < s; i++)
X    printf ("item = %d\n", a[i]);
X}
X
X/* Sets up the data buffers, generates random input, calls
X   the sort routine, records how long it takes to run,
X   checks the output for validity, and prints the results. */
X
Xint
Xmain (int argc, char *argv[])
X{
X  if (argc != 2)
X    {
X      fprintf (stderr, "usage: %s size\n", argv[0]);
X      return 1;
X    }
X  else
X    {
X      int status;
X      int size = atoi (argv[1]);
X      int *sorted = 0;
X      int *copy = 0;
X      double time;
X  
X      gen_rand (&sorted, size);
X      store_buf (sorted, &copy, size);
X
X      start_timer ();
X      sort (sorted, size);
X      time = return_elapsed_time ();
X  
X      if ((status = check_sort (copy, sorted, size)) > 0)
X        printf ("time = %.3f\n", time);
X      else
X        switch (status)
X          {
X          case -1: printf ("memory failure\n"); break;
X          case 0: 
X            printf ("`sorted' array is not an ordered permutation of original\n");
X            print (sorted, size);
X            print (copy, size);
X            break;
X          }
X      return 0;
X    }
X}
END_OF_FILE
if test 2610 -ne `wc -c <'test.c'`; then
    echo shar: \"'test.c'\" unpacked with wrong size!
fi
# end of 'test.c'
fi
if test -f 'Makefile' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'Makefile'\"
else
echo shar: Extracting \"'Makefile'\" \(1159 characters\)
sed "s/^X//" >'Makefile' <<'END_OF_FILE'
X#############################################################################
X#
X#  Makefile for sorttest
X#
X#############################################################################
X#
X#  If you move this makefile, update the variable below
X#  or else depend won't work.
X#############################################################################
XMAKEFILE	= Makefile
XCC		      = g++
XCFILES		= test.c sort.c checksort.c
XOFILES		= test.o sort.o checksort.o
XMANFILE		= sorttest.1
XPROGRAM		= sorttest
X#############################################################################
XOPTFLAGS	= -O
XCFLAGS		=  $(OPTFLAGS) $(DFLAGS)
X
X$(PROGRAM):	$(OFILES)
X	$(CC) $(LDFLAGS) $(OFILES) -o $(PROGRAM) $(LIBS)
X
Xclean:
X	rm -f *.o *.out core
X
Xclean-all: clean
X	rm -f $(PROGRAM)
X
Xdepend:
X	/usr/ucb/mkdep -f $(MAKEFILE) $(CFILES)
X
X# DO NOT DELETE THIS LINE -- mkdep uses it.
X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
X
Xtest.o: test.c /usr/include/stdio.h /usr/include/sys/time.h /usr/include/time.h
Xtest.o: /usr/include/sys/resource.h
Xsort.o: sort.c /usr/include/stdio.h /usr/include/ctype.h
Xchecksort.o: checksort.c
X
X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
END_OF_FILE
if test 1159 -ne `wc -c <'Makefile'`; then
    echo shar: \"'Makefile'\" unpacked with wrong size!
fi
# end of 'Makefile'
fi
echo shar: End of shell archive.
exit 0
--
On a clear day, under blue skies, there is no need to seek.
And asking about Buddha                +------------------------+
Is like proclaiming innocence,         | schmidt@ics.uci.edu    |
With loot in your pocket.              | office: (714) 856-4043 |