[comp.graphics] fast sorting algorithms.

ra2@doc.ic.ac.uk (Roger Attrill) (03/15/91)

Hi,

This posting concerns:
 .  Requested fast sorting algorithms.


A couple of days ago I mentioned I had a fast sort routine. All the
details and algorithms follow below.

The solids modeller the sort (RAGSort) was written for, was for the
Acorn Archimedes (RISC) Computer. ( Probably no use to you, Bob Kelley).
It was used to sort tiny triangles into depth order. The triangles were
not more than 2 pixels per side, so there was no problem you might
have with sorting larger surfaces by surface rather than by points.


Thanks to requests for the sort from the following people

> rjk@sequent.com
> wuly@vax5.cit.cornell.edu
> lim@cs.su.oz.au
> hjb@pilot.njin.net



The following consists of four sort routines, some of which may be useful,
others not.

A) C code for the RAGSort
B) BASIC V machine code for the RAGSOort
C) BASIC V code for the RAGSort
D) pseudo code for the Shell-Metzner sort

Don't knock the BASIC on the Archimedes it's a cut above any other
version of BASIC. The fact that you can write in machine code in BASIC
and assemble it so easily makes life easy for the Assembly Langyuage
Programmer. It's also a lot faster than using C. The machine code
version is extremely optimized, and may take a little work to see what
is going on.

Make of it what you will. A lot of work has gone in to this sort


A) Below follows the C version of the RAGSort


/*
   ragsort

   A very fast sort routine written by Roger Attrill
   originally in machine code. Implemented in C by Gary Smith and
   Roger Attrill (gks@doc.ic.ac.uk & ra2@doc.ic.ac.uk).
   The speed loses something in being written in C. The machine code
   version was about 3 times as fast.
   The sort or its derivations may not be used in commercial software
   without express permission of the authors. The code may be used in
   public domain software provided credit is given, and the authors are
   informed.
   No comments are included - meaningful variable names do the job just
   as well ub=upperbound, lb=lowerbound.
   Date:-13 March 1991.
*/




#include <stdio.h>
#include <time.h>
#include <stdlib.h>
 

int     NumOfElements, *ArrayPtr;
int     ub, lb;                /* upper and lower bounds */
int     PivPos, PivVal;        /* Pivot position and its array value */
int     Temp;                  /* Used as temporary store */


void RecurseSort(Oldlb,Oldub)
int Oldlb,Oldub;
{     int     Temp1;
     if (Oldlb < Oldub)
     {     
          PivPos = (Oldlb + Oldub) >> 1;
          PivVal = *(ArrayPtr + PivPos);
          lb = Oldlb;
          ub = Oldub;
          do
          {
               while ((PivVal >= *(ArrayPtr + lb)) && (lb < Oldub)) lb++;
               while ((*(ArrayPtr + ub) >= PivVal) && (ub > Oldlb)) ub--;
               if (lb < ub)
               {    Temp = *(ArrayPtr + lb);
                    *(ArrayPtr + lb++) = *(ArrayPtr + ub);
                    *(ArrayPtr + ub--) = Temp;
               }
               else
                    break;
          }     while(1);
          if (lb < PivPos)
          {    *(ArrayPtr + PivPos) = *(ArrayPtr + lb);
               *(ArrayPtr + lb++) = PivVal;
          }
          else
          {    if (PivPos < ub)
               {    *(ArrayPtr + PivPos) = *(ArrayPtr + ub);
                    *(ArrayPtr + ub--) = PivVal;
               }
          }
          Temp1 = lb;
          RecurseSort(Oldlb, ub);
          RecurseSort(Temp1, Oldub);          
     }
}


void CheckOrdering()
{     int     i;

     for(i = 1; i < NumOfElements; i++)
          if (*(ArrayPtr + i) < *(ArrayPtr + i - 1))
               printf("Out of Order\n");
     
}


void RAGSort()
{
     RecurseSort(0, NumOfElements);    /* call sort with lower and upper bounds */
}




int cmpint(P1,P2)    /* qsort comparator procedure */
int *P1,*P2;
{     return(*P1 - *P2);
}




void SetupData()
{     int     i;
     for(i = 0; i < NumOfElements; i++)
          *(ArrayPtr + i) = rand();
     printf("Sorting %d Random elements\n", NumOfElements);
}


int main()
{     clock_t     STime, FTime;
     int     Seed;
     

     printf("Enter Number of Elements and seed: ");
     scanf("%d %d", &NumOfElements, &Seed);
     ArrayPtr = malloc(NumOfElements * sizeof(int));

     srand(Seed);
     SetupData();
     STime = clock();
     RAGSort();
     FTime = clock();
     CheckOrdering();
     printf("Time Taken: %f ticks!\n", (double) (FTime - STime));
     
     printf("QSort version\n");
     srand(Seed);
     SetupData();
     STime = clock();
     qsort(ArrayPtr, NumOfElements, sizeof(int), (void *) cmpint);
     FTime = clock();
     CheckOrdering();     
     printf("Time Taken: %f ticks!\n", (double) (FTime - STime));
}



B) Below follows a BASIC V program for the archimedes to do
the ragsort in machine code for 100000 numbers - took 4.8 secs

   10REM>XS3
   20MODE0
   30n=100000
   40PROC_Code
   50END
   60
   70DEFPROC_Code
   80DIM A n*4
   90DIM code% 1000
  100PROC_Assemble
  110CALL rnd
  120PRINT"Sorting >>  ";n;"  << random elements in machine code"
  130TIME=0
  140CALL RAGSort
  150T=TIME
  160PRINT"Done."
  170CALL checkit
  180PRINTT
  190ENDPROC
  200
  210
  220
  230DEFPROC_Assemble                        :REM values of code% MOD 16
  240REPEAT:code%+=4:UNTIL code% MOD 16 = 0  :REM can speed up by over 5%
  250FOR pass=0 TO 3 STEP 3
  260P% = code%
  270inc   = 0
  280upper = 1
  290dec   = 2
  300lower = 3
  310mid   = 4
  320ainc  = 5
  330adec  = 6
  340amid  = 7
  350[opt pass
  360
  370
  380.RAGSsort
  390stmfd   R13!,{R0-R7,R14}
  400ldr     lower,array                ; base of element array
  410ldr     dec,arraysize
  420add     dec,dec,lower
  430teqp    pc,#0                      ; clear carry
  440bl      sort
  450ldmfd   R13!,{R0-R7,pc}
  460
  470.array     dcd  A                  ; base of element array
  480.arraysize dcd  (n-1)*4            ; size of array
  490
  500.sort
  510stmfd   R13!,{inc,upper,R14}
  520movcc   upper,dec
  530movcs   dec,upper
  540movcc   inc,lower
  550movcs   lower,inc
  560add     mid,lower,upper
  570mov     mid,mid,asr #3
  580mov     mid,mid,lsl #2
  590ldr     amid,[mid]
  600.cmp1
  610ldr     ainc,[inc]
  620cmp     amid,ainc
  630blt     cmp2
  640add     inc,inc,#4
  650cmp     inc,upper
  660bls     cmp1
  670mov     inc,upper
  680.cmp2
  690ldr     adec,[dec]
  700cmp     adec,amid
  710blt     cmp3
  720sub     dec,dec,#4
  730cmp     dec,lower
  740bcs     cmp2
  750mov     dec,lower
  760.cmp3
  770cmp     inc,dec
  780strcc   adec,[inc],#4
  800strcc   ainc,[dec],#-4
  810bcc     cmp1
  820cmp     inc,mid
  830bcs     cmp5
  840str     amid,[inc],#4
  850str     ainc,[mid]
  860cmp     lower,dec
  870blcc    sort               ; call sort with carry clear
  880cmp     inc,upper
  890ldmcsfd r13!,{inc,upper,PC}
  900teqp    pc,#(1<<29)        ; set carry
  910bl      sort               ; call sort with carry set
  920ldmfd   r13!,{inc,upper,PC}
  930.cmp5
  940cmp     mid,dec
  950strcc   amid,[dec],#-4
  960strcc   adec,[mid]
  970cmp     lower,dec
  980blcc    sort               ; call sort with carry clear
  990cmp     inc,upper
 1000ldmcsfd r13!,{inc,upper,PC}
 1010teqp    pc,#(1<<29)        ; set carry
 1020bl      sort               ; call sort with carry set
 1030ldmfd   r13!,{inc,upper,PC}
 1040
 1050
 1060
 1070; check sorted correctly
 1080
 1090.checkit
 1100stmfd   R13!,{R0-R4,R14}
 1110ldr     R4,array
 1120ldr     R1,arraysize
 1130ldr     R2,[R4,R1]
 1140.eqs
 1150subs    R1,R1,#4
 1160ldmltfd R13!,{R0-R4,PC}
 1170ldr     R3,[R4,R1]
 1180cmp     R3,R2
 1190swigt   &100+ASC("E")
 1200subs    R1,R1,#4
 1210ldmltfd R13!,{R0-R4,PC}
 1220ldr     R2,[R4,R1]
 1230cmp     R2,R3
 1240swigt   &100+ASC("E")
 1250b       eqs
 1260
 1270
 1280
 1290.rnd                      ;set array to random values
 1300stmfd r13!,{r0-r4,r14}    ;using generator below
 1310ldr   R1,array
 1320ldr   R2,arraysize
 1330mov   R0,#0
 1340.rl
 1350bl    random
 1360str   R0,[R1],#4
 1370subs  R2,R2,#4
 1380bge   rl
 1390ldmfd r13!,{r0-r4,r15}
 1400
 1410; I use this random number generator a lot. It used to be quite
 1420; a popular method in the Fortran days - but works excellently
 1430; in machine code
 1440
 1450.random
 1460stmfd r13!,{r1-r7,r14}
 1470adr   r7,randindex
 1480ldr   r6,[r7]
 1490add   r6,r6,#1
 1500cmp   r6,#55
 1510subge r6,r6,#55
 1520str   r6,[r7]
 1530add   r4,r6,#23
 1540cmp   r4,#55
 1550subge r4,r4,#55
 1560mov   r4,r4,lsl #2
 1570add   r5,r6,#54
 1580cmp   r5,#55
 1590subge r5,r5,#55
 1600mov   r5,r5,lsl #2
 1610adr   r3,rand
 1620add   r1,r3,r4
 1630ldr   r4,[r1]
 1640add   r2,r3,r5
 1650ldr   r5,[r2]
 1660add   r4,r4,r5
 1670.mod
 1680adr   r5,randconst
 1690ldr   r5,[r5]
 1700cmp   r4,r5
 1710subge r4,r4,r5
 1720bge   mod
 1730mov   r6,r6,lsl #2
 1740add   r3,r3,r6
 1750str   r4,[r3]
 1760mov   r0,r4
 1770ldmfd r13!,{r1-r7,r15}
 1780
 1790.rand
 1800]:P%+=56*4:[opt pass
 1810align
 1820
 1830.randindex dcd 0
 1840
 1850.randconst dcd n         ;this is the max value of the number generated
 1860                         ;  0 < R <= n   ( or is it  0 <= R < n  ????)
 1870                         ;randconst can be set to any desired value
 1880.endcode                    ;upto about 1<<30
 1890]
 1900NEXT
 1910PROC_initialise_random_table(25)
 1920ENDPROC
 1930
 1940
 1950DEFPROC_initialise_random_table(seed)
 1960m1=100
 1970rand!0=seed
 1980FOR j=1 TO 54
 1990rand!(j*4)=(FN_mult(31,rand!(j*4-4) ) +1) MOD !randconst
 2000NEXT
 2010FOR X%=0 TO 1000
 2020CALL random        :REM get rid of initial ordering
 2030NEXT
 2040ENDPROC
 2050
 2060DEFFN_mult(p,q)
 2070p1=p DIV m1: p0=p MOD m1
 2080q1=q DIV m1: q0=q MOD m1
 2090=(((p0*q1 + p1*q0) MOD m1)*m1 + p0*q0) MOD !randconst
 2100

C) Below follows a BASIC V program for the archimedes to do the
ragsort in BASIC for 200 numbers

   10REM>XsortBasic
   20
   30MODE0
   40n=200
   50PROC_Basic
   60END
   70
   80DEFPROC_Basic
   90DIM A(n)
  100FOR X=1 TO n
  110 A(X)=RND(n)
  120NEXT
  130PRINT"sorting ";n;" random elements in basic"
  140TIME=0
  150PROC_RAGSort
  160T=TIME
  170PRINT"Done."
  180PROC_check_basic
  190PRINTT
  200ENDPROC
  210
  220
  230DEFPROC_check_basic
  240FOR X=1 TO n
  250IF A(X) < A(X-1) PRINT"out of order at pos ";X;" / ";n
  260NEXT
  270ENDPROC
  280
  281
  290
  300
  310DEFPROC_RAGSort
  320 N=FNQ(0,n)
  330ENDPROC
  340
  350DEFFNQ(Y,Y1)
  360  IF Y>=Y1 THEN =0
  365  A1=FNP(Y,Y1)
  367  A2=FNQ(Y,J)
  370  Q=FNQ(A1,Y1)
  380=0
  390
  400DEFFNP(Y,Y1)         :REM another version follows with no GOTO's
  410  F=(Y+Y1) DIV 2
  420  K=A(F)
  430  I=Y
  440  J=Y1
  450  IF K < A(I) GOTO 490
  460  I+=1
  470  IF I<=Y1 THEN 450
  480  I=Y1
  490  IF A(J) < K GOTO 530
  500  J-=1
  510  IF J>=Y THEN 490
  520  J=Y
  530  IF I>=J GOTO 580
  540  SWAP A(I),A(J)
  550  I+=1
  560  J-=1
  570  GOTO450
  580  IF I>=F GOTO620
  590  SWAP A(I),A(F)
  600  I+=1
  610  GOTO650
  620  IF F>=J GOTO650
  630  SWAP A(J),A(F)
  640  J-=1
  650=I
  660
  670/* the p function - modified to remove goto statements
  680
  690REM DEFFNP(Y,Y1)
  700  F=(Y+Y1) DIV 2
  710  K=A(F)
  720  I=Y
  730  J=Y1
  740  L=TRUE
  750  REPEAT
  760    WHILE (K>=A(I)) AND (I<Y1)
  770      I+=1
  780    ENDWHILE
  790    WHILE (A(J) >= K) AND (J>Y)
  800      J-=1
  810    ENDWHILE
  820    IF I<J THEN
  830      SWAP A(I),A(J)
  840      I+=1:J-=1
  850    ELSE
  860      L=FALSE
  870    ENDIF
  880  UNTIL L=FALSE
  890  IF I<F THEN
  900    SWAP A(I),A(F):I+=1
  910  ELSE
  920    IF F<J THEN SWAP A(J),A(F):J-=1
  930  ENDIF
  940=I
  950
  960


D) Below follows pseudocode for the Shell-Metzner Sort.

n=500
claim space for array a(n)
for i=1 to n
  a(i)=rand(n)
endfor
time=0
h=1
repeat
  h=h*3+1
until h>n
repeat
  h=h DIV 3
  for i=h to n
    v=a(i):j=i
    repeat
      f=TRUE
      if a(j-h)>v then
        a(j)=a(j-h)
        j=j-h
        f=FALSE
      endif
    until j<h OR f
    a(j)=v
  endfor
until h=1
print time