koreth@ssyx.ucsc.edu (Steven Grimm) (02/25/89)
Submitted-by: leo@philmds.dts.philips.nl (Leo de Wit) Posting-number: Volume 2, Issue 17 Archive-name: sort/part01 This sort program will appear familiar to *nix freaks. You can use it to sort and/or merge multiple input files on several keys (numeric, lexicographic). All hardware configurations should be supported (at least I don't think it is dependent on resolution, memory size, disk type etc.). Binary & docs submitted to comp.binaries.atari.st. #!/bin/sh # shar: Shell Archiver (v1.22) # # This is part 1 of a multipart archive # do not concatenate these parts, unpack them in order with /bin/sh # # Run the following text with /bin/sh to create: # fastcmp.asm # readme # sort.c # sort.doc # sortcomp.c # sortcomp.h # sortfile.c # sortfile.h # sortmain.c # sortmain.h # if test -r s2_seq_.tmp then echo "Must unpack archives in sequence!" next=`cat s2_seq_.tmp`; echo "Please unpack part $next next" exit 1; fi sed 's/^X//' << 'SHAR_EOF' > fastcmp.asm && X****************************************************************************** X* * X* fastcmp.asm version 1.0 of 22 Januari 1989 (C) L.J.M. de Wit 1989 * X* * X* This software may be used and distributed freely if not used commercially * X* and the originator (me) is mentioned in the source (just leave this 9 line * X* header intact). * X* * X****************************************************************************** X* X* fastcmp.asm: fast sort functions X* X* The code here presented is offered as a replacement for some of the C X* functions present in sortcomp.c. Since comparisions are done a lot while X* sorting, some performance tweaking didn't seem misplaced. X* X* As for documentation, a reference is made to the corresponding C functions X* in sortcomp.c. Most of the code here does simple work, like swapping X* arguments (for the 'r' options), and/or skipping white space (for the 'b' X* options; marked by 'skips' labels). X* X X section s.ccode X Xtab equ 9 X X xref c_df X xref c_d X xref c_if X xref c_i X xref c_af X X xref c_dfu X xref c_du X xref c_ifu X xref c_iu X xref c_afu X X xdef c_dbfr Xc_dbfr X* Dict/Blank/Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X* FALLTHROUGH X X xdef c_dbf Xc_dbf X* Dict/Blank/Fold * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0a X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0a X cmp.b #tab,d0 X beq.s skips0a X subq.l #1,a0 Xskips1a X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1a X cmp.b #tab,d1 X beq.s skips1a X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_df X X xdef c_dbr Xc_dbr X* Dict/Blank/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X* FALLTHROUGH X X xdef c_db Xc_db X* Dict/Blank * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0b X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0b X cmp.b #tab,d0 X beq.s skips0b X subq.l #1,a0 Xskips1b X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1b X cmp.b #tab,d1 X beq.s skips1b X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_d X X xdef c_dfr Xc_dfr X* Dict/Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X jmp c_df X X xdef c_dr Xc_dr X* Dict/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X jmp c_d X X xdef c_ibfr Xc_ibfr X* Norm/Blank/Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X* FALLTHROUGH X X xdef c_ibf Xc_ibf X* Norm/Blank/Fold * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0c X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0c X cmp.b #tab,d0 X beq.s skips0c X subq.l #1,a0 Xskips1c X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1c X cmp.b #tab,d1 X beq.s skips1c X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_if X X xdef c_ibr Xc_ibr X* Norm/Blank/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X* FALLTHROUGH X X xdef c_ib Xc_ib X* Norm/Blank * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0d X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0d X cmp.b #tab,d0 X beq.s skips0d X subq.l #1,a0 Xskips1d X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1d X cmp.b #tab,d1 X beq.s skips1d X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_i X X xdef c_ifr Xc_ifr X* Norm/Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X jmp c_if X X xdef c_ir Xc_ir X* Norm/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X jmp c_i X X xdef c_abfr Xc_abfr X* Blank/Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X* FALLTHROUGH X X xdef c_abf Xc_abf X* Blank/Fold * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0e X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0e X cmp.b #tab,d0 X beq.s skips0e X subq.l #1,a0 Xskips1e X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1e X cmp.b #tab,d1 X beq.s skips1e X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_af X X xdef c_abr Xc_abr X* Blank/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X* FALLTHROUGH X X xdef c_ab Xc_ab X* Blank * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0f X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0f X cmp.b #tab,d0 X beq.s skips0f X subq.l #1,a0 Xskips1f X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1f X cmp.b #tab,d1 X beq.s skips1f X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_a X X xdef c_afr Xc_afr X* Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X jmp c_af X X xdef c_ar Xc_ar X* Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X move.l 12(sp),a0 X move.l 16(sp),12(sp) X move.l a0,16(sp) X jmp c_a X X xdef c_a Xc_a X move.l 4(sp),d0 X move.l 12(sp),d1 X sub.l d0,d1 X move.l d0,a0 X move.l 8(sp),d0 X move.l 16(sp),d2 X sub.l d0,d2 X move.l d0,a1 X move.w d2,d0 X sub.w d1,d0 X blt.s c_asmall X move.w d1,d2 Xc_asmall X dbra d2,c_aloop Xc_aeqs X ext.l d0 X rts X Xc_aloop X cmp.b (a0)+,(a1)+ X dbne d2,c_aloop X beq.s c_aeqs X bpl.s c_aplus X moveq.l #1,d0 X rts Xc_aplus X moveq.l #-1,d0 X rts X X* Unbounded comparisions start here * X X xdef c_dbfru Xc_dbfru X* Dict/Blank/Fold/Reverse * X move.l 4(sp),a1 X move.l 8(sp),a0 X bra.s skips0g X X xdef c_dbfu Xc_dbfu X* Dict/Blank/Fold * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0g X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0g X cmp.b #tab,d0 X beq.s skips0g X subq.l #1,a0 Xskips1g X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1g X cmp.b #tab,d1 X beq.s skips1g X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_dfu X X xdef c_dbru Xc_dbru X* Dict/Blank/Reverse * X move.l 4(sp),a1 X move.l 8(sp),a0 X bra.s skips0h X X xdef c_dbu Xc_dbu X* Dict/Blank * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0h X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0h X cmp.b #tab,d0 X beq.s skips0h X subq.l #1,a0 Xskips1h X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1h X cmp.b #tab,d1 X beq.s skips1h X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_du X X xdef c_dfru Xc_dfru X* Dict/Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X jmp c_dfu X X xdef c_dru Xc_dru X* Dict/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X jmp c_du X X xdef c_ibfru Xc_ibfru X* Norm/Blank/Fold/Reverse * X move.l 4(sp),a1 X move.l 8(sp),a0 X bra.s skips0i X* FALLTHROUGH X X xdef c_ibfu Xc_ibfu X* Norm/Blank/Fold * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0i X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0i X cmp.b #tab,d0 X beq.s skips0i X subq.l #1,a0 Xskips1i X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1i X cmp.b #tab,d1 X beq.s skips1i X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_ifu X X xdef c_ibru Xc_ibru X* Norm/Blank/Reverse * X move.l 4(sp),a1 X move.l 8(sp),a0 X bra.s skips0j X X xdef c_ibu Xc_ibu X* Norm/Blank * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0j X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0j X cmp.b #tab,d0 X beq.s skips0j X subq.l #1,a0 Xskips1j X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1j X cmp.b #tab,d1 X beq.s skips1j X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_iu X X xdef c_ifru Xc_ifru X* Norm/Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X jmp c_ifu X X xdef c_iru Xc_iru X* Norm/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X jmp c_iu X X xdef c_abfru Xc_abfru X* Blank/Fold/Reverse * X move.l 4(sp),a1 X move.l 8(sp),a0 X bra.s skips0k X X xdef c_abfu Xc_abfu X* Blank/Fold * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0k X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0k X cmp.b #tab,d0 X beq.s skips0k X subq.l #1,a0 Xskips1k X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1k X cmp.b #tab,d1 X beq.s skips1k X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_afu X X xdef c_abru Xc_abru X* Blank/Reverse * X move.l 4(sp),a1 X move.l 8(sp),a0 X bra.s skips0l X X xdef c_abu Xc_abu X* Blank * X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xskips0l X move.b (a0)+,d0 X cmp.b #' ',d0 X beq.s skips0l X cmp.b #tab,d0 X beq.s skips0l X subq.l #1,a0 Xskips1l X move.b (a1)+,d1 X cmp.b #' ',d1 X beq.s skips1l X cmp.b #tab,d1 X beq.s skips1l X subq.l #1,a1 X move.l a0,4(sp) X move.l a1,8(sp) X jmp c_au X X xdef c_afru Xc_afru X* Fold/Reverse * X move.l 4(sp),a0 X move.l 8(sp),4(sp) X move.l a0,8(sp) X jmp c_afu X X xdef c_aru Xc_aru X* Reverse * X move.l 4(sp),a1 X move.l 8(sp),a0 X bra.s c_auloop X X xdef c_au Xc_au X movea.l 4(sp),a0 X movea.l 8(sp),a1 Xc_auloop X move.b (a0)+,d0 X beq.s c_auex0 X sub.b (a1)+,d0 X bne.s c_auex1 X move.b (a0)+,d0 X beq.s c_auex0 X sub.b (a1)+,d0 X bne.s c_auex1 X move.b (a0)+,d0 X beq.s c_auex0 X sub.b (a1)+,d0 X bne.s c_auex1 X move.b (a0)+,d0 X beq.s c_auex0 X sub.b (a1)+,d0 X beq.s c_auloop Xc_auex1 X ext.w d0 X ext.l d0 X rts Xc_auex0 X sub.b (a1),d0 X ext.w d0 X ext.l d0 X rts X X end SHAR_EOF chmod 0600 fastcmp.asm || echo "restore of fastcmp.asm fails" sed 's/^X//' << 'SHAR_EOF' > readme && X X Sort X ---- X X X The sort utility here presented is comparable to the Unix sort utility; X even most of the flags are the same. Sorting is done in core as much as X possible; else a \tmp directory (or a user supplied one) is used for X creation of temporary files. X X Two speedups have been done that might affect portability: X 1) a sortfile.c module is added that is included if the symbol BESTIO X is defined. The stdio buffers have been increased to 2K, which seems a X better choice than the standard 512. X 2) a fastcmp.asm module is added for faster comparision functions; X these functions are taken from sortcomp.c if M68000 is not defined. X X Contents of complete package: X X fastcmp.asm - module containing faster comparision functions X readme - this introductory file X sort.c - module containing basic sort functions X sort.doc - manual page for sort X sort.prg - the executable program X sortcomp.c - module containing comparision functions X sortcomp.h - header for sortcomp module X sortfile.c - module containing improved I/O functions X sortfile.h - header for sortfile module X sortmain.c - main module; argument scanning; general key matching X sortmain.h - header for sortmain module X X It will probably be distributed in two chucks: one containing .c and .h X files, readme and sort.doc (submitted to comp.sources.atari.st) and one X containing sort.prg, readme and sort.doc (s. to comp.binaries.atari.st). X X Compiled with Lattice C; first compiler pass with -dBESTIO option, second X with -r option. Fastcmp.asm was assembled using the GST linker. X X This program was the result of a question on the net, asking for a disk X based sort. It seemed like a nice project (in fact, it was 8-), and I X hope at least the person who asked it can make some use of it. X X For questions, bug reports etc. try X X L.J.M. de Wit X Nachtegaallaan 7 X 5731XP Mierlo X Holland X E-mail: ..!mcvax!philmds!leo (and various other paths...) X X Enjoy! (sort of ...) X SHAR_EOF chmod 0600 readme || echo "restore of readme fails" sed 's/^X//' << 'SHAR_EOF' > sort.c && X/****************************************************************************** X * * X * sort.c version 1.0 of 22 Januari 1989 (C) L.J.M. de Wit 1989 * X * * X * This software may be used and distributed freely if not used commercially * X * and the originator (me) is mentioned in the source (just leave this 9 line * X * header intact). * X * * X ****************************************************************************** X * X * sort.c: basic sort functions X */ X X#include <stdio.h> X#include <osbind.h> X#include "sortmain.h" X#include "sortfile.h" X X#define LINELEN 256 /* Max length of input lines*/ X#define MAXNAMLEN 80 /* Max length of filename */ X#define MAXMERGE 8 /* Max. no of files to be */ X /* simultaneously merged */ Xstatic int (*cmp)(); /* Ptr to compare function */ X Xstatic void sortsub(), /* Recursive quicksort */ X merge(); /* Merge sorted files */ Xstatic void tempname(); /* Creates temporary name */ Xstatic int isort(); /* Initial sort of file(s) */ X Xstatic char tmpdir[MAXNAMLEN]; /* Name of temp files dir */ X Xstatic char insave[2][LINELEN]; /* Used by unique */ X Xvoid settemp(tempdir) /* Sets the temp files dir */ Xchar *tempdir; X{ X strcpy(tmpdir,tempdir); X} X Xstatic void sortsub(sbase,stop) /* Sort the pointers */ Xchar **sbase, **stop; /* between sbase and stop */ X{ X register char **base = sbase, /* All below base and above */ X **top = stop, /* top is kept sorted with */ X **mid, /* respect to element *mid */ X *tmp; /* For swapping values */ X register long compfunc = (long)cmp; /* Kludge to use extra reg. */ X X#define CMP (*(int (*)())compfunc) /* Replaces (*cmp) */ X X mid = base + ((top - base) >> 1); /* Choose pivot */ X --top; X if (top - base >= 6) { /* Make sure pivot mid value*/ X if (CMP(*base,*mid) > 0) { X if (CMP(*mid,*top) <= 0) { X if (CMP(*base,*top) > 0) { X tmp = *mid; /* Swap *mid and *top */ X *mid = *top; X *top = tmp; X } else { X tmp = *mid; /* Swap *base and *mid */ X *mid = *base; X *base = tmp; X } X } X } else if (CMP(*mid,*top) > 0) { X tmp = *mid; /* Swap *mid and *top */ X *mid = *top; X *top = tmp; X } X } else { /* <= 6 elts: use bubblesort*/ X for ( ; base < top; --top) { X for (mid = base; mid < top; mid++) { X if (CMP(*mid,mid[1]) > 0) { X tmp = *mid; *mid = mid[1]; mid[1] = tmp; X } X } X } X X return; /* and return ... */ X } X do { X for ( ; base < mid && CMP(*base,*mid) <= 0; base++) ; X for ( ; top > mid && CMP(*mid,*top) <= 0; --top) ; X if (base < top) { X tmp = *base; *base = *top; *top = tmp; /* Swap *base and *top */ X if (mid == top) { X mid = base; X --top; X } else if (mid == base) { X mid = top; X base++; X } X } X } while (base < top); /* Until all sorted to *mid */ X if (mid - sbase > 1) { /* Sort lower half if >1 elt*/ X sortsub(sbase,mid); X } X if (stop - mid > 1) { /* Sort upper half if >1 elt*/ X sortsub(mid,stop); X } X} X Xstatic void merge(infnames,level, X startno,nmerge,outfname) /* Merge sorted input files */ Xchar **infnames; /* Input file names array */ Xint level, /* No. of times merged */ X startno, /* Sequence no. within level*/ X nmerge; /* No. of files to merge */ Xchar *outfname; /* File to be written to */ X{ X static char *inbuf[MAXMERGE]; /* (0)Ptrs to input buffers */ X static FILE *infp[MAXMERGE], *outfp; /* Input and output FILE *'s*/ X char namebuf[MAXNAMLEN]; /* To store temp file names */ X char *nameptr; X int cur, /* Number of current file */ X i, j, X nbusy = 0, /* # of input files in use */ X tmp; X int order[MAXMERGE]; /* Ordering of file numbers */ X char *s; X long avail; X X avail = (Malloc(-1) - 20480) & ~1; /* Get free - 20K */ X avail /= nmerge + 1; X avail -= avail % 2048; X X if (outfname == (char *)0) { X outfp = stdout; /* stdout is default */ X } else { X outfp = fopen(outfname,"w"); X if (outfp == (FILE *)0) { X error("%s: cannot open for write\n",outfname); X } X } X X if ((s = (char *)Malloc(avail)) == (char *)-1) { /* Grab it */ X error("memory allocation failed\n",(char *)0); X } X setbuffer(outfp,s,avail); X X if (level < 0) { X if (*infnames != (char *)0) { X for (i = 0; i < startno; i++) { X infnames++; X } X } X } X X for (i = 0; i < nmerge; i++) { /* Open each input file */ X if (level < 0) { X nameptr = *infnames++; X } else { X tempname(namebuf,level,startno+i); X nameptr = namebuf; X } X if (nameptr == (char *)0 || !strcmp(nameptr,"-")) { X infp[i] = stdin; /* stdin is default */ X } else { X infp[i] = fopen(nameptr,"r"); X if (infp[i] == (FILE *)0) { X error("%s: cannot open for read\n",nameptr); X } X } X if ((s = (char *)Malloc(avail)) == (char *)-1) { /* Grab it */ X error("memory allocation failed\n",(char *)0); X } X setbuffer(infp[i],s,avail); X X if (inbuf[i] == (char *)0) { /* Possibly allocate buffer */ X if ((inbuf[i] = (char *)malloc(LINELEN)) == (char *)0) { X error("memory allocation failed\n",(char *)0); X } X } X if (fgets(inbuf[i],LINELEN,infp[i]) != (char *)0) { /* Read 1st line*/ X order[nbusy++] = i; /* Enter its seq no. */ X for (j = nbusy - 1; j >= 1 && /* Put in place in order */ X (*cmp)(inbuf[order[j-1]],inbuf[order[j]]) > 0; --j) { X tmp = order[j-1]; order[j-1] = order[j]; order[j] = tmp; X } X } X } X X if ((options & UNIQUE) && nbusy > 0) { X insave[0][0] = inbuf[order[0]][0] + 1; /* Force difference */ X } X X while (nbusy > 0) { X cur = order[0]; /* No. of lowest ordered */ X if (options & UNIQUE) { X if ((*cmp)(inbuf[cur],insave[0])) { /* Differs from previous one*/ X fputs(inbuf[cur],outfp); /* Write out lowest ordered */ X strcpy(insave[0],inbuf[cur]); X } X } else { X fputs(inbuf[cur],outfp); /* Write out lowest ordered */ X } X if (fgets(inbuf[cur],LINELEN,infp[cur]) == (char *)0) {/* No next */ X for (i = 1; i < nbusy; i++) { /* Reorder rest */ X order[i-1] = order[i]; X } X --nbusy; X } else { /* Binary search */ X if (nbusy > 1 && (*cmp)(inbuf[cur],inbuf[order[1]]) > 0) { X if (nbusy > 2) { X int low, high = nbusy - 1, mid; X X if ((*cmp)(inbuf[cur],inbuf[order[high]]) >= 0) { X low = high; X } else { X low = 1; X while (high - low > 1) { X mid = (high + low) >> 1; X if ((*cmp)(inbuf[cur],inbuf[order[mid]]) >= 0) { X low = mid; X } else { X high = mid; X } X } X } X for (i = 0; i < low; i++) { /* Shift elements 1..low to */ X order[i] = order[i+1]; /* 0..low-1 to accomodate */ X } X order[low] = cur; /* for the new index cur */ X } else { X order[0] = order[1]; order[1] = cur; X } X } X } X } X X for (i = 0; i < nmerge; i++) { X fclose(infp[i]); /* Close each input file */ X if (level >= 0) { /* (also frees Malloced buf)*/ X tempname(namebuf,level,startno+i); X unlink(namebuf); /* and delete if temp file */ X } X } X fclose(outfp); /* Close output file */ X} X Xvoid allsort(infnames,destname,compar) /* Main sort entry point */ Xchar **infnames, /* Array of input filenames */ X *destname; /* Destination file name */ Xint (*compar)(); /* Comparision function */ X{ X char **infs; X char outfname[MAXNAMLEN]; /* For temp output filenames*/ X int level = 0, /* Merge level */ X nfiles, /* No. of files to merge */ X startno, /* Order no. within nfiles */ X nmerge = 0; /* # of files to merge at a */ X /* time, initialized to */ X /* hush the compiler */ X int sameout = 0; /* same in as outfile? */ X int merging; X X cmp = compar; /* Set compare function used*/ X /* throughout this module */ X nfiles = 0; X for (infs = infnames; *infs != (char *)0; infs++) { X nfiles++; X if (destname != (char *)0 && !strcmp(destname,*infs)) { X sameout = 1; X } X } X if (nfiles == 0) { X nfiles = 1; X } X X if (options & MERGEONLY) { X level = -1; X merging = 1; X } else { X nfiles = isort(infnames,destname); /* # Files left after incore*/ X merging = nfiles > 1; X level = 0; X } X X while (merging) { X merging = nfiles > MAXMERGE || (level < 0 && sameout); X for (startno = 0; startno < nfiles; startno += nmerge) { X if ((nmerge = nfiles - startno) > MAXMERGE) { X nmerge = MAXMERGE; /* Use at most MAXMERGE */ X } X tempname(outfname,level+1,startno/MAXMERGE); X merge(infnames,level,startno,nmerge, X (merging) ? outfname : destname); X } X nfiles = 1 + (nfiles-1)/MAXMERGE; X level++; /* Next merge level */ X } X} X Xstatic void tempname(buf,level,seqno) /* Create name of temp file */ Xchar *buf; /* Will contain name formed */ Xint level, seqno; /* Merge level & seq. no. */ X{ X sprintf(buf,"%s\\sort%04d.%03d",tmpdir,level,seqno); X} X Xstatic FILE *reopen(infp,fname) /* Open input file */ XFILE *infp; /* Fileptr to possibly close*/ Xchar *fname; /* File to be opened */ X{ X if (infp != (FILE *)0) { /* Close a valid file ptr */ X fclose(infp); X } X if (!strcmp(fname,"-")) { X return stdin; X } X if ((infp = fopen(fname,"r")) == (FILE *)0) { X error("%s: cannot open for read\n",fname); X } X return infp; X} X Xstatic int isort(infnames,destname) /* Initial in-memory sort */ Xchar **infnames, /* Array of input files */ X *destname; /* File to write result to */ X{ X FILE *infp, *outfp; /* Input, output file ptr */ X int avail, /* Available free space */ X seqno = 0, /* To count multiple writes */ X isfree; /* Not still used from avail*/ X char *sortdata, /* Start of usable space */ X *snext, /* First unused position */ X **sorttop, /* Top of usable space */ X **sortbase; /* Start of pointer space */ X char outnambuf[MAXNAMLEN], /* To create temp name in */ X *outfname, /* Name of output file */ X *fromfile; /* Name of input file */ X X if (*infnames == (char *)0) { X fromfile = "standard input"; X infp = stdin; X } else { X infp = reopen((FILE *)0,fromfile = *infnames++); X } X if (options & CHECKONLY) { /* Check if input ordered */ X int next = 1; X X while (fgets(insave[0],sizeof(insave[0]),infp) == (char *)0) { X if (*infnames == (char *)0) { X exit(0); /* All input files empty */ X } else { X infp = reopen((FILE *)0,fromfile = *infnames++); X } X } /* 1st line of 1st non-empty*/ X do { X if (fgets(insave[next],sizeof(insave[next]),infp) == (char *)0) { X if (*infnames == (char *)0) { X exit(0); /* Input was ordered OK */ X } else { X infp = reopen((FILE *)0,fromfile = *infnames++); X } X } else { X if ((*cmp)(insave[1-next],insave[next]) > 0) { /* Compare */ X error("out of order and -c specified\n",(char *)0); X } X next = 1 - next; /* Swap buffer to be used */ X } X } while (1); X } X avail = (Malloc(-1) - 20480) & ~1; /* Get free mem but 20K,even*/ X X if ((sortdata = (char *)Malloc(avail)) == (char *)-1) { /* Grab it */ X error("memory allocation failed\n",(char *)0); X } X sorttop = (char **)(sortdata + avail); /* Ptrs start at top of mem,*/ X /* going downwards */ X do { X snext = sortdata; isfree = avail; X sortbase = sorttop; X for ( ; isfree > LINELEN; ) { /* Read lines while room */ X while (fgets(snext,isfree,infp) == (char *)0) { X if (*infnames == (char *)0) { X break; X } else { X infp = reopen(infp,fromfile = *infnames++); X } X } X if (feof(infp)) break; /* All input read */ X *--sortbase = snext; /* Store current ptr */ X snext += strlen(snext) + 1; /* Skip the line read */ X isfree = (char *)(sortbase) - snext; X } X if (sortbase == sorttop && seqno > 0) { X break; /* No lines this time */ X } X sortsub(sortbase,sorttop); X tempname(outnambuf,0,seqno++); X outfname = outnambuf; X if (feof(infp)) { X if (*infnames == (char *)0) { X if (seqno == 1) { /* All read the first time: */ X outfname = destname; /* Write to file destname */ X } X } else { /* Open next input file */ X infp = reopen(infp,fromfile = *infnames++); X } X } X if (outfname == (char *)0) { /* Open output file */ X outfp = stdout; X } else { X outfp = fopen(outfname,"w"); X if (outfp == (FILE *)0) { X error("%s: cannot open for write\n",outfname); X } X } X if ((options & UNIQUE) && sorttop > sortbase) { X insave[0][0] = sortbase[0][0] + 1; /* Force difference */ X for ( ; sortbase < sorttop; sortbase++) { X if ((*cmp)(*sortbase,insave[0])) { /* != previous */ X fputs(*sortbase,outfp); X strcpy(insave[0],*sortbase); X } X } X } else { X for ( ; sortbase < sorttop; sortbase++) { X fputs(*sortbase,outfp); X } X } X fclose(outfp); X } while (!feof(infp)); X X fclose(infp); /* Close last input file */ X Mfree(sortdata); /* Release Malloced mem */ X X return seqno; /* No of output files used */ X} SHAR_EOF chmod 0600 sort.c || echo "restore of sort.c fails" sed 's/^X//' << 'SHAR_EOF' > sort.doc && X X X X X sort(1) X X X XNAME X sort - sort and/or merge files X XSYNTAX X sort [ -bdfimnrutx ] [ +pos1 [ -pos2 ] ] [ -o name ] [ -T dir ] [ name...] X XDESCRIPTION X Sort sorts, then writes the lines of the named files to its standard X output, or a file (-o option). X Default (no input files are named), the standard input is sorted. X The name "-" stands for the standard input, so that standard input can be X used together with other input files. X X By default, the sort key is an entire line; ordering is lexicographic. X The global ordering is affected by zero or more of the following options X (of which some combinations are meaningless, e.g. d and n). X X -b Leading blanks (spaces and tabs) are skipped in field comparisons. X X -d Dictionary order: only letters, digits and blanks are X significant in comparisons. X X -f Fold: treat upper case as if lower case. X X -i Ignore characters outside the ASCII range 040-0176 in X nonnumeric comparisons. X X -n An initial numeric string, consisting of optional X blanks, optional minus sign, and zero or more digits X with optional decimal point, is sorted by arithmetic X value. This option implies option b. X X -r Reverse the result of comparisons. X X -tx `Tab character' field separator is x. X X The notation +pos1 -pos2 restricts a sort key to a field X beginning at pos1 and ending just before pos2. X Pos1 and pos2 each have the form m.n, optionally followed by one X or more of the options bdfinr, where m tells a number of fields X to skip from the beginning of the line and n tells a number X of characters to skip further. If any options are present X they override all the global ordering options for this key. X If the b option is in effect n is counted from the first X nonblank in the field. X A missing .n means .0; a missing -pos2 means the end of the X line. Under the -tx option, fields are strings separated by X x; otherwise fields are nonempty nonblank strings separated X by blanks. X X When there are multiple sort keys, later keys are compared X only after all earlier keys compare equal. X X These are additional (global) options: X X -c Check that the input file is sorted according to the key(s); X if the file is out of sort, an error message is printed. X Exit code is 0/1 for correctly/incorrectly sorted input. X X -m Merge only, the input files are already sorted. X X -o The next argument is the output file used instead of the X standard output. This file may be the same as one of the inputs. X X -T Temporary files directory. Default is \tmp on the current drive. X X -u Unique: Select only one in each set of, according to the X key(s), equal lines. X XRESTRICTIONS X Very long (>256 chars) lines are silently truncated. X The line terminator sequence \r\n is present in the line when X doing comparisions; this should not present problems. Use of \0 in X input is discouraged, since this will be seen as a C string terminator. X XDIAGNOSTICS X Comments and exits with nonzero status for various trouble X conditions and for disorder discovered under option c(heck). X XFILES X \tmp\* for temporary files. X SHAR_EOF chmod 0600 sort.doc || echo "restore of sort.doc fails" sed 's/^X//' << 'SHAR_EOF' > sortcomp.c && X/****************************************************************************** X * * X * sortcomp.c version 1.0 of 1 Januari 1989 (C) L.J.M. de Wit 1989 * X * * X * This software may be used and distributed freely if not used commercially * X * and the originator (me) is mentioned in the source (just leave this 9 line * X * header intact). * X * * X ****************************************************************************** X * X * sortcomp.c: comparision functions X * X * These functions contain the numeric comparision functions c_n and its X * reverse, c_nr. The other 48 comparision functions are formed by combining X * the following properties (take note of the Capitals): X * a) either Dictionary, Ignore non-printables or Any other (3) X * b) ignore leading Blanks, versus not ignoring them (2) X * c) Fold upper case to lower, versus not folding (2) X * d) Reverse the result of the comparision, versus not reversing (2) X * e) Unbounded (bounded only by '\0'), versus upper limit bound (2) X * X * Since comparing is done a lot it is important for the compare functions X * to be fast. As for using the 48 functions as it stands the following X * arguments are used: X * One could have combined functions and used flags to discern between options. X * This would however involve passing more parameters to the function (the X * flags) and would require (perhaps a lot of) unwanted testing inside the X * function. X * Several functions call one of the other functions. This has been done in X * such a way that duplication of code is mostly prevented, but still this call X * is limited to only one (both in number and in level). X * The numerical compare could have been done by a simple atoi(), but the X * c_n function does the comparision inline, and stops comparing when the X * bigger one has been found (while atoi needs to do calculation ala X * 10 * num + digit, and will use all digits). X */ X X#include <stdio.h> X#include <ctype.h> X#include "sortmain.h" X#include "sortcomp.h" X X#define SKIPSPACE(bs,bt) while (wspace(*(bs))) (bs)++; while (wspace(*(bt))) (bt)++ X X#define NCHARS 256 X#define NORM 0x1 X#define DICT 0x2 X#define isnorm(c) (stype[(c)]&NORM) X#define isdict(c) (stype[(c)]&DICT) X X#undef isdigit X#define isdigit(c) ((c) >= '0' && (c) <= '9') X Xstatic char fold[NCHARS]; /* For lcase conversion */ X Xstatic char stype[NCHARS]; /* For table lookup */ X Xvoid initlookup() /* Initialize lookup tables */ X{ X register int i; X X for (i = 0; i < NCHARS; i++) { X fold[i] = isupper(i) ? tolower(i) : i; /* Init lcase convert array */ X stype[i] = 0; /* Not really needed ... */ X if (i >= 040 && i <= 0176) { /* Ascii non-control */ X stype[i] |= NORM; X } X if (wspace(i) || isalnum(i)) { /* Whitespace & alphanum */ X stype[i] |= DICT; X } X } X} X Xint c_nr(bs,bt,es,et) /* Reversed Numeric */ Xchar *bs, *bt; Xchar *es, *et; X{ X return c_n(bt,bs,et,es); X} X Xint c_n(bs,bt,es,et) /* Numeric compare */ Xregister char *bs, *bt; Xchar *es, *et; X{ X register int rev, mini; X X SKIPSPACE(bs,bt); X if (*bs == '-') { X if (*bt == '-') { /* Two negatives: reverse */ X rev = -1; X bs++; X bt++; X } else { /* bt will be the larger one*/ X return -1; X } X } else { X if (*bt == '-') { /* bs will be the larger one*/ X return 1; X } else { X rev = 1; /* Both positive */ X } X } X X while (*bs == *bt && isdigit(*bt)) { /* Compare & skip eq. digits*/ X bs++; bt++; X } X X if (!isdigit(*bs)) { X if (!isdigit(*bt)) { X if (*bs == '.' && *bt == '.') { /* Decimal point */ X while (*++bs == *++bt && isdigit(*bt)) ; X while (*bs == '0') bs++; /* Skip possibly trailing */ X while (*bt == '0') bt++; /* zeroes */ X if (!isdigit(*bs)) { X if (!isdigit(*bt)) { X return 0; /* Tails also equal */ X } X return -rev; /* bt has more digits */ X } X if (!isdigit(*bt)) { X return rev; /* bs has more digits */ X } X return (rev == 1) /* Current digit decides */ X ? (*bs - *bt) : (*bt - *bs); X } else { X return 0; /* Equal numeric strings */ X } X } X return -rev; /* bt has more digits */ X } X if (!isdigit(*bt)) { X return rev; /* bs has more digits */ X } X mini = (rev == 1) /* Current digit decides: */ X ? (*bs - *bt) : (*bt - *bs); /* Difference into mini */ X do { X bs++; X bt++; X } while (isdigit(*bs) && isdigit(*bt)); /* Skip any more digits */ X X if (!isdigit(*bs)) { X if (!isdigit(*bt)) { /* Strings equally long: */ X return mini; /* then mini decides */ X } X return -rev; /* bt has more digits */ X } X return rev; /* bs has more digits */ X} X Xint c_df(bs,bt,es,et) /* Dictionary/Fold */ Xregister char *bs, *bt; /* Strings to compare */ Xregister char *es, *et; X{ X --bs; --bt; --es; --et; X for ( ; bs < es && bt < et; ) { X if (fold[*++bs] != fold[*++bt]) { /* Increment & compare */ X if (isdict(*bs)) { X if (isdict(*bt)) { X return fold[*bs] - fold[*bt]; X } else { X --bs; /* Effectively skip *bt */ X } X } else if (isdict(*bt)) { X --bt; /* Effectively skip *bs */ X } /* (else: skip both) */ X } X } X for ( ; bs < es; ) { /* Skip any more */ X if (isdict(*++bs)) { /* non-dict chars in bs */ X --bs; X break; X } X } X for ( ; bt < et; ) { /* Skip any more */ X if (isdict(*++bt)) { /* non-dict chars in bt */ X --bt; X break; X } X } X X return (et - bt) - (es - bs); X} X Xint c_d(bs,bt,es,et) /* Dictionary order */ Xregister char *bs, *bt; Xregister char *es, *et; X{ X --bs; --bt; --es; --et; X for ( ; bs < es && bt < et; ) { X if (*++bs != *++bt) { /* Increment & compare */ X if (isdict(*bs)) { X if (isdict(*bt)) { X return fold[*bs] - fold[*bt]; SHAR_EOF echo "End of part 1, continue with part 2" echo "2" > s2_seq_.tmp exit 0