koreth@ssyx.ucsc.edu (Steven Grimm) (02/25/89)
Submitted-by: leo@philmds.dts.philips.nl (Leo de Wit)
Posting-number: Volume 2, Issue 17
Archive-name: sort/part01
This sort program will appear familiar to *nix freaks. You can use it
to sort and/or merge multiple input files on several keys (numeric,
lexicographic). All hardware configurations should be supported (at
least I don't think it is dependent on resolution, memory size, disk
type etc.). Binary & docs submitted to comp.binaries.atari.st.
#!/bin/sh
# shar: Shell Archiver (v1.22)
#
# This is part 1 of a multipart archive
# do not concatenate these parts, unpack them in order with /bin/sh
#
# Run the following text with /bin/sh to create:
# fastcmp.asm
# readme
# sort.c
# sort.doc
# sortcomp.c
# sortcomp.h
# sortfile.c
# sortfile.h
# sortmain.c
# sortmain.h
#
if test -r s2_seq_.tmp
then echo "Must unpack archives in sequence!"
next=`cat s2_seq_.tmp`; echo "Please unpack part $next next"
exit 1; fi
sed 's/^X//' << 'SHAR_EOF' > fastcmp.asm &&
X******************************************************************************
X* *
X* fastcmp.asm version 1.0 of 22 Januari 1989 (C) L.J.M. de Wit 1989 *
X* *
X* This software may be used and distributed freely if not used commercially *
X* and the originator (me) is mentioned in the source (just leave this 9 line *
X* header intact). *
X* *
X******************************************************************************
X*
X* fastcmp.asm: fast sort functions
X*
X* The code here presented is offered as a replacement for some of the C
X* functions present in sortcomp.c. Since comparisions are done a lot while
X* sorting, some performance tweaking didn't seem misplaced.
X*
X* As for documentation, a reference is made to the corresponding C functions
X* in sortcomp.c. Most of the code here does simple work, like swapping
X* arguments (for the 'r' options), and/or skipping white space (for the 'b'
X* options; marked by 'skips' labels).
X*
X
X section s.ccode
X
Xtab equ 9
X
X xref c_df
X xref c_d
X xref c_if
X xref c_i
X xref c_af
X
X xref c_dfu
X xref c_du
X xref c_ifu
X xref c_iu
X xref c_afu
X
X xdef c_dbfr
Xc_dbfr
X* Dict/Blank/Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X* FALLTHROUGH
X
X xdef c_dbf
Xc_dbf
X* Dict/Blank/Fold *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0a
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0a
X cmp.b #tab,d0
X beq.s skips0a
X subq.l #1,a0
Xskips1a
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1a
X cmp.b #tab,d1
X beq.s skips1a
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_df
X
X xdef c_dbr
Xc_dbr
X* Dict/Blank/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X* FALLTHROUGH
X
X xdef c_db
Xc_db
X* Dict/Blank *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0b
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0b
X cmp.b #tab,d0
X beq.s skips0b
X subq.l #1,a0
Xskips1b
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1b
X cmp.b #tab,d1
X beq.s skips1b
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_d
X
X xdef c_dfr
Xc_dfr
X* Dict/Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X jmp c_df
X
X xdef c_dr
Xc_dr
X* Dict/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X jmp c_d
X
X xdef c_ibfr
Xc_ibfr
X* Norm/Blank/Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X* FALLTHROUGH
X
X xdef c_ibf
Xc_ibf
X* Norm/Blank/Fold *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0c
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0c
X cmp.b #tab,d0
X beq.s skips0c
X subq.l #1,a0
Xskips1c
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1c
X cmp.b #tab,d1
X beq.s skips1c
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_if
X
X xdef c_ibr
Xc_ibr
X* Norm/Blank/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X* FALLTHROUGH
X
X xdef c_ib
Xc_ib
X* Norm/Blank *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0d
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0d
X cmp.b #tab,d0
X beq.s skips0d
X subq.l #1,a0
Xskips1d
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1d
X cmp.b #tab,d1
X beq.s skips1d
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_i
X
X xdef c_ifr
Xc_ifr
X* Norm/Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X jmp c_if
X
X xdef c_ir
Xc_ir
X* Norm/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X jmp c_i
X
X xdef c_abfr
Xc_abfr
X* Blank/Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X* FALLTHROUGH
X
X xdef c_abf
Xc_abf
X* Blank/Fold *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0e
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0e
X cmp.b #tab,d0
X beq.s skips0e
X subq.l #1,a0
Xskips1e
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1e
X cmp.b #tab,d1
X beq.s skips1e
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_af
X
X xdef c_abr
Xc_abr
X* Blank/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X* FALLTHROUGH
X
X xdef c_ab
Xc_ab
X* Blank *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0f
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0f
X cmp.b #tab,d0
X beq.s skips0f
X subq.l #1,a0
Xskips1f
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1f
X cmp.b #tab,d1
X beq.s skips1f
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_a
X
X xdef c_afr
Xc_afr
X* Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X jmp c_af
X
X xdef c_ar
Xc_ar
X* Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X move.l 12(sp),a0
X move.l 16(sp),12(sp)
X move.l a0,16(sp)
X jmp c_a
X
X xdef c_a
Xc_a
X move.l 4(sp),d0
X move.l 12(sp),d1
X sub.l d0,d1
X move.l d0,a0
X move.l 8(sp),d0
X move.l 16(sp),d2
X sub.l d0,d2
X move.l d0,a1
X move.w d2,d0
X sub.w d1,d0
X blt.s c_asmall
X move.w d1,d2
Xc_asmall
X dbra d2,c_aloop
Xc_aeqs
X ext.l d0
X rts
X
Xc_aloop
X cmp.b (a0)+,(a1)+
X dbne d2,c_aloop
X beq.s c_aeqs
X bpl.s c_aplus
X moveq.l #1,d0
X rts
Xc_aplus
X moveq.l #-1,d0
X rts
X
X* Unbounded comparisions start here *
X
X xdef c_dbfru
Xc_dbfru
X* Dict/Blank/Fold/Reverse *
X move.l 4(sp),a1
X move.l 8(sp),a0
X bra.s skips0g
X
X xdef c_dbfu
Xc_dbfu
X* Dict/Blank/Fold *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0g
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0g
X cmp.b #tab,d0
X beq.s skips0g
X subq.l #1,a0
Xskips1g
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1g
X cmp.b #tab,d1
X beq.s skips1g
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_dfu
X
X xdef c_dbru
Xc_dbru
X* Dict/Blank/Reverse *
X move.l 4(sp),a1
X move.l 8(sp),a0
X bra.s skips0h
X
X xdef c_dbu
Xc_dbu
X* Dict/Blank *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0h
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0h
X cmp.b #tab,d0
X beq.s skips0h
X subq.l #1,a0
Xskips1h
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1h
X cmp.b #tab,d1
X beq.s skips1h
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_du
X
X xdef c_dfru
Xc_dfru
X* Dict/Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X jmp c_dfu
X
X xdef c_dru
Xc_dru
X* Dict/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X jmp c_du
X
X xdef c_ibfru
Xc_ibfru
X* Norm/Blank/Fold/Reverse *
X move.l 4(sp),a1
X move.l 8(sp),a0
X bra.s skips0i
X* FALLTHROUGH
X
X xdef c_ibfu
Xc_ibfu
X* Norm/Blank/Fold *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0i
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0i
X cmp.b #tab,d0
X beq.s skips0i
X subq.l #1,a0
Xskips1i
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1i
X cmp.b #tab,d1
X beq.s skips1i
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_ifu
X
X xdef c_ibru
Xc_ibru
X* Norm/Blank/Reverse *
X move.l 4(sp),a1
X move.l 8(sp),a0
X bra.s skips0j
X
X xdef c_ibu
Xc_ibu
X* Norm/Blank *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0j
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0j
X cmp.b #tab,d0
X beq.s skips0j
X subq.l #1,a0
Xskips1j
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1j
X cmp.b #tab,d1
X beq.s skips1j
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_iu
X
X xdef c_ifru
Xc_ifru
X* Norm/Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X jmp c_ifu
X
X xdef c_iru
Xc_iru
X* Norm/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X jmp c_iu
X
X xdef c_abfru
Xc_abfru
X* Blank/Fold/Reverse *
X move.l 4(sp),a1
X move.l 8(sp),a0
X bra.s skips0k
X
X xdef c_abfu
Xc_abfu
X* Blank/Fold *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0k
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0k
X cmp.b #tab,d0
X beq.s skips0k
X subq.l #1,a0
Xskips1k
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1k
X cmp.b #tab,d1
X beq.s skips1k
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_afu
X
X xdef c_abru
Xc_abru
X* Blank/Reverse *
X move.l 4(sp),a1
X move.l 8(sp),a0
X bra.s skips0l
X
X xdef c_abu
Xc_abu
X* Blank *
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xskips0l
X move.b (a0)+,d0
X cmp.b #' ',d0
X beq.s skips0l
X cmp.b #tab,d0
X beq.s skips0l
X subq.l #1,a0
Xskips1l
X move.b (a1)+,d1
X cmp.b #' ',d1
X beq.s skips1l
X cmp.b #tab,d1
X beq.s skips1l
X subq.l #1,a1
X move.l a0,4(sp)
X move.l a1,8(sp)
X jmp c_au
X
X xdef c_afru
Xc_afru
X* Fold/Reverse *
X move.l 4(sp),a0
X move.l 8(sp),4(sp)
X move.l a0,8(sp)
X jmp c_afu
X
X xdef c_aru
Xc_aru
X* Reverse *
X move.l 4(sp),a1
X move.l 8(sp),a0
X bra.s c_auloop
X
X xdef c_au
Xc_au
X movea.l 4(sp),a0
X movea.l 8(sp),a1
Xc_auloop
X move.b (a0)+,d0
X beq.s c_auex0
X sub.b (a1)+,d0
X bne.s c_auex1
X move.b (a0)+,d0
X beq.s c_auex0
X sub.b (a1)+,d0
X bne.s c_auex1
X move.b (a0)+,d0
X beq.s c_auex0
X sub.b (a1)+,d0
X bne.s c_auex1
X move.b (a0)+,d0
X beq.s c_auex0
X sub.b (a1)+,d0
X beq.s c_auloop
Xc_auex1
X ext.w d0
X ext.l d0
X rts
Xc_auex0
X sub.b (a1),d0
X ext.w d0
X ext.l d0
X rts
X
X end
SHAR_EOF
chmod 0600 fastcmp.asm || echo "restore of fastcmp.asm fails"
sed 's/^X//' << 'SHAR_EOF' > readme &&
X
X Sort
X ----
X
X
X The sort utility here presented is comparable to the Unix sort utility;
X even most of the flags are the same. Sorting is done in core as much as
X possible; else a \tmp directory (or a user supplied one) is used for
X creation of temporary files.
X
X Two speedups have been done that might affect portability:
X 1) a sortfile.c module is added that is included if the symbol BESTIO
X is defined. The stdio buffers have been increased to 2K, which seems a
X better choice than the standard 512.
X 2) a fastcmp.asm module is added for faster comparision functions;
X these functions are taken from sortcomp.c if M68000 is not defined.
X
X Contents of complete package:
X
X fastcmp.asm - module containing faster comparision functions
X readme - this introductory file
X sort.c - module containing basic sort functions
X sort.doc - manual page for sort
X sort.prg - the executable program
X sortcomp.c - module containing comparision functions
X sortcomp.h - header for sortcomp module
X sortfile.c - module containing improved I/O functions
X sortfile.h - header for sortfile module
X sortmain.c - main module; argument scanning; general key matching
X sortmain.h - header for sortmain module
X
X It will probably be distributed in two chucks: one containing .c and .h
X files, readme and sort.doc (submitted to comp.sources.atari.st) and one
X containing sort.prg, readme and sort.doc (s. to comp.binaries.atari.st).
X
X Compiled with Lattice C; first compiler pass with -dBESTIO option, second
X with -r option. Fastcmp.asm was assembled using the GST linker.
X
X This program was the result of a question on the net, asking for a disk
X based sort. It seemed like a nice project (in fact, it was 8-), and I
X hope at least the person who asked it can make some use of it.
X
X For questions, bug reports etc. try
X
X L.J.M. de Wit
X Nachtegaallaan 7
X 5731XP Mierlo
X Holland
X E-mail: ..!mcvax!philmds!leo (and various other paths...)
X
X Enjoy! (sort of ...)
X
SHAR_EOF
chmod 0600 readme || echo "restore of readme fails"
sed 's/^X//' << 'SHAR_EOF' > sort.c &&
X/******************************************************************************
X * *
X * sort.c version 1.0 of 22 Januari 1989 (C) L.J.M. de Wit 1989 *
X * *
X * This software may be used and distributed freely if not used commercially *
X * and the originator (me) is mentioned in the source (just leave this 9 line *
X * header intact). *
X * *
X ******************************************************************************
X *
X * sort.c: basic sort functions
X */
X
X#include <stdio.h>
X#include <osbind.h>
X#include "sortmain.h"
X#include "sortfile.h"
X
X#define LINELEN 256 /* Max length of input lines*/
X#define MAXNAMLEN 80 /* Max length of filename */
X#define MAXMERGE 8 /* Max. no of files to be */
X /* simultaneously merged */
Xstatic int (*cmp)(); /* Ptr to compare function */
X
Xstatic void sortsub(), /* Recursive quicksort */
X merge(); /* Merge sorted files */
Xstatic void tempname(); /* Creates temporary name */
Xstatic int isort(); /* Initial sort of file(s) */
X
Xstatic char tmpdir[MAXNAMLEN]; /* Name of temp files dir */
X
Xstatic char insave[2][LINELEN]; /* Used by unique */
X
Xvoid settemp(tempdir) /* Sets the temp files dir */
Xchar *tempdir;
X{
X strcpy(tmpdir,tempdir);
X}
X
Xstatic void sortsub(sbase,stop) /* Sort the pointers */
Xchar **sbase, **stop; /* between sbase and stop */
X{
X register char **base = sbase, /* All below base and above */
X **top = stop, /* top is kept sorted with */
X **mid, /* respect to element *mid */
X *tmp; /* For swapping values */
X register long compfunc = (long)cmp; /* Kludge to use extra reg. */
X
X#define CMP (*(int (*)())compfunc) /* Replaces (*cmp) */
X
X mid = base + ((top - base) >> 1); /* Choose pivot */
X --top;
X if (top - base >= 6) { /* Make sure pivot mid value*/
X if (CMP(*base,*mid) > 0) {
X if (CMP(*mid,*top) <= 0) {
X if (CMP(*base,*top) > 0) {
X tmp = *mid; /* Swap *mid and *top */
X *mid = *top;
X *top = tmp;
X } else {
X tmp = *mid; /* Swap *base and *mid */
X *mid = *base;
X *base = tmp;
X }
X }
X } else if (CMP(*mid,*top) > 0) {
X tmp = *mid; /* Swap *mid and *top */
X *mid = *top;
X *top = tmp;
X }
X } else { /* <= 6 elts: use bubblesort*/
X for ( ; base < top; --top) {
X for (mid = base; mid < top; mid++) {
X if (CMP(*mid,mid[1]) > 0) {
X tmp = *mid; *mid = mid[1]; mid[1] = tmp;
X }
X }
X }
X
X return; /* and return ... */
X }
X do {
X for ( ; base < mid && CMP(*base,*mid) <= 0; base++) ;
X for ( ; top > mid && CMP(*mid,*top) <= 0; --top) ;
X if (base < top) {
X tmp = *base; *base = *top; *top = tmp; /* Swap *base and *top */
X if (mid == top) {
X mid = base;
X --top;
X } else if (mid == base) {
X mid = top;
X base++;
X }
X }
X } while (base < top); /* Until all sorted to *mid */
X if (mid - sbase > 1) { /* Sort lower half if >1 elt*/
X sortsub(sbase,mid);
X }
X if (stop - mid > 1) { /* Sort upper half if >1 elt*/
X sortsub(mid,stop);
X }
X}
X
Xstatic void merge(infnames,level,
X startno,nmerge,outfname) /* Merge sorted input files */
Xchar **infnames; /* Input file names array */
Xint level, /* No. of times merged */
X startno, /* Sequence no. within level*/
X nmerge; /* No. of files to merge */
Xchar *outfname; /* File to be written to */
X{
X static char *inbuf[MAXMERGE]; /* (0)Ptrs to input buffers */
X static FILE *infp[MAXMERGE], *outfp; /* Input and output FILE *'s*/
X char namebuf[MAXNAMLEN]; /* To store temp file names */
X char *nameptr;
X int cur, /* Number of current file */
X i, j,
X nbusy = 0, /* # of input files in use */
X tmp;
X int order[MAXMERGE]; /* Ordering of file numbers */
X char *s;
X long avail;
X
X avail = (Malloc(-1) - 20480) & ~1; /* Get free - 20K */
X avail /= nmerge + 1;
X avail -= avail % 2048;
X
X if (outfname == (char *)0) {
X outfp = stdout; /* stdout is default */
X } else {
X outfp = fopen(outfname,"w");
X if (outfp == (FILE *)0) {
X error("%s: cannot open for write\n",outfname);
X }
X }
X
X if ((s = (char *)Malloc(avail)) == (char *)-1) { /* Grab it */
X error("memory allocation failed\n",(char *)0);
X }
X setbuffer(outfp,s,avail);
X
X if (level < 0) {
X if (*infnames != (char *)0) {
X for (i = 0; i < startno; i++) {
X infnames++;
X }
X }
X }
X
X for (i = 0; i < nmerge; i++) { /* Open each input file */
X if (level < 0) {
X nameptr = *infnames++;
X } else {
X tempname(namebuf,level,startno+i);
X nameptr = namebuf;
X }
X if (nameptr == (char *)0 || !strcmp(nameptr,"-")) {
X infp[i] = stdin; /* stdin is default */
X } else {
X infp[i] = fopen(nameptr,"r");
X if (infp[i] == (FILE *)0) {
X error("%s: cannot open for read\n",nameptr);
X }
X }
X if ((s = (char *)Malloc(avail)) == (char *)-1) { /* Grab it */
X error("memory allocation failed\n",(char *)0);
X }
X setbuffer(infp[i],s,avail);
X
X if (inbuf[i] == (char *)0) { /* Possibly allocate buffer */
X if ((inbuf[i] = (char *)malloc(LINELEN)) == (char *)0) {
X error("memory allocation failed\n",(char *)0);
X }
X }
X if (fgets(inbuf[i],LINELEN,infp[i]) != (char *)0) { /* Read 1st line*/
X order[nbusy++] = i; /* Enter its seq no. */
X for (j = nbusy - 1; j >= 1 && /* Put in place in order */
X (*cmp)(inbuf[order[j-1]],inbuf[order[j]]) > 0; --j) {
X tmp = order[j-1]; order[j-1] = order[j]; order[j] = tmp;
X }
X }
X }
X
X if ((options & UNIQUE) && nbusy > 0) {
X insave[0][0] = inbuf[order[0]][0] + 1; /* Force difference */
X }
X
X while (nbusy > 0) {
X cur = order[0]; /* No. of lowest ordered */
X if (options & UNIQUE) {
X if ((*cmp)(inbuf[cur],insave[0])) { /* Differs from previous one*/
X fputs(inbuf[cur],outfp); /* Write out lowest ordered */
X strcpy(insave[0],inbuf[cur]);
X }
X } else {
X fputs(inbuf[cur],outfp); /* Write out lowest ordered */
X }
X if (fgets(inbuf[cur],LINELEN,infp[cur]) == (char *)0) {/* No next */
X for (i = 1; i < nbusy; i++) { /* Reorder rest */
X order[i-1] = order[i];
X }
X --nbusy;
X } else { /* Binary search */
X if (nbusy > 1 && (*cmp)(inbuf[cur],inbuf[order[1]]) > 0) {
X if (nbusy > 2) {
X int low, high = nbusy - 1, mid;
X
X if ((*cmp)(inbuf[cur],inbuf[order[high]]) >= 0) {
X low = high;
X } else {
X low = 1;
X while (high - low > 1) {
X mid = (high + low) >> 1;
X if ((*cmp)(inbuf[cur],inbuf[order[mid]]) >= 0) {
X low = mid;
X } else {
X high = mid;
X }
X }
X }
X for (i = 0; i < low; i++) { /* Shift elements 1..low to */
X order[i] = order[i+1]; /* 0..low-1 to accomodate */
X }
X order[low] = cur; /* for the new index cur */
X } else {
X order[0] = order[1]; order[1] = cur;
X }
X }
X }
X }
X
X for (i = 0; i < nmerge; i++) {
X fclose(infp[i]); /* Close each input file */
X if (level >= 0) { /* (also frees Malloced buf)*/
X tempname(namebuf,level,startno+i);
X unlink(namebuf); /* and delete if temp file */
X }
X }
X fclose(outfp); /* Close output file */
X}
X
Xvoid allsort(infnames,destname,compar) /* Main sort entry point */
Xchar **infnames, /* Array of input filenames */
X *destname; /* Destination file name */
Xint (*compar)(); /* Comparision function */
X{
X char **infs;
X char outfname[MAXNAMLEN]; /* For temp output filenames*/
X int level = 0, /* Merge level */
X nfiles, /* No. of files to merge */
X startno, /* Order no. within nfiles */
X nmerge = 0; /* # of files to merge at a */
X /* time, initialized to */
X /* hush the compiler */
X int sameout = 0; /* same in as outfile? */
X int merging;
X
X cmp = compar; /* Set compare function used*/
X /* throughout this module */
X nfiles = 0;
X for (infs = infnames; *infs != (char *)0; infs++) {
X nfiles++;
X if (destname != (char *)0 && !strcmp(destname,*infs)) {
X sameout = 1;
X }
X }
X if (nfiles == 0) {
X nfiles = 1;
X }
X
X if (options & MERGEONLY) {
X level = -1;
X merging = 1;
X } else {
X nfiles = isort(infnames,destname); /* # Files left after incore*/
X merging = nfiles > 1;
X level = 0;
X }
X
X while (merging) {
X merging = nfiles > MAXMERGE || (level < 0 && sameout);
X for (startno = 0; startno < nfiles; startno += nmerge) {
X if ((nmerge = nfiles - startno) > MAXMERGE) {
X nmerge = MAXMERGE; /* Use at most MAXMERGE */
X }
X tempname(outfname,level+1,startno/MAXMERGE);
X merge(infnames,level,startno,nmerge,
X (merging) ? outfname : destname);
X }
X nfiles = 1 + (nfiles-1)/MAXMERGE;
X level++; /* Next merge level */
X }
X}
X
Xstatic void tempname(buf,level,seqno) /* Create name of temp file */
Xchar *buf; /* Will contain name formed */
Xint level, seqno; /* Merge level & seq. no. */
X{
X sprintf(buf,"%s\\sort%04d.%03d",tmpdir,level,seqno);
X}
X
Xstatic FILE *reopen(infp,fname) /* Open input file */
XFILE *infp; /* Fileptr to possibly close*/
Xchar *fname; /* File to be opened */
X{
X if (infp != (FILE *)0) { /* Close a valid file ptr */
X fclose(infp);
X }
X if (!strcmp(fname,"-")) {
X return stdin;
X }
X if ((infp = fopen(fname,"r")) == (FILE *)0) {
X error("%s: cannot open for read\n",fname);
X }
X return infp;
X}
X
Xstatic int isort(infnames,destname) /* Initial in-memory sort */
Xchar **infnames, /* Array of input files */
X *destname; /* File to write result to */
X{
X FILE *infp, *outfp; /* Input, output file ptr */
X int avail, /* Available free space */
X seqno = 0, /* To count multiple writes */
X isfree; /* Not still used from avail*/
X char *sortdata, /* Start of usable space */
X *snext, /* First unused position */
X **sorttop, /* Top of usable space */
X **sortbase; /* Start of pointer space */
X char outnambuf[MAXNAMLEN], /* To create temp name in */
X *outfname, /* Name of output file */
X *fromfile; /* Name of input file */
X
X if (*infnames == (char *)0) {
X fromfile = "standard input";
X infp = stdin;
X } else {
X infp = reopen((FILE *)0,fromfile = *infnames++);
X }
X if (options & CHECKONLY) { /* Check if input ordered */
X int next = 1;
X
X while (fgets(insave[0],sizeof(insave[0]),infp) == (char *)0) {
X if (*infnames == (char *)0) {
X exit(0); /* All input files empty */
X } else {
X infp = reopen((FILE *)0,fromfile = *infnames++);
X }
X } /* 1st line of 1st non-empty*/
X do {
X if (fgets(insave[next],sizeof(insave[next]),infp) == (char *)0) {
X if (*infnames == (char *)0) {
X exit(0); /* Input was ordered OK */
X } else {
X infp = reopen((FILE *)0,fromfile = *infnames++);
X }
X } else {
X if ((*cmp)(insave[1-next],insave[next]) > 0) { /* Compare */
X error("out of order and -c specified\n",(char *)0);
X }
X next = 1 - next; /* Swap buffer to be used */
X }
X } while (1);
X }
X avail = (Malloc(-1) - 20480) & ~1; /* Get free mem but 20K,even*/
X
X if ((sortdata = (char *)Malloc(avail)) == (char *)-1) { /* Grab it */
X error("memory allocation failed\n",(char *)0);
X }
X sorttop = (char **)(sortdata + avail); /* Ptrs start at top of mem,*/
X /* going downwards */
X do {
X snext = sortdata; isfree = avail;
X sortbase = sorttop;
X for ( ; isfree > LINELEN; ) { /* Read lines while room */
X while (fgets(snext,isfree,infp) == (char *)0) {
X if (*infnames == (char *)0) {
X break;
X } else {
X infp = reopen(infp,fromfile = *infnames++);
X }
X }
X if (feof(infp)) break; /* All input read */
X *--sortbase = snext; /* Store current ptr */
X snext += strlen(snext) + 1; /* Skip the line read */
X isfree = (char *)(sortbase) - snext;
X }
X if (sortbase == sorttop && seqno > 0) {
X break; /* No lines this time */
X }
X sortsub(sortbase,sorttop);
X tempname(outnambuf,0,seqno++);
X outfname = outnambuf;
X if (feof(infp)) {
X if (*infnames == (char *)0) {
X if (seqno == 1) { /* All read the first time: */
X outfname = destname; /* Write to file destname */
X }
X } else { /* Open next input file */
X infp = reopen(infp,fromfile = *infnames++);
X }
X }
X if (outfname == (char *)0) { /* Open output file */
X outfp = stdout;
X } else {
X outfp = fopen(outfname,"w");
X if (outfp == (FILE *)0) {
X error("%s: cannot open for write\n",outfname);
X }
X }
X if ((options & UNIQUE) && sorttop > sortbase) {
X insave[0][0] = sortbase[0][0] + 1; /* Force difference */
X for ( ; sortbase < sorttop; sortbase++) {
X if ((*cmp)(*sortbase,insave[0])) { /* != previous */
X fputs(*sortbase,outfp);
X strcpy(insave[0],*sortbase);
X }
X }
X } else {
X for ( ; sortbase < sorttop; sortbase++) {
X fputs(*sortbase,outfp);
X }
X }
X fclose(outfp);
X } while (!feof(infp));
X
X fclose(infp); /* Close last input file */
X Mfree(sortdata); /* Release Malloced mem */
X
X return seqno; /* No of output files used */
X}
SHAR_EOF
chmod 0600 sort.c || echo "restore of sort.c fails"
sed 's/^X//' << 'SHAR_EOF' > sort.doc &&
X
X
X
X
X sort(1)
X
X
X
XNAME
X sort - sort and/or merge files
X
XSYNTAX
X sort [ -bdfimnrutx ] [ +pos1 [ -pos2 ] ] [ -o name ] [ -T dir ] [ name...]
X
XDESCRIPTION
X Sort sorts, then writes the lines of the named files to its standard
X output, or a file (-o option).
X Default (no input files are named), the standard input is sorted.
X The name "-" stands for the standard input, so that standard input can be
X used together with other input files.
X
X By default, the sort key is an entire line; ordering is lexicographic.
X The global ordering is affected by zero or more of the following options
X (of which some combinations are meaningless, e.g. d and n).
X
X -b Leading blanks (spaces and tabs) are skipped in field comparisons.
X
X -d Dictionary order: only letters, digits and blanks are
X significant in comparisons.
X
X -f Fold: treat upper case as if lower case.
X
X -i Ignore characters outside the ASCII range 040-0176 in
X nonnumeric comparisons.
X
X -n An initial numeric string, consisting of optional
X blanks, optional minus sign, and zero or more digits
X with optional decimal point, is sorted by arithmetic
X value. This option implies option b.
X
X -r Reverse the result of comparisons.
X
X -tx `Tab character' field separator is x.
X
X The notation +pos1 -pos2 restricts a sort key to a field
X beginning at pos1 and ending just before pos2.
X Pos1 and pos2 each have the form m.n, optionally followed by one
X or more of the options bdfinr, where m tells a number of fields
X to skip from the beginning of the line and n tells a number
X of characters to skip further. If any options are present
X they override all the global ordering options for this key.
X If the b option is in effect n is counted from the first
X nonblank in the field.
X A missing .n means .0; a missing -pos2 means the end of the
X line. Under the -tx option, fields are strings separated by
X x; otherwise fields are nonempty nonblank strings separated
X by blanks.
X
X When there are multiple sort keys, later keys are compared
X only after all earlier keys compare equal.
X
X These are additional (global) options:
X
X -c Check that the input file is sorted according to the key(s);
X if the file is out of sort, an error message is printed.
X Exit code is 0/1 for correctly/incorrectly sorted input.
X
X -m Merge only, the input files are already sorted.
X
X -o The next argument is the output file used instead of the
X standard output. This file may be the same as one of the inputs.
X
X -T Temporary files directory. Default is \tmp on the current drive.
X
X -u Unique: Select only one in each set of, according to the
X key(s), equal lines.
X
XRESTRICTIONS
X Very long (>256 chars) lines are silently truncated.
X The line terminator sequence \r\n is present in the line when
X doing comparisions; this should not present problems. Use of \0 in
X input is discouraged, since this will be seen as a C string terminator.
X
XDIAGNOSTICS
X Comments and exits with nonzero status for various trouble
X conditions and for disorder discovered under option c(heck).
X
XFILES
X \tmp\* for temporary files.
X
SHAR_EOF
chmod 0600 sort.doc || echo "restore of sort.doc fails"
sed 's/^X//' << 'SHAR_EOF' > sortcomp.c &&
X/******************************************************************************
X * *
X * sortcomp.c version 1.0 of 1 Januari 1989 (C) L.J.M. de Wit 1989 *
X * *
X * This software may be used and distributed freely if not used commercially *
X * and the originator (me) is mentioned in the source (just leave this 9 line *
X * header intact). *
X * *
X ******************************************************************************
X *
X * sortcomp.c: comparision functions
X *
X * These functions contain the numeric comparision functions c_n and its
X * reverse, c_nr. The other 48 comparision functions are formed by combining
X * the following properties (take note of the Capitals):
X * a) either Dictionary, Ignore non-printables or Any other (3)
X * b) ignore leading Blanks, versus not ignoring them (2)
X * c) Fold upper case to lower, versus not folding (2)
X * d) Reverse the result of the comparision, versus not reversing (2)
X * e) Unbounded (bounded only by '\0'), versus upper limit bound (2)
X *
X * Since comparing is done a lot it is important for the compare functions
X * to be fast. As for using the 48 functions as it stands the following
X * arguments are used:
X * One could have combined functions and used flags to discern between options.
X * This would however involve passing more parameters to the function (the
X * flags) and would require (perhaps a lot of) unwanted testing inside the
X * function.
X * Several functions call one of the other functions. This has been done in
X * such a way that duplication of code is mostly prevented, but still this call
X * is limited to only one (both in number and in level).
X * The numerical compare could have been done by a simple atoi(), but the
X * c_n function does the comparision inline, and stops comparing when the
X * bigger one has been found (while atoi needs to do calculation ala
X * 10 * num + digit, and will use all digits).
X */
X
X#include <stdio.h>
X#include <ctype.h>
X#include "sortmain.h"
X#include "sortcomp.h"
X
X#define SKIPSPACE(bs,bt) while (wspace(*(bs))) (bs)++; while (wspace(*(bt))) (bt)++
X
X#define NCHARS 256
X#define NORM 0x1
X#define DICT 0x2
X#define isnorm(c) (stype[(c)]&NORM)
X#define isdict(c) (stype[(c)]&DICT)
X
X#undef isdigit
X#define isdigit(c) ((c) >= '0' && (c) <= '9')
X
Xstatic char fold[NCHARS]; /* For lcase conversion */
X
Xstatic char stype[NCHARS]; /* For table lookup */
X
Xvoid initlookup() /* Initialize lookup tables */
X{
X register int i;
X
X for (i = 0; i < NCHARS; i++) {
X fold[i] = isupper(i) ? tolower(i) : i; /* Init lcase convert array */
X stype[i] = 0; /* Not really needed ... */
X if (i >= 040 && i <= 0176) { /* Ascii non-control */
X stype[i] |= NORM;
X }
X if (wspace(i) || isalnum(i)) { /* Whitespace & alphanum */
X stype[i] |= DICT;
X }
X }
X}
X
Xint c_nr(bs,bt,es,et) /* Reversed Numeric */
Xchar *bs, *bt;
Xchar *es, *et;
X{
X return c_n(bt,bs,et,es);
X}
X
Xint c_n(bs,bt,es,et) /* Numeric compare */
Xregister char *bs, *bt;
Xchar *es, *et;
X{
X register int rev, mini;
X
X SKIPSPACE(bs,bt);
X if (*bs == '-') {
X if (*bt == '-') { /* Two negatives: reverse */
X rev = -1;
X bs++;
X bt++;
X } else { /* bt will be the larger one*/
X return -1;
X }
X } else {
X if (*bt == '-') { /* bs will be the larger one*/
X return 1;
X } else {
X rev = 1; /* Both positive */
X }
X }
X
X while (*bs == *bt && isdigit(*bt)) { /* Compare & skip eq. digits*/
X bs++; bt++;
X }
X
X if (!isdigit(*bs)) {
X if (!isdigit(*bt)) {
X if (*bs == '.' && *bt == '.') { /* Decimal point */
X while (*++bs == *++bt && isdigit(*bt)) ;
X while (*bs == '0') bs++; /* Skip possibly trailing */
X while (*bt == '0') bt++; /* zeroes */
X if (!isdigit(*bs)) {
X if (!isdigit(*bt)) {
X return 0; /* Tails also equal */
X }
X return -rev; /* bt has more digits */
X }
X if (!isdigit(*bt)) {
X return rev; /* bs has more digits */
X }
X return (rev == 1) /* Current digit decides */
X ? (*bs - *bt) : (*bt - *bs);
X } else {
X return 0; /* Equal numeric strings */
X }
X }
X return -rev; /* bt has more digits */
X }
X if (!isdigit(*bt)) {
X return rev; /* bs has more digits */
X }
X mini = (rev == 1) /* Current digit decides: */
X ? (*bs - *bt) : (*bt - *bs); /* Difference into mini */
X do {
X bs++;
X bt++;
X } while (isdigit(*bs) && isdigit(*bt)); /* Skip any more digits */
X
X if (!isdigit(*bs)) {
X if (!isdigit(*bt)) { /* Strings equally long: */
X return mini; /* then mini decides */
X }
X return -rev; /* bt has more digits */
X }
X return rev; /* bs has more digits */
X}
X
Xint c_df(bs,bt,es,et) /* Dictionary/Fold */
Xregister char *bs, *bt; /* Strings to compare */
Xregister char *es, *et;
X{
X --bs; --bt; --es; --et;
X for ( ; bs < es && bt < et; ) {
X if (fold[*++bs] != fold[*++bt]) { /* Increment & compare */
X if (isdict(*bs)) {
X if (isdict(*bt)) {
X return fold[*bs] - fold[*bt];
X } else {
X --bs; /* Effectively skip *bt */
X }
X } else if (isdict(*bt)) {
X --bt; /* Effectively skip *bs */
X } /* (else: skip both) */
X }
X }
X for ( ; bs < es; ) { /* Skip any more */
X if (isdict(*++bs)) { /* non-dict chars in bs */
X --bs;
X break;
X }
X }
X for ( ; bt < et; ) { /* Skip any more */
X if (isdict(*++bt)) { /* non-dict chars in bt */
X --bt;
X break;
X }
X }
X
X return (et - bt) - (es - bs);
X}
X
Xint c_d(bs,bt,es,et) /* Dictionary order */
Xregister char *bs, *bt;
Xregister char *es, *et;
X{
X --bs; --bt; --es; --et;
X for ( ; bs < es && bt < et; ) {
X if (*++bs != *++bt) { /* Increment & compare */
X if (isdict(*bs)) {
X if (isdict(*bt)) {
X return fold[*bs] - fold[*bt];
SHAR_EOF
echo "End of part 1, continue with part 2"
echo "2" > s2_seq_.tmp
exit 0