ahh@j.cc.purdue.edu (Brent L. Woods) (03/11/88)
Program Name: diff (part 1 of 1) Submitted By: Johan Widen <jw@sics.se> Summary: Unix diff for the Amiga. Poster Boy: Pat White (ain@j.cc.purdue.edu) Tested. NOTES: Docs and data posted in separate shar. Reshared it. -- Pat White (co-moderator comp.sources/binaries.amiga) UUCP: j.cc.purdue.edu!ain BITNET: PATWHITE@PURCCVM PHONE: (317) 743-8421 U.S. Mail: 320 Brown St. apt. 406, West Lafayette, IN 47906 ======================================== : This is a shar archive. Extract with sh, not csh. : This archive ends with exit, so do not worry about trailing junk. echo 'Extracting README' sed 's/^X//' > README << '+ END-OF-FILE README' X17-JAN-88 X XThis is a enhanced version of the diff program that was posted to Xcomp.sources.misc recently. The diff now uses less memory, is almost Xas fast as the UNIX diff and it produces "new style" context diffs. X X Johan Widen X jw@sics.se + END-OF-FILE README chmod 'u=rw,g=r,o=r' 'README' echo ' -rw-r--r-- 1 jw 246 Jan 17 17:25 README (as sent)' echo -n ' ' /bin/ls -l README echo 'Extracting README.old' sed 's/^X//' > README.old << '+ END-OF-FILE README.old' XHere's a public domain diff with the -b and -c options. (4.2bsd style Xcontex diffs.) I wasn't aware that these wern't present in all unix Xversions of diff, so I didn't think posting it was a priority. It's Xlarge, slow, and many of the comments are no longer true, but it does Xwork. (except when it runs out of memory) The one case I know of Xwhere its output is incompatable with patch does seem to be pretty Xrare. No makefile is included, the 4.2bsd diff is better on the unix Xsystem I use. If you don't know how to compile and load a single C Xprogram, this probably isn't the tool for you anyway. I'd be grateful Xto anyone who cleans this up and documents it properly. It does Xappear to have been separate files at some point, I'm presenting it in Xa form similar to how I got it: mail headers and outdated documentation Xin comments and all. I just banged on it enough to get it doing what XI wanted. + END-OF-FILE README.old chmod 'u=rw,g=r,o=r' 'README.old' echo ' -rw-r--r-- 1 jw 910 Jan 17 07:11 README.old (as sent)' echo -n ' ' /bin/ls -l README.old echo 'Extracting diff.c' sed 's/^X//' > diff.c << '+ END-OF-FILE diff.c' X/* X * Last Hacked by Johan Widen, jw@sics.se, 17 Jan 88. X * Decreased memory use by cleaning up the data types. "new style" context X * diffs. File dates are now displayed under UNIX and Amiga-DOS. X * Incorporated documentation cleanup and TURBO-C adaptions by mike2@lcuxa. X * X * Previously Hacked by Robert A. Larson, blarson@ecla.usc.edu, Nov 29 86 X * OSK support. X * Real context diffs, with a couple of minor problems: X * If the first change is deleting leading lines, and the second X * such that the context overlaps the deleted lines, the deleted X * lines are output as context. This is consistant with other X * cases of overlapping context, but patch doesn't like it. It's X * not hard to manually fix the diff in this (rare?) case. X * At most 9 lines of context is output. X * X * Previously Hacked by John Gilmore, hoptoad!gnu, 26Sep86. X * Compiles and runs under Unix. Much faster since it doesn't reallocate X * every data structure in the inner loop(!). Compatible with Unix diff X * output format, though it occasionally finds different sets of changed X * lines (both are valid). -c option needs work. Also, ftell's in X * <check> should be dumped when possible. X * XFrom: EVERHART%ARISIA%rca.com@CSNET-RELAY.ARPA XMessage-Id: <8608201845.AA15181@lll-crg.ARPA> XDate: Wed, 20 Aug 86 10:34 EDT XTo: hoptoad!gnu@LLL-CRG.ARPA XSubject: Decus C DIFF source. X */ X X/* X * D I F F X */ X X/* For VMS: X )BUILD $(TKBOPTIONS) = { X TASK = ...DIF X } X*/ X X/* X * OSK changes: since OSK doesn't have realloc, put the size X * allocated in the head of each allocated region. This code X * assumes int is a maximally alligned type. X */ X X/* X * Borland Turbo C instructions. X * X * 1. Make certain that TURBO is #defined. X * 2. Compile in the Large model. X * 3. Set the optimizations: X * a) to optimize for speed, not size X * b) to use register variables X * c) to invoke register optimization X * c) to invoke jump optimization X */ X X X#ifdef TURBO X#include <alloc.h> X#include <errno.h> X#include <process.h> X#include <stdio.h> X#include <stdlib.h> X#include <string.h> X#else X#include <stdio.h> X#include <ctype.h> X#endif X X#ifdef unix X#include <sys/types.h> X#include <sys/stat.h> X#endif X X#ifdef vms X#include <ssdef.h> X#include <stsdef.h> X#define IO_SUCCESS (SS | STS) X#define IO_ERROR SS X#endif X X#ifdef AMIGA X#ifndef MANX X#ifndef LATTICE X#define LATTICE 1 X#define ANSI 1 X#endif X#endif X#ifdef LATTICE X#include <stdlib.h> X#include <string.h> X Xextern char *_TZ; X#endif X#endif X/* X * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file X */ X#ifndef IO_SUCCESS X#define IO_SUCCESS 0 X#endif X#ifndef IO_ERROR X#define IO_ERROR 1 X#endif X X#define EOS 0 X#ifdef unix Xchar temfile[L_tmpnam]; Xchar *tmpnam(); X#define TEMPFILE (temfile[0]? temfile: tmpnam(temfile)) X#else X#define TEMPFILE "diff.tmp" X#endif X#define TRUE 1 X#define FALSE 0 X X#ifdef pdp11 X#define short int X#endif X Xtypedef short LINENO; Xtypedef unsigned short HASH; X Xtypedef struct candidate { X LINENO b; /* Line in fileB */ X LINENO a; /* Line in fileA */ X LINENO link; /* Previous candidate */ X} CANDIDATE; X Xtypedef struct line { X HASH hash; /* Hash value etc. */ X LINENO serial; /* Line number */ X} LINE; X X#define MAXLINES 32760 /* depends on sizeof(short) */ X#define LSIZE_INC 200 /* # of line entries to alloc at once */ X XLINE *file[2]; /* Hash/line for total file */ X#define fileA file[0] X#define fileB file[1] X XLINE *sfile[2]; /* Hash/line after prefix */ X#define sfileA sfile[0] X#define sfileB sfile[1] X Xint len[2]; /* Actual lines in each file */ X#define lenA len[0] X#define lenB len[1] X Xint slen[2]; /* Squished lengths */ X#define slenA slen[0] X#define slenB slen[1] X Xint prefix; /* Identical lines at start */ Xint suffix; /* Identical lines at end */ X XFILE *infd[2] = { NULL, NULL }; /* Input file identifiers */ XFILE *tempfd; /* Temp for input redirection */ X X#ifndef LATTICE Xextern long ftell(); Xextern FILE *fopen(); X#ifdef TURBO Xextern void *malloc(); X#else Xextern char *malloc(); X#endif X#endif X X#ifdef ANSI Xint error(char *,); Xchar *fgetss(char *, int, FILE *); XHASH hash(char *); XHASH newcand(LINENO, LINENO, LINENO); Xunsigned search(unsigned, unsigned, LINENO); Xunsigned subseq(); X#else Xchar *fgetss(); XHASH hash(); XHASH newcand(); Xunsigned search(); Xunsigned subseq(); X#endif X/* X * The following vectors overlay the area defined by fileA X */ X XHASH *class; /* Unsorted line numbers */ XHASH *klist; /* Index of element in clist */ XCANDIDATE *clist; /* Storage pool for candidates */ Xint clength = 0; /* Number of active candidates */ X#define CSIZE_INC 50 /* How many to allocate each time we have to */ Xint csize = CSIZE_INC; /* Current size of storage pool */ X XLINENO *match; /* Longest subsequence */ Xlong *oldseek; /* Seek position in file A */ X X/* X * The following vectors overlay the area defined by fileB X */ X XLINENO *member; /* Concatenated equiv. classes */ Xlong *newseek; /* Seek position in file B */ Xchar *textb; /* Input from file2 for check */ X X/* X * Global variables X */ X Xint eflag = FALSE; /* Edit script requested */ Xint bflag = FALSE; /* Blank supress requested */ Xint cflag = FALSE; /* Context printout */ Xint iflag = FALSE; /* Ignore case requested */ Xchar text[257]; /* Input line from file1 */ Xextern char *myalloc(); /* Storage allocator */ X Xextern char *compact(); /* Storage compactor */ X X#ifdef DEBUG X#ifndef OSK X#define TIMING X#endif X#endif X#ifdef TIMING Xextern long time(); Xextern char *358mend; Xlong totaltime; Xlong sectiontime; Xchar *mstart; X#endif X Xmain(argc, argv) Xint argc; Xchar **argv; X/* X * Diff main program X */ X{ X register int i; X register char *ap; X X#ifdef OSK X extern int _memmins; X _memmins = 16 * 1024; /* tell OSK we will malloc a lot */ X#endif X#ifdef TIMING X sectiontime = time(&totaltime); X#endif X#ifdef vms X argc = getredirection(argc,argv); X#endif X while (argc > 1 && *(ap = argv[1]) == '-' && *++ap != EOS) { X while (*ap != EOS) { X switch ((*ap++)) { X case 'b': X bflag++; X break; X X case 'c': X if (*ap > '0' && *ap <= '9') cflag = *ap++ - '0'; X else cflag = 3; X break; X X case 'e': X eflag++; X break; X X case 'i': X iflag++; X break; X X default: X fprintf(stderr, X "Warning, bad option '%c'\n", X ap[-1]); X break; X } X } X argc--; X argv++; X } X X if (argc != 3) X error("Usage: diff [-options] file1 file2"); X if (cflag && eflag) { X fprintf(stderr, X "Warning, -c and -e are incompatible, -c supressed.\n"); X cflag = FALSE; X } X argv++; X for (i = 0; i <= 1; i++) { X if (argv[i][0] == '-' && argv[i][1] == EOS) { X infd[i] = stdin; X if ((tempfd = fopen(TEMPFILE, "w")) == NULL) X cant(TEMPFILE, "work", 1); X } X else { X infd[i] = fopen(argv[i], "r"); X if (!infd[i]) cant(argv[i], "input", 2); /* Fatal error */ X } X } X X if (infd[0] == stdin && infd[1] == stdin) X error("Can't diff two things both on standard input."); X X if (infd[0] == NULL && infd[1] == NULL) { X cant(argv[0], "input", 0); X cant(argv[1], "input", 1); X } X#ifdef vms X else if (infd[1] == NULL) X opendir(1, &argv[1], infd[0]); X else if (infd[0] == NULL) X opendir(0, &argv[0], infd[1]); X#endif X X /* X * Read input, building hash tables. X */ X input(0); X input(1); X if(lenA > MAXLINES || lenB > MAXLINES) X error("Can't handle files with more than %d lines.", MAXLINES); X squish(); X#ifdef DEBUG X printf("before sort\n"); X for (i = 1; i <= slenA; i++) X printf("sfileA[%d] = %6d %06o\n", X i, sfileA[i].serial, sfileA[i].hash); X for (i = 1; i <= slenB; i++) X printf("sfileB[%d] = %6d %06o\n", X i, sfileB[i].serial, sfileB[i].hash); X#endif X sort(sfileA, slenA); X sort(sfileB, slenB); X#ifdef TIMING X ptime("input"); X#endif X#ifdef DEBUG X printf("after sort\n"); X for (i = 1; i <= slenA; i++) X printf("sfileA[%d] = %6d %06o\n", X i, sfileA[i].serial, sfileB[i].hash); X for (i = 1; i <= slenB; i++) X printf("sfileB[%d] = %6d %06o\n", X i, sfileB[i].serial, sfileB[i].hash); X#endif X /* X * Build equivalence classes. X */ X member = (LINENO *)fileB; X equiv(); /* This will overwrite fileB */ X member = (LINENO *)compact((char *)member, (slenB + 2) * sizeof (LINENO), X "squeezing member vector"); X /* sfileB and fileB are no longer valid */ X /* X * Reorder equivalence classes into array class[] X */ X class = (HASH *)fileA; X unsort(); /* This will overwrite fileA */ X class = (HASH *)compact((char *)class, (slenA + 2) * sizeof (HASH), X "compacting class vector"); X /* sfileA and fileA are no longer valid */ X#ifdef TIMING X ptime("equiv/unsort"); X#endif X /* X * Find longest subsequences X */ X klist = (HASH *)myalloc((slenA + 2) * sizeof (HASH), "klist"); X clist = (CANDIDATE *)myalloc(csize * sizeof (CANDIDATE), "clist"); X i = subseq(); X#ifndef OSK X free((char *)member); X free((char *)class); X#else X free((char *)member - sizeof(int)); X free((char *)class - sizeof(int)); X#endif X match = (LINENO *)myalloc((lenA + 2) * sizeof (LINENO), "match"); X unravel(klist[i]); X#ifndef OSK X free((char *)clist); X free((char *)klist); X#else X free((char *)clist - sizeof(int)); X free((char *)klist - sizeof(int)); X#endif X#ifdef TIMING X ptime("subsequence/unravel"); X#endif X /* X * Check for fortuitous matches and output differences X */ X oldseek = (long *)myalloc((lenA + 2) * sizeof (* oldseek), "oldseek"); X newseek = (long *)myalloc((lenB + 2) * sizeof (* newseek), "newseek"); X textb = myalloc(sizeof text, "textbuffer"); X check(argv[0], argv[1]); X#ifdef TIMING X ptime("check"); X#endif X output(argv[0], argv[1]); X#ifdef TIMING X ptime("output"); X printf("%ld seconds required\n", sectiontime - totaltime); X#endif X if (tempfd != NULL) { X fclose(tempfd); X#ifdef vms X remove(TEMPFILE); X#else X unlink(TEMPFILE); X#endif X } X} X X Xinput(which) Xint which; /* 0 or 1 to redefine infd[] */ X/* X * Read the file, building hash table X */ X{ X register LINE *lentry; X register int linect = 0; X FILE *fd; X int lsize = LSIZE_INC; X X lentry = (LINE *)myalloc(sizeof(LINE) * (lsize+3), "line"); X fd = infd[which]; X while (!getline(fd, text)) { X if (++linect >= lsize) { X lsize += LSIZE_INC; X lentry = (LINE *)compact((char *)lentry, X (lsize + 3) * sizeof(LINE), X "extending line vector"); X } X lentry[linect].hash = hash(text); X } X /* X * If input was from stdin ("-" command), finish off the temp file. X */ X if (fd == stdin) { X fclose(tempfd); X tempfd = infd[which] = fopen(TEMPFILE, "r"); X } X /* If we wanted to be stingy with memory, we could realloc lentry down X * to its exact size (+3 for some odd reason) here. No need? */ X len[which] = linect; X file[which] = lentry; X} X Xsquish() X/* X * Look for initial and trailing sequences that have identical hash values. X * Don't bother building them into the candidate vector. X */ X{ X register int i; X register LINE *ap; X register LINE *bp; X int j; X int k; X X /* X * prefix -> first line (from start) that doesn't hash identically X */ X i = 0; ap = &fileA[1]; bp = &fileB[1]; X while (i < lenA && i < lenB && ap->hash == bp->hash) { X i++; ap++; bp++; X } X prefix = i; X /* X * suffix -> first line (from end) that doesn't hash identically X */ X j = lenA - i; X k = lenB - i; X ap = &fileA[lenA]; X bp = &fileB[lenB]; X i = 0; X while (i < j && i < k && ap->hash == bp->hash) { X i++; ap--; bp--; X } X suffix = i; X /* X * Tuck the counts away X */ X for (k = 0; k <= 1; k++) { X sfile[k] = file[k] + prefix; X slen[k] = len[k] - prefix - suffix; X X for (i = 0, ap = sfile[k]; i <= slen[k]; i++, ap++) { X ap->serial = i; X } X } X} X Xsort(vector, vecsize) XLINE *vector; /* What to sort */ Xint vecsize; /* How many to sort */ X/* X * Sort hash entries X */ X{ X register int j; X register LINE *aim; X register LINE *ai; X int mid; X int k; X LINE work; X X for (j = 1; j <= vecsize; j *= 2); X mid = (j - 1); X while ((mid /= 2) != 0) { X k = vecsize - mid; X for (j = 1; j <= k; j++) { X for (ai = &vector[j]; ai > vector; ai -= mid) { X aim = &ai[mid]; X if (aim->hash > ai->hash || X aim->hash == ai->hash && X aim->serial > ai->serial) X break; X work.hash = ai->hash; X ai->hash = aim->hash; X aim->hash = work.hash; X work.serial = ai->serial; X ai->serial = aim->serial; X aim->serial = work.serial; X } X } X } X} X Xequiv() X/* X * Build equivalence class vector X */ X{ X register LINE *ap; X register LINE *bp; X register LINENO *mp; X register int j; X LINE *atop, *btop; X X#ifdef DEBUG X printf("equiv entry\n"); X for (j = 1; j <= slenA; j++) X printf("sfileA[%d] = %6d %06o\n", X j, sfileA[j].serial, sfileA[j].hash); X for (j = 1; j <= slenB; j++) X printf("sfileB[%d] = %6d %06o\n", X j, sfileB[j].serial, sfileB[j].hash); X#endif X j = 1; X ap = &sfileA[1]; X bp = &sfileB[1]; X atop = &sfileA[slenA]; X while (ap <= atop && j <= slenB) { X if (ap->hash < bp->hash) { X ap->hash = 0; X ap++; X } X else if (ap->hash == bp->hash) { X ap->hash = j; X ap++; X } X else { X bp++; X j++; X } X } X while (ap <= atop) { X ap->hash = 0; X ap++; X } X sfileB[slenB + 1].hash = 0; X#ifdef DEBUG X printf("equiv exit\n"); X for (j = 1; j <= slenA; j++) X printf("sfileA[%d] = %6d %06o\n", X j, sfileA[j].serial, sfileA[j].hash); X for (j = 1; j <= slenB; j++) X printf("sfileB[%d] = %6d %06o\n", X j, sfileB[j].serial, sfileB[j].hash); X#endif X bp = sfileB; X btop = &sfileB[slenB]; X mp = member; X while (++bp <= btop) { X *(++mp) = -(bp->serial); X while (bp[1].hash == bp->hash) { X bp++; X *(++mp) = bp->serial; X } X } X *(++mp) = -1; X#ifdef DEBUG X for (j = 0; j <= slenB; j++) X printf("member[%d] = %d\n", j, member[j]); X#endif X} X Xunsort() X/* X * Build class vector X */ X{ X HASH *temp; X register HASH *tp; X register LINE *ap; X register HASH *cp; X LINE *evec; X HASH *eclass; X#ifdef DEBUG X int i; X#endif X X tp = temp = (HASH *)myalloc((slenA + 1) * sizeof(HASH), "unsort scratch"); X ap = &sfileA[1]; X evec = &sfileA[slenA]; X while (ap <= evec) { X#ifdef DEBUG X printf("temp[%2d] := %06o\n", ap->serial, ap->hash); X#endif X tp[ap->serial] = ap->hash; X ap++; X } X /* X * Copy into class vector and free work space X */ X cp = &class[1]; X eclass = &class[slenA]; X tp = &temp[1]; X while (cp <= eclass) X *cp++ = *tp++; X#ifndef OSK X free((char *) temp); X#else X free((char *)temp - sizeof(int)); X#endif X#ifdef DEBUG X printf("unsort exit\n"); X for (i = 1; i <= slenA; i++) X printf("class[%d] = %d %06o\n", i, class[i], class[i]); X#endif X} X Xunsigned Xsubseq() X/* X * Generate maximum common subsequence chain in clist[] X */ X{ X int a; X register unsigned ktop; X register LINENO b; X register unsigned s; X unsigned r; X HASH i; X HASH cand; X X klist[0] = newcand(0, 0, -1); X klist[1] = newcand((LINENO) (slenA + 1), (LINENO) (slenB + 1), -1); X ktop = 1; /* -> guard entry */ X for (a = 1; a <= slenA; a++) { X /* X * For each non-zero element in fileA ... X */ X if ((i = class[a]) == 0) X continue; X cand = klist[0]; /* No candidate now */ X r = 0; /* Current r-candidate */ X do { X#ifdef DEBUG X printf("a = %d, i = %d, b = %d\n", a, i, member[i]); X#endif X /* X * Perform the merge algorithm X */ X if ((b = member[i]) < 0) X b = -b; X#ifdef DEBUG X printf("search(%d, %d, %d) -> %d\n", X r, ktop, b, search(r, ktop, b)); X#endif X if (s = search(r, ktop, b)) { X if (clist[klist[s]].b > b) { X klist[r] = cand; X r = s; X cand = newcand((LINENO) a, b, klist[s-1]); X#ifdef DEBUG X dumpklist(ktop, "klist[s-1]->b > b"); X#endif X } X if (s >= ktop) { X klist[ktop + 1] = klist[ktop]; X ktop++; X#ifdef DEBUG X klist[r] = cand; X dumpklist(ktop, "extend"); X#endif X break; X } X } X } while (member[++i] > 0); X klist[r] = cand; X } X#ifdef DEBUG X printf("last entry = %d\n", ktop - 1); X#endif X return(ktop - 1); /* Last entry found */ X} X XHASH Xnewcand(a, b, pred) XLINENO a; /* Line in fileA */ XLINENO b; /* Line in fileB */ XLINENO pred; /* Link to predecessor, index in cand[] */ X{ X register CANDIDATE *new; X X clength++; X if (++clength >= csize) { X csize += CSIZE_INC; X clist = (CANDIDATE *)compact((char *)clist, X csize * sizeof (CANDIDATE), X "extending clist"); X } X new = &clist[clength - 1]; X new->a = a; X new->b = b; X new->link = pred; X return((HASH) (clength - 1)); X} X Xunsigned Xsearch(low, high, b) Xregister unsigned low; Xregister unsigned high; Xregister LINENO b; X/* X * Search klist[low..top] (inclusive) for b. If klist[low]->b >= b, X * return zero. Else return s such that klist[s-1]->b < b and X * klist[s]->b >= b. Note that the algorithm presupposes the two X * preset "fence" elements, (0, 0) and (slenA, slenB). X */ X{ X register LINENO temp; X register unsigned mid; X X if (clist[klist[low]].b >= b) X return(0); X while ((mid = (low + high) / 2) > low) { X if ((temp = clist[klist[mid]].b) > b) X high = mid; X else if (temp < b) X low = mid; X else { X return(mid); X } X } X return(mid + 1); X} X X Xunravel(k) Xregister int k; X{ X register int i; X register CANDIDATE *cp; X int first_trailer; X int difference; X X first_trailer = lenA - suffix; X difference = lenB - lenA; X#ifdef DEBUG X printf("first trailer = %d, difference = %d\n", X first_trailer, difference); X#endif X for (i = 0; i <= lenA; i++) { X match[i] = (i <= prefix) ? i X : (i > first_trailer) ? i + difference X : 0; X } X#ifdef DEBUG X printf("unravel\n"); X#endif X while (k != -1) { X cp = &clist[k]; X#ifdef DEBUG X if (k < 0 || k >= clength) X error("Illegal link -> %d", k); X printf("match[%d] := %d\n", cp->a + prefix, cp->b + prefix); X#endif X match[cp->a + prefix] = cp->b + prefix; X k = cp->link; X } X} X X X/* X * Check for hash matches (jackpots) and collect random access indices to X * the two files. X * X * It should be possible to avoid doing most of the ftell's by noticing X * that we are not doing a context diff and noticing that if a line X * compares equal to the other file, we will not ever need to know its X * file position. FIXME. X */ Xcheck(fileAname, fileBname) Xchar *fileAname; Xchar *fileBname; X{ X register int a; /* Current line in file A */ X register int b; /* Current line in file B */ X int jackpot; X X/* X * The VAX C ftell() returns the address of the CURRENT record, not the X * next one (as in DECUS C or, effectively, other C's). Hence, the values X * are "off by one" in the array. OFFSET compensates for this. X */ X#ifdef vms X#define OFFSET (-1) X#else X#define OFFSET 0 X#endif X X b = 1; X rewind(infd[0]); X rewind(infd[1]); X/* X * See above; these would be over-written on VMS anyway. X */ X#ifndef vms X oldseek[0] = ftell(infd[0]); X newseek[0] = ftell(infd[1]); X#endif X X jackpot = 0; X#ifdef DEBUG X printf("match vector\n"); X for (a = 0; a <= lenA; a++) X printf("match[%d] = %d\n", a, match[a]); X#endif X for (a = 1; a <= lenA; a++) { X if (match[a] == 0) { X /* Unique line in A */ X oldseek[a+OFFSET] = ftell(infd[0]); X getline(infd[0], text); X continue; X } X while (b < match[a]) { X /* Skip over unique lines in B */ X newseek[b+OFFSET] = ftell(infd[1]); X getline(infd[1], textb); X b++; X } X X /* X * Compare the two, supposedly matching, lines. X * Unless we are going to print these lines, don't bother to X * remember where they are. We only print matching lines X * if a context diff is happening, or if a jackpot occurs. X */ X if (cflag) { X oldseek[a+OFFSET] = ftell(infd[0]); X newseek[b+OFFSET] = ftell(infd[1]); X } X getline(infd[0], text); X getline(infd[1], textb); X if (strcmp(text, textb)) { X#ifdef DEBUG X fprintf(stderr, "Spurious match:\n"); X fprintf(stderr, "line %d in %s, \"%s\"\n", X a, fileAname, text); X fprintf(stderr, "line %d in %s, \"%s\"\n", X b, fileBname, textb); X#endif X match[a] = 0; X jackpot++; X } X X b++; X } X for (; b <= lenB; b++) { X newseek[b+OFFSET] = ftell(infd[1]); X getline(infd[1], textb); X } X/* X * The logical converse to the code up above, for NON-VMS systems, to X * store away an fseek() pointer at the beginning of the file. For VMS, X * we need one at EOF... X */ X#ifdef vms X oldseek[lenA] = ftell(infd[0]); X getline(infd[0],text); /* Will hit EOF... */ X newseek[lenB] = ftell(infd[1]); X getline(infd[1],textb); /* Will hit EOF... */ X#endif X X return(jackpot); X} X Xoutput(fileAname, fileBname) Xchar *fileAname, *fileBname; X{ X register int astart; X register int aend; X int bstart; X register int bend; X X rewind(infd[0]); X rewind(infd[1]); X match[0] = 0; X match[lenA+1] = lenB + 1; X if (!eflag) { X if (cflag) { X coutput(fileAname, fileBname); X return; X } else { X /* X * Normal printout X */ X for (astart = 1; astart <= lenA; astart = aend + 1) { X /* X * New subsequence, skip over matching stuff X */ X while (astart <= lenA && X match[astart] == (match[astart - 1] + 1)) X astart++; X /* X * Found a difference, setup range and print it X */ X bstart = match[astart - 1] + 1; X aend = astart - 1; X while (aend < lenA && match[aend + 1] == 0) X aend++; X bend = match[aend + 1] - 1; X match[aend] = bend; X change(astart, aend, bstart, bend); X } X } X } X else { X /* X * Edit script output -- differences are output "backwards" X * for the benefit of a line-oriented editor. X */ X for (aend = lenA; aend >= 1; aend = astart - 1) { X while (aend >= 1 X && match[aend] == (match[aend + 1] - 1) X && match[aend] != 0) X aend--; X bend = match[aend + 1] - 1; X astart = aend + 1; X while (astart > 1 && match[astart - 1] == 0) X astart--; X bstart = match[astart - 1] + 1; X match[astart] = bstart; X change(astart, aend, bstart, bend); X } X } X if (lenA == 0) X change(1, 0, 1, lenB); X} X X Xcoutput(fileAname, fileBname) Xchar *fileAname, *fileBname; X{ X int astart; X int aend; X int bstart; X int bend = 0; X int change, ctmp, cFirst, cOld; X int a1start, a1end, b1start, b1end, a2start, a2end, b2start, b2end; X int astartLast, aendLast, bstartLast, bendLast; X int low; X int numDifferences = 0; X X#define CINSERTION 1 X#define CDELETION 2 X#define CCHANGE 4 X for (astart = 1; astart <= lenA; astart = aend + 1) { X if(!(change = findentry(&astart, &aend, &bstart, &bend))) X break; X /* find last entry to print in this diff */ X cFirst = change; X astartLast = astart; X aendLast = a1end = aend; X bstartLast = bstart; X bendLast = b1end = bend; X for(a1start = aendLast + 1; a1start <= lenA; a1start = a1end + 1) { X if(!(ctmp = findentry(&a1start, &a1end, &b1start, &b1end)) || X ((a1start - aendLast) >= 2*cflag && X (b1start - bendLast) >= 2*cflag)) X break; X change |= ctmp; X astartLast = a1start; X aendLast = a1end; X bstartLast = b1start; X bendLast = b1end; X } X if(!numDifferences++) X printHeader(fileAname, fileBname); X fputs("***************\n*** ", stdout); X range(astart, aendLast, 0); X fputs(" ****\n", stdout); X if (change & (CDELETION | CCHANGE)) { X a1start = astart; a1end = aend; b2end = bend; X cOld = cFirst; X low = astart - cflag; X while (a1start < astartLast) { X a2start = a1end + 1; X ctmp = findentry(&a2start, &a2end, &b2start, &b2end); X fetch(oldseek, a1start, a1end, low, a2start - 1, lenA, X infd[0], (cOld & CDELETION) ? "- " : "! "); X cOld = ctmp; X low = a2start; X a1start = a2start; X a1end = a2end; X } X fetch(oldseek, astartLast, aendLast, low, aendLast + cflag, lenA, X infd[0], (cOld & CDELETION) ? "- " : "! "); X } X fputs("--- ", stdout); X range(bstart, bendLast, 1); X fputs(" ----\n", stdout); X if (change & (CINSERTION | CCHANGE)) { X a2start = astart; a2end = aend; X b1start = bstart; b1end = b2end = bend; X cOld = cFirst; X low = bstart - cflag; X while (a2start < astartLast) { X a2start = a2end + 1; X ctmp = findentry(&a2start, &a2end, &b2start, &b2end); X fetch(newseek, b1start, b1end, low, b2start - 1, lenB, X infd[1], (cOld & CINSERTION) ? "+ " : "! "); X cOld = ctmp; X low = b2start; X b1start = b2start; X b1end = b2end; X } X fetch(newseek, bstartLast, bendLast, low, bendLast + cflag, lenB, X infd[1], (cOld & CINSERTION) ? "+ " : "! "); X } X aend = aendLast; X bend = bendLast; X } X if (lenA == 0 && lenB > 0) { X numDifferences++; X printHeader(fileAname, fileBname); X cchange(1, 0, 1, lenB); X } X if (!numDifferences) X printf("No differences encountered\n"); X} X X XprintHeader(fileAname, fileBname) Xchar *fileAname, *fileBname; X{ X long fileDate; X#ifdef unix X struct stat statbuf; X#endif X X#ifdef AMIGA X#ifdef LATTICE X _TZ = "GMT0"; X#endif X if(infd[0] == tempfd) X fileDate = time(0); X else X fileDate = getft(fileAname); X printf("*** %s\t%s", fileAname, ctime(&fileDate)); X if(infd[1] == tempfd) X fileDate = time(0); X else X fileDate = getft(fileBname); X printf("--- %s\t%s", fileBname, ctime(&fileDate)); X#else X#ifdef unix X if(infd[0] == tempfd) X fileDate = time(0); X else { X fstat(fileno(infd[0]), &statbuf); X fileDate = statbuf.st_mtime; X } X printf("*** %s\t%s", fileAname, ctime(&fileDate)); X if(infd[1] == tempfd) X fileDate = time(0); X else { X fstat(fileno(infd[1]), &statbuf); X fileDate = statbuf.st_mtime; X } X printf("--- %s\t%s", fileBname, ctime(&fileDate)); X#else X printf("*** %s\n--- %s\n", fileAname, fileBname); X#endif X#endif X} X X Xint Xfindentry(astartp, aendp, bstartp, bendp) Xint *astartp, *aendp, *bstartp, *bendp; X{ X int save; X register int astart = *astartp; X register int aend; X X if(astart > lenA) X return(0); X save = match[astart - 1]; X match[astart - 1] = *bendp; X /* Skip over matching stuff */ X while (astart <= lenA && match[astart] == (match[astart - 1] + 1)) X astart++; X *bstartp = match[astart - 1] + 1; X aend = astart - 1; X while (aend < lenA && match[aend + 1] == 0) X aend++; X *bendp = match[aend + 1] - 1; X match[*astartp - 1] = save; X *astartp = astart; X *aendp = aend; X if (astart > aend) { X if (*bstartp > *bendp) X return(0); X else X return(CINSERTION); X } else if (*bstartp > *bendp) X return(CDELETION); X else X return(CCHANGE); X} X X X/* X * Output a non context diff change entry: X * fileA[astart..aend] changed to fileB[bstart..bend] X */ Xchange(astart, aend, bstart, bend) Xint astart; Xint aend; Xint bstart; Xint bend; X{ X char c; X /* X * This catches a "dummy" last entry X */ X if (astart > aend && bstart > bend) X return; X c = (astart > aend) ? 'a' : (bstart > bend) ? 'd' : 'c'; X if (c == 'a') X range(astart-1, astart-1, 0); /* Addition: just print one odd # */ X else X range(astart, aend, 0); /* Print both, if different */ X putchar(c); X if (!eflag) { X if (c == 'd') X range(bstart-1, bstart-1, 1); /* Deletion: just print one odd # */ X else X range(bstart, bend, 1); /* Print both, if different */ X } X putchar('\n'); X if (!eflag) { X fetch(oldseek, astart, aend, 0, 0, lenA, infd[0], "< "); X if (astart <= aend && bstart <= bend) X printf("---\n"); X } X fetch(newseek, bstart, bend, 0, 0, lenB, infd[1], eflag ? "" : "> "); X if (eflag && bstart <= bend) X printf(".\n"); X} X X X/* X * Output a context diff change entry: X * fileA[astart..aend] changed to fileB[bstart..bend] X */ Xcchange(astart, aend, bstart, bend) Xint astart; Xint aend; Xint bstart; Xint bend; X{ X char c; X X /* X * This catches a "dummy" last entry X */ X if (astart > aend && bstart > bend) X return; X c = (astart > aend) ? 'a' : (bstart > bend) ? 'd' : 'c'; X fputs("***************\n*** ", stdout); X range(astart, aend, 0); X fputs(" ****\n", stdout); X fetch(oldseek, astart, aend, astart - cflag, aend + cflag, lenA, infd[0], X c=='d' ? "- " : "! "); X fputs("--- ", stdout); X range(bstart, bend, 1); X fputs(" ----\n", stdout); X fetch(newseek, bstart, bend, bstart - cflag, bend + cflag, lenB, infd[1], X c=='a' ? "+ " : "! "); X} X Xrange(from, to, w) Xint from; Xint to; Xint w; X/* X * Print a range X */ X{ X if (cflag) { X if((from -= cflag) <= 0) from = 1; X if((to += cflag) > len[w]) to = len[w]; X if(!to) from = 0; X } X if (to > from) { X printf("%d,%d", from, to); X } else if (to < from) { X printf("%d,%d", to, from); X } else { X printf("%d", from); X } X} X X Xfetch(seekvec, start, end, low, high, trueend, fd, pfx) Xlong *seekvec; Xregister int start; Xregister int end; Xint low; Xint high; Xint trueend; XFILE *fd; Xchar *pfx; X/* X * Print the appropriate text X */ X{ X register int i; X register int first; X register int last; X X if (cflag) { X if((first = low) <= 0) first = 1; X if((last = high) > trueend) last = trueend; X } else { X first = start; X last = end; X } X if (fseek(fd, seekvec[first], 0) != 0) { X printf("?Can't read line %d at %08lx (hex) in file%c\n", X start, seekvec[first], X (fd == infd[0]) ? 'A' : 'B'); X } X else { X for (i = first; i <= last; i++) { X if (fgetss(text, sizeof text, fd) == NULL) { X printf("** Unexpected end of file\n"); X break; X } X#ifdef DEBUG X printf("%5d: %s%s\n", i, pfx, text); X#else X fputs((cflag && (i<start || i>end)) ? " " : pfx, stdout); X fputs(text, stdout); X putchar('\n'); X#endif X } X } X} X X/* X * Input routine, read one line to buffer[], return TRUE on eof, else FALSE. X * The terminating newline is always removed. If "-b" was given, trailing X * whitespace (blanks and tabs) are removed and strings of blanks and X * tabs are replaced by a single blank. Getline() does all hacking for X * redirected input files. X */ Xint Xgetline(fd, buffer) XFILE *fd; Xchar *buffer; X{ X register char *top; X register char *fromp; X register char c; X X if (fgetss(buffer, sizeof text, fd) == NULL) { X *buffer = EOS; X return(TRUE); X } X if (fd == stdin) { X fputs(buffer, tempfd); X putc('\n', tempfd); X } X if (bflag || iflag) { X top = buffer; X fromp = buffer; X while ((c = *fromp++) != EOS) { X if (bflag && (c == ' ' || c == '\t')) { X c = ' '; X while (*fromp == ' ' || *fromp == '\t') X fromp++; X } X if (iflag) X c = tolower(c); X *top++ = c; X } X if (bflag && top[-1] == ' ') X top--; X *top = EOS; X } X return(FALSE); X} Xstatic unsigned short crc16a[] = { X 0000000, 0140301, 0140601, 0000500, X 0141401, 0001700, 0001200, 0141101, X 0143001, 0003300, 0003600, 0143501, X 0002400, 0142701, 0142201, 0002100, X}; Xstatic unsigned short crc16b[] = { X 0000000, 0146001, 0154001, 0012000, X 0170001, 0036000, 0024000, 0162001, X 0120001, 0066000, 0074000, 0132001, X 0050000, 0116001, 0104001, 0043000, X}; X Xunsigned short Xhash(buffer) Xchar *buffer; X/* X * Return the CRC16 hash code for the buffer X * Algorithm from Stu Wecker (Digital memo 130-959-002-00). X */ X{ X register unsigned short crc; X register char *tp; X register short temp; X X crc = 0; X for (tp = buffer; *tp != EOS;) { X temp = *tp++ ^ crc; /* XOR crc with new char */ X crc = (crc >> 8) X ^ crc16a[(temp & 0017)] X ^ crc16b[(temp & 0360) >> 4]; X } X#ifdef DEBUG_ALL X printf("%06o: %s\n", (crc == 0) ? 1 : crc, buffer); X#endif X return((crc == 0) ? (unsigned short) 1 : crc); X} X X X#ifdef vms Xopendir(which, arg, okfd) Xint which; /* Which file to open (0 or 1) */ Xchar **arg; /* File name argument, &argv[which] */ XFILE *okfd; /* File name (already open) */ X{ X register char *tp; X register int c; X register char *newname; X X fgetname(okfd, text); X /* X * Skip over device name X */ X for (tp = text; (c = *tp) && c != ':'; tp++); X if (c) tp++; X else tp = text; X /* X * Skip over [UIC] or [PPN] if present X */ X if (*tp == '[' || *tp == '(') { X while ((c = *tp++) && c != ']' && c != ')'); X if (c == 0) { X fprintf(stderr, "?Bug: bad file name \"%s\"\n", X text); X tp--; X } X } X strcpy(text, tp); X /* X * Don't include version X */ X for (tp = text; (c = *tp) && c != ';'; tp++); X *tp = 0; X /* X * Now, text has the file name, tp - text, its length, X * and *arg the (possible) directory name. Create a new X * file name for opening. X */ X#ifndef OSK X if ((newname = malloc(tp - text + strlen(*arg) + 1)) == NULL) X error("Out of space at start"); X#else X newname = myalloc(tp - text + strlen(*arg) + 1, "Out of space at start"); X#endif X concat(newname, *arg, text, NULL); X if ((infd[which] = fopen(newname, "r")) == NULL) X cant(*arg, "constructed input", 1); X else X *arg = newname; X} X#endif X Xchar * Xmyalloc(amount, why) Xint amount; Xchar *why; X/* X * Allocate or crash. X */ X{ X register char *pointer; X X#ifdef OSK X amount += sizeof(int); X#endif X if ((pointer = malloc((unsigned) amount)) == NULL) X noroom(why); X#ifdef OSK X *((int *)pointer) = amount; X pointer += sizeof(int); X#ifdef DEBUG X fprintf(stderr, "Myalloc: %d at %06o\n", amount, pointer); X#endif X#endif X X return (pointer); X} X X X/* X * Reallocate pointer, compacting storage X * X * The "compacting storage" part is probably not relevant any more. X * There used to be horrid code here that malloc'd one byte and freed X * it at magic times, to cause garbage collection of the freespace X * or something. It's safely gone now, you didn't have to see it. X * -- John Gilmore, Nebula Consultants, Sept 26, 1986 X */ Xchar * Xcompact(pointer, new_amount, why) Xchar *pointer; Xint new_amount; Xchar *why; X{ X register char *new_pointer; X X#ifndef OSK X#ifdef TURBO X extern void *realloc(); X#else X extern char *realloc(); X#endif X X if ((new_pointer = realloc(pointer, (unsigned) new_amount)) == NULL){ X noroom(why); X } X#else X register int old_amount; X new_amount += sizeof(int); X if((new_pointer = malloc(new_amount)) == NULL) noroom(why); X *(int *)new_pointer = new_amount; X new_pointer += sizeof(int); X old_amount = *(((int *)pointer)-1); X /* _strass is like bcopy with the first two arguments reversted */ X _strass(new_pointer, pointer, (new_amount <= old_amount ? X new_amount : old_amount) - sizeof(int)); X#ifdef DEBUG X fprintf(stderr, "compact %d to %d from %06o to %06o\n", X old_amount, new_amount, pointer, new_pointer); X#endif X free(pointer - sizeof(int)); X#endif X X#ifndef OSK X#ifdef DEBUG X if (new_pointer != pointer) { X fprintf(stderr, "moved from %06o to %06o\n", X pointer, new_pointer); X } X/* rdump(new_pointer, why); X*/ X#endif X#endif X return (new_pointer); X} X Xnoroom(why) Xchar *why; X{ X fprintf(stderr, "?DIFF-F-out of room when %s\n", why); X exit(IO_ERROR); X} X X#ifdef DEBUG Xrdump(pointer, why) Xint *pointer; Xchar *why; X/* X * Dump memory block X */ X{ X int *last; X int count; X X last = ((int **)pointer)[-1]; X fprintf(stderr, "dump %s of %06o -> %06o, %d words", X why, pointer, last, last - pointer); X last = (int *)(((int) last) & ~1); X for (count = 0; pointer < last; ++count) { X if ((count & 07) == 0) { X fprintf(stderr, "\n%06o", pointer); X } X fprintf(stderr, "\t%06o", *pointer); X pointer++; X } X fprintf(stderr, "\n"); X} X#endif Xcant(filename, what, fatalflag) Xchar *filename; Xchar *what; Xint fatalflag; X/* X * Can't open file message X */ X{ X fprintf(stderr, "Can't open %s file \"%s\": ", what, filename); X#ifndef OSK X perror((char *)NULL); X#else X prerr(0, errno); X#endif X if (fatalflag) { X exit(fatalflag); X } X} X#ifdef DEBUG Xdump(d_linep, d_len, d_which) XLINE *d_linep; X{ X register int i; X X printf("Dump of file%c, %d elements\n", "AB"[d_which], d_len); X printf("linep @ %06o\n", d_linep); X for (i = 0; i <= d_len; i++) { X printf("%3d %6d %06o\n", i, X d_linep[i].serial, d_linep[i].hash); X } X} X Xdumpklist(kmax, why) Xint kmax; Xchar *why; X/* X * Dump klist X */ X{ X register int i; X register CANDIDATE *cp; X register int count; X X printf("\nklist[0..%d] %s, clength = %d\n", kmax, why, clength); X for (i = 0; i <= kmax; i++) { X cp = &clist[klist[i]]; X printf("%2d %2d", i, klist[i]); X if (cp >= &clist[0] && cp < &clist[clength]) X printf(" (%2d %2d -> %2d)\n", cp->a, cp->b, cp->link); X else if (klist[i] == -1) X printf(" End of chain\n"); X else printf(" illegal klist element\n"); X } X for (i = 0; i <= kmax; i++) { X count = -1; X for (cp = (CANDIDATE *)klist[i]; cp > &clist[0]; X cp = (CANDIDATE *)&cp->link) { X if (++count >= 6) { X printf("\n "); X count = 0; X } X printf(" (%2d: %2d,%2d -> %d)", X cp-clist, cp->a, cp->b, cp->link); X } X printf("\n"); X } X printf("*\n"); X} X#endif X#ifdef TIMING Xptime(why) Xchar *why; X/* X * Dump time buffer X */ X{ X long ttemp; X X ttemp = time(NULL); X printf("%ld seconds for %s\n", X ttemp - sectiontime, why); X sectiontime = ttemp; X} X#endif X X/* X * s t r e q . c X */ X X/*)LIBRARY X*/ X X#ifdef DOCUMENTATION X Xtitle streq String Equality Test Xindex String equality test X Xsynopsis X .s.nf X streq(a, b); X char *a; X char *b; X .s.f XDescription X X Return TRUE if the strings are equal. X XBugs X X#endif X X/* #define EOS 0 X#define FALSE 0 X#define TRUE 1 X*/ Xint Xstreq(s1, s2) Xregister char *s1; Xregister char *s2; X/* X * TRUE if strings are identical X */ X{ X while (*s1++ == *s2) { X if (*s2++ == EOS) X return (TRUE); X } X return (FALSE); X} X/* X * e r r o r . c X */ X X/*)LIBRARY X*/ X X#ifdef DOCUMENTATION X Xtitle error Fatal Error Exit Xindex Fatal error exit X Xsynopsis X .s.nf X _error() X X error(format, args) X char *format; X .s.f Xdocumentation X X Fatal error exits. _error() halts, error() prints something X on stderr and then halts. X Xbugs X X Not a real vararg function. Only handles up to a fixed number of args. X X#endif X X/* VARARGS */ Xerror(format, arg1, arg2) Xchar *format; Xint arg1, arg2; X/* X * Error message before retiring. X */ X{ X fprintf(stderr, format, arg1, arg2); X putc('\n', stderr); X _error(); X} X X_error() X{ X exit(1); X} X X/* X * Fgetss() is like fgets() except that the terminating newline X * is removed. X */ Xchar *fgetss(s, n, iop) Xchar *s; XFILE *iop; X{ X int len; X X s[n - 1] = 0; X if(fgets(s, n-1, iop) == NULL) X return(NULL); X len = strlen(s); X if(s[len - 1] == '\n') X s[len - 1] = 0; X return(s); X} + END-OF-FILE diff.c chmod 'u=rw,g=r,o=r' 'diff.c' echo ' -rw-r--r-- 1 jw 38180 Jan 17 17:54 diff.c (as sent)' echo -n ' ' /bin/ls -l diff.c exit 0 Johan Widen SICS, PO Box 1263, S-164 28 KISTA, SWEDEN Tel: +46 8 752 15 32 Ttx: 812 61 54 SICS S Fax: +46 8 751 72 30 Internet: jw@sics.se or {mcvax,munnari,ukc,unido}!enea!sics.se!jw