fritz@unocss.UUCP (Tim Russell) (08/27/89)
+-+-+-+ Beginning of part 2 +-+-+-+ X`009 */ X`009len[which] = linect; X`009file[which] = lentry; X`125 X X X/* X * Look for initial and trailing sequences that have identical hash values. X * Don't bother building them into the candidate vector. X */ X Xsquish() X`123 X`009register int i; X`009register LINE *ap; X`009register LINE *bp; X`009int j; X`009int k; X X`009/* X`009 * prefix -> first line (from start) that doesn't hash identically`032 X`009 */ X`009i = 0; X`009ap = &fileA[1]; X`009bp = &fileB[1]; X`009while (i < lenA && i < lenB && ap->hash == bp->hash) `123 X`009`009i++; X`009`009ap++; X`009`009bp++; X`009`125 X`009prefix = i; X X`009/* X`009 * suffix -> first line (from end) that doesn't hash identically`032 X`009 */ X`009j = lenA - i; X`009k = lenB - i; X`009ap = &fileA[lenA]; X`009bp = &fileB[lenB]; X`009i = 0; X`009while (i < j && i < k && ap->hash == bp->hash) `123 X`009`009i++; X`009`009ap--; X`009`009bp--; X`009`125 X`009suffix = i; X X`009/* X`009 * Tuck the counts away`032 X`009 */ X`009for (k = 0; k <= 1; k++) `123 X`009`009sfile[k] = file[k] + prefix; X`009`009j = slen[k] = len[k] - prefix - suffix; X X`009`009for (i = 0, ap = sfile[k]; i <= slen[k]; i++, ap++) `123 X`009`009`009ap->serial = i; X`009`009`125 X`009`125 X`125 X X X/* X * Sort hash entries X */ X Xsort(vector, vecsize) X`009LINE *vector;`009`009/* What to sort `009 */ X`009int vecsize;`009/* How many to sort `009 */ X`123 X`009register int j; X`009register LINE *aim; X`009register LINE *ai; X`009int mid; X`009int k; X`009LINE work; X X`009for (j = 1; j <= vecsize; j *= 2); X`009mid = (j - 1); X`009while ((mid /= 2) != 0) `123 X`009`009k = vecsize - mid; X`009`009for (j = 1; j <= k; j++) `123 X`009`009`009for (ai = &vector[j]; ai > vector; ai -= mid) `123 X`009`009`009`009aim = &ai[mid]; X`009`009`009`009if (aim < ai) X`009`009`009`009`009break;`009`009/* ?? Why ?? `009 */ X`009`009`009`009if (aim->hash > ai->hash `124`124 X`009`009`009`009`009aim->hash == ai->hash && X`009`009`009`009`009aim->serial > ai->serial) X`009`009`009`009`009break; X`009`009`009`009work.hash = ai->hash; X`009`009`009`009ai->hash = aim->hash; X`009`009`009`009aim->hash = work.hash; X`009`009`009`009work.serial = ai->serial; X`009`009`009`009ai->serial = aim->serial; X`009`009`009`009aim->serial = work.serial; X`009`009`009`125 X`009`009`125 X`009`125 X`125 X X X/* X * Build equivalence class vector X */ X Xequiv() X`123 X`009register LINE *ap; X`009union `123 X`009`009LINE *bp; X`009`009short *mp; X`009`125 r; X`009register int j; X`009LINE *atop; X X#ifdef DEBUG X`009printf("equiv entry\n"); X`009for (j = 1; j <= slenA; j++) X`009`009printf("sfileA[%d] = %6d %06o\n", X`009`009`009 j, sfileA[j].serial, sfileA[j].hash); X`009for (j = 1; j <= slenB; j++) X`009`009printf("sfileB[%d] = %6d %06o\n", X`009`009`009 j, sfileB[j].serial, sfileB[j].hash); X#endif /* DEBUG */ X`009j = 1; X`009ap = &sfileA[1]; X`009r.bp = &sfileB[1]; X`009atop = &sfileA[slenA]; X`009while (ap <= atop && j <= slenB) `123 X`009`009if (ap->hash < r.bp->hash) `123 X`009`009`009ap->hash = 0; X`009`009`009ap++; X`009`009`125 else if (ap->hash == r.bp->hash) `123 X`009`009`009ap->hash = j; X`009`009`009ap++; X`009`009`125 else `123 X`009`009`009r.bp++; X`009`009`009j++; X`009`009`125 X`009`125 X`009while (ap <= atop) `123 X`009`009ap->hash = 0; X`009`009ap++; X`009`125 X`009sfileB[slenB + 1].hash = 0; X#ifdef DEBUG X`009printf("equiv exit\n"); X`009for (j = 1; j <= slenA; j++) X`009`009printf("sfileA[%d] = %6d %06o\n", X`009`009`009 j, sfileA[j].serial, sfileA[j].hash); X`009for (j = 1; j <= slenB; j++) X`009`009printf("sfileB[%d] = %6d %06o\n", X`009`009`009 j, sfileB[j].serial, sfileB[j].hash); X#endif /* DEBUG */ X`009ap = &sfileB[0]; X`009atop = &sfileB[slenB]; X`009r.mp = &member[0]; X`009while (++ap <= atop) `123 X`009`009r.mp++; X`009`009*r.mp = -(ap->serial); X`009`009while (ap[1].hash == ap->hash) `123 X`009`009`009ap++; X`009`009`009r.mp++; X`009`009`009*r.mp = ap->serial; X`009`009`125 X`009`125 X`009r.mp[1] = -1; X#ifdef DEBUG X`009for (j = 0; j <= slenB; j++) X`009`009printf("member[%d] = %d\n", j, member[j]); X#endif /* DEBUG */ X`125 X X X/* X * Build class vector X */ X Xunsort() X`123 X`009register int *temp; X`009register int *tp; X`009union `123 X`009`009LINE *ap; X`009`009short *cp; X`009`125 u; X`009LINE *evec; X`009short *eclass; X#ifdef DEBUG X`009int i; X#endif /* DEBUG */ X X`009temp = (int *) myalloc((slenA + 1) * sizeof(int), "unsort scratch"); X`009u.ap = &sfileA[1]; X`009evec = &sfileA[slenA]; X`009while (u.ap <= evec) `123 X#ifdef DEBUG X`009`009printf("temp[%2d] := %06o\n", u.ap->serial, u.ap->hash); X#endif /* DEBUG */ X`009`009temp[u.ap->serial] = u.ap->hash; X`009`009u.ap++; X`009`125 X X`009/* X`009 * Copy into class vector and free work space`032 X`009 */ X`009u.cp = &class[1]; X`009eclass = &class[slenA]; X`009tp = &temp[1]; X`009while (u.cp <= eclass) X`009`009*u.cp++ = *tp++; X#ifndef OSK X`009free((char *) temp); X#else /* OSK */ X`009free((char *) temp - sizeof(int)); X#endif /* OSK */ X#ifdef DEBUG X`009printf("unsort exit\n"); X`009for (i = 1; i <= slenA; i++) X`009`009printf("class[%d] = %d %06o\n", i, class[i], class[i]); X#endif /* DEBUG */ X`125 X X X/* X * Generate maximum common subsequence chain in clist[] X */ X Xsubseq() X`123 X`009int a; X`009register unsigned ktop; X`009register int b; X`009register int s; X`009unsigned r; X`009int i; X`009int cand; X X`009klist[0] = newcand(0, 0, -1); X`009klist[1] = newcand(slenA + 1, slenB + 1, -1); X`009ktop = 1;`009`009`009`009`009/* -> guard entry */ X`009for (a = 1; a <= slenA; a++) `123 X X`009`009/* X`009`009 * For each non-zero element in fileA ...`032 X`009`009 */ X`009`009if ((i = class[a]) == 0) X`009`009`009continue; X`009`009cand = klist[0];`009`009/* No candidate now */ X`009`009r = 0;`009`009`009`009`009/* Current r-candidate */ X`009`009do `123 X#ifdef DEBUG X`009`009`009printf("a = %d, i = %d, b = %d\n", a, i, member[i]); X#endif /* DEBUG */ X X`009`009`009/* X`009`009`009 * Perform the merge algorithm`032 X`009`009`009 */ X`009`009`009if ((b = member[i]) < 0) X`009`009`009`009b = -b; X#ifdef DEBUG X`009`009`009printf("search(%d, %d, %d) -> %d\n", X`009`009`009`009 r, ktop, b, search(r, ktop, b)); X#endif /* DEBUG */ X`009`009`009if ((s = search(r, ktop, b)) != 0) `123 X`009`009`009`009if (clist[klist[s]].b > b) `123 X`009`009`009`009`009klist[r] = cand; X`009`009`009`009`009r = s; X`009`009`009`009`009cand = newcand(a, b, klist[s - 1]); X#ifdef DEBUG X`009`009`009`009`009dumpklist(ktop, "klist[s-1]->b > b"); X#endif /* DEBUG */ X`009`009`009`009`125 X`009`009`009`009if (s >= ktop) `123 X`009`009`009`009`009klist[ktop + 1] = klist[ktop]; X`009`009`009`009`009ktop++; X#ifdef DEBUG X`009`009`009`009`009klist[r] = cand; X`009`009`009`009`009dumpklist(ktop, "extend"); X#endif /* DEBUG */ X`009`009`009`009`009break; X`009`009`009`009`125 X`009`009`009`125 X`009`009`125 while (member[++i] > 0); X`009`009klist[r] = cand; X`009`125 X#ifdef DEBUG X`009printf("last entry = %d\n", ktop - 1); X#endif /* DEBUG */ X`009return (ktop - 1);`009`009`009/* Last entry found */ X`125 X X Xint Xnewcand(a, b, pred) X`009int a;`009`009`009/* Line in fileA `009 */ X`009int b;`009`009`009/* Line in fileB `009 */ X`009int pred;`009`009/* Link to predecessor, index in cand[] */ X`123 X`009register CANDIDATE *new; X X`009clength++; X`009if (++clength >= csize) `123 X`009`009csize += CSIZE_INC; X`009`009clist = (CANDIDATE *) compact((char *) clist, X`009`009`009`009`009`009`009`009`009 csize * sizeof(CANDIDATE), X`009`009`009`009`009`009`009`009`009 "extending clist"); X`009`125 X`009new = &clist[clength - 1]; X`009new->a = a; X`009new->b = b; X`009new->link = pred; X`009return (clength - 1); X`125 X X X/* X * Search klist[low..top] (inclusive) for b. If klist[low]->b >= b, X * return zero. Else return s such that klist[s-1]->b < b and X * klist[s]->b >= b. Note that the algorithm presupposes the two X * preset "fence" elements, (0, 0) and (slenA, slenB). X */ X Xsearch(low, high, b) X`009register unsigned low; X`009register unsigned high; X`009register int b; X`123 X`009register int temp; X`009register unsigned mid; X X`009if (clist[klist[low]].b >= b) X`009`009return (0); X`009while ((mid = (low + high) / 2) > low) `123 X`009`009if ((temp = clist[klist[mid]].b) > b) X`009`009`009high = mid; X`009`009else if (temp < b) X`009`009`009low = mid; X`009`009else `123 X`009`009`009return (mid); X`009`009`125 X`009`125 X`009return (mid + 1); X`125 X X Xunravel(k) X`009register int k; X`123 X`009register int i; X`009register CANDIDATE *cp; X`009int first_trailer; X`009int difference; X X`009first_trailer = lenA - suffix; X`009difference = lenB - lenA; X#ifdef DEBUG X`009printf("first trailer = %d, difference = %d\n", X`009`009 first_trailer, difference); X#endif /* DEBUG */ X`009for (i = 0; i <= lenA; i++) `123 X`009`009match[i] = (i <= prefix) ? i X`009`009`009: (i > first_trailer) ? i + difference X`009`009`009: 0; X`009`125 X#ifdef DEBUG X`009printf("unravel\n"); X#endif /* DEBUG */ X`009while (k != -1) `123 X`009`009cp = &clist[k]; X#ifdef DEBUG X`009`009if (k < 0 `124`124 k >= clength) X`009`009`009error("Illegal link -> %d", k); X`009`009printf("match[%d] := %d\n", cp->a + prefix, cp->b + prefix); X#endif /* DEBUG */ X`009`009match[cp->a + prefix] = cp->b + prefix; X`009`009k = cp->link; X`009`125 X`125 X X X/* X * Check for hash matches (jackpots) and collect random access indices to X * the two files. X * X * It should be possible to avoid doing most of the ftell's by noticing X * that we are not doing a context diff and noticing that if a line X * compares equal to the other file, we will not ever need to know its X * file position. FIXME. X */ X Xcheck(fileAname, fileBname) X`009char *fileAname; X`009char *fileBname; X`123 X`009register int a;`009`009`009/* Current line in file A */ X`009register int b;`009`009`009/* Current line in file B */ X`009int jackpot; X X`009b = 1; X`009rewind(infd[0]); X`009rewind(infd[1]); X/* X * See above; these would be over-written on VMS anyway. X */ X X#ifndef vms X`009oldseek[0] = ftell(infd[0]); X`009newseek[0] = ftell(infd[1]); X#endif /* vms */ X X`009jackpot = 0; X#ifdef DEBUG X`009printf("match vector\n"); X`009for (a = 0; a <= lenA; a++) X`009`009printf("match[%d] = %d\n", a, match[a]); X#endif /* DEBUG */ X`009for (a = 1; a <= lenA; a++) `123 X`009`009if (match[a] == 0) `123 X`009`009`009/* Unique line in A */ X`009`009`009oldseek[a] = ftell(infd[0]); X`009`009`009getline(infd[0], text); X`009`009`009continue; X`009`009`125 X`009`009while (b < match[a]) `123 X`009`009`009/* Skip over unique lines in B */ X`009`009`009newseek[b] = ftell(infd[1]); X`009`009`009getline(infd[1], textb); X`009`009`009b++; X`009`009`125 X X`009`009/* X`009`009 * Compare the two, supposedly matching, lines. Unless we are going X`009`009 * to print these lines, don't bother to remember where they are. We X`009`009 * only print matching lines if a context diff is happening, or if a X`009`009 * jackpot occurs.`032 X`009`009 */ X`009`009if (cflag) `123 X`009`009`009oldseek[a] = ftell(infd[0]); X`009`009`009newseek[b] = ftell(infd[1]); X`009`009`125 X`009`009getline(infd[0], text); X`009`009getline(infd[1], textb); X`009`009if (!streq(text, textb)) `123 X`009`009`009fprintf(stderr, "Spurious match:\n"); X`009`009`009fprintf(stderr, "line %d in %s, \"%s\"\n", X`009`009`009`009`009a, fileAname, text); X`009`009`009fprintf(stderr, "line %d in %s, \"%s\"\n", X`009`009`009`009`009b, fileBname, textb); X`009`009`009match[a] = 0; X`009`009`009jackpot++; X`009`009`125 X`009`009b++; X`009`125 X`009for (; b <= lenB; b++) `123 X`009`009newseek[b] = ftell(infd[1]); X`009`009getline(infd[1], textb); X`009`125 X/* X * The logical converse to the code up above, for NON-VMS systems, to X * store away an fseek() pointer at the beginning of the file. For VMS, X * we need one at EOF... X */ X X#ifdef VMS X`009oldseek[lenA] = ftell(infd[0]); X`009getline(infd[0], text);`009`009/* Will hit EOF... */ X`009newseek[lenB] = ftell(infd[1]); X`009getline(infd[1], textb);`009/* Will hit EOF... */ X#endif /* vms */ X X`009return (jackpot); X`125 X X Xoutput(fileAname, fileBname) X`009char *fileAname, *fileBname; X`123 X`009register int astart; X`009register int aend = 0; X`009int bstart; X`009register int bend; X X`009rewind(infd[0]); X`009rewind(infd[1]); X`009match[0] = 0; X`009match[lenA + 1] = lenB + 1; X`009if (!eflag) `123 X`009`009if (cflag) `123 X X`009`009`009/* X`009`009`009 * Should include ctime style dates after the file names, but X`009`009`009 * this would be non-trivial on OSK. Perhaps there should be a X`009`009`009 * special case for stdin.`032 X`009`009`009 */ X`009`009`009printf("*** %s\n--- %s\n", fileAname, fileBname); X`009`009`125 X X`009`009/* X`009`009 * Normal printout`032 X`009`009 */ X`009`009for (astart = 1; astart <= lenA; astart = aend + 1) `123 X X`009`009`009/* X`009`009`009 * New subsequence, skip over matching stuff`032 X`009`009`009 */ X`009`009`009while (astart <= lenA X`009`009`009`009 && match[astart] == (match[astart - 1] + 1)) X`009`009`009`009astart++; X X`009`009`009/* X`009`009`009 * Found a difference, setup range and print it`032 X`009`009`009 */ X`009`009`009bstart = match[astart - 1] + 1; X`009`009`009aend = astart - 1; X`009`009`009while (aend < lenA && match[aend + 1] == 0) X`009`009`009`009aend++; X`009`009`009bend = match[aend + 1] - 1; X`009`009`009match[aend] = bend; X`009`009`009change(astart, aend, bstart, bend); X`009`009`125 X`009`125 else `123 X X`009`009/* X`009`009 * Edit script output -- differences are output "backwards" for the X`009`009 * benefit of a line-oriented editor.`032 X`009`009 */ X`009`009for (aend = lenA; aend >= 1; aend = astart - 1) `123 X`009`009`009while (aend >= 1 X`009`009`009`009 && match[aend] == (match[aend + 1] - 1) X`009`009`009`009 && match[aend] != 0) X`009`009`009`009aend--; X`009`009`009bend = match[aend + 1] - 1; X`009`009`009astart = aend + 1; X`009`009`009while (astart > 1 && match[astart - 1] == 0) X`009`009`009`009astart--; X`009`009`009bstart = match[astart - 1] + 1; X`009`009`009match[astart] = bstart; X`009`009`009change(astart, aend, bstart, bend); X`009`009`125 X`009`125 X`009if (lenA == 0) X`009`009change(1, 0, 1, lenB); X`125 X X X/* X * Output a change entry: fileA[astart..aend] changed to fileB[bstart..bend] X */ X Xchange(astart, aend, bstart, bend) X`009int astart; X`009int aend; -+-+-+-+-+ End of part 2 +-+-+-+-+- -- ---------------------------------+-------------------------------------------- Tim Russell, Computer Operator | Internet: russell@zeus.unl.edu Campus Computing | Bitnet: russell@unoma1 University of Nebraska at Omaha | UUCP: uunet!zeus.unl.edu!russell