[news.software.anu-news] diff part 2 of 4

fritz@unocss.UUCP (Tim Russell) (08/27/89)

+-+-+-+ Beginning of part 2 +-+-+-+
X`009 */
X`009len[which] = linect;
X`009file[which] = lentry;
X`125
X
X
X/*
X * Look for initial and trailing sequences that have identical hash values.
X * Don't bother building them into the candidate vector.
X */
X
Xsquish()
X`123
X`009register int    i;
X`009register LINE  *ap;
X`009register LINE  *bp;
X`009int             j;
X`009int             k;
X
X`009/*
X`009 * prefix -> first line (from start) that doesn't hash identically`032
X`009 */
X`009i = 0;
X`009ap = &fileA[1];
X`009bp = &fileB[1];
X`009while (i < lenA && i < lenB && ap->hash == bp->hash) `123
X`009`009i++;
X`009`009ap++;
X`009`009bp++;
X`009`125
X`009prefix = i;
X
X`009/*
X`009 * suffix -> first line (from end) that doesn't hash identically`032
X`009 */
X`009j = lenA - i;
X`009k = lenB - i;
X`009ap = &fileA[lenA];
X`009bp = &fileB[lenB];
X`009i = 0;
X`009while (i < j && i < k && ap->hash == bp->hash) `123
X`009`009i++;
X`009`009ap--;
X`009`009bp--;
X`009`125
X`009suffix = i;
X
X`009/*
X`009 * Tuck the counts away`032
X`009 */
X`009for (k = 0; k <= 1; k++) `123
X`009`009sfile[k] = file[k] + prefix;
X`009`009j = slen[k] = len[k] - prefix - suffix;
X
X`009`009for (i = 0, ap = sfile[k]; i <= slen[k]; i++, ap++) `123
X`009`009`009ap->serial = i;
X`009`009`125
X`009`125
X`125
X
X
X/*
X * Sort hash entries
X */
X
Xsort(vector, vecsize)
X`009LINE           *vector;`009`009/* What to sort  `009  */
X`009int             vecsize;`009/* How many to sort  `009 */
X`123
X`009register int    j;
X`009register LINE  *aim;
X`009register LINE  *ai;
X`009int             mid;
X`009int             k;
X`009LINE            work;
X
X`009for (j = 1; j <= vecsize; j *= 2);
X`009mid = (j - 1);
X`009while ((mid /= 2) != 0) `123
X`009`009k = vecsize - mid;
X`009`009for (j = 1; j <= k; j++) `123
X`009`009`009for (ai = &vector[j]; ai > vector; ai -= mid) `123
X`009`009`009`009aim = &ai[mid];
X`009`009`009`009if (aim < ai)
X`009`009`009`009`009break;`009`009/* ?? Why ??  `009 */
X`009`009`009`009if (aim->hash > ai->hash `124`124
X`009`009`009`009`009aim->hash == ai->hash &&
X`009`009`009`009`009aim->serial > ai->serial)
X`009`009`009`009`009break;
X`009`009`009`009work.hash = ai->hash;
X`009`009`009`009ai->hash = aim->hash;
X`009`009`009`009aim->hash = work.hash;
X`009`009`009`009work.serial = ai->serial;
X`009`009`009`009ai->serial = aim->serial;
X`009`009`009`009aim->serial = work.serial;
X`009`009`009`125
X`009`009`125
X`009`125
X`125
X
X
X/*
X * Build equivalence class vector
X */
X
Xequiv()
X`123
X`009register LINE  *ap;
X`009union `123
X`009`009LINE           *bp;
X`009`009short          *mp;
X`009`125               r;
X`009register int    j;
X`009LINE           *atop;
X
X#ifdef  DEBUG
X`009printf("equiv entry\n");
X`009for (j = 1; j <= slenA; j++)
X`009`009printf("sfileA[%d] = %6d %06o\n",
X`009`009`009   j, sfileA[j].serial, sfileA[j].hash);
X`009for (j = 1; j <= slenB; j++)
X`009`009printf("sfileB[%d] = %6d %06o\n",
X`009`009`009   j, sfileB[j].serial, sfileB[j].hash);
X#endif /* DEBUG */
X`009j = 1;
X`009ap = &sfileA[1];
X`009r.bp = &sfileB[1];
X`009atop = &sfileA[slenA];
X`009while (ap <= atop && j <= slenB) `123
X`009`009if (ap->hash < r.bp->hash) `123
X`009`009`009ap->hash = 0;
X`009`009`009ap++;
X`009`009`125 else if (ap->hash == r.bp->hash) `123
X`009`009`009ap->hash = j;
X`009`009`009ap++;
X`009`009`125 else `123
X`009`009`009r.bp++;
X`009`009`009j++;
X`009`009`125
X`009`125
X`009while (ap <= atop) `123
X`009`009ap->hash = 0;
X`009`009ap++;
X`009`125
X`009sfileB[slenB + 1].hash = 0;
X#ifdef  DEBUG
X`009printf("equiv exit\n");
X`009for (j = 1; j <= slenA; j++)
X`009`009printf("sfileA[%d] = %6d %06o\n",
X`009`009`009   j, sfileA[j].serial, sfileA[j].hash);
X`009for (j = 1; j <= slenB; j++)
X`009`009printf("sfileB[%d] = %6d %06o\n",
X`009`009`009   j, sfileB[j].serial, sfileB[j].hash);
X#endif /* DEBUG */
X`009ap = &sfileB[0];
X`009atop = &sfileB[slenB];
X`009r.mp = &member[0];
X`009while (++ap <= atop) `123
X`009`009r.mp++;
X`009`009*r.mp = -(ap->serial);
X`009`009while (ap[1].hash == ap->hash) `123
X`009`009`009ap++;
X`009`009`009r.mp++;
X`009`009`009*r.mp = ap->serial;
X`009`009`125
X`009`125
X`009r.mp[1] = -1;
X#ifdef  DEBUG
X`009for (j = 0; j <= slenB; j++)
X`009`009printf("member[%d] = %d\n", j, member[j]);
X#endif /* DEBUG */
X`125
X
X
X/*
X * Build class vector
X */
X
Xunsort()
X`123
X`009register int   *temp;
X`009register int   *tp;
X`009union `123
X`009`009LINE           *ap;
X`009`009short          *cp;
X`009`125               u;
X`009LINE           *evec;
X`009short          *eclass;
X#ifdef  DEBUG
X`009int             i;
X#endif /* DEBUG */
X
X`009temp = (int *) myalloc((slenA + 1) * sizeof(int), "unsort scratch");
X`009u.ap = &sfileA[1];
X`009evec = &sfileA[slenA];
X`009while (u.ap <= evec) `123
X#ifdef  DEBUG
X`009`009printf("temp[%2d] := %06o\n", u.ap->serial, u.ap->hash);
X#endif /* DEBUG */
X`009`009temp[u.ap->serial] = u.ap->hash;
X`009`009u.ap++;
X`009`125
X
X`009/*
X`009 * Copy into class vector and free work space`032
X`009 */
X`009u.cp = &class[1];
X`009eclass = &class[slenA];
X`009tp = &temp[1];
X`009while (u.cp <= eclass)
X`009`009*u.cp++ = *tp++;
X#ifndef OSK
X`009free((char *) temp);
X#else /* OSK */
X`009free((char *) temp - sizeof(int));
X#endif /* OSK */
X#ifdef  DEBUG
X`009printf("unsort exit\n");
X`009for (i = 1; i <= slenA; i++)
X`009`009printf("class[%d] = %d %06o\n", i, class[i], class[i]);
X#endif /* DEBUG */
X`125
X
X
X/*
X * Generate maximum common subsequence chain in clist[]
X */
X
Xsubseq()
X`123
X`009int             a;
X`009register unsigned ktop;
X`009register int    b;
X`009register int    s;
X`009unsigned        r;
X`009int             i;
X`009int             cand;
X
X`009klist[0] = newcand(0, 0, -1);
X`009klist[1] = newcand(slenA + 1, slenB + 1, -1);
X`009ktop = 1;`009`009`009`009`009/* -> guard entry  */
X`009for (a = 1; a <= slenA; a++) `123
X
X`009`009/*
X`009`009 * For each non-zero element in fileA ...`032
X`009`009 */
X`009`009if ((i = class[a]) == 0)
X`009`009`009continue;
X`009`009cand = klist[0];`009`009/* No candidate now  */
X`009`009r = 0;`009`009`009`009`009/* Current r-candidate  */
X`009`009do `123
X#ifdef  DEBUG
X`009`009`009printf("a = %d, i = %d, b = %d\n", a, i, member[i]);
X#endif /* DEBUG */
X
X`009`009`009/*
X`009`009`009 * Perform the merge algorithm`032
X`009`009`009 */
X`009`009`009if ((b = member[i]) < 0)
X`009`009`009`009b = -b;
X#ifdef  DEBUG
X`009`009`009printf("search(%d, %d, %d) -> %d\n",
X`009`009`009`009   r, ktop, b, search(r, ktop, b));
X#endif /* DEBUG */
X`009`009`009if ((s = search(r, ktop, b)) != 0) `123
X`009`009`009`009if (clist[klist[s]].b > b) `123
X`009`009`009`009`009klist[r] = cand;
X`009`009`009`009`009r = s;
X`009`009`009`009`009cand = newcand(a, b, klist[s - 1]);
X#ifdef  DEBUG
X`009`009`009`009`009dumpklist(ktop, "klist[s-1]->b > b");
X#endif /* DEBUG */
X`009`009`009`009`125
X`009`009`009`009if (s >= ktop) `123
X`009`009`009`009`009klist[ktop + 1] = klist[ktop];
X`009`009`009`009`009ktop++;
X#ifdef  DEBUG
X`009`009`009`009`009klist[r] = cand;
X`009`009`009`009`009dumpklist(ktop, "extend");
X#endif /* DEBUG */
X`009`009`009`009`009break;
X`009`009`009`009`125
X`009`009`009`125
X`009`009`125 while (member[++i] > 0);
X`009`009klist[r] = cand;
X`009`125
X#ifdef  DEBUG
X`009printf("last entry = %d\n", ktop - 1);
X#endif /* DEBUG */
X`009return (ktop - 1);`009`009`009/* Last entry found  */
X`125
X
X
Xint
Xnewcand(a, b, pred)
X`009int             a;`009`009`009/* Line in fileA  `009  */
X`009int             b;`009`009`009/* Line in fileB  `009  */
X`009int             pred;`009`009/* Link to predecessor, index in cand[]  */
X`123
X`009register CANDIDATE *new;
X
X`009clength++;
X`009if (++clength >= csize) `123
X`009`009csize += CSIZE_INC;
X`009`009clist = (CANDIDATE *) compact((char *) clist,
X`009`009`009`009`009`009`009`009`009  csize * sizeof(CANDIDATE),
X`009`009`009`009`009`009`009`009`009  "extending clist");
X`009`125
X`009new = &clist[clength - 1];
X`009new->a = a;
X`009new->b = b;
X`009new->link = pred;
X`009return (clength - 1);
X`125
X
X
X/*
X * Search klist[low..top] (inclusive) for b.  If klist[low]->b >= b,
X * return zero.  Else return s such that klist[s-1]->b < b and
X * klist[s]->b >= b.  Note that the algorithm presupposes the two
X * preset "fence" elements, (0, 0) and (slenA, slenB).
X */
X
Xsearch(low, high, b)
X`009register unsigned low;
X`009register unsigned high;
X`009register int    b;
X`123
X`009register int    temp;
X`009register unsigned mid;
X
X`009if (clist[klist[low]].b >= b)
X`009`009return (0);
X`009while ((mid = (low + high) / 2) > low) `123
X`009`009if ((temp = clist[klist[mid]].b) > b)
X`009`009`009high = mid;
X`009`009else if (temp < b)
X`009`009`009low = mid;
X`009`009else `123
X`009`009`009return (mid);
X`009`009`125
X`009`125
X`009return (mid + 1);
X`125
X
X
Xunravel(k)
X`009register int    k;
X`123
X`009register int    i;
X`009register CANDIDATE *cp;
X`009int             first_trailer;
X`009int             difference;
X
X`009first_trailer = lenA - suffix;
X`009difference = lenB - lenA;
X#ifdef  DEBUG
X`009printf("first trailer = %d, difference = %d\n",
X`009`009   first_trailer, difference);
X#endif /* DEBUG */
X`009for (i = 0; i <= lenA; i++) `123
X`009`009match[i] = (i <= prefix) ? i
X`009`009`009: (i > first_trailer) ? i + difference
X`009`009`009: 0;
X`009`125
X#ifdef  DEBUG
X`009printf("unravel\n");
X#endif /* DEBUG */
X`009while (k != -1) `123
X`009`009cp = &clist[k];
X#ifdef  DEBUG
X`009`009if (k < 0 `124`124 k >= clength)
X`009`009`009error("Illegal link -> %d", k);
X`009`009printf("match[%d] := %d\n", cp->a + prefix, cp->b + prefix);
X#endif /* DEBUG */
X`009`009match[cp->a + prefix] = cp->b + prefix;
X`009`009k = cp->link;
X`009`125
X`125
X
X
X/*
X * Check for hash matches (jackpots) and collect random access indices to
X * the two files.
X *
X * It should be possible to avoid doing most of the ftell's by noticing
X * that we are not doing a context diff and noticing that if a line
X * compares equal to the other file, we will not ever need to know its
X * file position.  FIXME.
X */
X
Xcheck(fileAname, fileBname)
X`009char           *fileAname;
X`009char           *fileBname;
X`123
X`009register int    a;`009`009`009/* Current line in file A  */
X`009register int    b;`009`009`009/* Current line in file B  */
X`009int             jackpot;
X
X`009b = 1;
X`009rewind(infd[0]);
X`009rewind(infd[1]);
X/*
X * See above; these would be over-written on VMS anyway.
X */
X
X#ifndef vms
X`009oldseek[0] = ftell(infd[0]);
X`009newseek[0] = ftell(infd[1]);
X#endif /* vms */
X
X`009jackpot = 0;
X#ifdef  DEBUG
X`009printf("match vector\n");
X`009for (a = 0; a <= lenA; a++)
X`009`009printf("match[%d] = %d\n", a, match[a]);
X#endif /* DEBUG */
X`009for (a = 1; a <= lenA; a++) `123
X`009`009if (match[a] == 0) `123
X`009`009`009/* Unique line in A */
X`009`009`009oldseek[a] = ftell(infd[0]);
X`009`009`009getline(infd[0], text);
X`009`009`009continue;
X`009`009`125
X`009`009while (b < match[a]) `123
X`009`009`009/* Skip over unique lines in B */
X`009`009`009newseek[b] = ftell(infd[1]);
X`009`009`009getline(infd[1], textb);
X`009`009`009b++;
X`009`009`125
X
X`009`009/*
X`009`009 * Compare the two, supposedly matching, lines. Unless we are going
X`009`009 * to print these lines, don't bother to remember where they are.  We
X`009`009 * only print matching lines if a context diff is happening, or if a
X`009`009 * jackpot occurs.`032
X`009`009 */
X`009`009if (cflag) `123
X`009`009`009oldseek[a] = ftell(infd[0]);
X`009`009`009newseek[b] = ftell(infd[1]);
X`009`009`125
X`009`009getline(infd[0], text);
X`009`009getline(infd[1], textb);
X`009`009if (!streq(text, textb)) `123
X`009`009`009fprintf(stderr, "Spurious match:\n");
X`009`009`009fprintf(stderr, "line %d in %s, \"%s\"\n",
X`009`009`009`009`009a, fileAname, text);
X`009`009`009fprintf(stderr, "line %d in %s, \"%s\"\n",
X`009`009`009`009`009b, fileBname, textb);
X`009`009`009match[a] = 0;
X`009`009`009jackpot++;
X`009`009`125
X`009`009b++;
X`009`125
X`009for (; b <= lenB; b++) `123
X`009`009newseek[b] = ftell(infd[1]);
X`009`009getline(infd[1], textb);
X`009`125
X/*
X * The logical converse to the code up above, for NON-VMS systems, to
X * store away an fseek() pointer at the beginning of the file.  For VMS,
X * we need one at EOF...
X */
X
X#ifdef VMS
X`009oldseek[lenA] = ftell(infd[0]);
X`009getline(infd[0], text);`009`009/* Will hit EOF...  */
X`009newseek[lenB] = ftell(infd[1]);
X`009getline(infd[1], textb);`009/* Will hit EOF...  */
X#endif /* vms */
X
X`009return (jackpot);
X`125
X
X
Xoutput(fileAname, fileBname)
X`009char           *fileAname, *fileBname;
X`123
X`009register int    astart;
X`009register int    aend = 0;
X`009int             bstart;
X`009register int    bend;
X
X`009rewind(infd[0]);
X`009rewind(infd[1]);
X`009match[0] = 0;
X`009match[lenA + 1] = lenB + 1;
X`009if (!eflag) `123
X`009`009if (cflag) `123
X
X`009`009`009/*
X`009`009`009 * Should include ctime style dates after the file names, but
X`009`009`009 * this would be non-trivial on OSK.  Perhaps there should be a
X`009`009`009 * special case for stdin.`032
X`009`009`009 */
X`009`009`009printf("*** %s\n--- %s\n", fileAname, fileBname);
X`009`009`125
X
X`009`009/*
X`009`009 * Normal printout`032
X`009`009 */
X`009`009for (astart = 1; astart <= lenA; astart = aend + 1) `123
X
X`009`009`009/*
X`009`009`009 * New subsequence, skip over matching stuff`032
X`009`009`009 */
X`009`009`009while (astart <= lenA
X`009`009`009`009   && match[astart] == (match[astart - 1] + 1))
X`009`009`009`009astart++;
X
X`009`009`009/*
X`009`009`009 * Found a difference, setup range and print it`032
X`009`009`009 */
X`009`009`009bstart = match[astart - 1] + 1;
X`009`009`009aend = astart - 1;
X`009`009`009while (aend < lenA && match[aend + 1] == 0)
X`009`009`009`009aend++;
X`009`009`009bend = match[aend + 1] - 1;
X`009`009`009match[aend] = bend;
X`009`009`009change(astart, aend, bstart, bend);
X`009`009`125
X`009`125 else `123
X
X`009`009/*
X`009`009 * Edit script output -- differences are output "backwards" for the
X`009`009 * benefit of a line-oriented editor.`032
X`009`009 */
X`009`009for (aend = lenA; aend >= 1; aend = astart - 1) `123
X`009`009`009while (aend >= 1
X`009`009`009`009   && match[aend] == (match[aend + 1] - 1)
X`009`009`009`009   && match[aend] != 0)
X`009`009`009`009aend--;
X`009`009`009bend = match[aend + 1] - 1;
X`009`009`009astart = aend + 1;
X`009`009`009while (astart > 1 && match[astart - 1] == 0)
X`009`009`009`009astart--;
X`009`009`009bstart = match[astart - 1] + 1;
X`009`009`009match[astart] = bstart;
X`009`009`009change(astart, aend, bstart, bend);
X`009`009`125
X`009`125
X`009if (lenA == 0)
X`009`009change(1, 0, 1, lenB);
X`125
X
X
X/*
X * Output a change entry: fileA[astart..aend] changed to fileB[bstart..bend]
X */
X
Xchange(astart, aend, bstart, bend)
X`009int             astart;
X`009int             aend;
-+-+-+-+-+ End of part 2 +-+-+-+-+-
-- 
---------------------------------+--------------------------------------------
 Tim Russell, Computer Operator  | Internet: russell@zeus.unl.edu
 Campus Computing                | Bitnet:   russell@unoma1
 University of Nebraska at Omaha | UUCP:     uunet!zeus.unl.edu!russell