cons@sdccsu3.UUCP (08/03/84)
Here is a description and fix for a 'refer' sort problem packaged as a shar archive. #! /bin/sh # The rest of this file is a shell script which will extract: # Sendbug02 refer2.c refer5.c echo x - Sendbug02 cat >Sendbug02 <<'!Funky!Stuff!' Subject: REFER mis-sorts reference lists if duplicate citations occur. Index: usr.bin/refer/refer2.c 4.2BSD usr.bin/refer/refer5.c 4.2BSD Description: The putsig routine in refer5.c is responsible for placing signals (usually superscripts, or author-date labels) in the body of a document. Putsig also makes the signals available for printing in the reference list by emitting strings such as ".ds [F signal" into the reference list. If a reference is cited more than once, it is only put on the reference list the first time it is cited. However putsig persists in emitting the ".ds [F ..." string for duplicate citations. This causes a problem if the reference list is sorted because the extraneous ".ds [F ..." material appears at the beginning of the next non-duplicate reference where it obscures the sortkey. Thus the reference list is mis-sorted. Repeat-By: Create a document which cites a reference twice, then cites a new reference. Process the document using REFER with the -s option. Notice the extra ".ds [F ..." line in the REFER output. If the two references came out sorted properly, you were lucky, reverse their roles and you will see the failure. Fix: Putsig is called at two places in refer2.c, in one context a duplicate citation is being processed and in the other a new citation is being processed. Add a flag parameter to putsig to distinguish the two calls. Modify the routine to suppress the emission of ".ds [F ..." when it is called for a duplicate citation. (See sources posted to net.sources) Rick Accurso UUCP: ...!ucbvax!sdcsvax!sdccsu3!accurso ARPA: sdcsvax!sdccsu3!accurso@nosc !Funky!Stuff! echo x - refer2.c cat >refer2.c <<'!Funky!Stuff!' #ifndef lint static char *sccsid = "@(#)refer2.c 4.1 (Berkeley) 5/6/83"; #endif #include "refer..c" #define NFLD 80 #define TLEN 512 extern FILE *in; char one[ANSLEN]; int onelen = ANSLEN; static char dr [100] = ""; /* ** doref - Process a citation. */ doref(line1) char *line1; { char buff[QLEN]; /* query keywords */ char dbuff[3*QLEN]; /* field data supplied in citation */ char answer[ANSLEN], temp[TLEN], line[BUFSIZ]; char *p, **sr, *flds[NFLD], *r; int stat, nf, nr, query = 0, alph, digs; again: buff[0] = dbuff[0] = NULL; if (biblio && Iline == 1 && line1[0] == '%') /* ** In biblio mode first line of input file may begin ** with % and contain field data. Hold it in dbuff. */ strcat(dbuff, line1); while (input(line)) { /* get query */ Iline++; if (prefix(".]", line)) /* end of citation */ break; if (biblio && line[0] == '\n') /* ** In biblio mode a blank line indicates ** the end of the reference. */ break; if (biblio && line[0] == '%' && line[1] == *convert) break; if (control(line[0])) query = 1; /* ** Store lines of query keys in buff; ** store lines of field data in dbuff. */ strcat(query ? dbuff : buff, line); if (strlen(buff) > QLEN) err("query too long (%d)", strlen(buff)); if (strlen(dbuff) > 3 * QLEN) err("record at line %d too long", Iline-1); } if (biblio && line[0] == '\n' && feof(in)) return; if (strcmp(buff, "$LIST$\n")==0) { /* ** Produce the list of accumulated references. */ assert (dbuff[0] == 0); dumpold(); return; } answer[0] = 0; /* ** Refine the query keywords in buff. */ for (p = buff; *p; p++) { if (isupper(*p)) /* Convert to lowercase. */ *p |= 040; } alph = digs = 0; for (p = buff; *p; p++) { if (isalpha(*p)) alph++; else if (isdigit(*p)) digs++; else { *p = 0; if ((alph+digs < 3) || common(p-alph)) { r = p-alph; /* ** Blank out unacceptable ** keywords (too short, common etc.) */ while (r < p) *r++ = ' '; } if (alph == 0 && digs > 0) { r = p-digs; if (digs != 4 || atoi(r)/100 != 19) { /* ** Blank out numbers ** which are not in ** 1900-1999 */ while (r < p) *r++ = ' '; } } *p = ' '; alph = digs = 0; } } one[0] = 0; if (buff[0]) { /* do not search if no query */ for (sr = rdata; sr < search; sr++) { temp[0] = 0; corout(buff, temp, "hunt", *sr, TLEN); assert(strlen(temp) < TLEN); if (strlen(temp)+strlen(answer) > BUFSIZ) err("Accumulated answers too large",0); strcat(answer, temp); if (strlen(answer)>BUFSIZ) err("answer too long (%d)", strlen(answer)); if (newline(answer) > 0) break; } } assert(strlen(one) < ANSLEN); assert(strlen(answer) < ANSLEN); /* ** If a search was done, the number of newlines in answer ** indicates how many hits were found. */ if (buff[0]) switch (newline(answer)) { case 0: fprintf(stderr, "No such paper: %s\n", buff); return; default: fprintf(stderr, "Too many hits: %s\n", trimnl(buff)); choices(answer); p = buff; while (*p != '\n') p++; *++p = 0; case 1: /* ** Search found one hit, success! */ if (endpush) /* ** References are being produced ** in a $LIST$ rather than as ** footnotes. */ if (nr = chkdup(answer)) { /* ** This reference has already ** been cited. */ if (bare < 2) { /* ** Signals in the text ** are desired (no -b) */ nf = tabs(flds, one); nf += tabs(flds+nf, dbuff); assert(nf < NFLD); putsig(nf,flds,nr,line1,line,1); } /* ** Since it's a dup, no need to ** putkey or putref. */ return; } if (one[0] == 0) /* ** Place the reference data for the hit ** indicated by answer in one. */ corout(answer, one, "deliv", dr, QLEN); break; } assert(strlen(buff) < QLEN); assert(strlen(one) < ANSLEN); /* ** Set the flds[] pointers at the beginning of each ** field of reference data in one and dbuff. */ nf = tabs(flds, one); nf += tabs(flds+nf, dbuff); assert(nf < NFLD); refnum++; /* ** The stream "fo" is written to by putkey ** and putref. If references are being produced in the ** form of footnotes, fo is stdout. If references are ** being printed as a list at the end (endpush), then ** fo is a temp file. In the endpush case each reference ** is written to fo as one long line. If the list is ** to be sorted, putkey places the sort key on the front ** of the line. */ if (sort) putkey(nf, flds, refnum, keystr); if (bare < 2) putsig(nf, flds, refnum, line1, line, 0); else flout(); putref(nf, flds); if (biblio && line[0] == '\n') goto again; if (biblio && line[0] == '%' && line[1] == *convert) fprintf(fo, "%s%c%s", convert+1, sep, line+3); } /* count the newlines in s */ newline(s) char *s; { int k = 0, c; while (c = *s++) if (c == '\n') k++; return(k); } /* print the titles associated with the hits in buff */ choices(buff) char *buff; { char ob[BUFSIZ], *p, *r, *q, *t; int nl; for (r = p = buff; *p; p++) { if (*p == '\n') { *p++ = 0; corout(r, ob, "deliv", dr, BUFSIZ); nl = 1; for (q = ob; *q; q++) { if (nl && (q[0]=='.'||q[0]=='%') && q[1]=='T') { q += 3; for (t = q; *t && *t != '\n'; t++) ; *t = 0; fprintf(stderr, "%.70s\n", q); q = 0; break; } nl = *q == '\n'; } if (q) fprintf(stderr, "??? at %s\n",r); r=p; } } } control(c) { if (c == '.') return(1); if (c == '%') return(1); return(0); } !Funky!Stuff! echo x - refer5.c cat >refer5.c <<'!Funky!Stuff!' /* * $Log: refer5.c,v $ * Revision 1.5 84/07/09 16:12:23 cons * Putsig now refrains from putting out ".ds [F" info when the citation * is a duplicate. The extraneous ".ds [F" info fouled-up sortkeys * for subsequent non-duplicate reference. Accurso * * Revision 1.4 84/07/05 15:30:12 cons * Fixed keylet() so that disambiguating letters a, b, c, ... * will be issued instead of control characters ^A, ^B, ^C, ... Accurso * * Revision 1.3 84/07/05 15:16:50 cons * Added comments. Accurso * */ #ifndef lint static char *rcsid = "$Header: refer5.c,v 1.5 84/07/09 16:12:23 cons Exp $"; #endif #include "refer..c" #define SAME 0 #define NFLAB 3000 #define NLABC 1000 static char sig[NLABC]; static char bflab[NFLAB]; /* Record of plain signals issued. ** "Plain signals" have not had ** disambiguating letter appended, ** miller84 vs. miller84a. */ static char *labtab[NLABC]; /* Array of pointers to plain signals; ** indexed by nref. */ static char *lbp = bflab; static char labc[NLABC]; /* Array of disambiguating ** characters issued; indexed by nref. */ static char stbuff[50]; static int prevsig; /* putsig ** ** Imbed a signal indicating a citation in the text. ** Also may supply the signal for printing in a ** reference list (.ds [F signal). ** ** CONDENSE facility which converts consecutive numeric signals (4,5,6,7) ** to a range (4-7) does not handle sorted reference lists. */ putsig (nf, flds, nref, nstline, endline, dupl) char *flds[]; /* Fields of reference data */ char *nstline; /* Line which indicated start of citation. ** Usually ".[". In biblio mode could be blank or ** start with "%". */ char *endline; /* Line which indicated end of citation. ** Usually ".]". Blank in biblio mode. */ int dupl; /* dupl==0 implies new citation; ** dupl==1 implies repeat citation. */ { char t[100], t1[100], t2[100], format[10], *sd, *stline; int addon, another = 0; static FILE *fhide = 0; int i; char tag; #ifdef CONDENSE static int *wref = NULL; static int wcnt = 0; static int wsize = 50; if (wref == NULL) wref = calloc(wsize, sizeof(int)); #endif if (labels) { /* User specified -l, -k, or -S option. */ if (nf == 0) /* Repeat citation of a reference. ** Reuse previously issued signal. */ sprintf(t, "%s%c", labtab[nref], labc[nref]); else { *t = 0; if (keywant) /* -k option, use signal ** supplied in reference data. */ sprintf(t, "%s", fpar(nf,flds,t1,keywant,1,0)); if (science && t[0] == 0) { /* -S option and no -k, produce ** signal such as (Miller, 1984). */ sd = fpar(nf, flds, t2, 'D', 1, 0); sprintf(t, "%s, %s", fpar(nf,flds,t1,'A',1,0), sd); } else if (t[0] == 0) { /* -l option, produce a signal such ** as Miller1984 or Mil84. */ sprintf(format, nmlen>0 ? "%%.%ds%%s" : "%%s%%s", nmlen); /* format is %s%s for default labels */ /* or %.3s%s eg if wanted */ sd = fpar(nf, flds, t2, 'D', 1, 0); if (dtlen > 0) { char *sdb; for (sdb = sd; *sd; sd++) ; sd = sd - dtlen; if (sd < sdb) sd = sdb; } sprintf(t, format, fpar(nf,flds,t1,'A',1,0), sd); } if (keywant) { /* Check user supplied signal, ** if final character is '-', ** user wants disambiguating ** character as necessary. */ addon = 0; for (sd = t; *sd; sd++) ; if (*--sd == '-') { addon = 1; *sd = 0; } } /* Add plain signal to record of issued ** signals. Append a disambiguating letter ** to this instance as necessary. */ if ((!keywant || addon) && !science) { addch(t, keylet(t, nref)); } else { tokeytab (t,nref); } } } /* end (labels) */ else { /* Use numbers for signals */ if (sort) /* Surround reference number by FLAG so ** that it can be found for renumbering ** after sort. */ sprintf(t, "%c%d%c", FLAG, nref, FLAG); else if (nref > 0) { #ifdef CONDENSE if ((++wcnt>wsize) && ((wref=realloc(wref, (wsize+=50)*sizeof(int))) == NULL) ) { fprintf(stderr, "Ref cond out of memory."); exit(1); } wref[wcnt-1] = nref; #endif } sprintf(t, "%d", nref); } another = prefix (".[", sd=lookat()); if (another && (strcmp(".[\n", sd) != SAME)) fprintf(stderr, "File %s line %d: punctuation ignored from: %s", Ifile, Iline, sd); strcat(sig, t); #if EBUG fprintf(stderr, "sig is now %s leng %d\n",sig,strlen(sig)); #endif /* Arrange stline and endline so that they point to ** appropriate signal bracketing strings. */ trimnl(nstline); trimnl(endline); stline = stbuff; if (prevsig == 0) { strcpy (stline, nstline); prevsig=1; } if (stline[2] || endline[2]) { stline += 2; endline += 2; } else { stline = "\\*([."; endline = "\\*(.]"; } if (science) { stline = " ("; endline = ")"; } if (bare == 0) { /* We are putting signals in text. */ if (!another) { /* No more citations for the moment. ** Prepare accumulated signals (do condensing ** and bracketing); put signals out. */ #ifdef CONDENSE wref[wcnt] = 0; if (!labels && !sort && wcnt > 1) condense(wref,wcnt,sig); wcnt = 0; #endif sprintf(t1, "%s%s\%s\n", stline, sig, endline); append(t1); flout(); sig[0] = 0; prevsig = 0; if (fo == fhide) { int ch; fclose(fhide); fhide = fopen(hidenam, "r"); fo = ftemp; while ((ch = getc(fhide)) != EOF) putc(ch, fo); fclose(fhide); unlink(hidenam); } } /* end (!another) */ else { /* Another citation follows immediately. */ strcat(sig, ",\\|"); if (fo == ftemp) { /* hide if need be */ sprintf(hidenam, "/tmp/rj%dc", getpid()); #if EBUG fprintf(stderr, "hiding in %s\n", hidenam); #endif fhide = fopen(hidenam, "w"); if (fhide == NULL) err("Can't get scratch file %s", (void) hidenam); fo = fhide; } } /* end (another) */ } /* end (bare == 0) -- putting signals in text */ if (bare < 2) if (nf > 0) if ( ! dupl ) fprintf(fo,".ds [F %s%c",t,sep); if (bare > 0) flout(); #if EBUG fprintf(stderr, "sig is now %s\n",sig); #endif } char * fpar (nf, flds, out, c, seq, prepend) char *flds[], *out; { char *p, *s; int i, fnd = 0; for(i = 0; i < nf; i++) if (flds[i][1] == c && ++fnd >= seq) { /* for titles use first word otherwise last */ if (c == 'T' || c == 'J') { p = flds[i]+3; if (prefix("A ", p)) p += 2; if (prefix("An ", p)) p += 3; if (prefix("The ", p)) p += 4; mycpy2(out, p, 20); return(out); } /* if its not 'L' then use just the last word */ s = p = flds[i]+2; if (c != 'L') { for(; *p; p++); while (p > s && *p != ' ') p--; } /* special wart for authors */ if (c == 'A' && (p[-1] == ',' || p[1] =='(')) { p--; while (p > s && *p != ' ') p--; mycpy(out, p+1); } else strcpy(out, p+1); if (c == 'A' && prepend) initadd(out, flds[i]+2, p); return(out); } return(0); } putkey(nf, flds, nref, keystr) char *flds[], *keystr; { char t1[50], *sf; int ctype, i, count; fprintf(fo, ".\\\""); if (nf <= 0) fprintf(fo, "%s%c%c", labtab[nref], labc[nref], sep); else { while (ctype = *keystr++) { count = atoi(keystr); if (*keystr=='+') count=999; if (count <= 0) count = 1; for(i = 1; i <= count; i++) { sf = fpar(nf, flds, t1, ctype, i, 1); if (sf == 0) break; sf = artskp(sf); fprintf(fo, "%s%c", sf, '-'); } } fprintf(fo, "%c%d%c%c", FLAG, nref, FLAG, sep); } } tokeytab (t, nref) char *t; { strcpy(labtab[nref]=lbp, t); while (*lbp++) ; } keylet(t, nref) char *t; { int i; int x = -1; for(i = 1; i < nref; i++) { if (strcmp(labtab[i], t) == 0) x = labc[i]; } tokeytab (t, nref); if (lbp-bflab > NFLAB) err("bflab overflow (%d)", NFLAB); if (nref > NLABC) err("nref in labc overflow (%d)", NLABC); #if EBUG fprintf(stderr, "lbp up to %d of %d\n", lbp-bflab, NFLAB); #endif if (x == 0) /* The last reference to use this signal ** was put out plain; this reference ** needs disambiguating character 'a'. */ x = 'a'-1; return(labc[nref] = x+1); } mycpy(s, t) char *s, *t; { while (*t && *t != ',' && *t != ' ') *s++ = *t++; *s = 0; } mycpy2(s, t, n) char *s, *t; { int c; while (n-- && (c= *t++) > 0) { if (c == ' ') c = '-'; *s++ = c; } *s = 0; } initadd(to, from, stop) char *to, *from, *stop; { int c, nalph = 1; while (*to) to++; while (from < stop) { c = *from++; if (!isalpha(c)) { if (nalph) *to++ = '.'; nalph = 0; continue; } if (nalph++ == 0) *to++ = c; } *to = 0; } static char *articles[] = { "the ", "an ", "a ", 0 }; char * artskp(s) /* skips over initial "a ", "an ", "the " in s */ char *s; { char **p, *r1, *r2; for (p = articles; *p; p++) { r2 = s; for (r1 = *p; ((*r1 ^ *r2) & ~040 ) == 0; r1++) r2++; if (*r1 == 0 && *r2 != 0) return(r2); } return(s); } !Funky!Stuff!