[net.sources] fix for 'refer' sort problem

cons@sdccsu3.UUCP (08/03/84)

Here is a description and fix for a 'refer' sort problem
packaged as a shar archive.

#! /bin/sh
# The rest of this file is a shell script which will extract:
# Sendbug02 refer2.c refer5.c
echo x - Sendbug02
cat >Sendbug02 <<'!Funky!Stuff!'
Subject: REFER mis-sorts reference lists if duplicate citations occur.
Index:	usr.bin/refer/refer2.c 4.2BSD
      	usr.bin/refer/refer5.c 4.2BSD

Description:
	The putsig routine in refer5.c is responsible for placing
	signals (usually superscripts, or author-date labels) in the
	body of a document.  Putsig also makes the signals available
	for printing in the reference list by emitting strings such as
	".ds [F signal" into the reference list.

	If a reference is cited more than once, it is only put on the
	reference list the first time it is cited.  However putsig
	persists in emitting the ".ds [F ..." string for duplicate
	citations.

	This causes a problem if the reference list is sorted because
	the extraneous ".ds [F ..." material appears at the beginning
	of the next non-duplicate reference where it obscures the
	sortkey.  Thus the reference list is mis-sorted.

Repeat-By:
	Create a document which cites a reference twice, then cites a
	new reference.  Process the document using REFER with the -s
	option.  Notice the extra ".ds [F ..." line in the REFER
	output.  If the two references came out sorted properly, you
	were lucky, reverse their roles and you will see the failure.

Fix:
	Putsig is called at two places in refer2.c, in one context a
	duplicate citation is being processed and in the other a new
	citation is being processed.  Add a flag parameter to putsig to
	distinguish the two calls.  Modify the routine to suppress the
	emission of ".ds [F ..." when it is called for a
	duplicate citation.  (See sources posted to net.sources)

Rick Accurso
UUCP:  ...!ucbvax!sdcsvax!sdccsu3!accurso
ARPA:  sdcsvax!sdccsu3!accurso@nosc

!Funky!Stuff!
echo x - refer2.c
cat >refer2.c <<'!Funky!Stuff!'
#ifndef lint
static char *sccsid = "@(#)refer2.c	4.1 (Berkeley) 5/6/83";
#endif

#include "refer..c"
#define NFLD 80
#define TLEN 512

extern FILE *in;
char one[ANSLEN];
int onelen = ANSLEN;
static char dr [100] = "";

/*
** doref - Process a citation.
*/
doref(line1)
char *line1;
{
	char buff[QLEN];	/* query keywords */
	char dbuff[3*QLEN];	/* field data supplied in citation */
	char answer[ANSLEN], temp[TLEN], line[BUFSIZ];
	char *p, **sr, *flds[NFLD], *r;
	int stat, nf, nr, query = 0, alph, digs;

   again:
	buff[0] = dbuff[0] = NULL;
	if (biblio && Iline == 1 && line1[0] == '%')
		/*
		**  In biblio mode first line of input file may begin
		**  with % and contain field data.  Hold it in dbuff.
		*/
		strcat(dbuff, line1);
	while (input(line)) {		/* get query */
		Iline++;
		if (prefix(".]", line))
			/* end of citation */
			break;
		if (biblio && line[0] == '\n')
			/*
			**  In biblio mode a blank line indicates
			**  the end of the reference.
			*/
			break;
		if (biblio && line[0] == '%' && line[1] == *convert)
			break;
		if (control(line[0]))
			query = 1;
		/*
		**  Store lines of query keys in buff;
		**  store lines of field data in dbuff.
		*/
		strcat(query ? dbuff : buff, line);
		if (strlen(buff) > QLEN)
			err("query too long (%d)", strlen(buff));
		if (strlen(dbuff) > 3 * QLEN)
			err("record at line %d too long", Iline-1);
	}
	if (biblio && line[0] == '\n' && feof(in))
		return;
	if (strcmp(buff, "$LIST$\n")==0) {
		/*
		**  Produce the list of accumulated references.
		*/
		assert (dbuff[0] == 0);
		dumpold();
		return;
	}
	answer[0] = 0;
	/*
	** Refine the query keywords in buff.
	*/
	for (p = buff; *p; p++) {
		if (isupper(*p))
			/* Convert to lowercase. */
			*p |= 040;
	}
	alph = digs = 0;
	for (p = buff; *p; p++) {
		if (isalpha(*p))
			alph++;
		else
			if (isdigit(*p))
				digs++;
			else {
				*p = 0;
				if ((alph+digs < 3) || common(p-alph)) {
					r = p-alph;
					/*
					** Blank out unacceptable
					** keywords (too short, common etc.)
					*/
					while (r < p)
						*r++ = ' ';
				}
				if (alph == 0 && digs > 0) {
					r = p-digs;
					if (digs != 4 || atoi(r)/100 != 19) { 
						/*
						** Blank out numbers
						** which are not in
						** 1900-1999
						*/
						while (r < p)
							*r++ = ' ';
					}
				}
				*p = ' ';
				alph = digs = 0;
			}
	}
	one[0] = 0;
	if (buff[0]) {	/* do not search if no query */
		for (sr = rdata; sr < search; sr++) {
			temp[0] = 0;
			corout(buff, temp, "hunt", *sr, TLEN);
			assert(strlen(temp) < TLEN);
			if (strlen(temp)+strlen(answer) > BUFSIZ)
				err("Accumulated answers too large",0);
			strcat(answer, temp);
			if (strlen(answer)>BUFSIZ)
				err("answer too long (%d)", strlen(answer));
			if (newline(answer) > 0)
				break;
		}
	}
	assert(strlen(one) < ANSLEN);
	assert(strlen(answer) < ANSLEN);
	/*
	** If a search was done, the number of newlines in answer
	** indicates how many hits were found.
	*/
	if (buff[0])
		switch (newline(answer)) {
		case 0:
			fprintf(stderr, "No such paper: %s\n", buff);
			return;
		default:
			fprintf(stderr, "Too many hits: %s\n", trimnl(buff));
			choices(answer);
			p = buff;
			while (*p != '\n')
				p++;
			*++p = 0;
		case 1:
			/*
			** Search found one hit, success!
			*/
			if (endpush)
				/*
				** References are being produced
				** in a $LIST$ rather than as
				** footnotes.
				*/
				if (nr = chkdup(answer)) {
					/*
					** This reference has already
					** been cited.
					*/
					if (bare < 2) {
						/*
						** Signals in the text
						** are desired (no -b)
						*/
						nf = tabs(flds, one);
						nf += tabs(flds+nf, dbuff);
						assert(nf < NFLD);
						putsig(nf,flds,nr,line1,line,1);
					}
					/*
					** Since it's a dup, no need to
					** putkey or putref.
					*/
					return;
				}
			if (one[0] == 0)
				/*
				** Place the reference data for the hit
				** indicated by answer in one.
				*/
				corout(answer, one, "deliv", dr, QLEN);
			break;
		}
	assert(strlen(buff) < QLEN);
	assert(strlen(one) < ANSLEN);
	/*
	** Set the flds[] pointers at the beginning of each
	** field of reference data in one and dbuff.
	*/
	nf = tabs(flds, one);
	nf += tabs(flds+nf, dbuff);
	assert(nf < NFLD);
	refnum++;
	/*
	** The stream "fo" is written to by putkey
	** and putref.  If references are being produced in the
	** form of footnotes, fo is stdout.  If references are
	** being printed as a list at the end (endpush), then
	** fo is a temp file.  In the endpush case each reference
	** is written to fo as one long line.  If the list is
	** to be sorted, putkey places the sort key on the front
	** of the line.
	*/
	if (sort)
		putkey(nf, flds, refnum, keystr);
	if (bare < 2)
		putsig(nf, flds, refnum, line1, line, 0);
	else
		flout();
	putref(nf, flds);
	if (biblio && line[0] == '\n')
		goto again;
	if (biblio && line[0] == '%' && line[1] == *convert)
		fprintf(fo, "%s%c%s", convert+1, sep, line+3);
}

/* count the newlines in s */
newline(s)
char *s;
{
	int k = 0, c;

	while (c = *s++)
		if (c == '\n')
			k++;
	return(k);
}

/* print the titles associated with the hits in buff */
choices(buff)
char *buff;
{
	char ob[BUFSIZ], *p, *r, *q, *t;
	int nl;

	for (r = p = buff; *p; p++) {
		if (*p == '\n') {
			*p++ = 0;
			corout(r, ob, "deliv", dr, BUFSIZ);
			nl = 1;
			for (q = ob; *q; q++) {
				if (nl && (q[0]=='.'||q[0]=='%') && q[1]=='T') {
					q += 3;
					for (t = q; *t && *t != '\n'; t++)
						;
					*t = 0;
					fprintf(stderr, "%.70s\n", q);
					q = 0; 
					break;
				}
				nl = *q == '\n';
			}
			if (q)
				fprintf(stderr, "??? at %s\n",r);
			r=p;
		}
	}
}

control(c)
{
	if (c == '.')
		return(1);
	if (c == '%')
		return(1);
	return(0);
}
!Funky!Stuff!
echo x - refer5.c
cat >refer5.c <<'!Funky!Stuff!'
/*
* $Log:	refer5.c,v $
 * Revision 1.5  84/07/09  16:12:23  cons
 * Putsig now refrains from putting out ".ds [F" info when the citation
 * is a duplicate.  The extraneous ".ds [F" info fouled-up sortkeys
 * for subsequent non-duplicate reference.  Accurso
 * 
 * Revision 1.4  84/07/05  15:30:12  cons
 * Fixed keylet() so that disambiguating letters a, b, c, ...
 * will be issued instead of control characters ^A, ^B, ^C, ...   Accurso
 * 
 * Revision 1.3  84/07/05  15:16:50  cons
 * Added comments.  Accurso
 * 
*/

#ifndef lint
static char *rcsid = "$Header: refer5.c,v 1.5 84/07/09 16:12:23 cons Exp $";
#endif

#include "refer..c"
#define SAME 0
#define NFLAB 3000
#define NLABC 1000

static char sig[NLABC];
static char bflab[NFLAB];	/* Record of plain signals issued.
				** "Plain signals" have not had
				** disambiguating letter appended,
				** miller84 vs. miller84a.
				*/
static char *labtab[NLABC];	/* Array of pointers to plain signals;
				** indexed by nref.
				*/
static char *lbp = bflab;
static char labc[NLABC];	/* Array of disambiguating
				** characters issued; indexed by nref.
				*/
static char stbuff[50];
static int  prevsig;

/* putsig 
**
**	Imbed a signal indicating a citation in the text.
**	Also may supply the signal for printing in a
**	reference list (.ds [F signal).
**	
**	CONDENSE facility which converts consecutive numeric signals (4,5,6,7)
**	to a range (4-7) does not handle sorted reference lists.
*/
putsig (nf, flds, nref, nstline, endline, dupl)
char *flds[];	/*  Fields of reference data */
char *nstline;	/*  Line which indicated start of citation.
		**  Usually ".[".  In biblio mode could be blank or
		**  start with "%".
		*/
char *endline;	/*  Line which indicated end of citation.
		**  Usually ".]".  Blank in biblio mode.
		*/
int dupl;	/*  dupl==0 implies new citation;
		**  dupl==1 implies repeat citation.
		*/
{
	char t[100], t1[100], t2[100], format[10], *sd, *stline;
	int addon, another = 0;
	static FILE *fhide = 0;
	int i;
	char tag;

#ifdef CONDENSE
	static int	*wref = NULL;
	static int	wcnt = 0;
	static int	wsize = 50;

	if (wref == NULL)
		wref = calloc(wsize, sizeof(int));
#endif

	if (labels) {	/* User specified -l, -k, or -S option.  */
		if (nf == 0)	/* Repeat citation of a reference.
				** Reuse previously issued signal.
				*/
			sprintf(t, "%s%c", labtab[nref], labc[nref]);
		else {
			*t = 0;
			if (keywant)	/* -k option, use signal
					** supplied in reference data.
					*/
				sprintf(t, "%s", fpar(nf,flds,t1,keywant,1,0));
			if (science && t[0] == 0) {
				/* -S option and no -k, produce
				** signal such as (Miller, 1984).
				*/
				sd = fpar(nf, flds, t2, 'D', 1, 0);
				sprintf(t, "%s, %s", fpar(nf,flds,t1,'A',1,0),
					sd);
			}
			else if (t[0] == 0) {
				/* -l option, produce a signal such
				** as Miller1984 or Mil84.
				*/
				sprintf(format,
					nmlen>0 ? "%%.%ds%%s" : "%%s%%s",
					nmlen);
				/* format is %s%s for default labels */
				/* or %.3s%s eg if wanted */
				sd = fpar(nf, flds, t2, 'D', 1, 0);
				if (dtlen > 0) {
					char *sdb;
					for (sdb = sd; *sd; sd++)
						;
					sd = sd - dtlen;
					if (sd < sdb)
						sd = sdb;
				}
				sprintf(t, format, fpar(nf,flds,t1,'A',1,0),
					sd);
			}
			if (keywant) {
				/* Check user supplied signal,
				** if final character is '-', 
				** user wants disambiguating
				** character as necessary.
				*/
				addon = 0;
				for (sd = t; *sd; sd++)
					;
				if (*--sd == '-') {
					addon = 1;
					*sd = 0;
				}
			}
			/* Add plain signal to record of issued
			** signals.  Append a disambiguating letter
			** to this instance as necessary.
			*/
			if ((!keywant || addon) && !science) {
			    addch(t, keylet(t, nref));
			}
			else {
			    tokeytab (t,nref);
			}
		}
	}	/* end (labels) */
	else {
		/* Use numbers for signals */
		if (sort)
			/* Surround reference number by FLAG so
			** that it can be found for renumbering
			** after sort.
			*/
			sprintf(t, "%c%d%c", FLAG, nref, FLAG);
		else
		if (nref > 0) {
#ifdef CONDENSE
			if ((++wcnt>wsize) && 
			 ((wref=realloc(wref, (wsize+=50)*sizeof(int))) == NULL)
			 ) {
				fprintf(stderr, "Ref cond out of memory.");
				exit(1);
			}
			wref[wcnt-1] = nref;
#endif
		}
			sprintf(t, "%d", nref);
	}
	another = prefix (".[", sd=lookat());
	if (another && (strcmp(".[\n", sd) != SAME))
		fprintf(stderr, "File %s line %d: punctuation ignored from: %s",
			Ifile, Iline, sd);
	strcat(sig, t);
#if EBUG
	fprintf(stderr, "sig is now %s leng %d\n",sig,strlen(sig));
#endif
	/*  Arrange stline and endline so that they point to
	**  appropriate signal bracketing strings.
	*/
	trimnl(nstline);
	trimnl(endline);
	stline = stbuff;
	if (prevsig == 0) {
		strcpy (stline, nstline);
		prevsig=1;
	}
	if (stline[2] || endline[2]) {
		stline += 2;
		endline += 2;
	}
	else {
		stline  = "\\*([.";
		endline = "\\*(.]";
	}
	if (science) {
		stline = " (";
		endline = ")";
	}
	if (bare == 0) {	/* We are putting signals in text. */
		if (!another) {	
			/*  No more citations for the moment.
			**  Prepare accumulated signals (do condensing
			**  and bracketing); put signals out.
			*/
#ifdef CONDENSE
			wref[wcnt] = 0;
			if (!labels && !sort && wcnt > 1)
				condense(wref,wcnt,sig);
			wcnt = 0;
#endif
			sprintf(t1, "%s%s\%s\n", stline, sig, endline);
			append(t1);
			flout();
			sig[0] = 0;
			prevsig = 0;
			if (fo == fhide) {
				int ch;
				fclose(fhide); 
				fhide = fopen(hidenam, "r");
				fo = ftemp;
				while ((ch = getc(fhide)) != EOF)
					putc(ch, fo);
				fclose(fhide);
				unlink(hidenam);
			}
		}	/*  end (!another) */
		else {
			/*  Another citation follows immediately.
			*/
			strcat(sig, ",\\|");
			if (fo == ftemp) {	/* hide if need be */
				sprintf(hidenam, "/tmp/rj%dc", getpid());
#if EBUG
				fprintf(stderr, "hiding in %s\n", hidenam);
#endif
				fhide = fopen(hidenam, "w");
				if (fhide == NULL)
					err("Can't get scratch file %s",
						(void) hidenam);
				fo = fhide;
			}
		}	/*  end (another) */
	}	/*  end (bare == 0) -- putting signals in text */
	if (bare < 2)
		if (nf > 0)
			if ( ! dupl )
				fprintf(fo,".ds [F %s%c",t,sep);
	if (bare > 0)
		flout();
#if EBUG
	fprintf(stderr, "sig is now %s\n",sig);
#endif
}

char *
fpar (nf, flds, out, c, seq, prepend)
char *flds[], *out;
{
	char *p, *s;
	int i, fnd = 0;

	for(i = 0; i < nf; i++)
		if (flds[i][1] == c && ++fnd >= seq) {
			/* for titles use first word otherwise last */
			if (c == 'T' || c == 'J') {
				p = flds[i]+3;
				if (prefix("A ", p))
					p += 2;
				if (prefix("An ", p))
					p += 3;
				if (prefix("The ", p))
					p += 4;
				mycpy2(out, p, 20);
				return(out);
			}
			/* if its not 'L' then use just the last word */
			s = p = flds[i]+2;
			if (c != 'L') {
			    for(; *p; p++);
			    while (p > s && *p != ' ')
				    p--;
			}
			/* special wart for authors */
			if (c == 'A' && (p[-1] == ',' || p[1] =='(')) {
				p--;
				while (p > s && *p != ' ')
					p--;
				mycpy(out, p+1);
			}
			else
				strcpy(out, p+1);
			if (c == 'A' && prepend)
				initadd(out, flds[i]+2, p);
			return(out);
		}
	return(0);
}

putkey(nf, flds, nref, keystr)
char *flds[], *keystr;
{
	char t1[50], *sf;
	int ctype, i, count;

	fprintf(fo, ".\\\"");
	if (nf <= 0)
		fprintf(fo, "%s%c%c", labtab[nref], labc[nref], sep);
	else {
		while (ctype = *keystr++) {
			count = atoi(keystr);
			if (*keystr=='+')
				count=999;
			if (count <= 0)
				count = 1;
			for(i = 1; i <= count; i++) {
				sf = fpar(nf, flds, t1, ctype, i, 1);
				if (sf == 0)
					break;
				sf = artskp(sf);
				fprintf(fo, "%s%c", sf, '-');
			}
		}
		fprintf(fo, "%c%d%c%c", FLAG, nref, FLAG, sep);
	}
}


tokeytab (t, nref)
char *t;
{
	strcpy(labtab[nref]=lbp, t);
	while (*lbp++)
		;
}

keylet(t, nref)
char *t;
{
	int i;
	int x = -1;

	for(i = 1; i < nref; i++) {
		if (strcmp(labtab[i], t) == 0)
			x = labc[i];
	}
	tokeytab (t, nref);
	if (lbp-bflab > NFLAB)
		err("bflab overflow (%d)", NFLAB);
	if (nref > NLABC)
		err("nref in labc overflow (%d)", NLABC);
#if EBUG
	fprintf(stderr, "lbp up to %d of %d\n", lbp-bflab, NFLAB);
#endif
	if (x == 0)	/* The last reference to use this signal 
			** was put out plain; this reference
			** needs disambiguating character 'a'.
			*/
		x = 'a'-1;
	return(labc[nref] = x+1);
}

mycpy(s, t)
char *s, *t;
{
	while (*t && *t != ',' && *t != ' ')
		*s++ = *t++;
	*s = 0;
}

mycpy2(s, t, n)
char *s, *t;
{
	int c;

	while (n-- && (c= *t++) > 0) {
		if (c == ' ')
			c = '-';
		*s++ = c;
	}
	*s = 0;
}

initadd(to, from, stop)
char *to, *from, *stop;
{
	int c, nalph = 1;

	while (*to)
		to++;
	while (from < stop) {
		c = *from++;
		if (!isalpha(c)) {
			if (nalph)
				*to++ = '.';
			nalph = 0;
			continue;
		}
		if (nalph++ == 0)
			*to++ = c;
	}
	*to = 0;
}

static char *articles[] = {
	"the ", "an ", "a ", 0
};

char *
artskp(s)	/* skips over initial "a ", "an ", "the " in s */
char *s;
{

	char **p, *r1, *r2;

	for (p = articles; *p; p++) {
		r2 = s;
		for (r1 = *p; ((*r1 ^ *r2) & ~040 ) == 0; r1++)
			r2++;
		if (*r1 == 0 && *r2 != 0)
			return(r2);
	}
	return(s);
}
!Funky!Stuff!