[comp.sources.unix] v11i065: Hum concordance package update kit

rsalz@uunet.UU.NET (Rich Salz) (09/22/87)

Submitted-by: sun!tut (Bill Tuthill)
Posting-number: Volume 11, Issue 65
Archive-name: hum.pch

Quite a few people asked me for the troff macros to produce concordances.
They weren't included with Hum source because "troffmt" automatically
produces macros.  However, this made me think.  Now that laser printers
have virtually replaced line printers, troff is a lot cheaper than it
used to be.  So I modified "troffmt" for laser printers, taking out the
cut marks.  Here is the recommended command line to produce a concordance
in 10 point type on a laser printer:

	% kwic -c120 filename(s) | sort | troffmt | troff

While I was modifying "troffmt" I found a bug in "format".  So the shell
archive below contains the new "troffmt", the fixed "format", and troff
macros for producing a concordance.  The troff macros are helpful if you
want to modify the look of your concordance.  Here's how to produce a
concordance with your own macros:

	% kwic -c120 filename(s) | sort | troffmt -m | troff trmacs -

A few people asked for the "lemma" program, which was promised in the
documentation.  To tell you the truth, I never came up with a good design
for "lemma".  At UC Berkeley, we always used editor scripts to move words
from one file to another.  I think this method is superior to any special
purpose program I could write.

Bill Tuthill

------------------------------- cut here ------------------------------- 
echo Extracting troffmt.c 1>&2
sed 's/^X//' > troffmt.c <<\!!!EOF!!!
X# include <stdio.h>			/* troffmt.c (rev3.7) */
X# include <ctype.h>
X# include <signal.h>
X
Xchar *tempfile;		/* to store contexts while counting */
Xint nocnt = 0;		/* toggle for not counting keyword */
Xint nokwd = 0;		/* toggle for suppressing keyword */
Xint nomacs = 0;		/* toggle for not supplying macros */
X
Xusage()			/* print proper usage and exit */
X{
X	puts("Usage: troffmt [-ckm] [filename(s)] [-]\t\t(rev3.7)");
X	puts("-c: suppress counting of keyword frequency");
X	puts("-k: entirely suppress printing of keyword");
X	puts("-m: do not supply concordance macros automatically");
X	puts("- : read standard input instead of files");
X	exit(1);
X}
X
Xmain(argc, argv)	/* format concordance for sending to troff */
Xint argc;
Xchar *argv[];
X{
X	FILE *fopen(), *fp;
X	int i, j, onintr();
X	char *mktemp();
X
X	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
X		signal(SIGINT, onintr);
X
X	tempfile = "/tmp/FmtXXXXX";
X	mktemp(tempfile);
X
X	for (i = 1; argv[i] && *argv[i] == '-'; i++)
X	{
X		for (j = 1; argv[i][j] != NULL; j++)
X		{
X			if (argv[i][j] == 'c')
X				nocnt = 1;
X			else if (argv[i][j] == 'k')
X				nokwd = 1;
X			else if (argv[i][j] == 'm')
X				nomacs = 1;
X			else  /* bad option */
X			{
X				fprintf(stderr,
X				"Illegal troffmt flag: -%c\n", argv[i][j]);
X				usage();
X			}
X		}
X	}
X	if (!nomacs)
X		pr_macs();
X	if (i == argc)
X	{
X		if (nokwd)
X			rmkwds(stdin);
X		else if (nocnt)
X			ffmt(stdin);
X		else
X			format(stdin);
X	}
X	for (; i < argc; i++)
X	{
X		if (argv[i][0] == '-' && argv[i][1] == NULL)
X		{
X			if (nokwd)
X				rmkwds(stdin);
X			else if (nocnt)
X				ffmt(stdin);
X			else
X				format(stdin);
X		}
X		if ((fp = fopen(argv[i], "r")) != NULL)
X		{
X			if (nokwd)
X				rmkwds(fp);
X			else if (nocnt)
X				ffmt(fp);
X			else
X				format(fp);
X			fclose(fp);
X		}
X		else  /* attempt to open file failed */
X		{
X			fprintf(stderr,
X			"Trfmt cannot access the file: %s\n", argv[i]);
X			continue;
X		}
X	}
X	unlink(tempfile);
X	exit(0);
X}
X
Xpr_macs()		/* supply concordance macros automatically */
X{
X	if (access("/usr/lib/ms/ms.acc", 4) == 0)	 /* -ms accent marks */
X		printf(".so /usr/lib/ms/ms.acc\n");
X	else if (access("/usr/lib/me/chars.me", 4) == 0) /* -me accent marks */
X		printf(".so /usr/lib/me/chars.me\n");
X	else	fprintf(stderr, "Install -ms or -me macros!\n");
X	printf(".de KW\n.ne 2.1\n.sp 0.3\n.ta 20n\n.ft B\n..\n");
X	printf(".de CX\n.sp 0.1\n.ta 3.6iR 3.75i\n.ft R\n..\n");
X	printf(".de HD\n'sp 2\n.tl ''- %% -''\n'sp 2\n..\n");
X	printf(".de FO\n'bp\n..\n.wh 0 HD\n.wh -5 FO\n");
X	printf(".nf\n.po 0.5i\n.ll 7.0i\n.lt 7.0i\n");
X}
X
Xformat(fp)	  	/* print keyword and count only if different */
XFILE *fp;
X{
X	FILE *fopen(), *tf;
X	char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
X	char *sp, *kwp, *cxp, *strcpy();
X	int kwfreq = 0;
X
X	strcpy(okw,"~~~~~");	/* make sure 1st keyword is printed */
X	tf = NULL;		/* to prevent core dump with no input */
X	while (fgets(s, BUFSIZ, fp))
X	{
X		for (sp = s, kwp = nkw; *sp != ' ' && *sp != '|'; sp++, kwp++)
X		{
X			if (*sp == '\b')	/* interpolate troff string */
X			{
X				*kwp = '\\';
X				*++kwp = '*';
X			} else
X				*kwp = *sp;
X		}
X		*kwp = NULL;
X		for (; *sp && *sp != '|'; sp++)
X			;
X		for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
X		{
X			if (*sp == '|')
X				*cxp = '\t';
X			else if (*sp == '\b')	/* interpolate troff string */
X			{
X				*cxp = '\\';
X				*++cxp = '*';
X			} else
X				*cxp = *sp;
X		}
X		*cxp = '\n';
X		*++cxp = NULL;
X
X		if (strcmp(nkw, okw) != 0)  /* kwds different */
X		{
X			if (kwfreq != 0)
X				prtmpfile(tf, kwfreq);
X			printf(".KW\n%s\t", nkw);
X			tf = fopen(tempfile, "w");
X			fputs(cntxt, tf);
X			kwfreq = 1;
X		}
X		else  /* if keywords are the same */
X		{
X			fputs(cntxt, tf);
X			kwfreq++;
X		}
X		strcpy(okw, nkw);
X	}
X	prtmpfile(tf, kwfreq);
X}
X
Xprtmpfile(tf, kwfreq)		/* print frequency and contexts */
XFILE *tf;
Xint kwfreq;
X{
X	char save[BUFSIZ];
X
X	if (tf == NULL)		/* exit without core dump */
X		exit(1);
X
X	printf("(%d)\n.CX\n", kwfreq);
X	fclose(tf);
X	tf = fopen(tempfile, "r");
X	while (fgets(save, BUFSIZ, tf))
X		printf(" %s", save);
X	fclose(tf);
X}
X
Xint onintr()		/* remove tempfile in case of interrupt */
X{
X	fprintf(stderr, "\nInterrupt\n");
X	unlink(tempfile);
X	exit(1);
X}
X
Xffmt(fp)	  	/* fast format routine, no keyword counting */
XFILE *fp;
X{
X	char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
X	char *sp, *kwp, *cxp;
X
X	strcpy(okw,"~~~~~");	/* make sure 1st keyword is printed */
X
X	while (fgets(s, BUFSIZ, fp))
X	{ 
X		for (sp = s, kwp = nkw; *sp && *sp != ' '; sp++, kwp++)
X		{
X			if (*sp == '\b')	/* interpolate troff string */
X			{
X				*kwp = '\\';
X				*++kwp = '*';
X			} else
X				*kwp = *sp;
X		}
X		*kwp = NULL;
X		for (; *sp && *sp != '|'; sp++)
X			;
X		for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
X		{
X			if (*sp == '|')
X				*cxp = '\t';
X			else if (*sp == '\b')	/* interpolate troff string */
X			{
X				*cxp = '\\';
X				*++cxp = '*';
X			} else
X				*cxp = *sp;
X		}
X		*cxp = '\n';
X		*++cxp = NULL;
X
X		if (strcmp(nkw, okw) != 0)  /* kwds different */
X			printf(".KW\n%s\n.CX\n %s", nkw, cntxt);
X		else  /* if keywords are the same */
X			printf(" %s", cntxt);
X		strcpy(okw, nkw);
X	}
X}
X
Xrmkwds(fp)		/* completely suppress printing of keyword */
XFILE *fp;
X{
X	char s[BUFSIZ], *sp;
X
X	while (fgets(s, BUFSIZ, fp))
X	{
X		for (sp = s; *sp && *sp != '|'; sp++)
X			;
X		for (; *sp; sp++)
X		{
X			if (*sp == '|')
X				putchar(' ');
X			else
X				putchar(*sp);
X		}
X	}
X}
!!!EOF!!!
if [ "`wc -c troffmt.c`" != "    5328 troffmt.c" ]
			then	
				echo \07WARNING troffmt.c: extraction error
			fi
echo Extracting format.c 1>&2
sed 's/^X//' > format.c <<\!!!EOF!!!
X# include <stdio.h>			/* format.c (rev3.7) */
X# include <ctype.h>
X# include <signal.h>
X
Xchar *tempfile;		/* to store overflow while counting */
Xint nomap = 0;		/* toggle for mapping keyword to lcase */
Xint nocnt = 0;		/* toggle for counting keyword */
Xint nokwd = 0;		/* toggle for suppressing keyword */
X
Xusage()			/* print proper usage and exit */
X{
X	puts("Usage: format [-mck] [filename(s)]\t\t(rev3.7)");
X	puts("-m: keywords not mapped from lower to upper case");
X	puts("-c: suppress counting of keyword frequency");
X	puts("-k: entirely suppress printing of keyword");
X	exit(1);
X}
X
Xmain(argc, argv)	/* make keyword headings with count */
Xint argc;
Xchar *argv[];
X{
X	FILE *fopen(), *fp;
X	int i, j, onintr();
X	char *mktemp();
X
X	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
X		signal(SIGINT, onintr);
X
X	tempfile = "/tmp/FmtXXXXX";
X	mktemp(tempfile);
X
X	for (i = 1; argv[i] && *argv[i] == '-'; i++)
X	{
X		for (j = 1; argv[i][j] != NULL; j++)
X		{
X			if (argv[i][j] == 'm')
X				nomap = 1;
X			else if (argv[i][j] == 'c')
X				nocnt = 1;
X			else if (argv[i][j] == 'k')
X				nokwd = 1;
X			else  /* bad option */
X			{
X				fprintf(stderr,
X				"Illegal format flag: -%c\n", argv[i][j]);
X				usage();
X			}
X		}
X	}
X	if (i == argc)
X	{
X		if (nokwd)
X			rmkwds(stdin);
X		else if (nocnt)
X			ffmt(stdin);
X		else
X			format(stdin);
X	}
X	for (; i < argc; i++)
X	{
X		if ((fp = fopen(argv[i], "r")) != NULL)
X		{
X			if (nokwd)
X				rmkwds(fp);
X			else if (nocnt)
X				ffmt(fp);
X			else
X				format(fp);
X			fclose(fp);
X		}
X		else  /* attempt to open file failed */
X		{
X			fprintf(stderr,
X			"Format cannot access the file: %s\n", argv[i]);
X			continue;
X		}
X	}
X	unlink(tempfile);
X	exit(0);
X}
X
Xchar buff[BUFSIZ*8];	/* tempfile buffer for storing contexts */
Xint bufflen;		/* total length of contexts in buffer */
Xint fulltf = 0;		/* does the tempfile contain something? */
XFILE *tf = NULL;	/* file pointer for tempfile routines */
X
Xformat(fp)	  	/* print keyword and count only if different */
XFILE *fp;
X{
X	char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
X	char *sp, *kwp, *cxp, *strcpy();
X	int kwfreq = 0;
X
X	strcpy(okw,"~~~~~");	/* make sure 1st keyword is printed */
X
X	while (fgets(s, BUFSIZ, fp))
X	{
X		for (sp = s, kwp = nkw; *sp && *sp != '|'; sp++, kwp++)
X		{
X			if (!nomap && islower(*sp))
X				*kwp = toupper(*sp);
X			else
X				*kwp = *sp;
X		}
X		*kwp = NULL;
X
X		for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
X		{
X			if (*sp == '|') {
X				*cxp = ' '; *++cxp = ' '; *++cxp = ' ';
X			} else
X				*cxp = *sp;
X		}
X		*cxp = '\n';
X		*++cxp = NULL;
X
X		if (strcmp(nkw, okw) != 0)  /* kwds different */
X		{
X			if (kwfreq != 0)
X			{
X				getbuff(kwfreq);
X				putchar('\n');
X			}
X			*buff = NULL;
X			bufflen = 0;
X			fputs(nkw, stdout);
X			putbuff(cntxt);
X			kwfreq = 1;
X		}
X		else  /* if keywords are the same */
X		{
X			putbuff(cntxt);
X			kwfreq++;
X		}
X		strcpy(okw, nkw);
X	}
X	getbuff(kwfreq);
X}
X
Xputbuff(cntxt)		/* cache routine to buffer tempfile */
Xchar cntxt[];
X{
X	char *strcat();
X
X	if (!fulltf)
X	{
X		bufflen += strlen(cntxt);
X		if (bufflen < BUFSIZ*8)
X			strcat(buff, cntxt);
X		else {
X			fulltf = 1;
X			if ((tf = fopen(tempfile, "w")) == NULL)
X				perror(tempfile);
X			fputs(buff, tf);
X			*buff = NULL;
X			bufflen = 0;
X		}
X	}
X	else  /* fulltf */
X		fputs(cntxt, tf);
X}
X
Xgetbuff(kwfreq)		/* print frequency and context buffer */
Xint kwfreq;
X{
X	char str[BUFSIZ];
X
X	printf("(%d)\n", kwfreq);
X	if (!fulltf)
X		fputs(buff, stdout);
X	else
X	{
X		fclose(tf);
X		if ((tf = fopen(tempfile, "r")) == NULL)
X			perror(tempfile);
X		while (fgets(str, BUFSIZ, tf))
X			fputs(str, stdout);
X		fclose(tf);
X		fulltf = 0;
X	}
X}
X
Xint onintr()		/* remove tempfile in case of interrupt */
X{
X	fprintf(stderr, "\nInterrupt\n");
X	unlink(tempfile);
X	exit(1);
X}
X
Xffmt(fp)	  	/* if different, print keyword without count */
XFILE *fp;
X{
X	char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
X	char *sp, *kwp, *cxp, *strcpy();
X
X	strcpy(okw,"~~~~~");	/* make sure 1st keyword is printed */
X	while (fgets(s, BUFSIZ, fp))
X	{
X		for (sp = s, kwp = nkw; *sp && *sp != '|'; sp++, kwp++)
X		{
X			if (!nomap && islower(*sp))
X				*kwp = toupper(*sp);
X			else
X				*kwp = *sp;
X		}
X		*kwp = NULL;
X
X		for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
X		{
X			if (*sp == '|') {
X				*cxp = ' '; *++cxp = ' '; *++cxp = ' ';
X			} else
X				*cxp = *sp;
X		}
X		*cxp = '\n';
X		*++cxp = NULL;
X
X		if (strcmp(nkw, okw) != 0)  /* kwds different */
X			printf("\n%s\n %s", nkw, cntxt);
X		else  /* if keywords are the same */
X			printf(" %s", cntxt);
X		strcpy(okw, nkw);
X	}
X}
X
Xrmkwds(fp)		/* completely suppress printing of keyword */
XFILE *fp;
X{
X	char s[BUFSIZ], *sp;
X
X	while (fgets(s, BUFSIZ, fp))
X	{
X		for (sp = s; *sp && *sp != '|'; sp++)
X			;
X		for (; *sp; sp++)
X		{
X			if (*sp == '|')
X				printf("   ");
X			else
X				putchar(*sp);
X		}
X	}
X}
!!!EOF!!!
if [ "`wc -c format.c`" != "    4765 format.c" ]
			then	
				echo \07WARNING format.c: extraction error
			fi
echo Extracting trmacs 1>&2
sed 's/^X//' > trmacs <<\!!!EOF!!!
X.\" @(#)ms.acc 1.1 86/07/08 SMI; from UCB 4.2
X.	\" AM - accent mark definitions
X.bd S B 3
X.	\" fudge factors for nroff and troff
X.if n \{\
X.	ds #H 0
X.	ds #V .8m
X.	ds #F .3m
X.	ds #[ \f1
X.	ds #] \fP
X.\}
X.if t \{\
X.	ds #H ((1u-(\\\\n(.fu%2u))*.13m)
X.	ds #V .6m
X.	ds #F 0
X.	ds #[ \&
X.	ds #] \&
X.\}
X.	\" simple accents for nroff and troff
X.if n \{\
X.	ds ' \h'-1'\'
X.	ds ` \h'-1'\`
X.	ds ^ \h'-1'^
X.	ds , \h'-1',
X.	ds ~ \h'-1'~
X.	ds ? ?
X.	ds ! !
X.	ds / \h'-1'\(sl
X.	ds q o\h'-1',
X.\}
X.if t \{\
X.	ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
X.	ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
X.	ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
X.	ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
X.	ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
X.	ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
X.	ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
X.	ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
X.	ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
X.\}
X.	\" troff and (daisy-wheel) nroff accents
X.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
X.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
X.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
X.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
X.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
X.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
X.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
X.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
X.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
X.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
X.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
X.ds ae a\h'-(\w'a'u*4/10)'e
X.ds Ae A\h'-(\w'A'u*4/10)'E
X.ds oe o\h'-(\w'o'u*4/10)'e
X.ds Oe O\h'-(\w'O'u*4/10)'E
X.	\" corrections for vroff
X.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
X.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
X.	\" for low resolution devices (crt and lpr)
X.if \n(.H>23 .if \n(.V>19 \
X\{\
X.	ds : \h'-1'"
X.	ds 8 B
X.	ds v \h'-1'\o'\(aa\(ga'
X.	ds _ \h'-1'^
X.	ds . \h'-1'.
X.	ds 3 3
X.	ds o a
X.	ds d- d\h'-1'\(ga
X.	ds D- D\h'-1'\(hy
X.	ds th \o'bp'
X.	ds Th \o'LP'
X.	ds ae ae
X.	ds Ae AE
X.	ds oe oe
X.	ds Oe OE
X.\}
X.rm #[ #] #H #V #F C
X.de KW
X.ne 2.1
X.sp 0.3
X.ta 20n
X.ft B
X..
X.de CX
X.sp 0.1
X.ta 3.6iR 3.75i
X.ft R
X..
X.de HD
X'sp 2
X.tl ''- % -''
X'sp 2
X..
X.de FO
X'bp
X..
X.wh 0 HD
X.wh -5 FO
X.nf
X.po 0.5i
X.ll 7.0i
X.lt 7.0i
!!!EOF!!!
if [ "`wc -c trmacs`" != "    2509 trmacs" ]
			then	
				echo \07WARNING trmacs: extraction error
			fi