[net.text] one way to get an index in troff

lee@rochester.UUCP (Lee Moore) (08/11/85)

Periodically there are requests on net.text for techniques to get a real
sorted index out of troff.  Below is some stuff that I cooked up to
generate our graduate student handbook.

The plan is simple: use the ".tm" command of troff to write index items to
the standard error.  Collect the standard error, sort it, process it and
feed it back into troff.  At certain points, I will assume the use of the
-me macro package but this code should be generally useful.  There are
two helper programs that are written in Icon which are included below.
I recommend the language Icon to any site that does alot of text processing.

First we have the index macro itself:

 .de IN		\" send an index entry to the stderr
.tm \\n%\t\\$1\t\\$2
..

It outputs the current page number and one or two arguments to the
standard error.  The first argument is the major name and the second
is the minor name.  The index will appear sorted first by major name and then
minor.  Examples of usage:

.IN "Pet Licenses"
.IN "Shopping" "Food"
.IN "Shopping" "Clothes"

While running troff, one collects the standard error into a file.  With the
Bourne shell this looks like:

	troff -me files 2> files.ind

In the following examples we are going to the following output of stderr:
	1	Shopping	Food
	2	Shopping	Clothes
	2	Shopping	Food
	3	Shopping	Clothes
	3	Shopping	Food
	4	Pet Licenses	
	4	Shopping	Clothes
	5	Pet Licenses	
	6	Pet Licenses	

As a second step, the output is re-processed and feed back into troff:

	sort +1n +0n -1n files.ind | fixindex | block | troff index.me

As you can see, two helper programs called fixindex and block were written.
The first program, deletes identical index entries that refer to the same
page, collects together all the page numbers that refer to the same index
item, and notes the breaks between major and minor items.  Its output
is in the form of Troff macros calls.  Applying sort and fixindex we get:

	.I> "Pet Licenses" "4,5,6"		<--- major heading
	.Ib "Shopping"				<--- start of minor headings
	.I< "Shopping" "Clothes" "2,3,4"
	.I< "Shopping" "Food" "1,2,3"

The following is source to fixindex.icn:
-----------------------------------------------------------------------
# transform raw index entries into new macros
#
#	features include:  merging page numbers and suppressing duplicates
#			   sorting out major headings from minor
#
# the (pre-sorted) input is of the form
#	<page-number><tab><major name><tab><minor name>
#


record LineState(PageNum, Major, Minor)

procedure main()
	local pageList, old, new

	old := LineState()
	new := LineState()
	split(old) | return
	pageList := old.PageNum

	if null(old.Minor) fails then
	    write('.Ib "', old.Major, '"')

	while split(new) do {
	    if old.Major == new.Major then
		if old.Minor == new.Minor then {
		    if old.PageNum ~= new.PageNum then
			pageList ||:= "," || new.PageNum }
		else {
		    WriteEntry(old, pageList)
		    pageList := new.PageNum }
	    else {
		WriteEntry(old, pageList)
		pageList := new.PageNum

		if null(new.Minor) fails then
		    write('.Ib "', new.Major, '"') }

	    AssignRecord(new, old) }		# new -> old

	WriteEntry(old, pageList)
 end


procedure split(state)
	static tabChar,
	       digits
	initial { tabChar := cset("\t");
		  digits := cset("0123456789") }

	scan read() | fail using {
	    state.PageNum := tab(many(digits))
	    tab(many(tabChar))
	    state.Major := tab(upto(tabChar)) | tab(0)
	    tab(many(tabChar))
	    state.Minor := tab(0) }

	return
end


procedure WriteEntry(state, pageList)
	if null(state.Minor) then
	    write('.I> "', state.Major, '" "', pageList, '"')
	else
	    write('.I< "', state.Major, '" "', state.Minor, 
					'" "', pageList, '"')
 end


procedure AssignRecord(a, b)
	b.Major := a.Major
	b.Minor := a.Minor
	b.PageNum := a.PageNum
end
-----------------------------------------------------------------------
The program "block.icn" takes in the macros produced by the above program
and inserts a new macro where the first letter changes.  This allows one
to break up the index into different sections for readability

	.LB S
	.Ib "Shopping"			\" label a set of minor headings
	.I< "Shopping" "Food" "1,2,3"		\" a minor item macro call
	.I< "Shopping" "Clothes" "2,3,4"
	.LB P
	.I> "Pet Licenses" "4,5,6"	\" a major item macro call

The following is the source to block:
-----------------------------------------------------------------------
#
#   Seperate index entries where the first letter of the entry
#	changes.  Produce a ".LB" at the break point.  Provide
#	the macro with the new letter
#	

procedure main()
	local doubleQuote,
	      line,
	      oldFirstChar,
	      firstChar

	doubleQuote := cset('"')
	oldFirstChar := ""

	# read until end of file
	while line := read() do  {
		scan line using {
			tab(upto(doubleQuote)) | write("can't find double q")
			move(1)
			firstChar :=  &subject[&pos] }
	
		# are the first two letters different?
		if firstChar ~== oldFirstChar then {
			write(".LB ", firstChar) }

		oldFirstChar := firstChar
		write(line) }
	end
-----------------------------------------------------------------------
the following is the index macros that must be pre-pended to the output
of the block program.
-----------------------------------------------------------------------
. \" Macros for the index
.de Ib	\" blank major entry
.br
.ne 2v
\\$1:
..
.de I>	\" major entry
\\$1, \\$2
..
.de I<  \" minor entry
.br
   \\$2, \\$3
..
.de LB	\" new letter starts here
.di DT	\" start diverted text
.sp
.sz +2
.b
\\$1
.r
.sz -2
.sp
.di	\" end diverted text
.ne \\n(dnu+1v	\" get enough space for it
.DT		\" output it
..
.\" set up various paramters for the right evironment.
.\" Your taste may be different.
.po 1.0i	\" physical offset
.ta 5iR		\" right alignment tab
.lp		\" initialize -me
.nf
.ce
.sz 18
Index
.sp 1
.sz 10
.2c		\" 2 column mode
.sp 3
-- 
TCP/IP:		lee@rochester.arpa
UUCP:		{decvax, allegra, seismo, cmcl2}!rochester!lee
XNS:		Lee Moore:CS:Univ Rochester
Phone:		+1 (716) 275-7747, -5671
Physical:	43 01' 40'' N, 77 37' 49'' W

lee@rochester.UUCP (Lee Moore) (09/05/85)

This is a follow-up to the troff indexing article I posted last month.
A couple people have had some problems with it.  The major problem is
that the Icon programs were written in an older version of the language.
I have updated these programs so that they will compile under the most recent
version.  They are included below.  The other problem is a typo in 
the sort command.  Where I said:
	sort +1n +0n -1n ....
I should have said:
	sort +1 +0n -1n
Now, it won't try to sort the index items numerically, only the
page numbers.

lee

------------------------------------------



#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	block.icn
#	fixindex.icn
# This archive created: Thu Sep  5 15:06:55 1985
export PATH; PATH=/bin:$PATH
if test -f 'block.icn'
then
	echo shar: over-writing existing file "'block.icn'"
fi
cat << \SHAR_EOF > 'block.icn'
#
#   Seperate index entries where the first letter of the entry
#	changes.  Produce a ".LB" at the break point.  Provide
#	the macro with the new letter
#	

procedure main()
	local doubleQuote,
	      line,
	      oldFirstChar,
	      firstChar

	doubleQuote := cset("\"")
	oldFirstChar := ""

	# read until end of file
	while line := read() do  {
		line ? {
			tab(upto(doubleQuote)) | write("can't find double q")
			move(1)
			firstChar :=  &subject[&pos] }
	
		# are the first two letters different?
		if firstChar ~== oldFirstChar then {
			write(".LB ", firstChar) }

		oldFirstChar := firstChar
		write(line) }
	end
SHAR_EOF
if test -f 'fixindex.icn'
then
	echo shar: over-writing existing file "'fixindex.icn'"
fi
cat << \SHAR_EOF > 'fixindex.icn'
# transform raw index entries into new macros
#
#	features include:  merging page numbers and suppressing duplicates
#			   sorting out major headings from minor
#
# the (pre-sorted) input is of the form
#	<page-number><tab><major name><tab><minor name>
#


record LineState(PageNum, Major, Minor)

procedure main()
	local pageList, old, new

	old := LineState()
	new := LineState()
	split(old) | return
	pageList := old.PageNum

	if old.Minor ~== "" then
	    write(".Ib \"", old.Major, "\"")

	while split(new) do {
	    if old.Major == new.Major then
		if old.Minor == new.Minor then {
		    if old.PageNum ~= new.PageNum then
			pageList ||:= "," || new.PageNum }
		else {
		    WriteEntry(old, pageList)
		    pageList := new.PageNum }
	    else {
		WriteEntry(old, pageList)
		pageList := new.PageNum

		if new.Minor ~== "" then 
		    write(".Ib \"", new.Major, "\"") }

	    AssignRecord(new, old) }		# new -> old

	WriteEntry(old, pageList)
 end


procedure split(state)
	static tabChar,
	       digits
	initial { tabChar := cset("\t");
		  digits := cset("0123456789") }

	read() | fail ? {
	    state.PageNum := tab(many(digits))
	    tab(many(tabChar))
	    state.Major := tab(upto(tabChar)) | tab(0)
	    tab(many(tabChar))
	    state.Minor := tab(0) }

	return
end


procedure WriteEntry(state, pageList)
	if state.Minor == "" then
	    write(".I> \"", state.Major, "\" \"", pageList, "\"")
	else
	    write(".I< \"", state.Major, "\" \"", state.Minor, 
					"\" \"", pageList, "\"")
 end


procedure AssignRecord(a, b)
	b.Major := a.Major
	b.Minor := a.Minor
	b.PageNum := a.PageNum
end
SHAR_EOF
#	End of shell archive
exit 0



-- 
TCP/IP:		lee@rochester.arpa
UUCP:		{decvax, allegra, seismo, cmcl2}!rochester!lee
XNS:		Lee Moore:CS:Univ Rochester
Phone:		+1 (716) 275-7747, -5671
Physical:	43 01' 40'' N, 77 37' 49'' W