[mod.sources] AAAAARRRRGGGGGHHHH!!!! Bugs in texindex!!!

sources-request@panda.UUCP (05/12/86)
Mod.sources:  Volume 4, Issue 116
Submitted by: seismo!turtlevax!weitek!robert (Robert Plamondon)

The previous version of texindex I sent out has bugs. How they got there is
beyond me, since I made a special effort to test it before I sent it. Either
my test file (which WAS very short) didn't exercise it, or I didn't look
at the output carefully, or I stupidly made a change after I send it.

In any event, THIS version has been tested on the largest .idx file I have
and seems to be okay. I've also cleaned up some miscellaneous stuff.

With great embarrassment,
	Robert Plamondon

New shar file follows:

# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by weitek!robert on Fri May  9 09:34:01 PDT 1986
# Contents:  texindex.n texindex index.awk index1.awk
 
echo x - texindex.n
sed 's/^@//' > "texindex.n" <<'@//E*O*F texindex.n//'
@.TH TEXINDEX N "9 May 1986"
@.SH NAME
texindex \- Create an index for a LATEX document
@.SH SYNOPSIS
@.B
texindex
file
@.SH DESCRIPTION
@.I LATEX
produces
@.I .idx
files that contain the information that goes into the index.
@.I LATEX
also has a set of macros that are used to format an index. For some reason,
the
@.I .idx
file that
@.I LATEX
produces has no resemblance to the input file that it requires. In addition,
the
@.I .idx
file isn't sorted, doesn't have multiple page numbers per entry line, and
doesn't arrange subentries under the corresponding main entry.
@.PP
@.I Texindex
takes a 
@.I LATEX \|.idx
file and converts it into a format that
@.I LATEX
will
recognize as an index. It puts multiple page numbers on the same line, and
handles subentries properly,
@.I i.e.,
@.PP
	Gnus, 5-6, 25, 111
@.br
	\ \ \ Habits of, 5
@.br
	\ \ \ Smell, 25
@.PP
Output is directed to standard out.
@.PP
@.I Texindex
uses two
@.I awk
script and calls
@.I sort
to sort the index.
@.SH FILES
/usr/new/texindex	main program (a csh script)
@.br
/usr/lib/tex/index.awk	first awk script
@.br
/usr/lib/tex/index1.awk	second awk script
@.br
/tmp/texindex.xx	temporary file
@.SH BUGS
Handles subentries, but not sub-subentries.
@.PP
Should probably put large capitals before the entries for each letter, but
doesn't.
@.SH HISTORY
This is the second release. The first one, dated April 25, 1986, had
some bugs that didn't show up in my (too short) test index.
@//E*O*F texindex.n//
chmod u=rw,g=r,o=r texindex.n
 
echo x - texindex
sed 's/^@//' > "texindex" <<'@//E*O*F texindex//'
#!/bin/csh -f
# texindex -- create an index from a LaTeX .idx file
# uses the file index.awk
set INDEXAWK = /usr/lib/tex/index.awk
set INDEXAWK1 = /usr/lib/tex/index1.awk
set TEMP = /tmp/texindex.$$
cat $1 > $TEMP
cat >> $TEMP <<xxx
%\indexentry{%AZZZZZZ}{}
%\indexentry{%BZZZZZZ}{}
%\indexentry{%CZZZZZZ}{}
%\indexentry{%DZZZZZZ}{}
%\indexentry{%EZZZZZZ}{}
%\indexentry{%FZZZZZZ}{}
%\indexentry{%GZZZZZZ}{}
%\indexentry{%HZZZZZZ}{}
%\indexentry{%IZZZZZZ}{}
%\indexentry{%JZZZZZZ}{}
%\indexentry{%KZZZZZZ}{}
%\indexentry{%LZZZZZZ}{}
%\indexentry{%MZZZZZZ}{}
%\indexentry{%NZZZZZZ}{}
%\indexentry{%OZZZZZZ}{}
%\indexentry{%PZZZZZZ}{}
%\indexentry{%QZZZZZZ}{}
%\indexentry{%RZZZZZZ}{}
%\indexentry{%SZZZZZZ}{}
%\indexentry{%TZZZZZZ}{}
%\indexentry{%UZZZZZZ}{}
%\indexentry{%VZZZZZZ}{}
%\indexentry{%WZZZZZZ}{}
%\indexentry{%XZZZZZZ}{}
%\indexentry{%YZZZZZZ}{}
%\indexentry{%ZZZZZZZ}{}
xxx
sort -o $TEMP -bdfu -t\{ +1 -2 +2n $TEMP
awk -f $INDEXAWK $TEMP | awk -f $INDEXAWK1 | uniq
rm -f $TEMP

@//E*O*F texindex//
chmod u=rwx,g=rx,o=rx texindex
 
echo x - index.awk
sed 's/^@//' > "index.awk" <<'@//E*O*F index.awk//'
# index.awk -- take a sorted LaTeX index, and produce \item and
# \subitem entries for it
#
# Robert Plamondon, March 1986
#
BEGIN	{FS = "{"
	oldentry= GaRgLeBlAsTeR
	print("\\begin{theindex}")}

# leave spaces on comment lines
$2 ~ /^%/		{printf("\n\\indexspace")
			 next}
# Replace ! with \
#$2 ~ /^[\\]*[!]/		{while(index($2,"!") > 0)
#				 {x = index($2,"!")
#				 $2 = (substr($2,1,x-1) "\\" substr($2,x+1))}}

			{	$2 = substr($2,1,length($2)-1)
				newentry = $2
				newpage = substr($3,1,length($3)-1)
			}
# Handle subentries (entries with commas in them)
		{comma = index($2,",")
		if (comma > 0)
		 {
		  subentry = substr($2,comma+1)
		  mainentry = substr($2,1,comma-1)
		  {
		   if (mainentry != substr(oldentry,1,comma-1))
# make new major entry
		    printf("\n\\item %s, %s", mainentry, newpage)
#		  else
		    if (oldentry == newentry)
	  	      {
			if (oldpage != newpage)
			printf(", %s", newpage)
		      }
		    else
		       printf("\n	\\subitem %s, %s", subentry,newpage)
		   } 
		}
		else	# no comma -- this is a major entry
			{
			if (oldentry == newentry)
			  {if (oldpage != newpage)
				printf(", %s", newpage)}
			else
				printf("\n\\item %s, %s", newentry,newpage)
			}
	}
		{oldpage = newpage}
		{oldentry = newentry}

END	{printf("\n\\end{theindex}")}
@//E*O*F index.awk//
chmod u=rw,g=r,o=r index.awk
 
echo x - index1.awk
sed 's/^@//' > "index1.awk" <<'@//E*O*F index1.awk//'
# index1.awk -- takes index entries in the form:
# entry, number, number, number...
# and turns consecutive numbers into ranges, i.e., 
# gnus, 5, 6, 7, 10, 11, 15
# becomes
# gnus, 5-7, 10-11, 15
#
# Robert Plamondon, April 25, 1986
#
BEGIN	{FS = ","}
 { if (NF > 1)
   {
	{toprange = 0; field = 2; botrange = $field; field1 = 1+field}
	{ printf("%s",$1)
	  while (field < NF)
		{ 
		  while ($field1 == 1+$field)
			{ toprange = $field1; field++; field1++}

		if (toprange != 0)
			{printf(", %d-%d",botrange,toprange)
			 lasttop = toprange
			}
		else
			printf(", %d",$field)

		toprange = 0
		botrange = $field1
		field ++; field1++
		}
	if (lasttop != $NF)
		if ($NF != 0)
			printf(", %d", $NF)
	printf("\n")
	}
  }
  else print}
@//E*O*F index1.awk//
chmod u=rw,g=r,o=r index1.awk
 
exit 0