[comp.sources.misc] Patch #1 to Ispell Version 2.0 Beta

geoff@desint.UUCP (Geoff Kuenning) (06/10/87)

This is Patch Number 1 to the beta posting of ispell.  Ispell is an
interactive spell-checking program that is faster, more friendly, and
more flexible than standard UNIX spell.  The beta posting itself is
available from the comp.sources.misc archives.

Well, once again it's proven that one shouldn't put a posting together
hastily in the middle of the night.  I am rather embarrassed at the
number and nature of the bugs in my ispell beta posting, but I guess
that's water under the bridge at this point.

The patch below renames "fixdict.sh" to "fixdict.X" (by dint of recreating
it);  you may want to remove "fixdict.sh" (make clean does this).

Bugs fixed in this patch:

    (1) If CAPITALIZE mode is selected, the wrongletter() routine may fail to
	properly generate all possibilities.
    (2) There were several syntax errors in xgets.c if MAXINCLUDEFILES was
	nonzero;  also the #if test was backwards (Johan Widen).
    (3) Several Makefile problems:  CC was not defined as "cc", fixdict.sh
	did not respect the definition of LIBDIR;  icombine (and now fixdict)
	was missing from make clean;  and make install referred to
	expand[12].sed rather than isexp[1-4].sed.  (David Neves, Gary
	Johnson, Don Kark)
    (4) In term.c, the variables oldtt{in,ou,tstp} are undefined on BSD
	systems.  (Joel Shprentz, Don Kark)
    (5) If CAPITALIZE was undefined, the routine toutent() still referred
	to the capitalization-control fields in the dictionary structure.
	(Gary Johnson)  In fact, the code with CAPITALIZE undefined had
	not been recently tested and was full of problems.  These have
	been corrected and ispell has been tested in this mode, though not
	extensively.
    (6) In TeX mode, some misspelled words could be missed because a left
	brace was not seen as a delimiter.  Also, there were still a couple
	of nroff hangovers in TeX mode.  (Don Kark).
    (7) Buildhash.c wouldn't compile without CAPITALIZE.

To install this patch:

    From rn, type "|patch -d dir", where dir is the directory where you
    have ispell installed.  From other newsreaders, save the article, then
    type "patch -d dir <savefile" where savefile is the name of the file
    you just created.

	Geoff Kuenning   geoff@ITcorp.com   {uunet,trwrb}!desint!geoff

Index: version.h
Prereq: 2.0,

*** version.h.old	Tue Jun  9 23:47:25 1987
--- version.h	Tue Jun  9 23:47:30 1987
***************
*** 1,2
  static char Version_ID[] =
!     "@(#) Ispell Version 2.0, May 1987 Beta posting";

--- 1,2 -----
  static char Version_ID[] =
!     "@(#) Ispell Version 2.0.01, May 1987 Beta posting";

Index: Makefile

*** Makefile.old	Wed Jun 10 00:08:29 1987
--- Makefile	Wed Jun 10 00:08:33 1987
***************
*** 6,12
  # MAN1EXT, MAN4EXT, and TERMLIB below;
  # the Makefile will update all other files to match.
  #
! # On USG systems, add -DUSG to CFLAGS.
  #
  # The ifdef NO8BIT may be used if 8 bit extended text characters
  # cause problems, or you simply don't wish to allow the feature.

--- 6,12 -----
  # MAN1EXT, MAN4EXT, and TERMLIB below;
  # the Makefile will update all other files to match.
  #
! # On USG systems, add -DUSG to CFLAGS.  On BSD, remove it.
  #
  # The ifdef NO8BIT may be used if 8 bit extended text characters
  # cause problems, or you simply don't wish to allow the feature.
***************
*** 16,22
  #
  #   buildhash <infile> <outfile>
  
! CC = lcc -v -HL -HD -R tgetflag
  CFLAGS = -n -O -DUSG
  # BINDIR, LIBDIR, DEFHASH, DEFDICT, MAN1DIR, MAN4DIR, MAN1EXT, MAN4EXT,
  # TERMLIB

--- 16,22 -----
  #
  #   buildhash <infile> <outfile>
  
! CC = cc
  CFLAGS = -n -O -DUSG
  # BINDIR, LIBDIR, DEFHASH, DEFDICT, MAN1DIR, MAN4DIR, MAN1EXT, MAN4EXT,
  # TERMLIB
***************
*** 33,39
  
  SHELL = /bin/sh
  
! all: buildhash ispell icombine munchlist isexpand $(DEFHASH)
  
  ispell.hash: buildhash $(DEFDICT)
  	./buildhash $(DEFDICT) $(DEFHASH)

--- 33,39 -----
  
  SHELL = /bin/sh
  
! all: buildhash fixdict ispell icombine munchlist isexpand $(DEFHASH)
  
  ispell.hash: buildhash $(DEFDICT)
  	./buildhash $(DEFDICT) $(DEFHASH)
***************
*** 41,47
  install: all
  	cp ispell isexpand munchlist $(BINDIR)
  	cp ispell.hash $(LIBDIR)/$(DEFHASH)
! 	cp expand1.sed expand2.sed icombine $(LIBDIR)
  	chmod 755 $(BINDIR)/ispell $(BINDIR)/munchlist $(BINDIR)/isexpand \
  	  $(LIBDIR)/icombine
  	chmod 644 $(LIBDIR)/$(DEFHASH) $(LIBDIR)/expand1.sed \

--- 41,47 -----
  install: all
  	cp ispell isexpand munchlist $(BINDIR)
  	cp ispell.hash $(LIBDIR)/$(DEFHASH)
! 	cp isexp[1-4].sed icombine $(LIBDIR)
  	chmod 755 $(BINDIR)/ispell $(BINDIR)/munchlist $(BINDIR)/isexpand \
  	  $(LIBDIR)/icombine
  	chmod 644 $(LIBDIR)/$(DEFHASH) $(LIBDIR)/isexp[1-4].sed
***************
*** 44,51
  	cp expand1.sed expand2.sed icombine $(LIBDIR)
  	chmod 755 $(BINDIR)/ispell $(BINDIR)/munchlist $(BINDIR)/isexpand \
  	  $(LIBDIR)/icombine
! 	chmod 644 $(LIBDIR)/$(DEFHASH) $(LIBDIR)/expand1.sed \
! 	  $(LIBDIR)/expand2.sed
  	cp ispell.1 $(MAN1DIR)/ispell$(MAN1EXT)
  	cp ispell.4 $(MAN4DIR)/ispell$(MAN4EXT)
  

--- 44,50 -----
  	cp isexp[1-4].sed icombine $(LIBDIR)
  	chmod 755 $(BINDIR)/ispell $(BINDIR)/munchlist $(BINDIR)/isexpand \
  	  $(LIBDIR)/icombine
! 	chmod 644 $(LIBDIR)/$(DEFHASH) $(LIBDIR)/isexp[1-4].sed
  	cp ispell.1 $(MAN1DIR)/ispell$(MAN1EXT)
  	cp ispell.4 $(MAN4DIR)/ispell$(MAN4EXT)
  
***************
*** 52,57
  buildhash: buildhash.o hash.o
  	$(CC) $(CFLAGS) -o buildhash buildhash.o hash.o
  
  icombine:	icombine.c config.h ispell.h
  	$(CC) $(CFLAGS) -o icombine icombine.c
  

--- 51,61 -----
  buildhash: buildhash.o hash.o
  	$(CC) $(CFLAGS) -o buildhash buildhash.o hash.o
  
+ fixdict:	fixdict.X Makefile
+ 	sed -e 's@!!LIBDIR!!@$(LIBDIR)@' -e 's@!!DEFDICT!!@$(DEFDICT)@' \
+ 		<fixdict.X >fixdict
+ 	chmod +x fixdict
+ 
  icombine:	icombine.c config.h ispell.h
  	$(CC) $(CFLAGS) -o icombine icombine.c
  
***************
*** 78,81
  
  clean:
  	rm -f *.o buildhash ispell core a.out mon.out hash.out \
! 		*.stat *.cnt munchlist config.h

--- 82,85 -----
  
  clean:
  	rm -f *.o buildhash ispell core a.out mon.out hash.out \
! 		*.stat *.cnt fixdict fixdict.sh icombine munchlist config.h

Index: buildhash.c

*** buildhash.c.old	Wed Jun 10 00:05:38 1987
--- buildhash.c	Wed Jun 10 00:05:45 1987
***************
*** 240,245
  			d.word = malloc (2 * len + 4);
  		else
  			d.word = malloc (len + 1);
  #endif
  		if (d.word == NULL) {
  			fprintf (stderr, "couldn't allocate space for word %s\n", lbuf);

--- 240,247 -----
  			d.word = malloc (2 * len + 4);
  		else
  			d.word = malloc (len + 1);
+ #else
+ 		d.word = malloc (len + 1);
  #endif
  		if (d.word == NULL) {
  			fprintf (stderr, "couldn't allocate space for word %s\n", lbuf);
***************
*** 339,344
  			    hashtbl[h].next = dp;
  			}
  		}
  	}
  	printf ("\n");
  }

--- 341,347 -----
  			    hashtbl[h].next = dp;
  			}
  		}
+ #ifdef CAPITALIZE
  	}
  #endif
  	printf ("\n");
***************
*** 340,345
  			}
  		}
  	}
  	printf ("\n");
  }
  

--- 343,349 -----
  		}
  #ifdef CAPITALIZE
  	}
+ #endif
  	printf ("\n");
  }
  
Index: fixdict.X

0a1,79
> : Use /bin/sh
> #
> #	Add capitalization information to an ispell dictionary
> #
> #	Usage:
> #
> #	fixdict dict-file
> #
> #	Requires availability of UNIX spell.  The new dictionary is
> #	rewritten in place.  A list of words that couldn't be
> #	resolved (because spell doesn't know them) is written to
> #	standard output.  This list appears in lowercase in the
> #	dictionary, and if there are any errors the must be edited
> #	by hand.
> #
> #	The final dictionary appears in expanded form and must be
> #	passed through munchlist to regenerate suffixes.
> #
> LIBDIR=!!LIBDIR!!
> EXPAND1=${LIBDIR}/isexp1.sed
> EXPAND2=${LIBDIR}/isexp2.sed
> EXPAND3=${LIBDIR}/isexp3.sed
> EXPAND4=${LIBDIR}/isexp4.sed
> TDIR=${TMPDIR:-/tmp}
> TMP=${TDIR}/fix$$
> 
> trap "/bin/rm -f ${TMP}*; exit 1" 1 2 15
> sed -f ${EXPAND1} $1 | sed -f ${EXPAND2} \
>   | sed -f ${EXPAND3} | sed -f ${EXPAND4} \
>   | tr '[A-Z]' '[a-z]' \
>   | spell \
>   | sort > ${TMP}a
> #
> # ${TMP}a contains all the words that spell doesn't like.
> # Now figure out which of those are because spell doesn't know them at
> # all, and leave those in ${TMP}b.
> #
> tr '[a-z]' '[A-Z]' < ${TMP}a | spell | tr '[A-Z]' '[a-z]' > ${TMP}b
> #
> # The wrongly-capitalized words are those that spell didn't object to
> # in the last step.  Produce a list of them in, and capitalize the
> # first letter of each.  Save this list in ${TMP}c.
> #
> comm -23 ${TMP}a ${TMP}b \
>   | sed 's/^a/A/;s/^b/B/;s/^c/C/;s/^d/D/;s/^e/E/;s/^f/F/;s/^g/G/;s/^h/H/
>      s/^i/I/;s/^j/J/;s/^k/K/;s/^l/L/;s/^m/M/;s/^n/N/;s/^o/O/;s/^p/P/
>      s/^q/Q/;s/^r/R/;s/^s/S/;s/^t/T/;s/^u/U/;s/^v/V/;s/^w/W/;s/^x/X/
>      s/^y/Y/;s/^z/Z/' > ${TMP}c
> #
> # Find out which of those spell objects to, saving the failures in ${TMP}d.
> #
> spell ${TMP}c > ${TMP}d
> #
> # Extract the words which were correctly capitalized at the first letter,
> # combine them with an all-capitals version of the ones that weren't, and
> # put the result into ${TMP}e.
> #
> (comm -23 ${TMP}c ${TMP}d;  tr '[a-z]' '[A-Z]' < ${TMP}d) \
>   | sort -o ${TMP}e
> #
> # At this point, ${TMP}b contains the words that spell just plain doesn't
> # like, and ${TMP}e contains the words that are now capitalized correctly.
> #
> /bin/rm ${TMP}[cd]
> #
> # Put it all together, rewriting the dictionary in place.
> #
> sed -f ${EXPAND1} $1 | sed -f ${EXPAND2} \
>   | sed -f ${EXPAND3} | sed -f ${EXPAND4} \
>   | tr '[A-Z]' '[a-z]' \
>   | sort \
>   | comm -23 - ${TMP}a \
>   | sort -f -o $1 - ${TMP}b ${TMP}e
> #
> # Finally, write the list of words that have questionable capitalization
> # to the standard output.
> #
> cat ${TMP}b
> /bin/rm ${TMP}*

Index: ispell.c

*** ispell.c.old	Wed Jun 10 00:25:46 1987
--- ispell.c	Wed Jun 10 00:26:14 1987
***************
*** 37,42
  #include "ispell.h"
  #include "version.h"
  
  FILE *infile;
  FILE *outfile;
  

--- 37,47 -----
  #include "ispell.h"
  #include "version.h"
  
+ #define ISTEXTERM(c)   (((c) == '{') || \
+ 			((c) == '}') || \
+ 			((c) == '[') || \
+ 			((c) == ']'))
+ 
  FILE *infile;
  FILE *outfile;
  
***************
*** 437,449
  		
  		len = strlen (secondbuf) - 1;
  
! 		/* skip over .if */
! 		if (strncmp(currentchar,".if t",5) == 0 
! 		||  strncmp(currentchar,".if n",5) == 0) {
! 			copyout(&currentchar,5);
! 			while (*currentchar && isspace(*currentchar)) 
! 				copyout(&currentchar, 1);
! 		}
  
  		/* skip over .ds XX or .nr XX */
  		if (strncmp(currentchar,".ds ",4) == 0 

--- 442,455 -----
  		
  		len = strlen (secondbuf) - 1;
  
! 		if(!tflag) {
! 		    /* skip over .if */
! 		    if (strncmp(currentchar,".if t",5) == 0 
! 		    ||  strncmp(currentchar,".if n",5) == 0) {
! 			    copyout(&currentchar,5);
! 			    while (*currentchar && isspace(*currentchar)) 
! 				    copyout(&currentchar, 1);
! 		    }
  
  		    /* skip over .ds XX or .nr XX */
  		    if (strncmp(currentchar,".ds ",4) == 0 
***************
*** 445,463
  				copyout(&currentchar, 1);
  		}
  
! 		/* skip over .ds XX or .nr XX */
! 		if (strncmp(currentchar,".ds ",4) == 0 
! 		||  strncmp(currentchar,".de ",4) == 0
! 		||  strncmp(currentchar,".nr ",4) == 0) {
! 			copyout(&currentchar, 3);
! 			while (*currentchar && isspace(*currentchar)) 
! 				copyout(&currentchar, 1);
! 			while (*currentchar && !isspace(*currentchar))
! 				copyout(&currentchar, 1);
! 			if (*currentchar == 0) {
! 				if (!lflag) putc ('\n', outfile);
! 				continue;
! 			}
  		}
  
  		if (secondbuf [ len ] == '\n')

--- 451,470 -----
  				    copyout(&currentchar, 1);
  		    }
  
! 		    /* skip over .ds XX or .nr XX */
! 		    if (strncmp(currentchar,".ds ",4) == 0 
! 		    ||  strncmp(currentchar,".de ",4) == 0
! 		    ||  strncmp(currentchar,".nr ",4) == 0) {
! 			    copyout(&currentchar, 3);
! 			    while (*currentchar && isspace(*currentchar)) 
! 				    copyout(&currentchar, 1);
! 			    while (*currentchar && !isspace(*currentchar))
! 				    copyout(&currentchar, 1);
! 			    if (*currentchar == 0) {
! 				    if (!lflag) putc ('\n', outfile);
! 				    continue;
! 			    }
! 		    }
  		}
  
  		if (secondbuf [ len ] == '\n')
***************
*** 484,490
  				if (*currentchar == '\\') {
  				    /* skip till whitespace */
  				    while (*currentchar && 
! 					!isspace(*currentchar)) {
  					    if (!lflag)
  						putc(*currentchar, outfile);
  					    currentchar++;

--- 491,498 -----
  				if (*currentchar == '\\') {
  				    /* skip till whitespace */
  				    while (*currentchar && 
! 					(!isspace(*currentchar) &&
! 					 !ISTEXTERM(*currentchar))) {
  					    if (!lflag)
  						putc(*currentchar, outfile);
  					    currentchar++;
***************
*** 960,965
  					return;
  			}
  		}
  		newword[i] = word[i];
  	}
  }

--- 968,980 -----
  					return;
  			}
  		}
+ #ifdef CAPITALIZE
+ 		c = word[i];
+ 		if (islower (c))
+ 		    newword[i] = toupper (c);
+ 		else
+ 		    newword[i] = c;
+ #else
  		newword[i] = word[i];
  #endif
  	}
***************
*** 961,966
  			}
  		}
  		newword[i] = word[i];
  	}
  }
  

--- 976,982 -----
  		    newword[i] = c;
  #else
  		newword[i] = word[i];
+ #endif
  	}
  }
  
Index: ispell.1

*** ispell.1.old	Wed Jun 10 00:28:13 1987
--- ispell.1	Wed Jun 10 00:28:21 1987
***************
*** 376,384
  .I ispell
  to read in the hash table.
  .sp
! Perhaps more than ten choices should be allowed for near misses.
! .sp
! The hash table is stored as a quarter-megabyte array, so a PDP-11
  version does not seem likely.
  .sp
  .I Ispell

--- 376,382 -----
  .I ispell
  to read in the hash table.
  .sp
! The hash table is stored as a quarter-megabyte (or larger) array, so a PDP-11
  version does not seem likely.
  .sp
  .I Ispell
***************
*** 425,430
  Stewart Clamen,
  Mark Davies,
  Steve Dum,
  Don Kark,
  Steve Kelem,
  Jim Knutson,

--- 423,429 -----
  Stewart Clamen,
  Mark Davies,
  Steve Dum,
+ Gary Johnson,
  Don Kark,
  Steve Kelem,
  Jim Knutson,
***************
*** 441,446
  Marc Ries,
  Rich Salz,
  Greg Schaffer,
  George Sipe,
  Perry Smith,
  Stefan Taxhet,

--- 440,446 -----
  Marc Ries,
  Rich Salz,
  Greg Schaffer,
+ Joel Shprentz,
  George Sipe,
  Perry Smith,
  Stefan Taxhet,
***************
*** 445,449
  Perry Smith,
  Stefan Taxhet,
  Andrew Vignaux,
  James Woods,
  and Ken Yap.

--- 445,450 -----
  Perry Smith,
  Stefan Taxhet,
  Andrew Vignaux,
+ Johan Widen,
  James Woods,
  and Ken Yap.

Index: term.c

*** term.c.old	Wed Jun 10 00:29:35 1987
--- term.c	Wed Jun 10 00:29:39 1987
***************
*** 68,73
  static termchanged = 0;
  static int (*oldint) ();
  static int (*oldterm) ();
  
  terminit ()
  {

--- 68,78 -----
  static termchanged = 0;
  static int (*oldint) ();
  static int (*oldterm) ();
+ #ifdef SIGTTIN
+ static int (*oldttin) ();
+ static int (*oldttou) ();
+ static int (*oldtstp) ();
+ #endif
  
  terminit ()
  {
***************
*** 228,236
  	signal (SIGINT, oldint);
  	signal (SIGTERM, oldterm);
  #ifdef SIGTTIN
! 	signal(SIGTTIN, SIG_DFL);
! 	signal(SIGTTOU, SIG_DFL);
! 	signal(SIGTSTP, SIG_DFL);
  #endif
  
  	system (buf);

--- 233,241 -----
  	signal (SIGINT, oldint);
  	signal (SIGTERM, oldterm);
  #ifdef SIGTTIN
! 	oldttin = signal(SIGTTIN, SIG_DFL);
! 	oldttou = signal(SIGTTOU, SIG_DFL);
! 	oldtstp = signal(SIGTSTP, SIG_DFL);
  #endif
  
  	system (buf);

Index: tree.c

*** tree.c.old	Wed Jun 10 00:31:23 1987
--- tree.c	Wed Jun 10 00:31:34 1987
***************
*** 632,637
  toutent (cent)
  register struct dent *cent;
  {
  	register char *cp;
  	int len;
  	register int wcount;

--- 632,638 -----
  toutent (cent)
  register struct dent *cent;
  {
+ #ifdef CAPITALIZE
  	register char *cp;
  	int len;
  	register int wcount;
***************
*** 659,664
  			cent->word[0] = toupper (cent->word[0]);
  		toutword (cent->word, cent);
  	}
  }
  		
  static

--- 660,668 -----
  			cent->word[0] = toupper (cent->word[0]);
  		toutword (cent->word, cent);
  	}
+ #else
+ 	toutword (cent->word, cent);
+ #endif
  }
  		
  static

Index: xgets.c

*** xgets.c.old	Wed Jun 10 00:33:05 1987
--- xgets.c	Wed Jun 10 00:33:08 1987
***************
*** 22,28
  xgets (str)
  char str [];
  {
! #if MAXINCLUDFILES == 0
      return gets (str);
  #else
      static char * Include_File = DEFINCSTR;

--- 22,28 -----
  xgets (str)
  char str [];
  {
! #if MAXINCLUDEFILES == 0
      return gets (str);
  #else
      static char * Include_File = DEFINCSTR;
***************
*** 35,41
      if (Include_Len == 0) {
  	char * env_variable, * getenv ();
  
! 	if ((env_variable = getenv (INCSTRVAR) != NULL)
  	    Include_File = env_variable;
  	Include_Len = strlen (Include_File);
  

--- 35,41 -----
      if (Include_Len == 0) {
  	char * env_variable, * getenv ();
  
! 	if ((env_variable = getenv (INCSTRVAR)) != NULL)
  	    Include_File = env_variable;
  	Include_Len = strlen (Include_File);
  
***************
*** 59,65
  
  	if (incfileflag != 0 && strncmp (str, Include_File, Include_Len) == 0) {
  	    char * file_name = str + Include_Len;
! 	    if (current_F - F < MAX_FILES && strlen (file_name) > 0) {
  		FILE * f;
  		if (f = fopen (file_name, "r"))
  		    *(++current_F) = f;

--- 59,65 -----
  
  	if (incfileflag != 0 && strncmp (str, Include_File, Include_Len) == 0) {
  	    char * file_name = str + Include_Len;
! 	    if (current_F - F < MAXINCLUDEFILES && strlen (file_name) > 0) {
  		FILE * f;
  		if (f = fopen (file_name, "r"))
  		    *(++current_F) = f;