[net.sources] ispell mods

bobm@rtech.UUCP (01/28/87)

Here are the mod's I mentioned in my comp.sources.d article of a few days ago.
That was article <620@rtech>, should you want to look for it.

A look at the updated manual page will tell you what I added - various
command line bells & whistles, basically, as well as abstracting some
configuration parameters into a separate header file.  Look over the new
makefile and config.h before you build.

Should be backwards compatible, with 2 minor differences:

	The stats & count files produced by buildhash are now named
	differently.

	No longer checks for the hash table file in your present
	directory before looking in the library directory.  If you
	have such a situation, use the new -d option.

If I do the enhancements to roff handling I alluded to in my article,
any changes will be on top of THIS base code.  Something else I've
been musing about as an enhancement:

	A global replace addition to the replacement commands so
	that you don't have to replace the same mispelling mutltiple
	times.  At the user interface, I would probably do this by
	allowing a keystroke to precede the replacement keystroke
	indicating that the replacement is to be applied to any
	further instances of the same misspelling.

To balance bytes transmitted against work required, I'm sending some
replacement source files, and some diffs.  There aren't any real major
logic changes in the replaced files, but applying lots of one and
two line diffs can get annoying.  You get:

	config.h - completely new file.

	Makefile buildhash.c ispell.c ispell.man tree.c - replacements.

	good.c.diff lookup.c.diff term.c.diff ispell.h.diff - diff's
		to apply to those files.

Bob McQueer
{amdahl, sun, mtxinu, hoptoad, cpsc6a}!rtech!bobm

cut here
--------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	Makefile
#	buildhash.c
#	config.h
#	ispell.c
#	ispell.man
#	tree.c
#	good.c.diff
#	lookup.c.diff
#	term.c.diff
#	ispell.h.diff
# This archive created: Tue Jan 27 12:15:31 1987
export PATH; PATH=/bin:$PATH
if test -f 'Makefile'
then
	echo shar: will not over-write existing file "'Makefile'"
else
cat << \SHAR_EOF > 'Makefile'
# -*- Mode: Text -*-

# Look over config.h before building.
#
# LIBDIR, DEFHASH, DEFDICT should match definitions in config.h.
#
# The ifdef NO8BIT may be used if 8 bit extended text characters
# cause problems, or you simply don't wish to allow the feature.
#
# the argument syntax for buildhash to make alternate dictionary files
# is simply:
#
#   buildhash <infile> <outfile>

CFLAGS = -O
BINDIR = /usr/local/bin
LIBDIR = /usr/local/lib
DEFHASH = ispell.hash
DEFDICT = dict.191

all: buildhash ispell $(DEFHASH)

ispell.hash: buildhash $(DEFDICT)
	buildhash

install: buildhash ispell $(DEFHASH)
	cp ispell ${BINDIR}/ispell
	cp ispell.hash ${LIBDIR}/${DEFHASH}
	chmod 755 ${BINDIR}/ispell ${LIBDIR}/ispell.hash

buildhash: buildhash.o hash.o
	cc -o buildhash buildhash.o hash.o

ispell: ispell.o term.o good.o lookup.o hash.o tree.o
	cc $(CFLAGS) -o ispell ispell.o term.o good.o lookup.o \
		hash.o tree.o -ltermlib

clean:
	rm -f *.o buildhash ispell core a.out mon.out hash.out \
		*.stat *.cnt
SHAR_EOF
fi # end of overwriting check
if test -f 'buildhash.c'
then
	echo shar: will not over-write existing file "'buildhash.c'"
else
cat << \SHAR_EOF > 'buildhash.c'
/* -*- Mode: Text -*- */
/*
 * buildhash.c - make a hash table for ispell
 *
 * Pace Willisson, 1983
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
#include "ispell.h"
#include "config.h"

#define NSTAT 100
struct stat dstat, cstat;

int numwords, hashsize;

char *malloc();

struct dent *hashtbl;

char *Dfile;
char *Hfile;

char Cfile[MAXPATHLEN];
char Sfile[MAXPATHLEN];

main (argc,argv)
int argc;
char **argv;
{
	FILE *countf;
	FILE *statf;
	int stats[NSTAT];
	int i;

	if (argc > 1) {
		++argv;
		Dfile = *argv;
		if (argc > 2) {
			++argv;
			Hfile = *argv;
		}
		else
			Hfile = DEFHASH;
	}
	else {
		Dfile = DEFDICT;
		Hfile = DEFHASH;
	}

	sprintf(Cfile,"%s.cnt",Dfile);
	sprintf(Sfile,"%s.stat",Dfile);

	if (stat (Dfile, &dstat) < 0) {
		fprintf (stderr, "No dictionary (%s)\n", Dfile);
		exit (1);
	}

	if (stat (Cfile, &cstat) < 0 || dstat.st_mtime > cstat.st_mtime)
		newcount ();

	if ((countf = fopen (Cfile, "r")) == NULL) {
		fprintf (stderr, "No count file\n");
		exit (1);
	}
	numwords = 0;
	fscanf (countf, "%d", &numwords);
	fclose (countf);
	if (numwords == 0) {
		fprintf (stderr, "Bad count file\n");
		exit (1);
	}
	hashsize = numwords;
	readdict ();

	if ((statf = fopen (Sfile, "w")) == NULL) {
		fprintf (stderr, "Can't create %s\n", Sfile);
		exit (1);
	}

	for (i = 0; i < NSTAT; i++)
		stats[i] = 0;
	for (i = 0; i < hashsize; i++) {
		struct dent *dp;
		int j;
		if (hashtbl[i].used == 0) {
			stats[0]++;
		} else {
			for (j = 1, dp = &hashtbl[i]; dp->next != NULL; j++, dp = dp->next)
				;
			if (j >= NSTAT)
				j = NSTAT - 1;
			stats[j]++;
		}
	}
	for (i = 0; i < NSTAT; i++)
		fprintf (statf, "%d: %d\n", i, stats[i]);
	fclose (statf);

	filltable ();

	output ();
}

output ()
{
	FILE *outfile;
	struct hashheader hashheader;
	int strptr, n, i;

	if ((outfile = fopen (Hfile, "w")) == NULL) {
		fprintf (stderr, "can't create %s\n",Hfile);
		return;
	}
	hashheader.magic = MAGIC;
	hashheader.stringsize = 0;
	hashheader.tblsize = hashsize;
	fwrite (&hashheader, sizeof hashheader, 1, outfile);
	strptr = 0;
	for (i = 0; i < hashsize; i++) {
		n = strlen (hashtbl[i].word) + 1;
		fwrite (hashtbl[i].word, n, 1, outfile);
		hashtbl[i].word = (char *)strptr;
		strptr += n;
	}
	for (i = 0; i < hashsize; i++) {
		if (hashtbl[i].next != 0) {
			int x;
			x = hashtbl[i].next - hashtbl;
			hashtbl[i].next = (struct dent *)x;
		} else {
			hashtbl[i].next = (struct dent *)-1;
		}
	}
	fwrite (hashtbl, sizeof (struct dent), hashsize, outfile);
	hashheader.stringsize = strptr;
	rewind (outfile);
	fwrite (&hashheader, sizeof hashheader, 1, outfile);
	fclose (outfile);
}

filltable ()
{
	struct dent *freepointer, *nextword, *dp;
	int i;

	for (freepointer = hashtbl; freepointer->used; freepointer++)
		;
	for (nextword = hashtbl, i = numwords; i != 0; nextword++, i--) {
		if (nextword->used == 0) {
			continue;
		}
		if (nextword->next == NULL) {
			continue;
		}
		if (nextword->next >= hashtbl && nextword->next < hashtbl + hashsize) {
			continue;
		}
		dp = nextword;
		while (dp->next) {
			if (freepointer > hashtbl + hashsize) {
				fprintf (stderr, "table overflow\n");
				getchar ();
				break;
			}
			*freepointer = *(dp->next);
			dp->next = freepointer;
			dp = freepointer;

			while (freepointer->used)
				freepointer++;
		}
	}
}


readdict ()
{
	struct dent d;
	char lbuf[100];
	FILE *dictf;
	int i;
	int h;
	char *p;

	if ((dictf = fopen (Dfile, "r")) == NULL) {
		fprintf (stderr, "Can't open dictionary\n");
		exit (1);
	}

	hashtbl = (struct dent *) calloc (numwords, sizeof (struct dent));
	if (hashtbl == NULL) {
		fprintf (stderr, "couldn't allocate hash table\n");
		exit (1);
	}

	i = 0;
	while (fgets (lbuf, sizeof lbuf, dictf) != NULL) {
		if (i % 1000 == 0) {
			printf ("%d ", i);
			fflush (stdout);
		}
		i++;

		p = &lbuf [ strlen (lbuf) - 1 ];
		if (*p == '\n')
			*p = 0;

		if (makedent (lbuf, &d) < 0)
			continue;

		d.word = malloc (strlen (lbuf) + 1);
		if (d.word == NULL) {
			fprintf (stderr, "couldn't allocate space for word %s\n", lbuf);
			exit (1);
		}
		strcpy (d.word, lbuf);

		h = hash (lbuf, strlen (lbuf), hashsize);

		if (hashtbl[h].used == 0) {
			hashtbl[h] = d;

		} else {
			struct dent *dp;

			dp = (struct dent *) malloc (sizeof (struct dent));
			if (dp == NULL) {
				fprintf (stderr, "couldn't allocate space for collision\n");
				exit (1);
			}
			*dp = d;
			dp->next = hashtbl[h].next;
			hashtbl[h].next = dp;
		}
	}
	printf ("\n");
}

/*
 * fill in the flags in d, and put a null after the word in s
 */

makedent (lbuf, d)
char *lbuf;
struct dent *d;
{
	char *p, *index();

	d->next = NULL;
	d->used = 1;
	d->v_flag = 0;
	d->n_flag = 0;
	d->x_flag = 0;
	d->h_flag = 0;
	d->y_flag = 0;
	d->g_flag = 0;
	d->j_flag = 0;
	d->d_flag = 0;
	d->t_flag = 0;
	d->r_flag = 0;
	d->z_flag = 0;
	d->s_flag = 0;
	d->p_flag = 0;
	d->m_flag = 0;

	p = index (lbuf, '/');
	if (p != NULL)
		*p = 0;
	if (strlen (lbuf) > WORDLEN - 1) {
		printf ("%s: word too big\n");
		return (-1);
	}

	if (p == NULL)
		return (0);

	p++;
	while (*p != NULL) {
		switch (*p) {
		case 'V': d->v_flag = 1; break;
		case 'N': d->n_flag = 1; break;
		case 'X': d->x_flag = 1; break;
		case 'H': d->h_flag = 1; break;
		case 'Y': d->y_flag = 1; break;
		case 'G': d->g_flag = 1; break;
		case 'J': d->j_flag = 1; break;
		case 'D': d->d_flag = 1; break;
		case 'T': d->t_flag = 1; break;
		case 'R': d->r_flag = 1; break;
		case 'Z': d->z_flag = 1; break;
		case 'S': d->s_flag = 1; break;
		case 'P': d->p_flag = 1; break;
		case 'M': d->m_flag = 1; break;
		case 0:
 			fprintf (stderr, "no key word %s\n", lbuf);
			continue;
		default:
			fprintf (stderr, "unknown flag %c word %s\n", 
					*p, lbuf);
			break;
		}
		p++;
		if (*p != '/' && *p != NULL && *p != '\n') {
			fprintf (stderr, "bad format %s (%c 0%o)\n", 
					lbuf, *p, *p);
			break;
		}
		if (*p)
			p++;
	
	}
	return (0);
}

newcount ()
{
	char buf[200];
	FILE *d;
	int i;

	fprintf (stderr, "Counting words in dictionary ...\n");

	if ((d = fopen (Dfile, "r")) == NULL) {
		fprintf (stderr, "Can't open dictionary\n");
		exit (1);
	}

	i = 0;
	while (fgets (buf, sizeof buf, d) != NULL) {
		i++;
		if (i % 1000 == 0) {
			printf ("%d ", i);
			fflush (stdout);
		}
	}
	fclose (d);
	printf ("\n%d words\n", i);
	if ((d = fopen (Cfile, "w")) == NULL) {
		fprintf (stderr, "can't create %s\n", Cfile);
		exit (1);
	}
	fprintf (d, "%d\n", i);
	fclose (d);
}
SHAR_EOF
fi # end of overwriting check
if test -f 'config.h'
then
	echo shar: will not over-write existing file "'config.h'"
else
cat << \SHAR_EOF > 'config.h'
/*
** library directory for hash table(s) / default hash table name
** If you intend to use multiple dictionary files, I would suggest
** LIBDIR be a directory which will contain nothing else, so sensible
** names can be constructed for the -d option without conflict.
*/
#define LIBDIR "/usr/local/lib"
#define DEFHASH "ispell.hash"

/* environment variable for user's word list */
#define PDICTVAR "WORDLIST"

/* default word list */
#define DEFPDICT "ispell.words"

/* mktemp template for temporary file - MUST contain 6 consecutive X's */
#define TEMPNAME "/usr/tmp/spellXXXXXX"

/* default dictionary file */
#define DEFDICT "dict.191"

/* buffer size to use for file names if not in sys/param.h */
#ifndef MAXPATHLEN
#define MAXPATHLEN 240
#endif

/* word length allowed in dictionary by buildhash */
#define WORDLEN 30

/* hash table magic number */
#define MAGIC 1

/*
** the isXXXX macros normally only check ASCII range.  These are used
** instead for text characters, which we assume may be 8 bit.  The
** NO8BIT ifdef shuts off significance of 8 bit characters.  If you are
** using this, and your ctype.h already masks, you can simplify.
*/
#ifdef NO8BIT
#define myupper(X) isupper((X)&0x7f)
#define mylower(X) islower((X)&0x7f)
#define myspace(X) isspace((X)&0x7f)
#define myalpha(X) isalpha((X)&0x7f)
#else
#define myupper(X) (!((X)&0x80) && isupper(X))
#define mylower(X) (!((X)&0x80) && islower(X))
#define myspace(X) (!((X)&0x80) && isspace(X))
#define myalpha(X) (!((X)&0x80) && isalpha(X))
#endif

/*
** the NOPARITY mask is applied to user input characters from the terminal
** in order to mask out the parity bit.
*/
#define NOPARITY 0x7f
SHAR_EOF
fi # end of overwriting check
if test -f 'ispell.c'
then
	echo shar: will not over-write existing file "'ispell.c'"
else
cat << \SHAR_EOF > 'ispell.c'
/* -*- Mode:Text -*- */
/*
 * ispell.c - An interactive spelling corrector.
 *
 * Copyright (c), 1983, by Pace Willisson
 * Permission for non-profit use is hereby granted.
 * All other rights reserved.
 *
 * 1987, Robert McQueer, added:
 *	-w option & handling of extra legal word characters
 *	-d option for alternate dictionary file
 *	-p option & WORDLIST variable for alternate personal dictionary
 *	-x option to suppress .bak files.
 *	8 bit text & config.h parameters
 */

#include <stdio.h>
#include <ctype.h>
#include <sys/param.h>
#include "ispell.h"
#include "config.h"

FILE *infile;
FILE *outfile;

char hashname[MAXPATHLEN];

/*
** we use extended character set range specifically to allow intl.
** character set characters.  We are being REALLY paranoid about indexing
** this array - explicitly cast into unsigned INTEGER, then mask
** If NO8BIT is set, text will be masked to ascii range.
*/
static int Trynum;
#ifdef NO8BIT
static char Try[128];
static char Checkch[128];
#define iswordch(X) (Checkch[((unsigned)(X))&0x7f])
#else
static char Try[256];
static char Checkch[256];
#define iswordch(X) (Checkch[((unsigned)(X))&0xff])
#endif

givehelp ()
{
	erase ();
	printf ("Whenever a word is found that is not in the dictionary,\r\n");
	printf ("it is printed on the first line of the screen.  If the dictionary\r\n");
	printf ("contains any similar words, they are listed with a single digit\r\n");
	printf ("next to each one.  You have the option of replacing the word\r\n");
	printf ("completely, or choosing one of the suggested words.\r\n");
	printf ("\r\n");
	printf ("Commands are:\r\n\r\n");
	printf ("R       Replace the misspelled word completely.\r\n");
	printf ("Space   Accept the word this time only\r\n");
	printf ("A       Accept the word for the rest of this file.\r\n");
	printf ("I       Accept the word, and put it in your private dictionary.\r\n");
	printf ("0-9     Replace with one of the suggested words.\r\n");
	printf ("Q       Write the rest of this file, ignoring misspellings, ");
	printf (         "and start next file.\r\n");
	printf ("X       Exit immediately.  Asks for conformation.  ");
	printf (         "Leaves file unchanged.\r\n");
	printf ("!       Shell escape.\r\n");
	printf ("^L      Redraw screen.\r\n");
	printf ("\r\n\r\n");
	printf ("-- Type space to continue --");
	fflush (stdout);
	getchar ();
}


char *getline();

int lflag = 0;
int aflag = 0;
int fflag = 0;
int sflag = 0;
int xflag = 0;

char *askfilename;

static char *Cmd;

usage ()
{
	fprintf (stderr, "Usage: %s [-dfile | -pfile | -wchars | -x] file .....\n",Cmd);
	fprintf (stderr, "       %s [-dfile | -pfile | -wchars] -l\n",Cmd);
	fprintf (stderr, "       %s [-dfile | -pfile | -ffile | -s] -a\n",Cmd);
	exit (1);
}

static initckch()
{
	int c;

	Trynum = 0;
#ifndef NO8BIT
	for (c = 0; c < 128; ++c) {
#else
	for (c = 0; c < 256; ++c) {
#endif
		if (myalpha((char) c)) {
			Checkch[c] = (char) 1;
			if (myupper((char) c)) {
				Try[Trynum] = (char) c;
				++Trynum;
			}
		}
		else
			Checkch[c] = (char) 0;
	}
}

main (argc, argv)
char **argv;
{
	char *p;
	char *cpd;
	char num[4];
	unsigned mask;

	Cmd = *argv;

	initckch();
	sprintf(hashname,"%s/%s",LIBDIR,DEFHASH);

	cpd = NULL;

	argv++;
	argc--;
	while (argc && **argv == '-') {
		switch ((*argv)[1]) {
		case 'a':
			aflag++;
			break;
		case 'x':
			xflag++;
			break;
		case 'f':
			fflag++;
			p = (*argv)+2;
			if (*p == '\0') {
				argv++; argc--;
				if (argc == 0)
					usage ();
				p = *argv;
			}
			askfilename = p;
			break;
		case 'l':
			lflag++;
			break;
		case 's':
			sflag++;
			break;
		case 'p':
			cpd = (*argv)+2;
			if (*cpd == '\0') {
				argv++; argc--;
				if (argc == 0)
					usage ();
				cpd = *argv;
			}
			break;
		case 'd':
			p = (*argv)+2;
			if (*p == '\0') {
				argv++; argc--;
				if (argc == 0)
					usage ();
				p = *argv;
			}
			if (*p == '/')
				strcpy(hashname,p);
			else
				sprintf(hashname,"%s/%s",LIBDIR,p);
			break;
		case 'w':
			num[3] = '\0';
#ifdef NO8BIT
			mask = 0x7;
#else
			mask = 0xff;
#endif
			p = (*argv)+2;
			if (*p == '\0') {
				argv++; argc--;
				if (argc == 0)
					usage ();
				p = *argv;
			}
			while (Trynum <= mask && *p != '\0') {
				if (*p != 'n') {
					Checkch[((unsigned)(*p))&mask] = (char) 1;
					Try[Trynum] = *p & mask;
					++p;
				}
				else {
					++p;
					num[0] = *p; ++p;
					num[1] = *p; ++p;
					num[2] = *p; ++p;
					Try[Trynum] = atoi(num) & mask;
					Checkch[atoi(num)&mask] = (char) 1;
				}
				++Trynum;
			}
			break;
		default:
			usage();
		}
		argv++; argc--;
	}

	if (!argc && !lflag && !aflag)
		usage ();

	if (linit () < 0)
		exit (0);

	treeinit (cpd);

	if (aflag) {
		askmode ();
		exit (0);
	}

	if (lflag) {
		infile = stdin;
		checkfile ();
		exit (0);
	}

	terminit ();

	while (argc--)
		dofile (*argv++);

	done ();
}

char firstbuf[BUFSIZ], secondbuf[BUFSIZ];
char *currentchar;
char token[BUFSIZ];

int quit;

char *currentfile = NULL;

dofile (filename)
char *filename;
{
	int c;
	char	bakfile[256];

	currentfile = filename;

	if ((infile = fopen (filename, "r")) == NULL) {
		fprintf (stderr, "Can't open %s\r\n", filename);
		sleep (2);
		return;
	}

	if (access (filename, 2) < 0) {
		fprintf (stderr, "Can't write to %s\r\n", filename);
		sleep (2);
		return;
	}

	strcpy(tempfile, TEMPNAME);
	mktemp (tempfile);
	if ((outfile = fopen (tempfile, "w")) == NULL) {
		fprintf (stderr, "Can't create %s\r\n", tempfile);
		sleep (2);
		return;
	}

	quit = 0;

	checkfile ();

	fclose (infile);
	fclose (outfile);

	treeoutput ();

	if ((infile = fopen (tempfile, "r")) == NULL) {
		fprintf (stderr, "tempoary file disappeared (%s)\r\n", tempfile);	
		sleep (2);
		return;
	}

	sprintf(bakfile, "%s.bak", filename);
	if(link(filename, bakfile) == 0)
		unlink(filename);

	/* if we can't write new, preserve .bak regardless of xflag */
	if ((outfile = fopen (filename, "w")) == NULL) {
		fprintf (stderr, "can't create %s\r\n", filename);
		sleep (2);
		return;
	}

	while ((c = getc (infile)) != EOF)
		putc (c, outfile);

	fclose (infile);
	fclose (outfile);

	unlink (tempfile);
	if (xflag)
		unlink(bakfile);
}

checkfile ()
{
	int c;
	char *p;
	int len;

	secondbuf[0] = 0;
	currentchar = secondbuf;

	while (1) {
		strcpy (firstbuf, secondbuf);
		if (quit) {	/* quit can't be set in l mode */
			while (fgets (secondbuf, sizeof secondbuf, infile) != NULL)
				fputs (secondbuf, outfile);
			break;
		}

		if (fgets (secondbuf, sizeof secondbuf, infile) == NULL)
			break;
		currentchar = secondbuf;
		
		len = strlen (secondbuf) - 1;
		if (secondbuf [ len ] == '\n')
			secondbuf [ len ] = 0;

		/* if this is a formatter command, skip over it */
		if (*currentchar == '.') {
			while (*currentchar && !myspace (*currentchar)) {
				if (!lflag)
					putc (*currentchar, outfile);
				currentchar++;
			}
			if (*currentchar == 0) {
				if (!lflag)
					putc ('\n', outfile);
				continue;
			}
		}

		while (1) {
			while (*currentchar && !iswordch(*currentchar)) {
				/* formatting escape sequences */
				if (*currentchar == '\\') {
				    if(currentchar[1] == 'f') {
					/* font change: \fX */
					copyout(&currentchar, 3);
					continue;
				    }
				    else if(currentchar[1] == 's') {
					/* size change */
					if(currentchar[2] < 6 &&
					   currentchar[2] != 0)
						/* two digit size */
						copyout(&currentchar, 4);
					else
						/* one digit size */
						copyout(&currentchar, 3);
					continue;
				    }
				    else if(currentchar[1] == '(') {
					/* extended char set escape: \(XX */
					copyout(&currentchar, 4);
					continue;
				    }
				}

				if (!lflag)
					putc (*currentchar, outfile);
				currentchar++;
			}

			if (*currentchar == 0)
				break;

			p = token;
			while (iswordch(*currentchar) ||
			       (*currentchar == '\'' &&
				iswordch(*(currentchar + 1))))
			  *p++ = *currentchar++;
			*p = 0;
			if (lflag) {
				if (!good (token))
					printf ("%s\r\n", token);
			} else {
				if (!quit)
				correct (token, &currentchar);
			}
			if (!lflag)
				fprintf (outfile, "%s", token);
		}
		if (!lflag)
			putc ('\n', outfile);
	}
}

char possibilities[10][BUFSIZ];
int pcount;

correct (token, currentchar)
char *token;
char **currentchar;
{
	int c;
	int i;
	char *p;
	int len;
	char *begintoken;

	len = strlen (token);
	begintoken = *currentchar - len;

checkagain:
	if (good (token))
		return;

	erase ();
	printf ("    %s", token);
	if (currentfile)
		printf ("              File: %s", currentfile);
	printf ("\r\n\r\n");

	makepossibilities (token);

	for (i = 0; i < 10; i++) {
		if (possibilities[i][0] == 0)
			break;
		printf ("%d: %s\r\n", i, possibilities[i]);
	}

	move (15, 0);
	printf ("%s\r\n", firstbuf);

	for (p = secondbuf; p != begintoken; p++)
		putchar (*p);
	inverse ();
	for (i = strlen (token); i > 0; i--)
		putchar (*p++);
	normal ();
	while (*p)
		putchar (*p++);
	printf ("\r\n");

	while (1) {
		switch (c = (getchar () & NOPARITY)) {
		case 'Z' & 037:
			stop ();
			erase ();
			goto checkagain;
		case ' ':
			erase ();
			return;
		case 'x': case 'X':
			printf ("Are you sure you want to throw away your changes? ");
			c = (getchar () & NOPARITY);
			if (c == 'y' || c == 'Y') {
				erase ();
				done ();
			}
			putchar (7);
			goto checkagain;
		case 'i': case 'I':
			treeinsert (token, 1);
			erase ();
			return;
		case 'a': case 'A':
			treeinsert (token, 0);
			erase ();
			return;
		case 'L' & 037:
			goto checkagain;
		case '?':
			givehelp ();
			goto checkagain;
		case '!':
			{
				char buf[200];
				move (18, 0);
				putchar ('!');
				if (getline (buf) == NULL) {
					putchar (7);
					erase ();
					goto checkagain;
				}
				printf ("\r\n");
				shellescape (buf);
				erase ();
				goto checkagain;
			}
		case 'r': case 'R':
			move (18, 0);
			printf ("Replace with: ");
			if (getline (token) == NULL) {
				putchar (7);
				erase ();
				goto checkagain;
			}
			inserttoken (secondbuf, begintoken, token, currentchar);
			erase ();
			goto checkagain;
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			if (possibilities[c - '0'][0] != 0) {
				strcpy (token, possibilities[c - '0']);
				inserttoken (secondbuf, begintoken, token, currentchar);				erase ();
				return;
			}
			putchar (7);
			break;
		case 'q': case 'Q':
			quit = 1;
			erase ();
			return;
		default:
			putchar (7);
			break;
		}
	}
}

inserttoken (buf, start, token, currentchar)
char *buf, *start, *token;
char **currentchar;
{
	char copy[BUFSIZ];
	char *p, *q;

	strcpy (copy, buf);

	for (p = buf, q = copy; p != start; p++, q++)
		*p = *q;
	while (*token)
		*p++ = *token++;
	q += *currentchar - start;
	*currentchar = p;
	while (*p++ = *q++)
		;
}


makepossibilities (word)
char word[];
{
	int i;

	for (i = 0; i < 10; i++)
		possibilities[i][0] = 0;
	pcount = 0;

	if (pcount < 10) wrongletter (word);
	if (pcount < 10) extraletter (word);
	if (pcount < 10) missingletter (word);
	if (pcount < 10) transposedletter (word);

}

char *cap();

insert (word)
char *word;
{
	int i;

	for (i = 0; i < pcount; i++)
		if (strcmp (possibilities[i], word) == 0)
			return (0);

	strcpy (possibilities[pcount++], word);
	if (pcount >= 10)
		return (-1);
	else
		return (0);
}

wrongletter (word)
char word[];
{
	int i, j, c, n;
	char newword[BUFSIZ];

	n = strlen (word);
	strcpy (newword, word);

	for (i = 0; i < n; i++) {
		for (j=0; j < Trynum; ++j) {
			newword[i] = Try[j];
			if (good (newword)) {
				if (insert (cap (newword, word)) < 0)
					return;
			}
		}
		newword[i] = word[i];
	}
}

extraletter (word)
char word[];
{
	char newword[BUFSIZ], *p, *s, *t;

	if (strlen (word) < 3)
		return;

	for (p = word; *p; p++) {
		for (s = word, t = newword; *s; s++)
			if (s != p)
				*t++ = *s;
		*t = 0;
		if (good (newword)) {
			if (insert (cap (newword, word)) < 0)
				return;
		}
	}
}

missingletter (word)
char word[];
{
	char newword[BUFSIZ], *p, *r, *s, *t;
	int i;

	for (p = word; p == word || p[-1]; p++) {
		for (s = newword, t = word; t != p; s++, t++)
			*s = *t;
		r = s++;
		while (*t)
			*s++ = *t++;
		*s = 0;
		for (i=0; i < Trynum; ++i) {
			*r = Try[i];
			if (good (newword)) {
				if (insert (cap (newword, word)) < 0)
					return;
			}
		}
	}
}

transposedletter (word)
char word[];
{
	char newword[BUFSIZ];
	int t;
	char *p;

	strcpy (newword, word);
	for (p = newword; p[1]; p++) {
		t = p[0];
		p[0] = p[1];
		p[1] = t;
		if (good (newword)) {
			if (insert (cap (newword, word)) < 0)
				return;
		}
		t = p[0];
		p[0] = p[1];
		p[1] = t;
	}
}

char *
cap (word, pattern)
char word[], pattern[];
{
	static char newword[BUFSIZ];
	char *p, *q;

	if (*word == 0)
		return;

	if (myupper (pattern[0])) {
		if (myupper (pattern[1])) {
			for (p = word, q = newword; *p; p++, q++) {
				if (mylower (*p))
					*q = toupper (*p);
				else
					*q = *p;
			}
			*q = 0;
		} else {
			if (mylower (word [0]))
				newword[0] = toupper (word[0]);
			else
				newword[0] = word[0];

			for (p = word + 1, q = newword + 1; *p; p++, q++)
				if (myupper (*p))
					*q = tolower (*p);
				else
					*q = *p;

			*q = 0;
		}
	} else {
		for (p = word, q = newword; *p; p++, q++)
			if (myupper (*p))
				*q = tolower (*p);
			else
				*q = *p;
		*q = 0;
	}
	return (newword);
}

char *
getline (s)
char *s;
{
	char *p;
	int c;

	p = s;

	while (1) {
		c = (getchar () & NOPARITY);
		if (c == '\\') {
			putchar ('\\');
			c = (getchar () & NOPARITY);
			backup ();
			putchar (c);
			*p++ = c;
		} else if (c == ('G' & 037)) {
			return (NULL);
		} else if (c == '\n' || c == '\r') {
			*p = 0;
			return (s);
		} else if (c == erasechar) {
			if (p != s) {
				p--;
				backup ();
				putchar (' ');
				backup ();
			}
		} else if (c == killchar) {
			while (p != s) {
				p--;
				backup ();
				putchar (' ');
				backup ();
			}
		} else {
			*p++ = c;
			putchar (c);
		}
	}
}

askmode ()
{
	char buf[BUFSIZ];
	int i;

	if (fflag) {
		if (freopen (askfilename, "w", stdout) == NULL) {
			fprintf (stderr, "Can't create %s\n", askfilename);
			exit (1);
		}
	}

	setbuf (stdin, NULL);
	setbuf (stdout, NULL);

	while (gets (buf) != NULL) {
		if (good (buf)) {
			if (rootword[0] == 0) {
				printf ("*\n");	/* perfect match */
			} else {
				printf ("+ %s\n", rootword);
			}
		} else {
			makepossibilities (buf);
			if (possibilities[0][0]) {
				printf ("& ");
				for (i = 0; i < 10; i++) {
					if (possibilities[i][0] == 0)
						break;
					printf ("%s ", possibilities[i]);
				}
				printf ("\n");
			} else {
				printf ("#\n");
			}
		}
		if (sflag) {
			stop ();
			if (fflag) {
				rewind (stdout);
				creat (askfilename, 0666);
			}
		}
	}
}


copyout(cc, cnt)
char	**cc;
{
	while (--cnt >= 0) {
		if (*(*cc) == 0)
			break;
		if (!lflag)
			putc (*(*cc), outfile);
		(*cc)++;
	}

}
SHAR_EOF
fi # end of overwriting check
if test -f 'ispell.man'
then
	echo shar: will not over-write existing file "'ispell.man'"
else
cat << \SHAR_EOF > 'ispell.man'
.\" -*- Mode:Text -*-
.TH ISPELL local MIT
.SH NAME
ispell \- Correct spelling for a file
.SH SYNOPSIS
.B ispell
[
.B \-x
|
.B \-d
file |
.B \-p
file |
.B \-w
chars ] file .....
.br
.B ispell
[
.B \-d
file |
.B \-p
file |
.B \-w
chars ]
.B \-l
.br
.B ispell
[
.B \-d
file |
.B \-p
file
]
.B \-a
.SH DESCRIPTION
.PP
.I Ispell
is fashioned after the
.I spell
program from ITS (called
.I ispell
on Twenex systems.)  The most common usage is "ispell filename".  In this
case,
.I ispell
will display each word which does not appear in the dictionary, and
allow you to change it.  If there are "near misses" in the dictionary
(words which differ by only a single letter, a missing or extra letter,
or a pair of transposed letters), then they are also displayed.  If you
think the word is correct as it stands, you can type either "Space" to
accept it this one time, or "I" to accept it and put it in your private
dictionary.  If one of the near misses is the word you want, type the
corresponding number.  Finally, if none of these choices is right, you
can type "R" and you will be prompted for a replacement word.
.PP
When a misspelled word is found, it is printed at the top of the screen.
Any near misses will be printed on the following lines, and finally, two
lines containing the word are printed at the bottom of the screen.  If
your terminal can type in reverse video, the word itself is highlighted.
.PP
The
.B \-l
or "list" option to
.I ispell
is used to produce a list of misspelled words from the standard input.
.PP
The
.B \-a
is intended to be used from other programs through a pipe.  In this
mode,
.I ispell
expects the standard input to consist of single words.  Each word is
read, and a single line is written to the standard output.  If the word
was found in the main dictionary, or your personal dictionary, then the
line contains only a '*'.  If the word was found through suffix removal,
then the line contains a '+', a space, and the root word.  If the word
is not in the dictionary, but there are near misses, then the line
contains an '&', a space, and a list of the near misses separated by
spaces.  Also, each near miss is capitalized the same as the input
words.  Finally, if the word neither appears in the dictionary, and
there are no near misses, then the line contains only a '#'.  This mode
is also suitable for interactive use when you want to figure out the
spelling of a single word.  (These characters are the same as the codes
that the real spell program uses.)
.PP
The -x option causes
.I ispell
to remove the .bak file that it normally leaves.  The .bak file contains
the pre-corrected text.  If there are file opening / writing errors,
the .bak file may be left for recovery purposes even with the -x option.
.PP
The -d option is used to specify an alternate hashed dictionary file,
other than the default.  If the filename does not begin with a "/",
the library directory for the default dictionary file is prefixed.
This is useful to allow dictionaries which prefer alternate british
spellings ("centre", "tyre", etc), or add lists of special-purpose
jargon and acronyms for subclasses of documents.  There are some shortcomings
in attempting to provide foreign-language dictionaries, but something
like "-dfrench" could be made to work somewhat.
.PP
The -p option is used to specify an alternate personal dictionary file.
If the file name does not begin with "/", $HOME is prefixed.  Also, the
shell variable WORDLIST may be set, which renames the personal dictionary
in the same manner.  The command line overrides WORDLIST setting.  If
neither is present "ispell.words" is used.
.PP
The -w option may be used to specify characters other than alphabetics
which may also appear in words.  For instance, -w "&" will allow "AT&T"
to be picked up.  Underscores are useful in many technical documents.
There is an admittedly crude provision in this option for 8-bit international
characters.  If "n" appears in the character string, the three characters
following are a DECIMAL code 0 - 255, for the character.  There must be
three decimal characters in all cases, so you have to prepend with 0's,
for instance, to include bells and formfeeds in your words (an admittedly
silly thing to do, but aren't most pedagogical examples):
.PP
n007n012
.PP
Numeric digits other than the three following "n" are simply numeric
characters.  Use of "n" does not conflict with anything because actual
alphabetics have no meaning - alphabetics are already accepted.
.I Ispell
will typically be used with input from a file, meaning that preserving
parity for possible 8 bit characters from the input text is OK.  If you
specify the -l option, and actually type text from the terminal, this may
create problems if your stty settings preserve parity.
.PP
It is possible to install
.I ispell
in such a way as to only support ASCII range text if desired.
.SH DEFAULT FILES
<ispell library directory>/ispell.hash
.br
$HOME/ispell.words
.SH BUGS
It takes about five seconds for
.I ispell
to read in the hash table.
.sp
Perhaps more than ten choices should be allowed for near misses.
.sp
The hash table is stored as a quarter-megabyte array, so a PDP-11
version does not seem likely.
.sp
.I Ispell
should understand more
.I troff
syntax, and deal more intelligently with contractions.
.sp
While alternate dictionaries for foreign languages could be defined, and
the international characters included in words, rules concerning
word endings / pluralization accommodate english only.
.SH AUTHOR
Pace Willisson (pace@mit-vax)
SHAR_EOF
fi # end of overwriting check
if test -f 'tree.c'
then
	echo shar: will not over-write existing file "'tree.c'"
else
cat << \SHAR_EOF > 'tree.c'
/* -*- Mode:Text -*- */
/*
 * tree.c - a tree style dictionary for user's personal words
 *
 * Pace Willisson, 1983
 */

#include <stdio.h>
#include <ctype.h>
#include <sys/param.h>
#include "ispell.h"
#include "config.h"

char *getenv();
char *upcase();

static struct node *root = NULL;
struct node *tinsert();

static char personaldict[MAXPATHLEN];
static FILE *dictf;
static newwords = 0;

treeinit (p)
char *p;
{
	char *h;
	char buf[BUFSIZ];

	/*
	** if p exists and begins with '/' we don't really need HOME,
	** but it's not very likely that HOME isn't set anyway.
	*/
	if (p == NULL)
		p = getenv (PDICTVAR);
	if ((h = getenv ("HOME")) == NULL)
		return;

	if (p == NULL)
		sprintf(personaldict,"%s/%s",h,DEFPDICT);
	else {
		if (*p == '/')
			strcpy(personaldict,p);
		else
			sprintf(personaldict,"%s/%s",h,p);
	}

	if ((dictf = fopen (personaldict, "r")) == NULL)
		return;

	while (fgets (buf, sizeof buf, dictf) != NULL) {
		int len = strlen (buf) - 1;

		if (buf [ len ] == '\n')
			buf [ len ] = 0;
		treeinsert (buf, 1);
	}

	fclose (dictf);

	newwords = 0;

	if (!lflag && !aflag && access (personaldict, 2) < 0)
		printf ("Warning: Cannot update personal dictionary (%s)\r\n", personaldict);
}

treeprint ()
{
	printf ("(");
	tprint (root);
	printf (")");
}

static
tprint (root)
struct node *root;
{
	if (root == NULL)
		return;
	printf ("%s ", root->word);
	tprint (root->left);
	tprint (root->right);
}


treeinsert (word, keep)
char *word;
{
	char nword[BUFSIZ];
	strcpy (nword, word);
	root = tinsert (upcase (nword), root, keep);
	newwords = 1;
}

static
struct node *
tinsert (word, root, keep)
char *word;
struct node *root;
{
	int cmp;

	if (root == NULL) {
		root = (struct node *) calloc (1, sizeof (struct node));
		root->word = (char *) malloc (strlen (word) + 1);
		strcpy (root->word, word);
		root->keep = keep;
		return (root);
	}

	cmp = strcmp (word, root->word);

	if (cmp == 0)
		return (root);

	if (cmp < 0)
		root->left = tinsert (word, root->left, keep);
	else
		root->right = tinsert (word, root->right, keep);

	return (root);
}

treelookup (word)
char *word;
{
	char nword[BUFSIZ];
	strcpy (nword, word);
	if (tlookup (upcase (nword), root)) {
		return (1);
	}
	return (0);
}

static
tlookup (word, root)
char *word;
struct node *root;
{
	int cmp;

	if (root == NULL)
		return (0);

	cmp = strcmp (word, root->word);

	if (cmp == 0)
		return (1);

	if (cmp < 0)
		return (tlookup (word, root->left));
	else
		return (tlookup (word, root->right));
}

treeoutput ()
{
	if (newwords == 0)
		return;

	if ((dictf = fopen (personaldict, "w")) == NULL) {
		fprintf (stderr, "Can't create %s\r\n", personaldict);
		return;
	}

	toutput1 (root);

	fclose (dictf);
}

static
toutput1 (root)
struct node *root;
{
	if (root == NULL)
		return;

	if (root->keep)
		fprintf (dictf, "%s\n", root->word);

	toutput1 (root->left);
	toutput1 (root->right);
}

char *
upcase (s)
register char *s;
{
	register char *os = s;

	while (*s) {
		if (mylower (*s))
			*s = toupper (*s);
		s++;
	}
	return (os);
}
SHAR_EOF
fi # end of overwriting check
if test -f 'good.c.diff'
then
	echo shar: will not over-write existing file "'good.c.diff'"
else
cat << \SHAR_EOF > 'good.c.diff'
11a12
> #include "config.h"
26c27
< 		if (islower (*p))
---
> 		if (mylower (*p))
SHAR_EOF
fi # end of overwriting check
if test -f 'lookup.c.diff'
then
	echo shar: will not over-write existing file "'lookup.c.diff'"
else
cat << \SHAR_EOF > 'lookup.c.diff'
9d8
< #include <ctype.h>
10a10
> #include "config.h"
15a16,17
> extern hashname[];
> 
22d23
< 	char hashname[100];
24,26d24
< 	strcpy (hashname, LIBDIR);
< 	strcat (hashname, "/ispell.hash");
< 
30,31c28
< 	if ((hashfd = open ("ispell.hash", 0)) < 0 &&
< 	    (hashfd = open (hashname, 0)) < 0) {
---
> 	if ((hashfd = open (hashname, 0)) < 0) {
SHAR_EOF
fi # end of overwriting check
if test -f 'term.c.diff'
then
	echo shar: will not over-write existing file "'term.c.diff'"
else
cat << \SHAR_EOF > 'term.c.diff'
63c63
< 	short tpgrp;
---
> 	int tpgrp;
SHAR_EOF
fi # end of overwriting check
if test -f 'ispell.h.diff'
then
	echo shar: will not over-write existing file "'ispell.h.diff'"
else
cat << \SHAR_EOF > 'ispell.h.diff'
3,4d2
< #define LIBDIR "/fe1/rti/bobm/data/ispell"
< 
123,124d120
< #define WORDLEN 30
< 
131,133d126
< #define MAGIC 1
< 
< 	
SHAR_EOF
fi # end of overwriting check
#	End of shell archive
exit 0

billr@tekred.UUCP (01/30/87)

After snarfing bobm@rtech's recent ispell enhancements, I applied the
previously posted diffs for the 'L' (lookup) command to the new changes.
(I also added some code that will use the look(1) command when no
wildcards are specified.) Since the 'L' command looks up words in
/usr/dict/web2 it can used to verify spelling of words that you
think are correct but are not in ispell's dictionary.
For those of you who may wish to do the same, here are a new set of
diffs suitable for "patch" or for hand editing.
	-------cut here------
# The rest of this file is a shell script which will extract:
# config.h.diff ispell.c.diff ispell.man.diff
echo x - config.h.diff
sed -e 's/^X//' >config.h.diff <<'!Funky!Stuff!'
X21a22,30
X> /* define LOOK if look(1) command is available */
X> #define LOOK
X> 
X> /* path to egrep (use speeded up version if available) */
X> #define EGREPCMD "/usr/local/egrep"
X> 
X> /* path to wordlist for Lookup command (typically /usr/dict/{words|web2} */
X> #define WORDS	"/usr/dict/web2"
X> 
!Funky!Stuff!
echo x - ispell.c.diff
sed -e 's/^X//' >ispell.c.diff <<'!Funky!Stuff!'
X59a60
X> 	printf ("L       Look up words in system dictionary.\r\n");
X62c63
X< 	printf ("X       Exit immediately.  Asks for conformation.  ");
X---
X> 	printf ("X       Exit immediately.  Asks for confirmation.  ");
X523a525,539
X> 		case 'l': case 'L':
X> 			{
X> 				char buf[100];
X> 				move (18, 0);
X> 				printf ("Lookup string ('*' is wildcard): ");
X> 				if (getline (buf) == NULL) {
X> 					putchar (7);
X> 					erase ();
X> 					goto checkagain;
X> 				}
X> 				printf ("\r\n\r\n");
X> 				lookharder (buf);
X> 				erase ();
X> 				goto checkagain;
X> 			}
X819a836,866
X> }
X> 
X> lookharder(string)
X> char *string;
X> {
X> 	char cmd[150];
X> 	char *g, *s, grepstr[100];
X> 	int wild = 0;
X> 
X> 	g = grepstr;
X> 	for (s = string; *s != '\0'; s++)
X> 		if (*s == '*') {
X> 			wild++;
X> 			*g++ = '.';
X> 			*g++ = '*';
X> 		} else
X> 			*g++ = *s;
X> 	*g = '\0';
X> 	if (grepstr[0]) {
X> #ifdef LOOK
X> 		if (wild)
X> 			/* string has wild card characters */
X> 			sprintf (cmd, "%s -i '^%s$' %s", EGREPCMD, grepstr, WORDS);
X> 		else
X> 			/* no wild, use look(1) */
X> 			sprintf (cmd, "/usr/bin/look -df %s %s", grepstr, WORDS);
X> #else
X> 		sprintf (cmd, "%s -i '^%s$' %s", EGREPCMD, grepstr, WORDS);
X> #endif
X> 		shellescape (cmd);
X> 	}
!Funky!Stuff!
echo x - ispell.man.diff
sed -e 's/^X//' >ispell.man.diff <<'!Funky!Stuff!'
X53a54,55
X> If you want to see a list of words that might be close using wildcard
X> characters, type "L" to lookup a word in the system dictionary.
X131c133
X< <ispell library directory>/ispell.hash
X---
X> /usr/public/lib/ispell.hash
X133c135,139
X< $HOME/ispell.words
X---
X> /usr/dict/web2		for the Lookup function
X> .br
X> $HOME/ispell.words	user's private dictionary
X> .SH SEE ALSO
X> spell(1), egrep(1), look(1)
!Funky!Stuff!
-- 

	-Bill Randle
	Tektronix, Inc.
	billr@tekred.TEK.COM