[net.sources.d] Wanted, nearly equal function for strings.

faustus@ucbcad.BERKELEY.EDU (Wayne A. Christopher) (07/05/86)
Here is a program that I wrote to interactively correct spelling errors...
It is *SLOW* but it has a reasonable closest-match algorithm that I made
up by trial and error... It sounds a lot like the one you mention from
BYTE...

# The rest of this file is a shell script which will extract:
# READ.ME spellfix.1 Makefile spellfix.h adjacent.c baseword.c compare.c dconvert.c fixwords.c freqcount.c getbuf.c makeadj.c makemod.c readdict.c spellcheck.c spellfix.c wordchange.c freqwords
echo x - READ.ME
cat >READ.ME <<'!Funky!Stuff!'
This is the ultimate spelling correction program.  It identifies misspelled
words, and then tries to figure out how they should be spelled, using
closest-match heuristics on words in the dictionary.  You will have
to compile several programs, and install them in a public directory --
they are baseword, spellcheck, and wordchange.  You should also copy the
file freqwords to this directory.  Then change the definition of
BINDIR in spellfix.h to this path.

This program is a prototype -- it is VERY slow, and the user interface
needs a lot of work.  I don't have time to work on it, so if anybody
makes any improvements I will be very happy to see them.

Note that the file freqwords has only a few words in it -- you'll have to
assemble your own list if you want it... It's a performance enhancement, so
spellfix will run without it...
!Funky!Stuff!
echo x - spellfix.1
cat >spellfix.1 <<'!Funky!Stuff!'
.\" RCS Info: $Revision: 1.2 $ on $Date: 86/04/02 10:33:33 $
.\"           $Source: /ic4/faustus/src/spellfix/RCS/spellfix.1,v $
.\" Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
.\"	Permission is granted to do anything with this code except sell it
.\"	or remove this message.
.TH SPELLFIX 1 "October 10, 1985"
.UC 4
.SH NAME
spellfix, baseword, spellcheck, wordchange \- interactively correct spelling
.SH SYNOPSIS
.B spellfix
document
.br
.SH DESCRIPTION
.B spellfix
interactively corrects the spelling in
.I document.
For each misspelled word, it determines a list of possible correct
spellings for the word and displays them on the screen, updating the
list as it finds more. It searches in order the list of correctly spelled
words in the document, a list of the 4000 most frequent words, and the
full dictionary
.I /usr/dict/words.
Also, if the user has a file called
.I .spellrc
in his home directory, words in this file are considered properly
spelled.
.PP
As the program is displaying words, the user may type a space to stop the
process. He is then prompted for a command, which may be one of:
.TP
E
Enter the correct spelling of the word, terminated by a newline.
.TP
?
Print out a help message.
.TP
Q
Exit
.B spellfix
immediately (doesn't save changes).
.TP
W
Write out the changes so far and exit.
.PP
Any other character is taken to be a choice of one of the words currently
displayed.
.PP
A line of context is provided, so that the user may see where the word is
used. Only one line in which the misspelled word appears is printed,
though.
.PP
.B spellfix
makes use of several other (possibly useful) programs:
.TP
.B baseword
Reads a list of words on the standard input, and for each word,
outputs a line consisting of:
.IP
\fIword0 word1 change1 word2 change2 ... wordN changeN\fR
Where
.I word0
is the origonal word, and each
.I wordI
is a possible base word for
.I word0,
that is, one with all the prefixes and suffixes stripped off. The
corresponding
.I changeI
is an editing script that describes how to reconstruct the origonal
word. The characters in this script have the following meanings:
.br
.nf
$ Go to the end of the word.
^ Go to the beginning of the word
\&. Duplicate the current letter.
+ Add the following characters.
- Delete the following characters.
c Any other character is either added or deleted, depending on whether
	a '+' or a '-' was last executed.
.TP
.B spellcheck [bad good]
Takes as input the output of baseword and outputs the lines for which
none of the possible base words is in the dictionary.
.TP
\fBwordchange file ...\fR
Reads
.I old new
pairs from the standard input and for every occurrence of
.I old
in any of the files, replaces
.I new.
(Except when
.I old
is a subset of another word.)
.SH "SEE ALSO"
spell(1)
.SH AUTHOR
Wayne Christopher (faustus@cad.berkeley.edu)
.SH BUGS
.PP
Suffixes and prefixes aren't dealt with well enough.
!Funky!Stuff!
echo x - Makefile
cat >Makefile <<'!Funky!Stuff!'
#
# RCS Info: $Revision: 1.4 $ on $Date: 86/04/02 10:33:03 $
#           $Source: /ic4/faustus/src/spellfix/RCS/Makefile,v $
# Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
#
# Makefile for the spellfix system
#
# For dependencies, see below in LDFLAGS and CFLAGS.

CC = cc

HFILES	=\
	spellfix.h

CFILES =\
	adjacent.c\
	baseword.c\
	compare.c\
	fixwords.c\
	freqcount.c\
	getbuf.c\
	makeadj.c\
	makemod.c\
	readdict.c\
	spellcheck.c\
	spellfix.c

SFILES =

OFILES =\
	adjacent.o\
	baseword.o\
	compare.o\
	fixwords.o\
	freqcount.o\
	getbuf.o\
	makeadj.o\
	makemod.o\
	readdict.o\
	spellcheck.o\
	spellfix.o

SOURCE=	$(CFILES) $(SFILES)
ALLFILES= $(SOURCE) $(HFILES)

INCLUDE = -I../h

# Compile flags are as follows:
#	UNIX		Compiling under unix
#	BSD		Compiling under 4.2 BSD unix
#	VAX		Compiling for a vax

DEFINES = -DUNIX -DBSD -DVAX

CFLAGS = $(DEFINES) $(INCLUDE) -g
LINTFLAGS = -huca -DLINT $(DEFINES) $(INCLUDE)
LDFLAGS = -g -z -lcurses -ltermlib -lm

.c.o: $*.c
	$(CC) $(CFLAGS) -c $*.c

all: baseword spellcheck spellfix makeadj freqcount wordchange

baseword: 	baseword.o
	$(CC) baseword.o $(LDFLAGS) -o baseword

spellcheck:	spellcheck.o readdict.o
	$(CC) spellcheck.o readdict.o $(LDFLAGS) -o spellcheck

spellfix:	spellfix.o readdict.o compare.o adjacent.o fixwords.o \
		makemod.o getbuf.o
	$(CC) spellfix.o readdict.o compare.o adjacent.o fixwords.o \
			makemod.o getbuf.o $(LDFLAGS) -o spellfix

makeadj:	makeadj.o
	$(CC) makeadj.o $(LDFLAGS) -o makeadj

wordchange:	wordchange.o
	$(CC) wordchange.o $(LDFLAGS) -o wordchange

freqcount:	freqcount.o
	$(CC) freqcount.o $(LDFLAGS) -o freqcount

lint:	$(SOURCE)
	lint $(LINTFLAGS) $(SOURCE)

qgrind: $(ALLFILES)
	qgrind -lc $(ALLFILES)

source:	$(SOURCE) 

tags: 	/tmp
	ctags -t ../*/*.c ../*/*.h > /dev/null 2>&1

wc: $(ALLFILES)
	@wc $(ALLFILES)

print: $(ALLFILES)
	@pr $(ALLFILES)

clean:
	rm -f $(OFILES) foo make.out tags a.out

$(ALLFILES):
	co -l $@

depend: $(SOURCE)
	cc -M $(CFLAGS) $(CFILES) $(SFILES) > makedep
	echo '/^# DO NOT DELETE THIS LINE/+2,$$d' >eddep
	echo '$$r makedep' >>eddep
	echo 'w' >>eddep
	ed - Makefile < eddep
	rm eddep makedep 
	echo '# DEPENDENCIES MUST END AT END OF FILE' >> Makefile
	echo '# IF YOU PUT STUFF HERE IT WILL GO AWAY' >> Makefile
	echo '# see make depend above' >> Makefile

#-----------------------------------------------------------------
# DO NOT DELETE THIS LINE -- make depend uses it
# DEPENDENCIES MUST END AT END OF FILE
adjacent.o: adjacent.c
adjacent.o: ./spellfix.h
adjacent.o: /usr/include/stdio.h
adjacent.o: /usr/include/ctype.h
baseword.o: baseword.c
baseword.o: ./spellfix.h
baseword.o: /usr/include/stdio.h
baseword.o: /usr/include/ctype.h
compare.o: compare.c
compare.o: ./spellfix.h
compare.o: /usr/include/stdio.h
compare.o: /usr/include/ctype.h
fixwords.o: fixwords.c
fixwords.o: ./spellfix.h
fixwords.o: /usr/include/stdio.h
fixwords.o: /usr/include/ctype.h
fixwords.o: /usr/include/curses.h
fixwords.o: /usr/include/stdio.h
fixwords.o: /usr/include/sgtty.h
fixwords.o: /usr/include/sys/ioctl.h
fixwords.o: /usr/include/sys/ttychars.h
fixwords.o: /usr/include/sys/ttydev.h
fixwords.o: /usr/include/sys/time.h
fixwords.o: /usr/include/time.h
freqcount.o: freqcount.c
freqcount.o: /usr/include/stdio.h
freqcount.o: /usr/include/ctype.h
getbuf.o: getbuf.c
getbuf.o: /usr/include/sys/ioctl.h
getbuf.o: /usr/include/sys/ttychars.h
getbuf.o: /usr/include/sys/ttydev.h
getbuf.o: /usr/include/ctype.h
getbuf.o: /usr/include/curses.h
getbuf.o: /usr/include/stdio.h
getbuf.o: /usr/include/sgtty.h
makeadj.o: makeadj.c
makeadj.o: /usr/include/stdio.h
makeadj.o: /usr/include/ctype.h
makemod.o: makemod.c
makemod.o: ./spellfix.h
makemod.o: /usr/include/stdio.h
makemod.o: /usr/include/ctype.h
readdict.o: readdict.c
readdict.o: ./spellfix.h
readdict.o: /usr/include/stdio.h
readdict.o: /usr/include/ctype.h
spellcheck.o: spellcheck.c
spellcheck.o: ./spellfix.h
spellcheck.o: /usr/include/stdio.h
spellcheck.o: /usr/include/ctype.h
spellfix.o: spellfix.c
spellfix.o: ./spellfix.h
spellfix.o: /usr/include/stdio.h
spellfix.o: /usr/include/ctype.h
spellfix.o: /usr/include/sys/file.h
spellfix.o: /usr/include/curses.h
spellfix.o: /usr/include/stdio.h
spellfix.o: /usr/include/sgtty.h
spellfix.o: /usr/include/sys/ioctl.h
spellfix.o: /usr/include/sys/ttychars.h
spellfix.o: /usr/include/sys/ttydev.h
# DEPENDENCIES MUST END AT END OF FILE
# IF YOU PUT STUFF HERE IT WILL GO AWAY
# see make depend above
!Funky!Stuff!
echo x - spellfix.h
cat >spellfix.h <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.4 $ on $Date: 86/04/02 10:33:39 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/spellfix.h,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 */

#include <stdio.h>
#include <ctype.h>

#define isvalid(ch)	(isupper(ch) || islower(ch) || ((ch) == '\'') || \
				isdigit(ch))

#define DICTFILE 	"/usr/dict/words"
#define BINDIR		"/usr/public/lib/spellfix"

#define SIZE		128	/* The maximum size of a word. */
#define NLETTERS	26	/* # letters in the alphabet. */
#define NOCHANCE	100000	/* No chance of a match between words. */
#define NSAVE		16	/* Keep the top NSAVE choices... */
#define NALTS		256	/* Max # of decompositions given by baseword */
#define NDICTS		8	/* How many dictionaries we can search. */

extern char *malloc();

!Funky!Stuff!
echo x - adjacent.c
cat >adjacent.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.1 $ on $Date: 85/09/28 13:16:08 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/adjacent.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 */

#include "spellfix.h"

/* Letter adjacency frequency matrix... This is normalized so that the
 * greatest value is ~ 250. This particular matrix was taken from
 * /usr/dict/words... See makeadj.c
 */

unsigned char adjacent[NLETTERS][NLETTERS] = {
        { 1, 56, 76, 58, 5, 13, 44, 9, 41, 1, 17, 155, 57, 229, 2, 44, 1, 174, 
		72, 183, 31, 20, 14, 6, 23, 6 } ,
        { 51, 11, 0, 1, 58, 0, 0, 0, 36, 0, 0, 63, 0, 0, 44, 0, 0, 36, 7, 1, 33,
		0, 0, 0, 6, 0 } ,
        { 85, 0, 12, 0, 71, 0, 0, 93, 45, 0, 51, 29, 0, 0, 124, 0, 1, 40, 1, 44,
		35, 0, 0, 0, 10, 0 } ,
        { 37, 3, 1, 14, 116, 2, 8, 3, 73, 1, 0, 10, 4, 1, 36, 1, 0, 26, 9, 1, 
		19, 2, 5, 0, 13, 0 } ,
        { 77, 17, 55, 52, 44, 21, 23, 7, 19, 1, 3, 97, 49, 191, 18, 32, 5, 264,
		101, 93, 13, 22, 18, 31, 22, 3 } ,
        { 22, 0, 0, 0, 28, 23, 0, 0, 32, 0, 0, 23, 0, 0, 28, 0, 0, 19, 0, 8, 21,
		0, 0, 0, 7, 0 } ,
        { 39, 1, 0, 0, 66, 0, 13, 24, 28, 0, 0, 19, 4, 11, 25, 0, 0, 38, 3, 2, 
		21, 0, 0, 0, 12, 0 } ,
        { 72, 2, 0, 0, 94, 1, 0, 0, 50, 0, 0, 4, 5, 3, 70, 0, 0, 16, 1, 15, 19,
		0, 3, 0, 22, 0 } ,
        { 79, 26, 133, 52, 43, 22, 42, 0, 1, 0, 6, 76, 48, 250, 88, 27, 3, 44, 
		114, 108, 12, 37, 0, 4, 0, 5 } ,
        { 8, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 9, 0, 0, 
		0, 0, 0 } ,
        { 10, 1, 0, 0, 40, 0, 0, 2, 19, 0, 0, 7, 1, 5, 5, 1, 0, 2, 5, 1, 2, 0, 
		2, 0, 7, 0 } ,
        { 119, 5, 6, 21, 189, 6, 4, 1, 108, 0, 6, 84, 8, 0, 79, 5, 0, 1, 10, 22,
		37, 6, 2, 0, 22, 0 } ,
        { 106, 22, 6, 0, 87, 1, 0, 0, 69, 0, 0, 1, 19, 3, 56, 42, 0, 0, 4, 0, 
		21, 0, 0, 0, 9, 0 } ,
        { 75, 4, 49, 82, 106, 15, 91, 5, 70, 2, 13, 3, 2, 24, 47, 1, 2, 2, 50, 
		140, 19, 9, 2, 0, 13, 2 } ,
        { 20, 20, 40, 33, 10, 12, 32, 4, 18, 1, 11, 81, 70, 215, 44, 44, 1, 147,
		53, 51, 85, 16, 34, 6, 8, 2 } ,
        { 58, 1, 0, 0, 75, 0, 0, 41, 45, 0, 0, 36, 1, 0, 56, 23, 0, 58, 13, 22,
		18, 0, 0, 0, 8, 0 } ,
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0,
		0, 0, 0 } ,
        { 163, 15, 23, 35, 182, 6, 19, 5, 144, 0, 14, 12, 30, 25, 129, 12, 0, 
		35, 30, 55, 35, 9, 4, 0, 59, 0 } ,
        { 45, 3, 44, 1, 93, 2, 1, 62, 87, 0, 11, 14, 23, 7, 52, 42, 4, 1, 52, 
		158, 34, 1, 11, 0, 17, 0 } ,
        { 101, 2, 11, 0, 209, 4, 0, 83, 163, 0, 0, 12, 4, 1, 93, 1, 0, 96, 7, 
		44, 41, 0, 6, 0, 34, 3 } ,
        { 22, 15, 22, 21, 23, 6, 18, 0, 22, 0, 2, 57, 47, 50, 6, 20, 0, 74, 95,
		47, 0, 1, 0, 2, 1, 2 } ,
        { 27, 0, 0, 0, 83, 0, 0, 0, 35, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 1, 0,
		0, 0, 1, 0 } ,
        { 38, 2, 0, 1, 21, 1, 0, 12, 26, 0, 1, 3, 1, 9, 20, 0, 0, 5, 2, 1, 0, 0,
		0, 0, 1, 0 } ,
        { 3, 0, 4, 0, 3, 0, 0, 1, 6, 0, 0, 0, 0, 0, 2, 7, 0, 0, 0, 8, 1, 0, 0, 
		0, 1, 0 } ,
        { 8, 2, 8, 5, 9, 1, 2, 1, 1, 0, 0, 9, 8, 7, 7, 9, 0, 7, 10, 5, 1, 0, 2,
		 0, 0, 0 } ,
        { 5, 0, 0, 0, 8, 0, 0, 0, 3, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 
		0, 1, 3 }
} ;

!Funky!Stuff!
echo x - baseword.c
cat >baseword.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.4 $ on $Date: 86/04/02 10:33:10 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/baseword.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * This program takes words with possible prefixes and suffixes and strips
 * them off. The output is a list of pairs: <base word, trans info>, where
 * the transformation info describes how to change the base word into the
 * word given. Each pair describes one possible way to parse the word (so
 * "catting" would produce "catt $+ing cat $.+ing"). The transformation data
 * is basically an editing script: characters have the following
 * interpretation:
 *	$ go to the character at the end of the word
 *	^ go to before the beginning of the word
 *	- delete the following characters
 *	+ add the following characters (the default mode)
 *	. duplicate the character we are on now
 *	c any other character means either add this character after the
 *		current one or delete the current one and move to the next
 *		one (if it isn't there the match fails), depending on 
 *		whether we last saw a + or a -.
 * The first word on the output line is the origonal unmodified input,
 * and it doesn't have a transformation word. Note that the first
 * character in the mod must be either '^' or '$', and neither character
 * can appear anywhere else. This makes things a bit easier to do,
 * and this is really all that is needed for this application.
 */

#include "spellfix.h"

/* The table of all possible word modifiers. The trans info we output will
 * be one of these. Note that the process below is the opposite of what we
 * are specifying in the mods.
 */

char *mods[] = {
	"$+ive",
	"$-e+ive",
	"$+ism",
	"$-e+ism",
	"$+y",
	"$+es",
	"$-y+ies",
	"$+al",
	"$+s",
	"$+ing",
	"$.+ing",
	"$-e+ing",
	"$+ion",
	"$-e+ion",
	"$+ness",
	"$-y+iness",
	"$+like",
	"$+less",
	"$-y+iless",
	"$+ize",
	"$-e+ize",
	"$+'s",
	"$-t+ce",
	"$+ity",
	"$-y+iable",
	"$+able",
	"$-e+able",
	"$+.able",
	"$+ity",
	"$-e+ity",
	"$+ly",
	"$+.ly",
	"$-y+ily",
	"$+ment",
	"$+ater",
	"$+er",
	"$-e+er",
	"$+ed",
	"$-e+ed",
	"$+.ed",
	"$-y+ied",
	"$+est",
	"$+.est",
	"$-e+est",
	"$-y+iest",
	"$-y+ication",
	"$+ship",
	"^+anti",
	"^+bio",
	"^+dis",
	"^+electro",
	"^+en",
	"^+fore",
	"^+hyper",
	"^+intra",
	"^+inter",
	"^+iso",
	"^+kilo",
	"^+magneto",
	"^+meta",
	"^+micro",
	"^+milli",
	"^+mis",
	"^+mono",
	"^+multi",
	"^+non",
	"^+out",
	"^+over",
	"^+photo",
	"^+poly",
	"^+pre",
	"^+pseudo",
	"^+re",
	"^+semi",
	"^+stereo",
	"^+sub",
	"^+super",
	"^+thermo",
	"^+ultra",
	"^+under",
	"^+un",
} ;

/* For each word, we have to try to apply as many decompositions as
 * possible. So we keep a list of words and repeatedly try to
 * decompose them, adding the results to the list. We may get
 * several copies of each decomposition this way, so we should try
 * and print only one. (but we don't...)
 */

main(ac, av)
	char **av;
{
	char buf[BUFSIZ];
	char *poss[NALTS], *change[NALTS], done[NALTS];
	char **words, **changes;
	register char *t, *s;
	register int i, j, k, l;
	int nchanges, changemade;
	char modmade[sizeof (mods) / sizeof (char *)];

	while (fgets(buf, BUFSIZ, stdin)) {
		for (t = buf; *t && (*t != '\n') && (*t != ' ') && 
				(*t != '\t'); t++)
			;
		*t = '\0';
		bzero(done, NALTS);
		bzero(poss, sizeof (char *) * NALTS);
		bzero(change, sizeof (char *) * NALTS);
		bzero(modmade, sizeof (modmade));
		poss[0] = buf;
		do {
			changemade = 0;
			for (i = 0; i < NALTS; i++) {
				/* See what we can do with this word. */
				if (poss[i] && !done[i]) {
					nchanges = decomp(poss[i], &words,
							&changes, modmade);
				} else
					continue;
				for (j = 0, k = 0; j < nchanges; j++) {
					/* First make sure we don't already
					 * have this one.
					 */
					for (l = 0; poss[l]; l++)
						if (!strcmp(poss[l], words[j]))
							break;
					if (poss[l])
						continue;
					/* Find a free place. */
					while (poss[k] && (k < NALTS))
						k++;
					if (k == NALTS) {
						/* None left... */
						fprintf(stderr, "Gasp...\n");
						goto newword;
					}
					poss[k] = words[j];

					/* Now concatenate the two mods. */
					s = malloc(strlen(changes[j]) +
							strlen(change[i]) + 1);
					sprintf(s, "%s%s", changes[j],
							change[i]);
					change[k] = s;
				}
				if (nchanges) {
					/* Mark this as already dealt with. */
					done[i] = 1;
					changemade = 1;
				}
			}
		} while (changemade);

		fputs(poss[0], stdout);
		for (i = 1; poss[i]; i++) {
			printf(" %s %s", poss[i], change[i]);
			free(poss[i]);
			free(change[i]);
		}
newword:	putchar('\n');
	}
	exit(0);
}

/*  Try to decompose this word, stripping off one prefix or suffix. */

decomp(word, wptr, cptr, modmade)
	char *word;
	char ***wptr, ***cptr;
	char *modmade;
{
	register int i, j, k;
	register char *s, *t, *r;
	char buf[BUFSIZ], mbuf[BUFSIZ];
	int addmode, forward, decount = 0;
	static char *poss[NALTS], *changes[NALTS];
	int nmods = sizeof (mods) / sizeof (char *);

	for (i = 0; i < nmods; i++) {
		if (modmade[i])
			continue;
		/* Try to apply the reverse of mods[i] to buf. */
		strcpy(buf, word);
		strcpy(mbuf, mods[i]);
		addmode = 1;
		switch (*mbuf) {
			case '^':
				forward = 1;
				s = buf;
				break;
			case '$':
				forward = 0;
				for (s = buf; s[1]; s++)
					;
				break;
			default:
				fprintf(stderr, "Bad mod %s...\n",
						mbuf);
				exit(1);
		}
		if (forward) {	/* Damn tabs. */
			for (t = mbuf + 1; *t; t++) {
				switch (*t) {
					case '+':
						addmode = 1;
						break;
					case '-':
						addmode = 0;
						break;
					case '.':
						if ((s > buf) &&
							(s[-1] == s[0])) {
							for (r = s; *r; r++)
								r[0] = r[1];
							s--;
						} else
							goto out;
						break;
					default:

					if (!isvalid(*t)) {
						fprintf(stderr, 
						"Bad character %c in mod %s\n",
								*t, t);
						exit(1);
					} else if (addmode) {
						if (*s == *t) {
							for (r = s; *r; r++)
								r[0] = r[1];
						} else if (isupper(*s) &&
							(tolower(*s) == *t)) {
							for (r = s; *r; r++)
								r[0] = r[1];
							*t = toupper(*t);
						} else
							goto out;
					} else {
						for (r = s; *r; r++)
							;
						for (; r >= s; r--)
							r[1] = r[0];
						*s++ = *t;
					}
				}
			}
		} else {
			/* In this case, we have to go back from the
			 * end. This is seriously ugly stuff.
			 */
			for (t = mbuf; t[1]; t++)
				;
			for (r = t; (r > mbuf) && (*r != '-') &&
					(*r != '+'); r--)
				;
			switch (*r) {
				case '-':
					addmode = 0;
					break;
				case '+':
					addmode = 1;
					break;
				default:
					fprintf(stderr, "Bad mod %s\n",
							mbuf);
					exit(1);
			}
			for (; t > mbuf; t--) {
				switch (*t) {
					case '+':
					case '-':
						for (r = t - 1; (r > mbuf) &&
						(*r != '-') && (*r != '+'); r--)
							;
						if (*r == '-')
							addmode = 0;
						else
							addmode = 1;
						break;
					case '.':
						if ((s > buf) && 
							(s[-1] == s[0])) {
							for (r = s; *r; r++)
								r[0] = r[1];
							s--;
						} else
							goto out;
						break;
					default:
					if (!isvalid(*t)) {
						fprintf(stderr, 
						"Bad character %c in mod %s\n",
									*t, t);
						exit(1);
					} else if (addmode) {
						if (*s == *t) {
							for (r = s; *r; r++)
								r[0] = r[1];
							s--;
						} else if (isupper(*s) &&
							(tolower(*s) == *t)) {
							for (r = s; *r; r++)
								r[0] = r[1];
							s--;
							*t = toupper(*t);
						} else
							goto out;
					} else {
						for (r = s; *r; r++)
							;
						for (; r > s; r--)
							r[1] = r[0];
						s[1] = t[0];
					}
				}
			}
		}

out:		if ((forward && !*t) || (t == mbuf)) {
			/* Ok, this modification works. */
			poss[decount] = malloc(strlen(buf) + 1);
			strcpy(poss[decount], buf);
			changes[decount] = malloc(strlen(mbuf) + 1);
			strcpy(changes[decount], mbuf);
			decount++;
			modmade[i] = 1;
		}
	}
	*wptr = poss;
	*cptr = changes;
	return (decount);
}

!Funky!Stuff!
echo x - compare.c
cat >compare.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.3 $ on $Date: 85/10/08 18:35:16 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/compare.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * Defined:
 */

#include "spellfix.h"

/* This is the major routine -- it determines how well a misspelled word
 * and a correctly spelled word match. The higher the return value the
 * worse the match is. We use a stack here so that we can backtrack
 * (we don't, but we could...)
 */

struct action {
	int a_type;		/* What we did. */
	char *a_where;		/* A pointer into the bad string. */
	char a_letter;		/* What letter we used, if applicable. */
	int a_cost;		/* How much this cost us. */
} ;

#define A_TRANS		1	/* Transpose a_where and a_where + 1. */
#define A_ADD		2	/* Added a_letter after a_where. */
#define A_DELETE	3	/* Deleted the letter at a_where. */
#define A_CHANGE	4	/* Changed the letter at w_where to a_letter. */

#define NACTIONS	256	/* Too many. */

extern unsigned char adjacent[NLETTERS][NLETTERS];

#define freq(x, y)	(adjacent[(x) - 'a'][(y) - 'a'])

compare(bad, good, maxcost)
	char *bad, *good;
	int maxcost;	/* If the cost gets higher than this, give up. */
{
	register int sp = 0;		/* Action stack pointer. */
	struct action astack[NACTIONS];	/* The action stack. */
	register char *badplace = bad;	/* Where we are now. */
	register char *goodplace = good;
	int nmatched = 0;		/* How many letters we have matched. */
	int roll = 0;			/* How long we've been on a roll. */
	int cost = 0, ntrans = 0, nadds = 0, ndels = 0, nchanges = 0;
	register int tempcost = 0, i, j;

	while (*badplace && *goodplace) {
		/* See what we can do with *badplace and *goodplace. */
		if (*badplace == *goodplace) {
			/* Great, they match. */
			badplace++;
			goodplace++;
			nmatched++;
			roll++;
		} else if ((badplace[0] == goodplace[1]) &&
				(goodplace[0] == badplace[1])) {
			/* Transpose these two. Neither can be NULL here. */
			astack[sp].a_type = A_TRANS;
			astack[sp].a_where = badplace;
			astack[sp].a_letter = 0;
			j = freq(badplace[0], badplace[1]) / 20;
			i = freq(goodplace[0], goodplace[1]) / 20;
			if ((badplace > bad) && (goodplace > good)) {
				j += freq(badplace[-1], badplace[0]) / 40;
				i += freq(goodplace[-1], goodplace[0]) / 40;
			}
			if (badplace[2] && goodplace[2]) {
				j += freq(badplace[1], badplace[2]) / 40;
				i += freq(goodplace[1], goodplace[2]) / 40;
			}
			if (i > 5)
				i = 5;
			if (j > 5)
				j = 5;
			astack[sp].a_cost = ++ntrans * 10 + j - i;
			tempcost += astack[sp].a_cost;
			roll = 0;
			sp++;
			badplace += 2;
			goodplace += 2;
		} else if (goodplace[0] == badplace[1]) {
			/* Delete *badplace. */
			astack[sp].a_type = A_DELETE;
			astack[sp].a_where = badplace;
			astack[sp].a_letter = 0;
			if (badplace > bad) {
				j = (freq(badplace[-1], badplace[0]) + 
					freq(badplace[0], badplace[1]))/ 20;
				i = freq(badplace[-1], badplace[1]) / 10;
				if (i > 10)
					i = 10;
				if (j > 10)
					j = 10;
			} else
				i = j = 0;
			astack[sp].a_cost = ++ndels * 20 + j - i;
			tempcost += astack[sp].a_cost;
			roll = 0;
			sp++;
			badplace++;
		} else if (badplace[0] == goodplace[1]) {
			/* Add *goodplace. */
			astack[sp].a_type = A_ADD;
			astack[sp].a_where = badplace - 1;
			astack[sp].a_letter = goodplace[0];
			if (badplace[1]) {
				i = (freq(badplace[0], goodplace[0]) + 
					freq(goodplace[0], badplace[1]))/20;
				j = freq(badplace[0], badplace[1]) / 10;
				if (i > 10)
					i = 10;
				if (j > 10)
					j = 10;
			} else
				i = j = 0;
			astack[sp].a_cost = ++nadds * 20 + j - i;
			tempcost += astack[sp].a_cost;
			roll = 0;
			sp++;
			goodplace++;
		} else {
			/* Change *badplace to *goodplace. This is a last
			 * resort.
			 */
			astack[sp].a_type = A_CHANGE;
			astack[sp].a_where = badplace;
			astack[sp].a_letter = *goodplace;
			if ((badplace > bad) && badplace[1]) {
				j = (freq(badplace[-1], badplace[0]) + 
					freq(badplace[0], badplace[1]))/10;
				i = (freq(badplace[-1], goodplace[0]) + 
					freq(goodplace[0], badplace[1]))/10;
				if (i > 20)
					i = 20;
				if (j > 20)
					j = 20;
			} else
				i = j = 0;
			astack[sp].a_cost = ++nchanges * 30 - i + j;
			tempcost += astack[sp].a_cost;
			roll = 0;
			sp++;
			badplace++;
			goodplace++;
		}
		if (tempcost > maxcost)
			return (NOCHANCE);
	}
	while (*badplace) {
		/* Delete all these characters. */
		astack[sp].a_type = A_DELETE;
		astack[sp].a_where = badplace;
		astack[sp].a_letter = 0;
		astack[sp].a_cost = ++ndels * 20;
		tempcost += astack[sp].a_cost;
		roll = 0;
		sp++;
		badplace++;
	}
	if (tempcost > maxcost)
		return (NOCHANCE);
	while (*goodplace) {
		/* Add all these characters. */
		astack[sp].a_type = A_ADD;
		astack[sp].a_where = badplace - 1;
		astack[sp].a_letter = goodplace[0];
		astack[sp].a_cost = ++nadds * 20;
		tempcost += astack[sp].a_cost;
		roll = 0;
		sp++;
		goodplace++;
	}
	if (tempcost > maxcost)
		return (NOCHANCE);
	return (tempcost);
}

!Funky!Stuff!
echo x - dconvert.c
cat >dconvert.c <<'!Funky!Stuff!'

#include <stdio.h>
#include <ctype.h>

main()
{
	char buf[BUFSIZ];
	register char *s, *t;
	int pl;

	while (fgets(buf, BUFSIZ, stdin)) {
		for (s = buf; *s && (*s != ':'); s++)
			if (!isalpha(*s))
				break;
		if (*s != ':')
			continue;
		while (*++s != ':')
			;
		while (*++s != ':')
			;
		if (*++s == 'p')
			pl = 1;
		else
			pl = 0;
		s += 2;
		if (((*s == 'v') && pl) || ((*s == 'n') && !pl) ||
				((*s != 'v') && (*s != 'n'))) {
			for (t = buf; isalpha(*t); t++)
				putchar(*t);
			printf(" %s", t);
		}
	}
	exit(0);
}

!Funky!Stuff!
echo x - fixwords.c
cat >fixwords.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.2 $ on $Date: 86/04/02 10:33:17 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/fixwords.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * Defined:
 */

#include "spellfix.h"
#include <curses.h>
#include <sys/time.h>
#include <signal.h>
#include <setjmp.h>
#include <sys/ioctl.h>

/* Figure out what the word should be, and return the correction. Every time
 * we call update the screen, we check to see if the user has typed anything.
 * Since it is too easy for the output buffer to fill up before the user
 * can type anything, we temporarily reset the quit character to space and
 * trap it.
 */

static jmp_buf jbuf;

char *
fixword(buf, dict, nwords, numdicts, file)
	char *buf;
	char ***dict;
	int *nwords;
	char *file;
{
	char word[BUFSIZ], context[BUFSIZ];
	static char xbuf[BUFSIZ];
	register char *s;
	register int i, j, k;
	int nsaved = 0, upper, ss, numw;
	char *saved[NSAVE];
	int scores[NSAVE], dnum;
	char **words = NULL, *getbuf();
	static struct timeval nulltime = { 0, 0 } ;
	int readfds, writefds = 0, exceptfds = 0, c;
	FILE *fp, *popen();

	for (s = buf, i = 0; isvalid(*s); s++, i++)
		word[i] = *s;
	word[i] = '\0';
	sprintf(xbuf, "/usr/ucb/grep -w %s %s", word, file);
	if (!(fp = popen(xbuf, "r"))) {
		fprintf(stderr, "Can't run %s\n", xbuf);
		exit(1);
	}

	upper = 20 + strlen(word) * 10;
	if (!upper) {
		return (NULL);
	}

	fgets(context, BUFSIZ, fp);
	pclose(fp);
	for (s = context; *s && (*s != '\n'); s++)
		;
	*s = '\0';
	s[79] = '\0';

	for (i = 0, j = 0; context[i] && word[j]; i++)
		if (context[i] == word[j])
			j++;
		else
			j = 0;
	if (word[j]) {
		fprintf(stderr, "Help, can't find it!\n");
		return (word);
	}
	i -= strlen(word);
	for (k = 0, j = 0; k < i; k++)
		if (context[k] == '\t')
			j = (j | 07) + 1;
		else
			j++;

	/* Print the header... */
	clear();
	sprintf(xbuf, "Misspelled word: %s.    Context:", word);
	mvaddstr(0, 0, xbuf);
	mvaddstr(2, 0, context);
	for (i = 0; i < j; i++)
		xbuf[i] = ' ';
	for (k = strlen(word); k > 0; k--)
		xbuf[i++] = '-';
	xbuf[i] = '\0';
	mvaddstr(3, 0, xbuf);
	mvaddstr(22, 0, "Hit space to stop...  ");
	clrtoeol();
	refresh();
	siginit();

	if (setjmp(jbuf)) {
		refresh();
		mvcur(COLS - 1, LINES - 1, 0, 0);
		mvcur(0, 0, 22, 0);
		goto getcom;
	}
	for (dnum = 0; dnum < numdicts; dnum++) {
		words = dict[dnum];
		numw = nwords[dnum];

		for (i = 0; i < numw; i++) {
			ss = compare(word, words[i], upper);
			if (ss == NOCHANCE)
				continue;

			/* Stick this word in its proper place. */
			for (j = 0; j < nsaved; j++)
				if (ss < scores[j])
					break;

			for (k = 0; k < nsaved; k++)
				if (!strcmp(saved[k], words[i]))
					break;
			if (k != nsaved)
				continue;

			if (j == NSAVE) {
				continue;
			} else if (j == nsaved) {
				saved[j] = words[i];
				scores[j] = ss;
				if (ss < upper)
					ss = upper;
				nsaved++;
			} else {
				for (k = (nsaved < NSAVE) ? nsaved : nsaved - 1;
						k > j; k--) {
					saved[k] = saved[k - 1];
					scores[k] = scores[k - 1];
				}
				if (nsaved < NSAVE)
					nsaved++;
				saved[j] = words[i];
				scores[j] = ss;
			}
			/* Update the screen... */
			for (k = 0; k < nsaved; k++) {
				sprintf(xbuf, "%c %-32s %-5d\n", 'a' + k,
						saved[k], scores[k]);
				mvaddstr(k + 4, 8, xbuf);
			}
			mvaddstr(22, 0, "Hit space to stop...  ");
			clrtoeol();
			refresh();
		}
	}
getcom: ;
	sigend();
	move(23, 0);
	clrtoeol();
	mvaddstr(22, 0, "Command (? for help): ");
	clrtoeol();
	refresh();
	c = getch();
	switch (c) {
		case 'N':
			return (word);
		case 'Q':
			endwin();
			puts("\bBye then...\n");
			return ((char *) 1);
		case 'W':
			return (NULL);
		case 'E':
			move(22, 0);
			clrtoeol();
			mvaddstr(22, 0, "Enter word: ");
			refresh();
			strcpy(xbuf, getbuf());
			move(22, 0);
			clrtoeol();
			for (s = xbuf; isvalid(*s); s++)
				;
			*s = '\0';
			return (xbuf);
		case '?':
			mvaddstr(22, 0,
"N = ok as is, Q = quit, W = write & quit, E = enter correction,");
			mvaddstr(23, 0, 
"? = help, any other character = select word.   (hit space to continue) ");
			refresh();
			getch();
			goto getcom;
		default:
			i = c - 'a';
			if ((i < 0) || (i >= nsaved)) {
				mvaddstr(22, 0, "Command (? for help): ");
				clrtoeol();
				mvaddstr(23, 0, "No such word or function.\n");
				refresh();
				sleep(1);
				goto getcom;
			}
			return (saved[i]);
	}
}

static struct tchars tcbuf;

static
sigquit()
{
	longjmp(jbuf, 1);
	/* NOTREACHED */
}

static
sigint()
{
	ioctl(0, TIOCSETC, &tcbuf);
	fprintf(stderr, "\nQuit\n");
	endwin();
	exit(1);
}

static
siginit()
{
	char oquit;

	ioctl(0, TIOCGETC, &tcbuf);
	oquit = tcbuf.t_quitc;
	tcbuf.t_quitc = ' ';
	ioctl(0, TIOCSETC, &tcbuf);
	tcbuf.t_quitc = oquit;
	signal(SIGQUIT, sigquit);
	signal(SIGINT, sigint);
	return;
}

static
sigend()
{
	ioctl(0, TIOCSETC, &tcbuf);
	signal(SIGQUIT, SIG_DFL);
	signal(SIGINT, SIG_DFL);
	return;
}

!Funky!Stuff!
echo x - freqcount.c
cat >freqcount.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.1 $ on $Date: 85/10/04 15:44:37 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/freqcount.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * Determine how frequent various words are in text. Usage is
 *	freq dictfile sample ...
 */

#include <stdio.h>
#include <ctype.h>

#define HASHSIZE 50021
#define OFFSET	13

struct hashent {
	char h_word[32];
	char h_extra[32];
	long h_freq;
} ;

struct hashent hashtab[HASHSIZE];

main(ac, av)
	char **av;
{
	FILE *fp, *fopen();
	int i = 2;

	if (ac < 3)
		exit(1);
	if (!(fp = fopen(av[1], "r"))) {
		perror(av[1]);
		exit(1);
	}
	fprintf(stderr, "loading dictionary... ");
	fflush(stderr);
	inpdict(fp);
	fclose(fp);
	fprintf(stderr, "done.\n");
	while (i < ac) {
		if (!(fp = fopen(av[i], "r"))) {
			perror(av[i]);
			exit(1);
		}
		fprintf(stderr, "reading file %s... ", av[i]);
		fflush(stderr);
		i++;
		addfile(fp);
		fclose(fp);
		fprintf(stderr, "done.\n");
	}
	printwords();
	exit(0);
}

inpdict(fp)
	FILE *fp;
{
	char buf[BUFSIZ];
	register unsigned int j, x = 0;
	unsigned int hash();
	char *extra;

	while (fgets(buf, BUFSIZ, fp)) {
		for (extra = buf; isalpha(*extra); extra++)
			;
		if (*extra && (*extra != '\n')) {
			*extra = '\0';
			extra++;
		} else {
			*extra = '\0';
			extra = NULL;
		}
		j = hash(buf);
		while (hashtab[j].h_word[0]) {
			j += OFFSET;
			j %= HASHSIZE;
		}
		strcpy(hashtab[j].h_word, buf);
		if (extra && *extra)
			strcpy(hashtab[j].h_extra, extra);
		hashtab[j].h_freq = 0;
		if ((++x % 1000) == 0) {
			fprintf(stderr, ".");
			fflush(stderr);
		}
	}
	return;
}

unsigned
hash(word)
	register char *word;
{
	register unsigned int i = 0;
	register unsigned int p = 5003;

	while (*word) {
		i += (*word++ * (i + p));
		i %= HASHSIZE;
	}
	return (i);
}

addfile(fp)
	FILE *fp;
{
	char buf[BUFSIZ];
	register char *s;
	register int j, ct, x = 0;

	while (fgets(buf, BUFSIZ, fp)) {
		for (s = buf; isalpha(*s); s++)
			;
		*s = '\0';
		j = hash(buf);
		if (!hashtab[j].h_word[0]) {
			/* printf("= %s\n", buf); */
			continue;
		}
		ct = 0;
		while (strcmp(buf, hashtab[j].h_word)) {
			j += OFFSET;
			j %= HASHSIZE;
			if (ct++ > 100) {
				/* printf("= %s\n", buf); */
				goto moe;
			}
		}
		hashtab[j].h_freq++;
moe:		;
		if ((++x % 1000) == 0) {
			fprintf(stderr, ".");
			fflush(stderr);
		}
	}
	return;
}

printwords()
{
	register int i;

	for (i = 0; i < HASHSIZE; i++)
		if (hashtab[i].h_word[0])
			printf("%s %d\n", hashtab[i].h_word,
					hashtab[i].h_freq);
	return;
}

!Funky!Stuff!
echo x - getbuf.c
cat >getbuf.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.2 $ on $Date: 86/04/02 10:33:25 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/getbuf.c,v $
 * Copyright (c) 1985 Steve Procter
 */

#include <sys/ioctl.h>
#include <ctype.h>
#include <curses.h>

#define MAX(a,b)	((a) > (b) ? (a) : (b))
#define ISMETA(a)	((a) & 0x80)

char *
getbuf ()
{
    int     x;
    int     y;
    char    c;
    char    *s;
    char    *makeprint ();
    register    i;
    register    current = 0;
    register    offset = 0;
    static char string[BUFSIZ];
    struct sgttyb   sg, osg;
    struct ltchars  lt, olt;

    rewind (stdin);
    wrefresh (stdscr);
    getyx (stdscr, y, x);
    bzero (string, sizeof (string));

    ioctl (fileno (stdin), TIOCGETP, &sg);
    bcopy (&sg, &osg, sizeof (struct sgttyb));
    sg.sg_flags |= CRMOD;
    sg.sg_flags &= ~ECHO;
    ioctl (fileno (stdin), TIOCSETP, &sg);

    ioctl (fileno (stdin), TIOCGLTC, &lt);
    bcopy (&lt, &olt, sizeof (struct ltchars));

    while (((c = getchar ()) != '\015') && (c != '\n')) {
	c &= 0177;
	if (c == sg.sg_kill) {
	    current = 0;
	    offset = 0;
	    wmove (stdscr, y, x);
	    wclrtoeol (stdscr);
	    bzero (string, sizeof (string));
	}
	else if (c == sg.sg_erase) {
	    if (current <= 0) {
		current = 0;
		offset = 0;
		wmove (stdscr, y, x);
		wclrtoeol (stdscr);
		continue;
	    }
	    offset -= strlen (makeprint (string[--current]));
	    string[current] = NULL;
	    wmove (stdscr, y, x + offset);
	    wclrtoeol (stdscr);
	}
	else if (c == lt.t_werasc) {
	    if (current <= 1) {
		current = 0;
		offset = 0;
	    }
	    while ((current - 1 >= 0) && (string[current - 1] == ' ')) {
		string[--current] = NULL;
		offset--;
	    }
	    while ((current - 1 >= 0) && (string[current - 1] != ' ')) {
		current--;
		offset -= strlen (makeprint (string[current]));
		string[current] = NULL;
	    }
	    wmove (stdscr, y, x + offset);
	    wclrtoeol (stdscr);
	}
	else if (c == lt.t_rprntc) {
	    wmove (stdscr, y, x);
	    for (i = 0; i < current; i++) {
		addstr (makeprint (string[i]));
	    }
	}
	else {
	    if (isprint (c)) {
		string[current++] = c;
		wmove (stdscr, y, x + offset);
		waddch (stdscr, c);
		offset++;
	    }
	    else {
		s = makeprint (c);
		wmove (stdscr, y, x + offset);
		waddstr (stdscr, s);
		string[current++] = c;
		offset += strlen (s);
	    }
	}
	wrefresh (stdscr);
    }
    ioctl (fileno (stdin), TIOCSETP, &osg);
    ioctl (fileno (stdin), TIOCSLTC, &olt);
    return (string);
}

char *
makeprint (c)
char    c;
{
    static char r[5];

    bzero (r, sizeof (r));
    if (ISMETA(c)) {
	strcat (r, "^[");
	return (r);
    }
    if (iscntrl (c)) {
	sprintf (r, "^%c", (c | 0x40) & 0x7f);
	return (r);
    }
    r[0] = c;
    return (r);
}

!Funky!Stuff!
echo x - makeadj.c
cat >makeadj.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.1 $ on $Date: 85/09/27 23:06:37 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/makeadj.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 */

#include <stdio.h>
#include <ctype.h>

#define NLETTERS 26

long adjacent[NLETTERS][NLETTERS];

main(ac, av)
	char **av;
{
	char buf[512];
	register char *s;
	register int i, j, highest = 0;

	fprintf(stderr, "Reading words..."); fflush(stderr);
	while (gets(buf)) {
		for (s = buf; *s; s++)
			if (!isalpha(*s))
				goto moe;
		for (s = buf; (s[0] && s[1]); s++) {
			i = (isupper(s[0]) ? tolower(s[0]) : s[0]) - 'a';
			j = (isupper(s[1]) ? tolower(s[1]) : s[1]) - 'a';
			adjacent[i][j]++;
		}
moe:	;
	}
	fprintf(stderr, " done.\n"); fflush(stderr);
	for (i = 0; i < NLETTERS; i++)
		for (j = 0; j < NLETTERS; j++)
			if (adjacent[i][j] > highest)
				highest = adjacent[i][j];
	fprintf(stderr, "Highest frequency = %d, ", highest);
	highest /= 255;
	fprintf(stderr, "dividing by %d...", highest); fflush(stderr);
	for (i = 0; i < NLETTERS; i++)
		for (j = 0; j < NLETTERS; j++)
			adjacent[i][j] /= highest;
	fprintf(stderr, " done.\n"); fflush(stderr);

	for (i = 0; i < NLETTERS; i++) {
		printf("	{ ");	/* { */
		for (j = 0; j < NLETTERS; j++)
			printf("%d%s", adjacent[i][j],
					(j < NLETTERS - 1) ? ", " : "");
		if (i < NLETTERS - 1)
			printf(" } ,\n");
		else
			printf(" }\n");
	}
	exit (0);
}

!Funky!Stuff!
echo x - makemod.c
cat >makemod.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.1 $ on $Date: 85/10/08 18:36:16 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/makemod.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * This routine carries out modifications on base words (see the description
 * in baseword.c for details).
 * Defined: makemod
 */

#include "spellfix.h"

char *
makemod(word, mod)
	char *word, *mod;
{
	char buf[BUFSIZ];
	register char *s, *t, *r;
	int addmode = 1;

	strcpy(buf, word);
	s = buf;
	for (t = mod; *t; t++) {
		switch (*t) {
			case '$':
				while (s[1])
					s++;
				break;
			case '^':
				s = buf - 1;
				break;
			case '.':
				for (r = s; *r; r++)
					;
				for ( ; r >= s; r--)
					r[1] = r[0];
				s++;
				break;
			case '-':
				addmode = 0;
				break;
			case '+':
				addmode = 1;
				break;
			default:
				if (!isvalid(*t)) {
					/* Ack.. */
					return (NULL);
				} else if (addmode) {
					for (r = s + 1; *r; r++)
						;
					for ( ; r > s; r--)
						r[1] = r[0];
					*++s = *t;
				} else {
					if (*s != *t) {
						/* What now? */
						return (NULL);
					}
					/* Strange case... */
					while (s < buf)
						s++;
					for (r = s; *r; r++)
						r[0] = r[1];
					if (!*s)
						s--;
				}
		}
	}
	s = malloc(strlen(buf) + 1);
	strcpy(s, buf);
	return (s);
}

/* main(ac, av) char **av;{ printf("%s\n", makemod(av[1], av[2])); exit(0); } */

!Funky!Stuff!
echo x - readdict.c
cat >readdict.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.3 $ on $Date: 85/10/08 18:35:20 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/readdict.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 */

#include "spellfix.h"

/* This routine creates the word table from the dictionary -- it returns
 * the number of words read.
 */

readdict(dictfile, ptr)
	char *dictfile;
	char ***ptr;
{
	register FILE *fp;
	FILE *fopen();
	register int i = 0, c, j = 0;
	char buf[BUFSIZ];
	register char **words = NULL, *s, *field;

	if (!dictfile)
		dictfile = DICTFILE;
	if (!(fp = fopen(dictfile, "r"))) {
		perror(dictfile);
		return (0);
	}
	while ((c = getc(fp)) != EOF) {
		if (c == '\n')
			i++;
		j++;
	}
	rewind(fp);

	/* Now get one big chunk of memory for this file. */
	field = malloc(j);
	words = (char **) malloc(i * sizeof (char *));
	if (!words || !field) {
		fprintf(stderr, "drat, malloc failed\n");
		return (0);
	}

	for (c = 0; c < i; c++) {
		fgets(buf, BUFSIZ, fp);
		words[c] = field;
		for (s = buf; *s && (*s != ' ') && (*s != '\t') &&
				(*s != '\n'); s++)
			*field++ = *s;
		*field++ = '\0';
	}
	*ptr = words;

	return (i);
}

/* main() { char **p; printf("%d entries\n", readdict((char *) NULL, &p)); }*/

!Funky!Stuff!
echo x - spellcheck.c
cat >spellcheck.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.3 $ on $Date: 86/04/02 10:33:31 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/spellcheck.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * spellcheck -- takes a list of sets of words on the standard input,
 *	of the form <word1> <word2> <junk> <word3> <junk> ... <wordn> <junk>
 * 	and outputs those lines for which none of the wordi are in the
 *	dictionary. We are very liberal with memory, unlike spell...
 */

#include "spellfix.h"
#include <sys/file.h>
#include <pwd.h>

int debug = 0;

/* Usage is spellcheck [badfile goodfile]. */
main(ac, av)
	char **av;
{
	char **words, **rcwords, *text;
	int nwords, nrcwords;
	char buf[BUFSIZ], wbuf[BUFSIZ];
	register int i, j, k, found;
	register char *s, *t;
	char c, d;
	FILE *good, *bad, *fopen();
	struct passwd *pwd, *getpwuid();

	if (ac == 1) {
		bad = stdout;
		good = NULL;
	} else if (ac == 3) {
		if (!(bad = fopen(av[1], "w"))) {
			perror(av[1]);
			exit(1);
		}
		if (!(good = fopen(av[2], "w"))) {
			perror(av[2]);
			exit(1);
		}
	} else {
		fprintf(stderr, "Usage: %s [badfile] [goodfile]\n", av[0]);
		exit(1);
	}

	pwd = getpwuid(getuid());
	if (!pwd && !pwd->pw_dir)
		exit(1);
	
	sprintf(buf, "%s/.spellrc", pwd->pw_dir);
	if (!access(buf, R_OK))
		nrcwords = readdict(buf, &rcwords);
	else
		nrcwords = 0;

	nwords = readdict((char *) NULL, &words);
	if (nwords <= 0)
		exit(1);

	while (fgets(buf, BUFSIZ, stdin)) {
		found = 0;
		for (text = buf; *text; ) {
			/* Grab the next word. */
			for (s = text, t = wbuf; *s && (*s != ' ') && 
					(*s != '\n'); )
				*t++ = *s++;
			*t = '\0';
			if (!*wbuf)
				break;
			/* Make sure text points to the next good stuff. */
			if (text != buf) {
				text = s;
				while (*text && (*text == ' '))
					text++;
				while (*text && (*text != ' '))
					text++;
				while (*text && (*text == ' '))
					text++;
			} else {
				text = s;
				while (*text && (*text == ' '))
					text++;
			}

			/* Now wbuf is the word we want. Make sure it
			 * isn't a number first...
			 */
			for (s = wbuf; *s && isdigit(*s); s++)
				;
			if (!*s) {
				found = 1;
				break;
			}
			i = -1;
			j = nwords;
			for (;;) {
				k = (i + j) / 2;
				if ((k == i) || (k == j))
					break;
				if (debug)
					printf("%s =? %s\n", wbuf, words[k]);
				for (s = wbuf, t = words[k]; ; ) {
					/* Ignore all this junk they allow
					 * in /usr/dict/words.
					 */
					while ((*s == '\'') || (*s == '&') ||
							(*s == '.'))
						s++;
					while ((*t == '\'') || (*t == '&') ||
							(*t == '.'))
						t++;
					c = isupper(*s) ? tolower(*s) : *s;
					d = isupper(*t) ? tolower(*t) : *t;
					if (c > d) {
						i = k;
						break;
					} else if (c < d) {
						j = k;
						break;
					} else if (!c && !d && words[k][1]) {
						/* No 1-letter matches... */
						found = 1;
						break;
					} else {
						s++;
						t++;
					}
				}
				if (found)
					break;
			}
			if (found)
				break;
			else {
				/* See if it was in .spellrc... */
				for (i = 0; i < nrcwords; i++)
					if (!strcmp(wbuf, rcwords[i])) {
						found = 1;
						break;
					}
			}
		}
		if (found) {
			if (good)
				fputs(buf, good);
			continue;
		} else {
			fputs(buf, bad);
		}
	}
	exit(0);
}

!Funky!Stuff!
echo x - spellfix.c
cat >spellfix.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.5 $ on $Date: 86/04/02 10:33:36 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/spellfix.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * This is the main control program for the spellfix system. Usage is
 *	spellfix document
 */

#include "spellfix.h"
#include <sys/file.h>
#include <curses.h>
#include <pwd.h>

int debug = 1;

main(ac, av, ep)
	char **av, **ep;
{
	char buf[BUFSIZ];
	char *tempf1 = "/tmp/spf1XXXXXX";
	char *tempf2 = "/tmp/spf2XXXXXX";
	char *tempf3 = "/tmp/spf3XXXXXX";
	char **words[NDICTS];
	int nwords[NDICTS], i, c;
	FILE *fp, *out, *fopen();
	char *mktemp(), *fixword(), *getbuf(), *badword, *goodword, *s;
	struct passwd *pwd, *getpwuid();

	if (!strcmp(av[0], "spell")) {
		/* Spell emulation mode. */
	} if (ac != 2) {
		fprintf(stderr, "Usage: %s document\n", av[0]);
		exit(1);
	}

	/* This is cleaner than letting sh find the error. */
	if (access(av[1], 0)) {
		fprintf(stderr, "Error: can't open %s.\n", av[1]);
		exit(1);
	} else if (access(av[1], W_OK | R_OK)) {
		fprintf(stderr, "Error: %s must be readable and writable.\n",
				av[1]);
			exit(1);
	}

	printf("Starting up... "); fflush(stdout);
	tempf1 = mktemp(tempf1);
	tempf2 = mktemp(tempf2);
	tempf3 = mktemp(tempf3);

	/* Read the .spellrc file... */
	pwd = getpwuid(getuid());
	if (!pwd->pw_dir) {
		fprintf(stderr, "Hey, you don't have a home directory.\n");
		exit(1);
	}
	sprintf(buf, "%s/.spellrc", pwd->pw_dir);
	if (!access(buf, R_OK))
		nwords[0] = readdict(buf, &words[0]);
	else
		nwords[0] = 0;

	/* First prep the doc. We won't use prep because deroff is
	 * better and more standard.
	 */
	sprintf(buf, "/usr/bin/deroff -w < %s | /usr/bin/sort -u > %s", 
			av[1], tempf1);
	if (system(buf))
		exit(1);
	
	/* Now make up the decomposition list of the words. */
	sprintf(buf, "%s/baseword < %s > %s", BINDIR, tempf1, tempf2);
	if (system(buf))
		exit(1);
	
	/* Seperate them into bad (tempf1) and good (tempf3) files. */
	printf("done.\nIdentifying misspelled words... "); fflush(stdout);
	sprintf(buf, "%s/spellcheck %s %s < %s", BINDIR, tempf1, tempf3, 
			tempf2);
	if (system(buf))
		exit(1);

	unlink(tempf2);

	i = 0;
	fp = fopen(tempf1, "r");
	while ((c = getc(fp)) != EOF)
		if (c == '\n')
			i++;
	fclose(fp);

	/* Now read in the good file as a dictionary. */
	printf("done (%d misspelled word%s).\nReading dictionaries...\n", i,
			(i == 1) ? "" : "s");
	nwords[1] = readdict(tempf3, &words[1]);
	printf("%d word%s in document correct...\n", nwords[1],
			(nwords[1] == 1) ? "" : "s");
	if (nwords[1] <= 0) {
		/* Probably this should be an error. */
		printf("Wow, not a single word spelled right!\n");
	}

	unlink(tempf3);

	sprintf(buf, "%s/freqwords", BINDIR);
	nwords[2] = readdict(buf, &words[2]);
	printf("Read %d frequently used words...\n", nwords[2]);
	if (nwords[2] <= 0)
		exit(1);
	
	nwords[3] = readdict((char *) NULL, &words[3]);
	printf("And %d words from the dictionary.\n", nwords[3]);
	if (nwords[3] <= 0)
		exit(1);
	
	/* Now for each word, try to correct it. */
	if (!(fp = fopen(tempf1, "r"))) {
		perror(tempf1);
		exit(1);
	}
	if (!(out = fopen(tempf2, "w"))) {
		perror(tempf2);
		exit(1);
	}

	initscr();
	crmode();

	/* It looks like curses likes to trash my environment... */
	for (i = 0; ep[i]; i++)
		if (!index(ep[i], '='))
			ep[i] = "CURSES=shit";

	/* Read words from tempf1, get the corrections, and write
	 * bad/good pairs to tempf2.
	 */
	i = 0;
	while (fgets(buf, BUFSIZ, fp)) {
		goodword = fixword(buf, words, nwords, 4, av[1]);
		if (!goodword)
			break;
		else if (goodword == (char *) 1) {
			system("stty -tabs");
			unlink(tempf1);
			unlink(tempf2);
			exit(0);
		}
		badword = buf;
		for (s = buf; isvalid(*s); s++)
			;
		*s = '\0';
		if (goodword && *goodword && strcmp(badword, goodword)) {
			fprintf(out, "%s %s\n", badword, goodword);
			i++;
		}
	}
	if (i)
		sprintf(buf, "%d words corrected, ok to write out changes? ",
				i);
	else
		strcpy(buf, "No words corrected.");
	mvaddstr(22, 0, buf);
	refresh();
	if (i)
		strcpy(buf, getbuf());
	endwin();
	system("stty -tabs");	/* Damn... */
	fclose(out);
	fclose(fp);
	unlink(tempf1);

	if (!i) {
		unlink(tempf2);
		putchar('\n');
		exit(0);
	}
	if ((buf[0] != 'y') && (buf[0] != 'Y')) {
		printf("\nOk, aborting... Changes saved in %s.changes.\n",
				av[1]);
		printf("This file is valid input for wordchange(1).\n");
		sprintf(buf, "/bin/cp %s %s.changes", tempf2, av[1]);
		system(buf);
		unlink(tempf2);
		exit(0);
	}

	printf(" ... "); fflush(stdout);
	sprintf(buf, "%s/wordchange %s < %s", BINDIR, av[1], tempf2);
	if (system(buf))
		exit(1);
	unlink(tempf2);
	printf("done.\nSo long...\n");
	exit(0);
}

!Funky!Stuff!
echo x - wordchange.c
cat >wordchange.c <<'!Funky!Stuff!'

/* RCS Info: $Revision: 1.2 $ on $Date: 85/10/08 18:35:24 $
 *           $Source: /ic4/faustus/src/spellfix/RCS/wordchange.c,v $
 * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group
 *	Permission is granted to do anything with this code except sell it
 *	or remove this message.
 *
 * This program is called by wordchange file ..., and it reads the
 * standard input for pairs of words -- an old word, and a new word
 * to replace it with.
 */

#include "spellfix.h"

main(ac, av)
	char **av;
{
	char *tempf = "/tmp/wchXXXXXX", *mktemp();
	FILE *fp, *tp, *fopen();
	char buf[BUFSIZ];
	register char **old, **new, *s, *t;
	register int i = 0, j, k;
	int nwords = 0;
	char *beg;

	/* First collect all the changes. */
	tempf = mktemp(tempf);
	if (!(tp = fopen(tempf, "w+"))) {
		perror(tempf);
		exit(1);
	}
	while (fgets(buf, BUFSIZ, stdin)) {
		nwords++;
		fputs(buf, tp);
	}
	rewind(tp);

	old = (char **) malloc(nwords * sizeof (char *));
	new = (char **) malloc(nwords * sizeof (char *));

	/* Now read them back in. */
	while (fgets(buf, BUFSIZ, tp)) {
		old[i] = malloc(strlen(buf) + 1);
		strcpy(old[i], buf);
		for (s = old[i]; *s && (*s != ' '); s++)
			;
		if (*s) {
			*s = '\0';
			new[i] = s + 1;
		} else
			new[i] = s;
		for (s = new[i]; *s && (*s != ' ') && (*s != '\t') &&
				(*s != '\n'); s++)
			;
		*s = '\0';
		i++;
	}

	/* Now for each file, go through and do the replacements. */
	for (av++; *av; av++) {
		if (!(fp = fopen(*av, "r+"))) {
			perror(*av);
			continue;
		}
		rewind(tp);

		while (fgets(buf, BUFSIZ, fp)) {
			/* Now do the substitutions on this line. */
			for (i = 0; i < nwords; i++) {
				for (s = buf; *s; ) {
					/* Find the beginning of a word. */
					while (!isvalid(*s))
						s++;
					while (*s && (*s == '\''))
						s++;
					beg = s;
					for (t = old[i]; *t; t++)
						if (*t != *s)
							break;
						else
							s++;
					if (*t || isvalid(*s)) {
						while (isvalid(*s))
							s++;
						continue;
					}
					/* Now splice the new word in. Don't
					 * look at this code or you will die.
					 */
					k = strlen(old[i]);
					j = k - strlen(new[i]);
					if (j > 0) {
						for (t = beg + j; *t; t++)
							t[-j] = t[0];
						t[-j] = '\0';
					} else if (j < 0) {
						for (t = s; *t; t++)
							;
						for (; t >= beg + k; t--)
							t[-j] = t[0];
					}
					for (t = new[i]; *t; t++)
						*beg++ = *t;
					s = beg;	/* Important... */
				}
			}
			fputs(buf, tp);
		}
		ftruncate(fileno(tp), ftell(tp));
		rewind(tp);
		rewind(fp);
		while ((i = fread(buf, 1, BUFSIZ, tp)) > 0)
			fwrite(buf, 1, i, fp);
		ftruncate(fileno(fp), ftell(fp));
		fclose(fp);
	}
	fclose(tp);
	unlink(tempf);
	exit(0);
}

!Funky!Stuff!
echo x - freqwords
cat >freqwords <<'!Funky!Stuff!'
the
and
that
for
you
are
with
have
not
this
can
but
was
from
will
they
one
!Funky!Stuff!