faustus@ucbcad.BERKELEY.EDU (Wayne A. Christopher) (07/05/86)
Here is a program that I wrote to interactively correct spelling errors... It is *SLOW* but it has a reasonable closest-match algorithm that I made up by trial and error... It sounds a lot like the one you mention from BYTE... # The rest of this file is a shell script which will extract: # READ.ME spellfix.1 Makefile spellfix.h adjacent.c baseword.c compare.c dconvert.c fixwords.c freqcount.c getbuf.c makeadj.c makemod.c readdict.c spellcheck.c spellfix.c wordchange.c freqwords echo x - READ.ME cat >READ.ME <<'!Funky!Stuff!' This is the ultimate spelling correction program. It identifies misspelled words, and then tries to figure out how they should be spelled, using closest-match heuristics on words in the dictionary. You will have to compile several programs, and install them in a public directory -- they are baseword, spellcheck, and wordchange. You should also copy the file freqwords to this directory. Then change the definition of BINDIR in spellfix.h to this path. This program is a prototype -- it is VERY slow, and the user interface needs a lot of work. I don't have time to work on it, so if anybody makes any improvements I will be very happy to see them. Note that the file freqwords has only a few words in it -- you'll have to assemble your own list if you want it... It's a performance enhancement, so spellfix will run without it... !Funky!Stuff! echo x - spellfix.1 cat >spellfix.1 <<'!Funky!Stuff!' .\" RCS Info: $Revision: 1.2 $ on $Date: 86/04/02 10:33:33 $ .\" $Source: /ic4/faustus/src/spellfix/RCS/spellfix.1,v $ .\" Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group .\" Permission is granted to do anything with this code except sell it .\" or remove this message. .TH SPELLFIX 1 "October 10, 1985" .UC 4 .SH NAME spellfix, baseword, spellcheck, wordchange \- interactively correct spelling .SH SYNOPSIS .B spellfix document .br .SH DESCRIPTION .B spellfix interactively corrects the spelling in .I document. For each misspelled word, it determines a list of possible correct spellings for the word and displays them on the screen, updating the list as it finds more. It searches in order the list of correctly spelled words in the document, a list of the 4000 most frequent words, and the full dictionary .I /usr/dict/words. Also, if the user has a file called .I .spellrc in his home directory, words in this file are considered properly spelled. .PP As the program is displaying words, the user may type a space to stop the process. He is then prompted for a command, which may be one of: .TP E Enter the correct spelling of the word, terminated by a newline. .TP ? Print out a help message. .TP Q Exit .B spellfix immediately (doesn't save changes). .TP W Write out the changes so far and exit. .PP Any other character is taken to be a choice of one of the words currently displayed. .PP A line of context is provided, so that the user may see where the word is used. Only one line in which the misspelled word appears is printed, though. .PP .B spellfix makes use of several other (possibly useful) programs: .TP .B baseword Reads a list of words on the standard input, and for each word, outputs a line consisting of: .IP \fIword0 word1 change1 word2 change2 ... wordN changeN\fR Where .I word0 is the origonal word, and each .I wordI is a possible base word for .I word0, that is, one with all the prefixes and suffixes stripped off. The corresponding .I changeI is an editing script that describes how to reconstruct the origonal word. The characters in this script have the following meanings: .br .nf $ Go to the end of the word. ^ Go to the beginning of the word \&. Duplicate the current letter. + Add the following characters. - Delete the following characters. c Any other character is either added or deleted, depending on whether a '+' or a '-' was last executed. .TP .B spellcheck [bad good] Takes as input the output of baseword and outputs the lines for which none of the possible base words is in the dictionary. .TP \fBwordchange file ...\fR Reads .I old new pairs from the standard input and for every occurrence of .I old in any of the files, replaces .I new. (Except when .I old is a subset of another word.) .SH "SEE ALSO" spell(1) .SH AUTHOR Wayne Christopher (faustus@cad.berkeley.edu) .SH BUGS .PP Suffixes and prefixes aren't dealt with well enough. !Funky!Stuff! echo x - Makefile cat >Makefile <<'!Funky!Stuff!' # # RCS Info: $Revision: 1.4 $ on $Date: 86/04/02 10:33:03 $ # $Source: /ic4/faustus/src/spellfix/RCS/Makefile,v $ # Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group # # Makefile for the spellfix system # # For dependencies, see below in LDFLAGS and CFLAGS. CC = cc HFILES =\ spellfix.h CFILES =\ adjacent.c\ baseword.c\ compare.c\ fixwords.c\ freqcount.c\ getbuf.c\ makeadj.c\ makemod.c\ readdict.c\ spellcheck.c\ spellfix.c SFILES = OFILES =\ adjacent.o\ baseword.o\ compare.o\ fixwords.o\ freqcount.o\ getbuf.o\ makeadj.o\ makemod.o\ readdict.o\ spellcheck.o\ spellfix.o SOURCE= $(CFILES) $(SFILES) ALLFILES= $(SOURCE) $(HFILES) INCLUDE = -I../h # Compile flags are as follows: # UNIX Compiling under unix # BSD Compiling under 4.2 BSD unix # VAX Compiling for a vax DEFINES = -DUNIX -DBSD -DVAX CFLAGS = $(DEFINES) $(INCLUDE) -g LINTFLAGS = -huca -DLINT $(DEFINES) $(INCLUDE) LDFLAGS = -g -z -lcurses -ltermlib -lm .c.o: $*.c $(CC) $(CFLAGS) -c $*.c all: baseword spellcheck spellfix makeadj freqcount wordchange baseword: baseword.o $(CC) baseword.o $(LDFLAGS) -o baseword spellcheck: spellcheck.o readdict.o $(CC) spellcheck.o readdict.o $(LDFLAGS) -o spellcheck spellfix: spellfix.o readdict.o compare.o adjacent.o fixwords.o \ makemod.o getbuf.o $(CC) spellfix.o readdict.o compare.o adjacent.o fixwords.o \ makemod.o getbuf.o $(LDFLAGS) -o spellfix makeadj: makeadj.o $(CC) makeadj.o $(LDFLAGS) -o makeadj wordchange: wordchange.o $(CC) wordchange.o $(LDFLAGS) -o wordchange freqcount: freqcount.o $(CC) freqcount.o $(LDFLAGS) -o freqcount lint: $(SOURCE) lint $(LINTFLAGS) $(SOURCE) qgrind: $(ALLFILES) qgrind -lc $(ALLFILES) source: $(SOURCE) tags: /tmp ctags -t ../*/*.c ../*/*.h > /dev/null 2>&1 wc: $(ALLFILES) @wc $(ALLFILES) print: $(ALLFILES) @pr $(ALLFILES) clean: rm -f $(OFILES) foo make.out tags a.out $(ALLFILES): co -l $@ depend: $(SOURCE) cc -M $(CFLAGS) $(CFILES) $(SFILES) > makedep echo '/^# DO NOT DELETE THIS LINE/+2,$$d' >eddep echo '$$r makedep' >>eddep echo 'w' >>eddep ed - Makefile < eddep rm eddep makedep echo '# DEPENDENCIES MUST END AT END OF FILE' >> Makefile echo '# IF YOU PUT STUFF HERE IT WILL GO AWAY' >> Makefile echo '# see make depend above' >> Makefile #----------------------------------------------------------------- # DO NOT DELETE THIS LINE -- make depend uses it # DEPENDENCIES MUST END AT END OF FILE adjacent.o: adjacent.c adjacent.o: ./spellfix.h adjacent.o: /usr/include/stdio.h adjacent.o: /usr/include/ctype.h baseword.o: baseword.c baseword.o: ./spellfix.h baseword.o: /usr/include/stdio.h baseword.o: /usr/include/ctype.h compare.o: compare.c compare.o: ./spellfix.h compare.o: /usr/include/stdio.h compare.o: /usr/include/ctype.h fixwords.o: fixwords.c fixwords.o: ./spellfix.h fixwords.o: /usr/include/stdio.h fixwords.o: /usr/include/ctype.h fixwords.o: /usr/include/curses.h fixwords.o: /usr/include/stdio.h fixwords.o: /usr/include/sgtty.h fixwords.o: /usr/include/sys/ioctl.h fixwords.o: /usr/include/sys/ttychars.h fixwords.o: /usr/include/sys/ttydev.h fixwords.o: /usr/include/sys/time.h fixwords.o: /usr/include/time.h freqcount.o: freqcount.c freqcount.o: /usr/include/stdio.h freqcount.o: /usr/include/ctype.h getbuf.o: getbuf.c getbuf.o: /usr/include/sys/ioctl.h getbuf.o: /usr/include/sys/ttychars.h getbuf.o: /usr/include/sys/ttydev.h getbuf.o: /usr/include/ctype.h getbuf.o: /usr/include/curses.h getbuf.o: /usr/include/stdio.h getbuf.o: /usr/include/sgtty.h makeadj.o: makeadj.c makeadj.o: /usr/include/stdio.h makeadj.o: /usr/include/ctype.h makemod.o: makemod.c makemod.o: ./spellfix.h makemod.o: /usr/include/stdio.h makemod.o: /usr/include/ctype.h readdict.o: readdict.c readdict.o: ./spellfix.h readdict.o: /usr/include/stdio.h readdict.o: /usr/include/ctype.h spellcheck.o: spellcheck.c spellcheck.o: ./spellfix.h spellcheck.o: /usr/include/stdio.h spellcheck.o: /usr/include/ctype.h spellfix.o: spellfix.c spellfix.o: ./spellfix.h spellfix.o: /usr/include/stdio.h spellfix.o: /usr/include/ctype.h spellfix.o: /usr/include/sys/file.h spellfix.o: /usr/include/curses.h spellfix.o: /usr/include/stdio.h spellfix.o: /usr/include/sgtty.h spellfix.o: /usr/include/sys/ioctl.h spellfix.o: /usr/include/sys/ttychars.h spellfix.o: /usr/include/sys/ttydev.h # DEPENDENCIES MUST END AT END OF FILE # IF YOU PUT STUFF HERE IT WILL GO AWAY # see make depend above !Funky!Stuff! echo x - spellfix.h cat >spellfix.h <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.4 $ on $Date: 86/04/02 10:33:39 $ * $Source: /ic4/faustus/src/spellfix/RCS/spellfix.h,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. */ #include <stdio.h> #include <ctype.h> #define isvalid(ch) (isupper(ch) || islower(ch) || ((ch) == '\'') || \ isdigit(ch)) #define DICTFILE "/usr/dict/words" #define BINDIR "/usr/public/lib/spellfix" #define SIZE 128 /* The maximum size of a word. */ #define NLETTERS 26 /* # letters in the alphabet. */ #define NOCHANCE 100000 /* No chance of a match between words. */ #define NSAVE 16 /* Keep the top NSAVE choices... */ #define NALTS 256 /* Max # of decompositions given by baseword */ #define NDICTS 8 /* How many dictionaries we can search. */ extern char *malloc(); !Funky!Stuff! echo x - adjacent.c cat >adjacent.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.1 $ on $Date: 85/09/28 13:16:08 $ * $Source: /ic4/faustus/src/spellfix/RCS/adjacent.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. */ #include "spellfix.h" /* Letter adjacency frequency matrix... This is normalized so that the * greatest value is ~ 250. This particular matrix was taken from * /usr/dict/words... See makeadj.c */ unsigned char adjacent[NLETTERS][NLETTERS] = { { 1, 56, 76, 58, 5, 13, 44, 9, 41, 1, 17, 155, 57, 229, 2, 44, 1, 174, 72, 183, 31, 20, 14, 6, 23, 6 } , { 51, 11, 0, 1, 58, 0, 0, 0, 36, 0, 0, 63, 0, 0, 44, 0, 0, 36, 7, 1, 33, 0, 0, 0, 6, 0 } , { 85, 0, 12, 0, 71, 0, 0, 93, 45, 0, 51, 29, 0, 0, 124, 0, 1, 40, 1, 44, 35, 0, 0, 0, 10, 0 } , { 37, 3, 1, 14, 116, 2, 8, 3, 73, 1, 0, 10, 4, 1, 36, 1, 0, 26, 9, 1, 19, 2, 5, 0, 13, 0 } , { 77, 17, 55, 52, 44, 21, 23, 7, 19, 1, 3, 97, 49, 191, 18, 32, 5, 264, 101, 93, 13, 22, 18, 31, 22, 3 } , { 22, 0, 0, 0, 28, 23, 0, 0, 32, 0, 0, 23, 0, 0, 28, 0, 0, 19, 0, 8, 21, 0, 0, 0, 7, 0 } , { 39, 1, 0, 0, 66, 0, 13, 24, 28, 0, 0, 19, 4, 11, 25, 0, 0, 38, 3, 2, 21, 0, 0, 0, 12, 0 } , { 72, 2, 0, 0, 94, 1, 0, 0, 50, 0, 0, 4, 5, 3, 70, 0, 0, 16, 1, 15, 19, 0, 3, 0, 22, 0 } , { 79, 26, 133, 52, 43, 22, 42, 0, 1, 0, 6, 76, 48, 250, 88, 27, 3, 44, 114, 108, 12, 37, 0, 4, 0, 5 } , { 8, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0 } , { 10, 1, 0, 0, 40, 0, 0, 2, 19, 0, 0, 7, 1, 5, 5, 1, 0, 2, 5, 1, 2, 0, 2, 0, 7, 0 } , { 119, 5, 6, 21, 189, 6, 4, 1, 108, 0, 6, 84, 8, 0, 79, 5, 0, 1, 10, 22, 37, 6, 2, 0, 22, 0 } , { 106, 22, 6, 0, 87, 1, 0, 0, 69, 0, 0, 1, 19, 3, 56, 42, 0, 0, 4, 0, 21, 0, 0, 0, 9, 0 } , { 75, 4, 49, 82, 106, 15, 91, 5, 70, 2, 13, 3, 2, 24, 47, 1, 2, 2, 50, 140, 19, 9, 2, 0, 13, 2 } , { 20, 20, 40, 33, 10, 12, 32, 4, 18, 1, 11, 81, 70, 215, 44, 44, 1, 147, 53, 51, 85, 16, 34, 6, 8, 2 } , { 58, 1, 0, 0, 75, 0, 0, 41, 45, 0, 0, 36, 1, 0, 56, 23, 0, 58, 13, 22, 18, 0, 0, 0, 8, 0 } , { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0 } , { 163, 15, 23, 35, 182, 6, 19, 5, 144, 0, 14, 12, 30, 25, 129, 12, 0, 35, 30, 55, 35, 9, 4, 0, 59, 0 } , { 45, 3, 44, 1, 93, 2, 1, 62, 87, 0, 11, 14, 23, 7, 52, 42, 4, 1, 52, 158, 34, 1, 11, 0, 17, 0 } , { 101, 2, 11, 0, 209, 4, 0, 83, 163, 0, 0, 12, 4, 1, 93, 1, 0, 96, 7, 44, 41, 0, 6, 0, 34, 3 } , { 22, 15, 22, 21, 23, 6, 18, 0, 22, 0, 2, 57, 47, 50, 6, 20, 0, 74, 95, 47, 0, 1, 0, 2, 1, 2 } , { 27, 0, 0, 0, 83, 0, 0, 0, 35, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0 } , { 38, 2, 0, 1, 21, 1, 0, 12, 26, 0, 1, 3, 1, 9, 20, 0, 0, 5, 2, 1, 0, 0, 0, 0, 1, 0 } , { 3, 0, 4, 0, 3, 0, 0, 1, 6, 0, 0, 0, 0, 0, 2, 7, 0, 0, 0, 8, 1, 0, 0, 0, 1, 0 } , { 8, 2, 8, 5, 9, 1, 2, 1, 1, 0, 0, 9, 8, 7, 7, 9, 0, 7, 10, 5, 1, 0, 2, 0, 0, 0 } , { 5, 0, 0, 0, 8, 0, 0, 0, 3, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3 } } ; !Funky!Stuff! echo x - baseword.c cat >baseword.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.4 $ on $Date: 86/04/02 10:33:10 $ * $Source: /ic4/faustus/src/spellfix/RCS/baseword.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * This program takes words with possible prefixes and suffixes and strips * them off. The output is a list of pairs: <base word, trans info>, where * the transformation info describes how to change the base word into the * word given. Each pair describes one possible way to parse the word (so * "catting" would produce "catt $+ing cat $.+ing"). The transformation data * is basically an editing script: characters have the following * interpretation: * $ go to the character at the end of the word * ^ go to before the beginning of the word * - delete the following characters * + add the following characters (the default mode) * . duplicate the character we are on now * c any other character means either add this character after the * current one or delete the current one and move to the next * one (if it isn't there the match fails), depending on * whether we last saw a + or a -. * The first word on the output line is the origonal unmodified input, * and it doesn't have a transformation word. Note that the first * character in the mod must be either '^' or '$', and neither character * can appear anywhere else. This makes things a bit easier to do, * and this is really all that is needed for this application. */ #include "spellfix.h" /* The table of all possible word modifiers. The trans info we output will * be one of these. Note that the process below is the opposite of what we * are specifying in the mods. */ char *mods[] = { "$+ive", "$-e+ive", "$+ism", "$-e+ism", "$+y", "$+es", "$-y+ies", "$+al", "$+s", "$+ing", "$.+ing", "$-e+ing", "$+ion", "$-e+ion", "$+ness", "$-y+iness", "$+like", "$+less", "$-y+iless", "$+ize", "$-e+ize", "$+'s", "$-t+ce", "$+ity", "$-y+iable", "$+able", "$-e+able", "$+.able", "$+ity", "$-e+ity", "$+ly", "$+.ly", "$-y+ily", "$+ment", "$+ater", "$+er", "$-e+er", "$+ed", "$-e+ed", "$+.ed", "$-y+ied", "$+est", "$+.est", "$-e+est", "$-y+iest", "$-y+ication", "$+ship", "^+anti", "^+bio", "^+dis", "^+electro", "^+en", "^+fore", "^+hyper", "^+intra", "^+inter", "^+iso", "^+kilo", "^+magneto", "^+meta", "^+micro", "^+milli", "^+mis", "^+mono", "^+multi", "^+non", "^+out", "^+over", "^+photo", "^+poly", "^+pre", "^+pseudo", "^+re", "^+semi", "^+stereo", "^+sub", "^+super", "^+thermo", "^+ultra", "^+under", "^+un", } ; /* For each word, we have to try to apply as many decompositions as * possible. So we keep a list of words and repeatedly try to * decompose them, adding the results to the list. We may get * several copies of each decomposition this way, so we should try * and print only one. (but we don't...) */ main(ac, av) char **av; { char buf[BUFSIZ]; char *poss[NALTS], *change[NALTS], done[NALTS]; char **words, **changes; register char *t, *s; register int i, j, k, l; int nchanges, changemade; char modmade[sizeof (mods) / sizeof (char *)]; while (fgets(buf, BUFSIZ, stdin)) { for (t = buf; *t && (*t != '\n') && (*t != ' ') && (*t != '\t'); t++) ; *t = '\0'; bzero(done, NALTS); bzero(poss, sizeof (char *) * NALTS); bzero(change, sizeof (char *) * NALTS); bzero(modmade, sizeof (modmade)); poss[0] = buf; do { changemade = 0; for (i = 0; i < NALTS; i++) { /* See what we can do with this word. */ if (poss[i] && !done[i]) { nchanges = decomp(poss[i], &words, &changes, modmade); } else continue; for (j = 0, k = 0; j < nchanges; j++) { /* First make sure we don't already * have this one. */ for (l = 0; poss[l]; l++) if (!strcmp(poss[l], words[j])) break; if (poss[l]) continue; /* Find a free place. */ while (poss[k] && (k < NALTS)) k++; if (k == NALTS) { /* None left... */ fprintf(stderr, "Gasp...\n"); goto newword; } poss[k] = words[j]; /* Now concatenate the two mods. */ s = malloc(strlen(changes[j]) + strlen(change[i]) + 1); sprintf(s, "%s%s", changes[j], change[i]); change[k] = s; } if (nchanges) { /* Mark this as already dealt with. */ done[i] = 1; changemade = 1; } } } while (changemade); fputs(poss[0], stdout); for (i = 1; poss[i]; i++) { printf(" %s %s", poss[i], change[i]); free(poss[i]); free(change[i]); } newword: putchar('\n'); } exit(0); } /* Try to decompose this word, stripping off one prefix or suffix. */ decomp(word, wptr, cptr, modmade) char *word; char ***wptr, ***cptr; char *modmade; { register int i, j, k; register char *s, *t, *r; char buf[BUFSIZ], mbuf[BUFSIZ]; int addmode, forward, decount = 0; static char *poss[NALTS], *changes[NALTS]; int nmods = sizeof (mods) / sizeof (char *); for (i = 0; i < nmods; i++) { if (modmade[i]) continue; /* Try to apply the reverse of mods[i] to buf. */ strcpy(buf, word); strcpy(mbuf, mods[i]); addmode = 1; switch (*mbuf) { case '^': forward = 1; s = buf; break; case '$': forward = 0; for (s = buf; s[1]; s++) ; break; default: fprintf(stderr, "Bad mod %s...\n", mbuf); exit(1); } if (forward) { /* Damn tabs. */ for (t = mbuf + 1; *t; t++) { switch (*t) { case '+': addmode = 1; break; case '-': addmode = 0; break; case '.': if ((s > buf) && (s[-1] == s[0])) { for (r = s; *r; r++) r[0] = r[1]; s--; } else goto out; break; default: if (!isvalid(*t)) { fprintf(stderr, "Bad character %c in mod %s\n", *t, t); exit(1); } else if (addmode) { if (*s == *t) { for (r = s; *r; r++) r[0] = r[1]; } else if (isupper(*s) && (tolower(*s) == *t)) { for (r = s; *r; r++) r[0] = r[1]; *t = toupper(*t); } else goto out; } else { for (r = s; *r; r++) ; for (; r >= s; r--) r[1] = r[0]; *s++ = *t; } } } } else { /* In this case, we have to go back from the * end. This is seriously ugly stuff. */ for (t = mbuf; t[1]; t++) ; for (r = t; (r > mbuf) && (*r != '-') && (*r != '+'); r--) ; switch (*r) { case '-': addmode = 0; break; case '+': addmode = 1; break; default: fprintf(stderr, "Bad mod %s\n", mbuf); exit(1); } for (; t > mbuf; t--) { switch (*t) { case '+': case '-': for (r = t - 1; (r > mbuf) && (*r != '-') && (*r != '+'); r--) ; if (*r == '-') addmode = 0; else addmode = 1; break; case '.': if ((s > buf) && (s[-1] == s[0])) { for (r = s; *r; r++) r[0] = r[1]; s--; } else goto out; break; default: if (!isvalid(*t)) { fprintf(stderr, "Bad character %c in mod %s\n", *t, t); exit(1); } else if (addmode) { if (*s == *t) { for (r = s; *r; r++) r[0] = r[1]; s--; } else if (isupper(*s) && (tolower(*s) == *t)) { for (r = s; *r; r++) r[0] = r[1]; s--; *t = toupper(*t); } else goto out; } else { for (r = s; *r; r++) ; for (; r > s; r--) r[1] = r[0]; s[1] = t[0]; } } } } out: if ((forward && !*t) || (t == mbuf)) { /* Ok, this modification works. */ poss[decount] = malloc(strlen(buf) + 1); strcpy(poss[decount], buf); changes[decount] = malloc(strlen(mbuf) + 1); strcpy(changes[decount], mbuf); decount++; modmade[i] = 1; } } *wptr = poss; *cptr = changes; return (decount); } !Funky!Stuff! echo x - compare.c cat >compare.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.3 $ on $Date: 85/10/08 18:35:16 $ * $Source: /ic4/faustus/src/spellfix/RCS/compare.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * Defined: */ #include "spellfix.h" /* This is the major routine -- it determines how well a misspelled word * and a correctly spelled word match. The higher the return value the * worse the match is. We use a stack here so that we can backtrack * (we don't, but we could...) */ struct action { int a_type; /* What we did. */ char *a_where; /* A pointer into the bad string. */ char a_letter; /* What letter we used, if applicable. */ int a_cost; /* How much this cost us. */ } ; #define A_TRANS 1 /* Transpose a_where and a_where + 1. */ #define A_ADD 2 /* Added a_letter after a_where. */ #define A_DELETE 3 /* Deleted the letter at a_where. */ #define A_CHANGE 4 /* Changed the letter at w_where to a_letter. */ #define NACTIONS 256 /* Too many. */ extern unsigned char adjacent[NLETTERS][NLETTERS]; #define freq(x, y) (adjacent[(x) - 'a'][(y) - 'a']) compare(bad, good, maxcost) char *bad, *good; int maxcost; /* If the cost gets higher than this, give up. */ { register int sp = 0; /* Action stack pointer. */ struct action astack[NACTIONS]; /* The action stack. */ register char *badplace = bad; /* Where we are now. */ register char *goodplace = good; int nmatched = 0; /* How many letters we have matched. */ int roll = 0; /* How long we've been on a roll. */ int cost = 0, ntrans = 0, nadds = 0, ndels = 0, nchanges = 0; register int tempcost = 0, i, j; while (*badplace && *goodplace) { /* See what we can do with *badplace and *goodplace. */ if (*badplace == *goodplace) { /* Great, they match. */ badplace++; goodplace++; nmatched++; roll++; } else if ((badplace[0] == goodplace[1]) && (goodplace[0] == badplace[1])) { /* Transpose these two. Neither can be NULL here. */ astack[sp].a_type = A_TRANS; astack[sp].a_where = badplace; astack[sp].a_letter = 0; j = freq(badplace[0], badplace[1]) / 20; i = freq(goodplace[0], goodplace[1]) / 20; if ((badplace > bad) && (goodplace > good)) { j += freq(badplace[-1], badplace[0]) / 40; i += freq(goodplace[-1], goodplace[0]) / 40; } if (badplace[2] && goodplace[2]) { j += freq(badplace[1], badplace[2]) / 40; i += freq(goodplace[1], goodplace[2]) / 40; } if (i > 5) i = 5; if (j > 5) j = 5; astack[sp].a_cost = ++ntrans * 10 + j - i; tempcost += astack[sp].a_cost; roll = 0; sp++; badplace += 2; goodplace += 2; } else if (goodplace[0] == badplace[1]) { /* Delete *badplace. */ astack[sp].a_type = A_DELETE; astack[sp].a_where = badplace; astack[sp].a_letter = 0; if (badplace > bad) { j = (freq(badplace[-1], badplace[0]) + freq(badplace[0], badplace[1]))/ 20; i = freq(badplace[-1], badplace[1]) / 10; if (i > 10) i = 10; if (j > 10) j = 10; } else i = j = 0; astack[sp].a_cost = ++ndels * 20 + j - i; tempcost += astack[sp].a_cost; roll = 0; sp++; badplace++; } else if (badplace[0] == goodplace[1]) { /* Add *goodplace. */ astack[sp].a_type = A_ADD; astack[sp].a_where = badplace - 1; astack[sp].a_letter = goodplace[0]; if (badplace[1]) { i = (freq(badplace[0], goodplace[0]) + freq(goodplace[0], badplace[1]))/20; j = freq(badplace[0], badplace[1]) / 10; if (i > 10) i = 10; if (j > 10) j = 10; } else i = j = 0; astack[sp].a_cost = ++nadds * 20 + j - i; tempcost += astack[sp].a_cost; roll = 0; sp++; goodplace++; } else { /* Change *badplace to *goodplace. This is a last * resort. */ astack[sp].a_type = A_CHANGE; astack[sp].a_where = badplace; astack[sp].a_letter = *goodplace; if ((badplace > bad) && badplace[1]) { j = (freq(badplace[-1], badplace[0]) + freq(badplace[0], badplace[1]))/10; i = (freq(badplace[-1], goodplace[0]) + freq(goodplace[0], badplace[1]))/10; if (i > 20) i = 20; if (j > 20) j = 20; } else i = j = 0; astack[sp].a_cost = ++nchanges * 30 - i + j; tempcost += astack[sp].a_cost; roll = 0; sp++; badplace++; goodplace++; } if (tempcost > maxcost) return (NOCHANCE); } while (*badplace) { /* Delete all these characters. */ astack[sp].a_type = A_DELETE; astack[sp].a_where = badplace; astack[sp].a_letter = 0; astack[sp].a_cost = ++ndels * 20; tempcost += astack[sp].a_cost; roll = 0; sp++; badplace++; } if (tempcost > maxcost) return (NOCHANCE); while (*goodplace) { /* Add all these characters. */ astack[sp].a_type = A_ADD; astack[sp].a_where = badplace - 1; astack[sp].a_letter = goodplace[0]; astack[sp].a_cost = ++nadds * 20; tempcost += astack[sp].a_cost; roll = 0; sp++; goodplace++; } if (tempcost > maxcost) return (NOCHANCE); return (tempcost); } !Funky!Stuff! echo x - dconvert.c cat >dconvert.c <<'!Funky!Stuff!' #include <stdio.h> #include <ctype.h> main() { char buf[BUFSIZ]; register char *s, *t; int pl; while (fgets(buf, BUFSIZ, stdin)) { for (s = buf; *s && (*s != ':'); s++) if (!isalpha(*s)) break; if (*s != ':') continue; while (*++s != ':') ; while (*++s != ':') ; if (*++s == 'p') pl = 1; else pl = 0; s += 2; if (((*s == 'v') && pl) || ((*s == 'n') && !pl) || ((*s != 'v') && (*s != 'n'))) { for (t = buf; isalpha(*t); t++) putchar(*t); printf(" %s", t); } } exit(0); } !Funky!Stuff! echo x - fixwords.c cat >fixwords.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.2 $ on $Date: 86/04/02 10:33:17 $ * $Source: /ic4/faustus/src/spellfix/RCS/fixwords.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * Defined: */ #include "spellfix.h" #include <curses.h> #include <sys/time.h> #include <signal.h> #include <setjmp.h> #include <sys/ioctl.h> /* Figure out what the word should be, and return the correction. Every time * we call update the screen, we check to see if the user has typed anything. * Since it is too easy for the output buffer to fill up before the user * can type anything, we temporarily reset the quit character to space and * trap it. */ static jmp_buf jbuf; char * fixword(buf, dict, nwords, numdicts, file) char *buf; char ***dict; int *nwords; char *file; { char word[BUFSIZ], context[BUFSIZ]; static char xbuf[BUFSIZ]; register char *s; register int i, j, k; int nsaved = 0, upper, ss, numw; char *saved[NSAVE]; int scores[NSAVE], dnum; char **words = NULL, *getbuf(); static struct timeval nulltime = { 0, 0 } ; int readfds, writefds = 0, exceptfds = 0, c; FILE *fp, *popen(); for (s = buf, i = 0; isvalid(*s); s++, i++) word[i] = *s; word[i] = '\0'; sprintf(xbuf, "/usr/ucb/grep -w %s %s", word, file); if (!(fp = popen(xbuf, "r"))) { fprintf(stderr, "Can't run %s\n", xbuf); exit(1); } upper = 20 + strlen(word) * 10; if (!upper) { return (NULL); } fgets(context, BUFSIZ, fp); pclose(fp); for (s = context; *s && (*s != '\n'); s++) ; *s = '\0'; s[79] = '\0'; for (i = 0, j = 0; context[i] && word[j]; i++) if (context[i] == word[j]) j++; else j = 0; if (word[j]) { fprintf(stderr, "Help, can't find it!\n"); return (word); } i -= strlen(word); for (k = 0, j = 0; k < i; k++) if (context[k] == '\t') j = (j | 07) + 1; else j++; /* Print the header... */ clear(); sprintf(xbuf, "Misspelled word: %s. Context:", word); mvaddstr(0, 0, xbuf); mvaddstr(2, 0, context); for (i = 0; i < j; i++) xbuf[i] = ' '; for (k = strlen(word); k > 0; k--) xbuf[i++] = '-'; xbuf[i] = '\0'; mvaddstr(3, 0, xbuf); mvaddstr(22, 0, "Hit space to stop... "); clrtoeol(); refresh(); siginit(); if (setjmp(jbuf)) { refresh(); mvcur(COLS - 1, LINES - 1, 0, 0); mvcur(0, 0, 22, 0); goto getcom; } for (dnum = 0; dnum < numdicts; dnum++) { words = dict[dnum]; numw = nwords[dnum]; for (i = 0; i < numw; i++) { ss = compare(word, words[i], upper); if (ss == NOCHANCE) continue; /* Stick this word in its proper place. */ for (j = 0; j < nsaved; j++) if (ss < scores[j]) break; for (k = 0; k < nsaved; k++) if (!strcmp(saved[k], words[i])) break; if (k != nsaved) continue; if (j == NSAVE) { continue; } else if (j == nsaved) { saved[j] = words[i]; scores[j] = ss; if (ss < upper) ss = upper; nsaved++; } else { for (k = (nsaved < NSAVE) ? nsaved : nsaved - 1; k > j; k--) { saved[k] = saved[k - 1]; scores[k] = scores[k - 1]; } if (nsaved < NSAVE) nsaved++; saved[j] = words[i]; scores[j] = ss; } /* Update the screen... */ for (k = 0; k < nsaved; k++) { sprintf(xbuf, "%c %-32s %-5d\n", 'a' + k, saved[k], scores[k]); mvaddstr(k + 4, 8, xbuf); } mvaddstr(22, 0, "Hit space to stop... "); clrtoeol(); refresh(); } } getcom: ; sigend(); move(23, 0); clrtoeol(); mvaddstr(22, 0, "Command (? for help): "); clrtoeol(); refresh(); c = getch(); switch (c) { case 'N': return (word); case 'Q': endwin(); puts("\bBye then...\n"); return ((char *) 1); case 'W': return (NULL); case 'E': move(22, 0); clrtoeol(); mvaddstr(22, 0, "Enter word: "); refresh(); strcpy(xbuf, getbuf()); move(22, 0); clrtoeol(); for (s = xbuf; isvalid(*s); s++) ; *s = '\0'; return (xbuf); case '?': mvaddstr(22, 0, "N = ok as is, Q = quit, W = write & quit, E = enter correction,"); mvaddstr(23, 0, "? = help, any other character = select word. (hit space to continue) "); refresh(); getch(); goto getcom; default: i = c - 'a'; if ((i < 0) || (i >= nsaved)) { mvaddstr(22, 0, "Command (? for help): "); clrtoeol(); mvaddstr(23, 0, "No such word or function.\n"); refresh(); sleep(1); goto getcom; } return (saved[i]); } } static struct tchars tcbuf; static sigquit() { longjmp(jbuf, 1); /* NOTREACHED */ } static sigint() { ioctl(0, TIOCSETC, &tcbuf); fprintf(stderr, "\nQuit\n"); endwin(); exit(1); } static siginit() { char oquit; ioctl(0, TIOCGETC, &tcbuf); oquit = tcbuf.t_quitc; tcbuf.t_quitc = ' '; ioctl(0, TIOCSETC, &tcbuf); tcbuf.t_quitc = oquit; signal(SIGQUIT, sigquit); signal(SIGINT, sigint); return; } static sigend() { ioctl(0, TIOCSETC, &tcbuf); signal(SIGQUIT, SIG_DFL); signal(SIGINT, SIG_DFL); return; } !Funky!Stuff! echo x - freqcount.c cat >freqcount.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.1 $ on $Date: 85/10/04 15:44:37 $ * $Source: /ic4/faustus/src/spellfix/RCS/freqcount.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * Determine how frequent various words are in text. Usage is * freq dictfile sample ... */ #include <stdio.h> #include <ctype.h> #define HASHSIZE 50021 #define OFFSET 13 struct hashent { char h_word[32]; char h_extra[32]; long h_freq; } ; struct hashent hashtab[HASHSIZE]; main(ac, av) char **av; { FILE *fp, *fopen(); int i = 2; if (ac < 3) exit(1); if (!(fp = fopen(av[1], "r"))) { perror(av[1]); exit(1); } fprintf(stderr, "loading dictionary... "); fflush(stderr); inpdict(fp); fclose(fp); fprintf(stderr, "done.\n"); while (i < ac) { if (!(fp = fopen(av[i], "r"))) { perror(av[i]); exit(1); } fprintf(stderr, "reading file %s... ", av[i]); fflush(stderr); i++; addfile(fp); fclose(fp); fprintf(stderr, "done.\n"); } printwords(); exit(0); } inpdict(fp) FILE *fp; { char buf[BUFSIZ]; register unsigned int j, x = 0; unsigned int hash(); char *extra; while (fgets(buf, BUFSIZ, fp)) { for (extra = buf; isalpha(*extra); extra++) ; if (*extra && (*extra != '\n')) { *extra = '\0'; extra++; } else { *extra = '\0'; extra = NULL; } j = hash(buf); while (hashtab[j].h_word[0]) { j += OFFSET; j %= HASHSIZE; } strcpy(hashtab[j].h_word, buf); if (extra && *extra) strcpy(hashtab[j].h_extra, extra); hashtab[j].h_freq = 0; if ((++x % 1000) == 0) { fprintf(stderr, "."); fflush(stderr); } } return; } unsigned hash(word) register char *word; { register unsigned int i = 0; register unsigned int p = 5003; while (*word) { i += (*word++ * (i + p)); i %= HASHSIZE; } return (i); } addfile(fp) FILE *fp; { char buf[BUFSIZ]; register char *s; register int j, ct, x = 0; while (fgets(buf, BUFSIZ, fp)) { for (s = buf; isalpha(*s); s++) ; *s = '\0'; j = hash(buf); if (!hashtab[j].h_word[0]) { /* printf("= %s\n", buf); */ continue; } ct = 0; while (strcmp(buf, hashtab[j].h_word)) { j += OFFSET; j %= HASHSIZE; if (ct++ > 100) { /* printf("= %s\n", buf); */ goto moe; } } hashtab[j].h_freq++; moe: ; if ((++x % 1000) == 0) { fprintf(stderr, "."); fflush(stderr); } } return; } printwords() { register int i; for (i = 0; i < HASHSIZE; i++) if (hashtab[i].h_word[0]) printf("%s %d\n", hashtab[i].h_word, hashtab[i].h_freq); return; } !Funky!Stuff! echo x - getbuf.c cat >getbuf.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.2 $ on $Date: 86/04/02 10:33:25 $ * $Source: /ic4/faustus/src/spellfix/RCS/getbuf.c,v $ * Copyright (c) 1985 Steve Procter */ #include <sys/ioctl.h> #include <ctype.h> #include <curses.h> #define MAX(a,b) ((a) > (b) ? (a) : (b)) #define ISMETA(a) ((a) & 0x80) char * getbuf () { int x; int y; char c; char *s; char *makeprint (); register i; register current = 0; register offset = 0; static char string[BUFSIZ]; struct sgttyb sg, osg; struct ltchars lt, olt; rewind (stdin); wrefresh (stdscr); getyx (stdscr, y, x); bzero (string, sizeof (string)); ioctl (fileno (stdin), TIOCGETP, &sg); bcopy (&sg, &osg, sizeof (struct sgttyb)); sg.sg_flags |= CRMOD; sg.sg_flags &= ~ECHO; ioctl (fileno (stdin), TIOCSETP, &sg); ioctl (fileno (stdin), TIOCGLTC, <); bcopy (<, &olt, sizeof (struct ltchars)); while (((c = getchar ()) != '\015') && (c != '\n')) { c &= 0177; if (c == sg.sg_kill) { current = 0; offset = 0; wmove (stdscr, y, x); wclrtoeol (stdscr); bzero (string, sizeof (string)); } else if (c == sg.sg_erase) { if (current <= 0) { current = 0; offset = 0; wmove (stdscr, y, x); wclrtoeol (stdscr); continue; } offset -= strlen (makeprint (string[--current])); string[current] = NULL; wmove (stdscr, y, x + offset); wclrtoeol (stdscr); } else if (c == lt.t_werasc) { if (current <= 1) { current = 0; offset = 0; } while ((current - 1 >= 0) && (string[current - 1] == ' ')) { string[--current] = NULL; offset--; } while ((current - 1 >= 0) && (string[current - 1] != ' ')) { current--; offset -= strlen (makeprint (string[current])); string[current] = NULL; } wmove (stdscr, y, x + offset); wclrtoeol (stdscr); } else if (c == lt.t_rprntc) { wmove (stdscr, y, x); for (i = 0; i < current; i++) { addstr (makeprint (string[i])); } } else { if (isprint (c)) { string[current++] = c; wmove (stdscr, y, x + offset); waddch (stdscr, c); offset++; } else { s = makeprint (c); wmove (stdscr, y, x + offset); waddstr (stdscr, s); string[current++] = c; offset += strlen (s); } } wrefresh (stdscr); } ioctl (fileno (stdin), TIOCSETP, &osg); ioctl (fileno (stdin), TIOCSLTC, &olt); return (string); } char * makeprint (c) char c; { static char r[5]; bzero (r, sizeof (r)); if (ISMETA(c)) { strcat (r, "^["); return (r); } if (iscntrl (c)) { sprintf (r, "^%c", (c | 0x40) & 0x7f); return (r); } r[0] = c; return (r); } !Funky!Stuff! echo x - makeadj.c cat >makeadj.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.1 $ on $Date: 85/09/27 23:06:37 $ * $Source: /ic4/faustus/src/spellfix/RCS/makeadj.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. */ #include <stdio.h> #include <ctype.h> #define NLETTERS 26 long adjacent[NLETTERS][NLETTERS]; main(ac, av) char **av; { char buf[512]; register char *s; register int i, j, highest = 0; fprintf(stderr, "Reading words..."); fflush(stderr); while (gets(buf)) { for (s = buf; *s; s++) if (!isalpha(*s)) goto moe; for (s = buf; (s[0] && s[1]); s++) { i = (isupper(s[0]) ? tolower(s[0]) : s[0]) - 'a'; j = (isupper(s[1]) ? tolower(s[1]) : s[1]) - 'a'; adjacent[i][j]++; } moe: ; } fprintf(stderr, " done.\n"); fflush(stderr); for (i = 0; i < NLETTERS; i++) for (j = 0; j < NLETTERS; j++) if (adjacent[i][j] > highest) highest = adjacent[i][j]; fprintf(stderr, "Highest frequency = %d, ", highest); highest /= 255; fprintf(stderr, "dividing by %d...", highest); fflush(stderr); for (i = 0; i < NLETTERS; i++) for (j = 0; j < NLETTERS; j++) adjacent[i][j] /= highest; fprintf(stderr, " done.\n"); fflush(stderr); for (i = 0; i < NLETTERS; i++) { printf(" { "); /* { */ for (j = 0; j < NLETTERS; j++) printf("%d%s", adjacent[i][j], (j < NLETTERS - 1) ? ", " : ""); if (i < NLETTERS - 1) printf(" } ,\n"); else printf(" }\n"); } exit (0); } !Funky!Stuff! echo x - makemod.c cat >makemod.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.1 $ on $Date: 85/10/08 18:36:16 $ * $Source: /ic4/faustus/src/spellfix/RCS/makemod.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * This routine carries out modifications on base words (see the description * in baseword.c for details). * Defined: makemod */ #include "spellfix.h" char * makemod(word, mod) char *word, *mod; { char buf[BUFSIZ]; register char *s, *t, *r; int addmode = 1; strcpy(buf, word); s = buf; for (t = mod; *t; t++) { switch (*t) { case '$': while (s[1]) s++; break; case '^': s = buf - 1; break; case '.': for (r = s; *r; r++) ; for ( ; r >= s; r--) r[1] = r[0]; s++; break; case '-': addmode = 0; break; case '+': addmode = 1; break; default: if (!isvalid(*t)) { /* Ack.. */ return (NULL); } else if (addmode) { for (r = s + 1; *r; r++) ; for ( ; r > s; r--) r[1] = r[0]; *++s = *t; } else { if (*s != *t) { /* What now? */ return (NULL); } /* Strange case... */ while (s < buf) s++; for (r = s; *r; r++) r[0] = r[1]; if (!*s) s--; } } } s = malloc(strlen(buf) + 1); strcpy(s, buf); return (s); } /* main(ac, av) char **av;{ printf("%s\n", makemod(av[1], av[2])); exit(0); } */ !Funky!Stuff! echo x - readdict.c cat >readdict.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.3 $ on $Date: 85/10/08 18:35:20 $ * $Source: /ic4/faustus/src/spellfix/RCS/readdict.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. */ #include "spellfix.h" /* This routine creates the word table from the dictionary -- it returns * the number of words read. */ readdict(dictfile, ptr) char *dictfile; char ***ptr; { register FILE *fp; FILE *fopen(); register int i = 0, c, j = 0; char buf[BUFSIZ]; register char **words = NULL, *s, *field; if (!dictfile) dictfile = DICTFILE; if (!(fp = fopen(dictfile, "r"))) { perror(dictfile); return (0); } while ((c = getc(fp)) != EOF) { if (c == '\n') i++; j++; } rewind(fp); /* Now get one big chunk of memory for this file. */ field = malloc(j); words = (char **) malloc(i * sizeof (char *)); if (!words || !field) { fprintf(stderr, "drat, malloc failed\n"); return (0); } for (c = 0; c < i; c++) { fgets(buf, BUFSIZ, fp); words[c] = field; for (s = buf; *s && (*s != ' ') && (*s != '\t') && (*s != '\n'); s++) *field++ = *s; *field++ = '\0'; } *ptr = words; return (i); } /* main() { char **p; printf("%d entries\n", readdict((char *) NULL, &p)); }*/ !Funky!Stuff! echo x - spellcheck.c cat >spellcheck.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.3 $ on $Date: 86/04/02 10:33:31 $ * $Source: /ic4/faustus/src/spellfix/RCS/spellcheck.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * spellcheck -- takes a list of sets of words on the standard input, * of the form <word1> <word2> <junk> <word3> <junk> ... <wordn> <junk> * and outputs those lines for which none of the wordi are in the * dictionary. We are very liberal with memory, unlike spell... */ #include "spellfix.h" #include <sys/file.h> #include <pwd.h> int debug = 0; /* Usage is spellcheck [badfile goodfile]. */ main(ac, av) char **av; { char **words, **rcwords, *text; int nwords, nrcwords; char buf[BUFSIZ], wbuf[BUFSIZ]; register int i, j, k, found; register char *s, *t; char c, d; FILE *good, *bad, *fopen(); struct passwd *pwd, *getpwuid(); if (ac == 1) { bad = stdout; good = NULL; } else if (ac == 3) { if (!(bad = fopen(av[1], "w"))) { perror(av[1]); exit(1); } if (!(good = fopen(av[2], "w"))) { perror(av[2]); exit(1); } } else { fprintf(stderr, "Usage: %s [badfile] [goodfile]\n", av[0]); exit(1); } pwd = getpwuid(getuid()); if (!pwd && !pwd->pw_dir) exit(1); sprintf(buf, "%s/.spellrc", pwd->pw_dir); if (!access(buf, R_OK)) nrcwords = readdict(buf, &rcwords); else nrcwords = 0; nwords = readdict((char *) NULL, &words); if (nwords <= 0) exit(1); while (fgets(buf, BUFSIZ, stdin)) { found = 0; for (text = buf; *text; ) { /* Grab the next word. */ for (s = text, t = wbuf; *s && (*s != ' ') && (*s != '\n'); ) *t++ = *s++; *t = '\0'; if (!*wbuf) break; /* Make sure text points to the next good stuff. */ if (text != buf) { text = s; while (*text && (*text == ' ')) text++; while (*text && (*text != ' ')) text++; while (*text && (*text == ' ')) text++; } else { text = s; while (*text && (*text == ' ')) text++; } /* Now wbuf is the word we want. Make sure it * isn't a number first... */ for (s = wbuf; *s && isdigit(*s); s++) ; if (!*s) { found = 1; break; } i = -1; j = nwords; for (;;) { k = (i + j) / 2; if ((k == i) || (k == j)) break; if (debug) printf("%s =? %s\n", wbuf, words[k]); for (s = wbuf, t = words[k]; ; ) { /* Ignore all this junk they allow * in /usr/dict/words. */ while ((*s == '\'') || (*s == '&') || (*s == '.')) s++; while ((*t == '\'') || (*t == '&') || (*t == '.')) t++; c = isupper(*s) ? tolower(*s) : *s; d = isupper(*t) ? tolower(*t) : *t; if (c > d) { i = k; break; } else if (c < d) { j = k; break; } else if (!c && !d && words[k][1]) { /* No 1-letter matches... */ found = 1; break; } else { s++; t++; } } if (found) break; } if (found) break; else { /* See if it was in .spellrc... */ for (i = 0; i < nrcwords; i++) if (!strcmp(wbuf, rcwords[i])) { found = 1; break; } } } if (found) { if (good) fputs(buf, good); continue; } else { fputs(buf, bad); } } exit(0); } !Funky!Stuff! echo x - spellfix.c cat >spellfix.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.5 $ on $Date: 86/04/02 10:33:36 $ * $Source: /ic4/faustus/src/spellfix/RCS/spellfix.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * This is the main control program for the spellfix system. Usage is * spellfix document */ #include "spellfix.h" #include <sys/file.h> #include <curses.h> #include <pwd.h> int debug = 1; main(ac, av, ep) char **av, **ep; { char buf[BUFSIZ]; char *tempf1 = "/tmp/spf1XXXXXX"; char *tempf2 = "/tmp/spf2XXXXXX"; char *tempf3 = "/tmp/spf3XXXXXX"; char **words[NDICTS]; int nwords[NDICTS], i, c; FILE *fp, *out, *fopen(); char *mktemp(), *fixword(), *getbuf(), *badword, *goodword, *s; struct passwd *pwd, *getpwuid(); if (!strcmp(av[0], "spell")) { /* Spell emulation mode. */ } if (ac != 2) { fprintf(stderr, "Usage: %s document\n", av[0]); exit(1); } /* This is cleaner than letting sh find the error. */ if (access(av[1], 0)) { fprintf(stderr, "Error: can't open %s.\n", av[1]); exit(1); } else if (access(av[1], W_OK | R_OK)) { fprintf(stderr, "Error: %s must be readable and writable.\n", av[1]); exit(1); } printf("Starting up... "); fflush(stdout); tempf1 = mktemp(tempf1); tempf2 = mktemp(tempf2); tempf3 = mktemp(tempf3); /* Read the .spellrc file... */ pwd = getpwuid(getuid()); if (!pwd->pw_dir) { fprintf(stderr, "Hey, you don't have a home directory.\n"); exit(1); } sprintf(buf, "%s/.spellrc", pwd->pw_dir); if (!access(buf, R_OK)) nwords[0] = readdict(buf, &words[0]); else nwords[0] = 0; /* First prep the doc. We won't use prep because deroff is * better and more standard. */ sprintf(buf, "/usr/bin/deroff -w < %s | /usr/bin/sort -u > %s", av[1], tempf1); if (system(buf)) exit(1); /* Now make up the decomposition list of the words. */ sprintf(buf, "%s/baseword < %s > %s", BINDIR, tempf1, tempf2); if (system(buf)) exit(1); /* Seperate them into bad (tempf1) and good (tempf3) files. */ printf("done.\nIdentifying misspelled words... "); fflush(stdout); sprintf(buf, "%s/spellcheck %s %s < %s", BINDIR, tempf1, tempf3, tempf2); if (system(buf)) exit(1); unlink(tempf2); i = 0; fp = fopen(tempf1, "r"); while ((c = getc(fp)) != EOF) if (c == '\n') i++; fclose(fp); /* Now read in the good file as a dictionary. */ printf("done (%d misspelled word%s).\nReading dictionaries...\n", i, (i == 1) ? "" : "s"); nwords[1] = readdict(tempf3, &words[1]); printf("%d word%s in document correct...\n", nwords[1], (nwords[1] == 1) ? "" : "s"); if (nwords[1] <= 0) { /* Probably this should be an error. */ printf("Wow, not a single word spelled right!\n"); } unlink(tempf3); sprintf(buf, "%s/freqwords", BINDIR); nwords[2] = readdict(buf, &words[2]); printf("Read %d frequently used words...\n", nwords[2]); if (nwords[2] <= 0) exit(1); nwords[3] = readdict((char *) NULL, &words[3]); printf("And %d words from the dictionary.\n", nwords[3]); if (nwords[3] <= 0) exit(1); /* Now for each word, try to correct it. */ if (!(fp = fopen(tempf1, "r"))) { perror(tempf1); exit(1); } if (!(out = fopen(tempf2, "w"))) { perror(tempf2); exit(1); } initscr(); crmode(); /* It looks like curses likes to trash my environment... */ for (i = 0; ep[i]; i++) if (!index(ep[i], '=')) ep[i] = "CURSES=shit"; /* Read words from tempf1, get the corrections, and write * bad/good pairs to tempf2. */ i = 0; while (fgets(buf, BUFSIZ, fp)) { goodword = fixword(buf, words, nwords, 4, av[1]); if (!goodword) break; else if (goodword == (char *) 1) { system("stty -tabs"); unlink(tempf1); unlink(tempf2); exit(0); } badword = buf; for (s = buf; isvalid(*s); s++) ; *s = '\0'; if (goodword && *goodword && strcmp(badword, goodword)) { fprintf(out, "%s %s\n", badword, goodword); i++; } } if (i) sprintf(buf, "%d words corrected, ok to write out changes? ", i); else strcpy(buf, "No words corrected."); mvaddstr(22, 0, buf); refresh(); if (i) strcpy(buf, getbuf()); endwin(); system("stty -tabs"); /* Damn... */ fclose(out); fclose(fp); unlink(tempf1); if (!i) { unlink(tempf2); putchar('\n'); exit(0); } if ((buf[0] != 'y') && (buf[0] != 'Y')) { printf("\nOk, aborting... Changes saved in %s.changes.\n", av[1]); printf("This file is valid input for wordchange(1).\n"); sprintf(buf, "/bin/cp %s %s.changes", tempf2, av[1]); system(buf); unlink(tempf2); exit(0); } printf(" ... "); fflush(stdout); sprintf(buf, "%s/wordchange %s < %s", BINDIR, av[1], tempf2); if (system(buf)) exit(1); unlink(tempf2); printf("done.\nSo long...\n"); exit(0); } !Funky!Stuff! echo x - wordchange.c cat >wordchange.c <<'!Funky!Stuff!' /* RCS Info: $Revision: 1.2 $ on $Date: 85/10/08 18:35:24 $ * $Source: /ic4/faustus/src/spellfix/RCS/wordchange.c,v $ * Copyright (c) 1985 Wayne A. Christopher, U. C. Berkeley CAD Group * Permission is granted to do anything with this code except sell it * or remove this message. * * This program is called by wordchange file ..., and it reads the * standard input for pairs of words -- an old word, and a new word * to replace it with. */ #include "spellfix.h" main(ac, av) char **av; { char *tempf = "/tmp/wchXXXXXX", *mktemp(); FILE *fp, *tp, *fopen(); char buf[BUFSIZ]; register char **old, **new, *s, *t; register int i = 0, j, k; int nwords = 0; char *beg; /* First collect all the changes. */ tempf = mktemp(tempf); if (!(tp = fopen(tempf, "w+"))) { perror(tempf); exit(1); } while (fgets(buf, BUFSIZ, stdin)) { nwords++; fputs(buf, tp); } rewind(tp); old = (char **) malloc(nwords * sizeof (char *)); new = (char **) malloc(nwords * sizeof (char *)); /* Now read them back in. */ while (fgets(buf, BUFSIZ, tp)) { old[i] = malloc(strlen(buf) + 1); strcpy(old[i], buf); for (s = old[i]; *s && (*s != ' '); s++) ; if (*s) { *s = '\0'; new[i] = s + 1; } else new[i] = s; for (s = new[i]; *s && (*s != ' ') && (*s != '\t') && (*s != '\n'); s++) ; *s = '\0'; i++; } /* Now for each file, go through and do the replacements. */ for (av++; *av; av++) { if (!(fp = fopen(*av, "r+"))) { perror(*av); continue; } rewind(tp); while (fgets(buf, BUFSIZ, fp)) { /* Now do the substitutions on this line. */ for (i = 0; i < nwords; i++) { for (s = buf; *s; ) { /* Find the beginning of a word. */ while (!isvalid(*s)) s++; while (*s && (*s == '\'')) s++; beg = s; for (t = old[i]; *t; t++) if (*t != *s) break; else s++; if (*t || isvalid(*s)) { while (isvalid(*s)) s++; continue; } /* Now splice the new word in. Don't * look at this code or you will die. */ k = strlen(old[i]); j = k - strlen(new[i]); if (j > 0) { for (t = beg + j; *t; t++) t[-j] = t[0]; t[-j] = '\0'; } else if (j < 0) { for (t = s; *t; t++) ; for (; t >= beg + k; t--) t[-j] = t[0]; } for (t = new[i]; *t; t++) *beg++ = *t; s = beg; /* Important... */ } } fputs(buf, tp); } ftruncate(fileno(tp), ftell(tp)); rewind(tp); rewind(fp); while ((i = fread(buf, 1, BUFSIZ, tp)) > 0) fwrite(buf, 1, i, fp); ftruncate(fileno(fp), ftell(fp)); fclose(fp); } fclose(tp); unlink(tempf); exit(0); } !Funky!Stuff! echo x - freqwords cat >freqwords <<'!Funky!Stuff!' the and that for you are with have not this can but was from will they one !Funky!Stuff!