gtoal@tharr.UUCP (Graham Toal) (06/29/90)
Archive-Name: dawgutils/update1.shar To people who received the last post and got a bit annoyed at the lack of a makefile, I apologise. It was my first net posting. I'll know better next time. Meanwhile here's an emergency fix; cc dawg.c; mv a.out dawg cc pdawg.c; mv a.out pdawg cc pack.c; mv a.out pack cc ppack.c; mv a.out ppack cc dwgcheck.c; mv a.out dwgcheck cc pckcheck.c; mv a.out pckcheck cc tell.c; mv a.out tell cc proot.c; mv a.out proot dawg /usr/dict/words dict pack dict.dwg dict.pck dwgcheck this and that wroang wurd and another wurd pckcheck this and that wroang wurd and another wurd proot meta tell stoopid speled wurds Hope that helps a bit. Now for todays additions: cc typo.c; mv a.out typo cc cross.c; mv a.out cross typo spel cross p\?zz\?e #!/bin/sh-----cut here-----cut here-----cut here-----cut here----- # shar: Shell Archiver # Run the following text with /bin/sh to create: # README # cross.c # typo.c cat - << \SHAR_EOF > README This is probably rather early for a follow up to my posting on spelling checker utilities, but a couple of people have asked for these... (These programs need dawgutils/* as posted a couple of days ago) typo.c A sample program showing how to use the dawg structure to efficiently correct typos. cross.c A 'crossword puzzle' solver - effectively the same as grepping the word list allowing '?' as a single wild char, eg cross p?zz?e would return 'puzzle'. I've also had some interest in my throwaway comments on a replacement for Soundex; enough to convince me to document it properly and post it here (a week away at the earliest). Meanwhile, may I ask again: If anyone has a dictionary of words and their phonetic representations could they get in touch please? Many thanks if you can. Share & Enjoy, Graham Toal (grA@m tOl) <gtoal%uk.ac.ed@nsfnet-relay.ac.uk> SHAR_EOF cat - << \SHAR_EOF > cross.c /* On brain-dead PC's, with MICROSOFT, link with /ST:30000 */ /* File: cross.c Author: Graham Toal Purpose: match words with single-char wildcards (cr?ssw?rd p?zzl?s) Creation date: 28/06/90 14:01:34 Lastedit: 28/06/90 14:05:45 Description: (The nice thing about the dawg structure is that it makes utilities like this easy to write; none of these small programs has taken more than about an hour. This one took five minutes ;-)) */ /* Manadatory header files */ #include <stdio.h> #include "dawg.h" #include "grope.h" #include "utils.c" /* Headers here as needed on per-program basis */ /* Spelling library utilities */ #include "init.c" /* Loading dicts */ #ifdef SYS_MAC /* To compile with THINK C 4.0, place all the relevant .h and .c files in a folder. Then create a project which contains this main.c and the libraries unix and ANSI. */ #include <unix.h> #include <stdlib.h> #include <console.h> #endif /* This one just gets wrong letters */ int #ifdef PROTOTYPES fix_cross( NODE PCCRAP *dawg, INDEX i, char *word, char *res, int len, int *found) #else fix_cross(dawg, i, word, res, len, found) NODE PCCRAP *dawg; INDEX i; char *word; char *res; int len; int *found; #endif { int endsword, last, ch, target; NODE node; INDEX link; for (;;) { node = dawg[i++]; ch = (int)((node >> V_LETTER) & M_LETTER); last = ((node & (INDEX)M_END_OF_NODE) != 0); endsword = ((node & M_END_OF_WORD) != 0); link = node & M_NODE_POINTER; res[len] = ch; res[len+1] = '\0'; target = ((int)*word)&255; if (ch != 0) { if (ch == target || target == '?') { if (endsword && *(word+1) == '\0') { fprintf(stdout, "word: %s\n", res); (*found)++; } if (*(word+1) != '\0' && link != 0) (void) fix_cross(dawg, link, word+1, res, len+1, found); } } if (last) break; } return(0==0); } int #ifdef PROTOTYPES crossword(NODE PCCRAP *dawg, char *word) #else crossword(dawg, word) NODE PCCRAP *dawg; char *word; #endif { char result[MAX_WORD_LEN]; int i = 0; (void)fix_cross(dawg, (INDEX)ROOT_NODE, word, result, 0, &i); return(i); } int #ifdef PROTOTYPES main(int argc, char **argv) #else main(argc, argv) int argc; char **argv; #endif { NODE PCCRAP *dawg; INDEX edges; int each; #ifdef SYS_MAC argc = ccommand(&argv); #endif /* Your program goes here... */ if (argc == 1) { fprintf(stderr, "usage: %s mispeled wurdz\n", argv[0]); exit(EXIT_ERROR); } if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR); for (each = 1; each < argc; each++) { fprintf(stderr, "* Matches:\n"); if (!crossword(dawg, argv[each])) { fprintf(stderr, "(none found)\n"); } if (each+1 != argc) fprintf(stderr, "\n"); } exit(EXIT_OK); } SHAR_EOF cat - << \SHAR_EOF > typo.c /* On brain-dead PC's, with MICROSOFT, link with /ST:30000 */ /* File: typo.c Author: Graham Toal Purpose: offer correct spelling Creation date: 27/06/90 Lastedit: 28/06/90 13:27:03 Description: Like the unix 'spelltell' command but only fixes typos rather than soundslike errors. See my 'tell' program if you want soundex, or wait a few weeks for my new proper phonetic algorithm. It is a design decision that some of the functions will return the word presented if it is in fact correct. You can call this a bug if you prefer. (The nice thing about the dawg structure is that it makes utilities like this easy to write; none of these small programs has taken more than about an hour.) */ /* Manadatory header files */ #include <stdio.h> #include "dawg.h" #include "grope.h" #include "utils.c" /* Headers here as needed on per-program basis */ /* Spelling library utilities */ #include "init.c" /* Loading dicts */ #include "check.c" /* Simple word-check */ #ifdef SYS_MAC /* To compile with THINK C 4.0, place all the relevant .h and .c files in a folder. Then create a project which contains this main.c and the libraries unix and ANSI. */ #include <unix.h> #include <stdlib.h> #include <console.h> #endif /* This one just gets wrong letters */ int #ifdef PROTOTYPES fix_typos( NODE PCCRAP *dawg, INDEX i, char *word, char *res, int len, int errs_allowed, int *found) #else fix_typos(dawg, i, word, res, len, errs_allowed, found) NODE PCCRAP *dawg; INDEX i; char *word; char *res; int len; int errs_allowed; int *found; #endif { int endsword, last, ch; NODE node; INDEX link; for (;;) { node = dawg[i++]; ch = (int)((node >> V_LETTER) & M_LETTER); last = ((node & (INDEX)M_END_OF_NODE) != 0); endsword = ((node & M_END_OF_WORD) != 0); link = node & M_NODE_POINTER; res[len] = ch; res[len+1] = '\0'; if (ch != 0) { if (ch == ((int)*word)&255) { if (endsword && *(word+1) == '\0') { fprintf(stdout, "word: %s\n", res); (*found)++; } if (*(word+1) != '\0' && link != 0) (void) fix_typos(dawg, link, word+1, res, len+1, errs_allowed, found); } else { /* Try a different letter here instead? */ if (errs_allowed > 0) { if (endsword && *(word+1) == '\0') { fprintf(stdout, "word: %s\n", res); (*found)++; } if (*(word+1) != '\0' && link != 0) (void) fix_typos( dawg, link, word+1, res, len+1, errs_allowed-1, found); } } } if (last) break; } return(0==0); } /* And this one corrects omitted letters by inserting one. */ int #ifdef PROTOTYPES fix_insert( NODE PCCRAP *dawg, INDEX i, char *word, char *res, int len, int errs_allowed, int *found) #else fix_insert(dawg, i, word, res, len, errs_allowed, found) NODE PCCRAP *dawg; INDEX i; char *word; char *res; int len; int errs_allowed; int *found; #endif { int endsword, last, ch; NODE node; INDEX link; for (;;) { node = dawg[i++]; ch = (int)((node >> V_LETTER) & M_LETTER); endsword = ((node & M_END_OF_WORD) != 0); last = ((node & M_END_OF_NODE) != 0); link = node & M_NODE_POINTER; res[len] = ch; res[len+1] = '\0'; if (ch != 0) { if (endsword && *word == '\0' && errs_allowed > 0) { fprintf(stdout, "word: %s\n", res); (*found)++; } if (ch == ((int)*word)&255) { if (endsword && *(word+1) == '\0') { fprintf(stdout, "word: %s\n", res); (*found)++; /*return(0==0);*/ } if (*word == '\0') { if (errs_allowed > 0) (void) fix_insert( dawg, link, word+1, res, len+1, errs_allowed, found); } else { if (link != 0) (void) fix_insert( dawg, link, word+1, res, len+1, errs_allowed, found); } } /* Insert this letter (len+1) and see if rest matches */ if (errs_allowed > 0) { if (link != 0) (void) fix_insert(dawg, link, word, res, len+1, errs_allowed-1, found); } } if (last) break; } return(0==0); } /* And finally catch inserted letters by deleting one */ int #ifdef PROTOTYPES fix_delete( NODE PCCRAP *dawg, INDEX i, char *word, char *res, int len, int errs_allowed, int *found) #else fix_delete(dawg, i, word, res, len, errs_allowed, found) NODE PCCRAP *dawg; INDEX i; char *word; char *res; int len; int errs_allowed; int *found; #endif { int endsword, last, ch; NODE node; INDEX link; if (errs_allowed > 0) { if (*(word+1) != '\0') { (void) fix_delete(dawg, i, word+1, res, len, errs_allowed-1, found); } else { /* Shouldn't get this far, but does :-( */ return(0==0); } } for (;;) { node = dawg[i++]; ch = (int)((node >> V_LETTER) & M_LETTER); endsword = ((node & M_END_OF_WORD) != 0); last = ((node & M_END_OF_NODE) != 0); link = node & M_NODE_POINTER; res[len] = ch; res[len+1] = '\0'; if (ch != 0) { if (ch == ((int)*word)&255) { if (errs_allowed > 0 && endsword && *(word+1) != '\0' && *(word+2) == '\0') { fprintf(stdout, "word: %s\n", res); (*found)++; } if (endsword && *(word+1) == '\0') { fprintf(stdout, "word: %s\n", res); (*found)++; return(0==0); } if (*(word+1) != '\0' && link != 0) (void) fix_delete(dawg, link, word+1, res, len+1, errs_allowed, found); } } if (last) break; } return(0==0); } int #ifdef PROTOTYPES dawg_typo(NODE PCCRAP *dawg, char *word) #else dawg_typo(dawg, word) NODE PCCRAP *dawg; char *word; #endif { char result[MAX_WORD_LEN]; int i = 0; (void)fix_typos(dawg, (INDEX)ROOT_NODE, word, result, 0, 1, &i); return(i); } int #ifdef PROTOTYPES dawg_insert(NODE PCCRAP *dawg, char *word) #else dawg_insert(dawg, word) NODE PCCRAP *dawg; char *word; #endif { char result[MAX_WORD_LEN]; int i = 0; (void)fix_insert(dawg, (INDEX)ROOT_NODE, word, result, 0, 1, &i); return(i); } int #ifdef PROTOTYPES dawg_delete(NODE PCCRAP *dawg, char *word) #else dawg_delete(dawg, word) NODE PCCRAP *dawg; char *word; #endif { char result[MAX_WORD_LEN]; int i = 0; (void)fix_delete(dawg, (INDEX)ROOT_NODE, word, result, 0, 1, &i); return(i); } int #ifdef PROTOTYPES dawg_transpose(NODE PCCRAP *dawg, char *word) #else dawg_transpose(dawg, word) NODE PCCRAP *dawg; char *word; #endif { int i = 0, l, c; for (l = 0; word[l+1] != '\0'; l++) { c = word[l]; word[l] = word[l+1]; word[l+1] = c; if (dawg_check(dawg, word)) { i++; fprintf(stdout, "word: %s\n", word); } c = word[l]; word[l] = word[l+1]; word[l+1] = c; } return(i /* != 0 */); } int #ifdef PROTOTYPES main(int argc, char **argv) #else main(argc, argv) int argc; char **argv; #endif { NODE PCCRAP *dawg; INDEX edges; int each; #ifdef SYS_MAC argc = ccommand(&argv); #endif /* Your program goes here... */ if (argc == 1) { fprintf(stderr, "usage: %s mispeled wurdz\n", argv[0]); exit(EXIT_ERROR); } if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR); for (each = 1; each < argc; each++) { fprintf(stderr, "* Wrong char:\n"); if (!dawg_typo(dawg, argv[each])) { fprintf(stderr, "(none found)\n"); } fprintf(stderr, "* Omitted char\n"); if (!dawg_insert(dawg, argv[each])) { fprintf(stderr, "(none found)\n"); } fprintf(stderr, "* Inserted char:\n"); if (!dawg_delete(dawg, argv[each])) { fprintf(stderr, "(none found)\n"); } fprintf(stderr, "* Transposed char:\n"); if (!dawg_transpose(dawg, argv[each])) { fprintf(stderr, "(none found)\n"); } if (each+1 != argc) fprintf(stderr, "\n"); } exit(EXIT_OK); } SHAR_EOF