allbery@ncoast.UUCP (05/31/87)
#! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # good.c # hash.c # icombine.c # isexp1.sed # isexp2.sed # isexp3.sed # isexp4.sed # isexpand.X # ispell.1 # ispell.4 # This archive created: Sat May 30 17:13:30 1987 export PATH; PATH=/bin:$PATH echo shar: extracting "'good.c'" '(14078 characters)' if test -f 'good.c' then echo shar: will not over-write existing file "'good.c'" else sed 's/^X //' << \SHAR_EOF > 'good.c' X /* -*- Mode:Text -*- */ X X /* X * good.c - see if a word or its root word X * is in the dictionary. X * X * Pace Willisson, 1983 X */ X X #include <stdio.h> X #include <ctype.h> X #include "config.h" X #include "ispell.h" X X extern struct dent *lookup(); X extern char *index(); X X static int wordok; X static char *orig_word; X X extern int cflag; X X good (w) X char *w; X { X char nword[100]; X register char *p, *q; X register n; X X /* X ** Make an uppercase copy of the word we are checking. X */ X for (p = w, q = nword; *p; p++, q++) { X if (mylower (*p)) X *q = toupper (*p); X else X *q = *p; X } X *q = 0; X X rootword[0] = 0; X X if (cflag) { X printf ("%s\n", w); X orig_word = w; X } X else if (lookup (nword, q - nword, 1) != NULL) { X #ifdef CAPITALIZE X return cap_ok (w, lastdent); X #else X return (1); X #endif X } X X /* try stripping off suffixes */ X X n = strlen (w); X if (n == 1) X return (1); X X if (n < 4) X return 0; X X wordok = 0; X X /* this part from 'check.mid' */ X switch (q[-1]) { X case 'D': d_ending (nword,n); break; /* FOR "CREATED", "IMPLIED", "CROSSED" */ X case 'T': t_ending (nword,n); break; /* FOR "LATEST", "DIRTIEST", "BOLDEST" */ X case 'R': r_ending (nword,n); break; /* FOR "LATER", "DIRTIER", "BOLDER" */ X case 'G': g_ending (nword,n); break; /* FOR "CREATING", "FIXING" */ X case 'H': h_ending (nword,n); break; /* FOR "HUNDREDTH", "TWENTIETH" */ X case 'S': s_ending (nword,n); break; /* FOR ALL SORTS OF THINGS ENDING IN "S" */ X case 'N': n_ending (nword,n); break; /* "TIGHTEN", "CREATION", "MULIPLICATION" */ X case 'E': e_ending (nword,n); break; /* FOR "CREATIVE", "PREVENTIVE" */ X case 'Y': y_ending (nword,n); break; /* FOR "QUICKLY" */ X default: X break; X } X X if (wordok) { X strcpy (rootword, lastdent->word); X #ifdef CAPITALIZE X return cap_ok (w, lastdent); X #else X return 1; X #endif X } X return 0; X } X X #ifdef CAPITALIZE X cap_ok (word, dent) X register char *word; X register struct dent *dent; X { X register char *dword; X register char *w; X int wcount; X X /* X ** All caps is always legal. X */ X for (dword = word; *dword; dword++) { X if (mylower (*dword)) X break; X } X if (*dword == '\0') X return 1; /* It was all caps */ X if (dent->allcaps) X return 0; /* Not all caps and required to be */ X if (dent->followcase) { X /* X ** It's a followcase word. The correct capitalizations are X ** found following the main dent word. When we find a X ** mismatch between letters, we assume we are in the suffix, X ** and begin insisting on the same case as the last letter X ** that matched. X */ X dword = dent->word + strlen (dent->word) + 1; X wcount = *dword++ & 0xFF; X while (--wcount >= 0) { X dword++; /* Skip over keep flag */ X for (w = word; *w; w++, dword++) { X if (*dword != *w) { X /* Begin suffix processing. */ X if (myupper (dword[-1])) { X while (*w && !mylower (*w)) X w++; X if (*w == '\0') X return 1; X } X else { X while (*w && !myupper (*w)) X w++; X if (*w == '\0') X return 1; X } X break; X } X } X if (*w == '\0') X return 1; X while (*dword++) /* Skip to next prototype */ X ; X } X } X /* X ** If it's a capitalize word, and the first letter is lowercase, X ** it's illegal. Note that all-lowercase followcase words will X ** be found by the string scan above. X */ X if (dent->capitalize && mylower (*word)) X return 0; X /* X ** If it's not a followcase word, or if the capitalize flag is set, X ** capitalization (e.g. at the beginning of a sentence) is always X ** legal. All-lowercase is also legal for non-followcase words. X */ X if (!dent->followcase || dent->capitalize) { X for (dword = word + 1; *dword; dword++) { X if (myupper (*dword)) X break; X } X if (*dword == '\0') X return 1; /* It was all-lower or capitalized */ X } X return 0; /* Word has a bad mix of cases */ X } X #endif X X flagpr (w, flag, modpoint) X register char *w; X int flag; X register char *modpoint; /* Must be in w and greater than w */ X { X register char *orig; X X /* X ** We refuse to print if the case at and after modpoint isn't X ** consistent with the case just before there. This prevents X ** things like "OEM's" from being turned into OEM/M, which in X ** turn will only accept "OEM'S". X */ X orig = orig_word + (modpoint - w); X if (myupper(orig[-1])) { X while (*orig) { X if (mylower (*orig++)) X return; X } X } X else { X while (*orig) { X if (myupper (*orig++)) X return; X } X } X /* Case is ok. Now print it. */ X for (orig = orig_word; *w && w < modpoint; orig++, w++) X putchar (*orig); X if (myupper (orig[-1])) X printf ("%s", w); X else { X for ( ; *w; w++) { X if (myupper (*w)) X putchar (tolower (*w)); X else X putchar (*w); X } X } X printf ("/%c\n", flag); X } X X g_ending (w,n) X register char *w; X register int n; X { X register char *p; X register struct dent *dent; X X p = w + n - 3; /* if the word ends in 'ing', then *p == 'i' */ X X if (strcmp (p, "ING") != 0) X return; X X *p = 'E'; /* change I to E, like in CREATING */ X *(p+1) = 0; X n -= 2; X X if (n < 2) X return; X X if (cflag) X flagpr (w, 'G', p); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->g_flag) { X wordok = 1; X return; X } X X X *p = 0; X n--; X X if (n < 2) X return; X X if (p[-1] == 'E') X return; /* this stops CREATEING */ X X if (cflag) X flagpr (w, 'G', p); X else if ((dent = lookup (w, n, 1)) != NULL) { X if (dent->g_flag) X wordok = 1; X return; X } X return; X } X X d_ending (w,n) X register char *w; X register n; X { X register char *p; X register struct dent *dent; X X p = w + n - 2; X X if (strcmp (p, "ED") != 0) X return; X X p[1] = 0; /* kill 'D' */ X n--; X X if (cflag) X flagpr (w, 'D', p + 1); X else if ((dent = lookup (w, n, 1)) != NULL) { /* eg CREATED */ X if (dent->d_flag) { X wordok = 1; X return; X } X } X X if (n < 3) X return; X X p[0] = 0; X n--; X p--; X X /* ED is now completely gone */ X X if (p[0] == 'I' && !vowel (p[-1])) { X p[0] = 'Y'; X if (cflag) X flagpr (w, 'D', p); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->d_flag) { X wordok = 1; X return; X } X p[0] = 'I'; X } X X if ((p[0] != 'E' && p[0] != 'Y') || X (p[0] == 'Y' && vowel (p[-1]))) { X if (cflag) X flagpr (w, 'D', p + 1); X else if ((dent = lookup (w, n, 1)) != NULL) { X if (dent->d_flag) X wordok = 1; X return; X } X } X } X X t_ending (w,n) X register char *w; X register n; X { X X register char *p; X register struct dent *dent; X X p = w + n - 3; X X if (strcmp (p, "EST") != 0) X return; X X p[1] = 0; /* kill "ST" */ X n -= 2; X X if (cflag) X flagpr (w, 'T', p); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->t_flag) { X wordok = 1; X return; X } X X if (n < 3) X return; X X p[0] = 0; /* kill 'E' */ X n--; X p--; X X /* EST is now completely gone */ X X if (p[0] == 'I' && !vowel (p[-1])) { X p[0] = 'Y'; X if (cflag) X flagpr (w, 'T', p); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->t_flag) { X wordok = 1; X return; X } X p[0] = 'I'; X } X X if ((p[0] != 'E' && p[0] != 'Y') || X (p[0] == 'Y' && vowel (p[-1]))) { X if (cflag) X flagpr (w, 'T', p + 1); X else if ((dent = lookup (w, n, 1)) != NULL) { X if (dent->t_flag) X wordok = 1; X return; X } X } X X } X X X r_ending (w,n) X register char *w; X register n; X { X register char *p; X register struct dent *dent; X X p = w + n - 2; X X if (strcmp (p, "ER") != 0) X return; X X p[1] = 0; /* kill 'R' */ X n--; X X if (cflag) X flagpr (w, 'R', p + 1); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->r_flag) { X wordok = 1; X return; X } X X if (n < 3) X return; X X p[0] = 0; /* kill 'E' */ X n--; X p--; X X /* ER is now completely gone */ X X if (p[0] == 'I' && !vowel (p[-1])) { X p[0] = 'Y'; X if (cflag) X flagpr (w, 'R', p); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->r_flag) { X wordok = 1; X return; X } X p[0] = 'I'; X } X X if ((p[0] != 'E' && p[0] != 'Y') || X (p[0] == 'Y' && vowel (p[-1]))) { X if (cflag) X flagpr (w, 'R', p + 1); X else if ((dent = lookup (w, n, 1)) != NULL) { X if (dent->r_flag) X wordok = 1; X return; X } X } X X } X X h_ending (w,n) X register char *w; X register n; X { X register char *p; X register struct dent *dent; X X p = w + n - 2; X X if (strcmp (p, "TH") != 0) X return; X X *p = 0; /* kill "TH" */ X n -= 2; X X p -= 2; X X if (p[1] != 'Y') { X if (cflag) X flagpr (w, 'H', p + 2); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->h_flag) X wordok = 1; X } X X if (strcmp (p, "IE") != 0) X return; X X p[0] = 'Y'; /* change "IE" to "Y" */ X p[1] = 0; X n--; X X if (cflag) X flagpr (w, 'H', p + 1); X else if ((dent = lookup (w, n, 1)) != NULL) X if (dent->h_flag) X wordok = 1; X X } X X /* X * check for flags: X, J, Z, S, P, M X * X * X -ions or -ications or -ens X * J -ings X * Z -ers or -iers X * S -ies or -es or -s X * P -iness or -ness X * M -'S X */ X X s_ending (w,n) X register char *w; X register n; X { X register char *p; X register struct dent *dent; X X p = w + n; X X p[-1] = 0; /* kill 'S' */ X n--; X X if (index ("SXZHY", p[-2]) == NULL || (p[-2] == 'Y' && vowel (p[-3]))) { X if (cflag) X flagpr (w, 'S', p - 1); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->s_flag) { X wordok = 1; X return; X } X } X X X switch (p[-2]) { /* letter before S */ X case 'N': /* X */ X if (strcmp (p-4, "ION") == 0) { X p[-4] = 'E'; /* change "ION" to "E" */ X p[-3] = 0; X n -= 2; X if (cflag) X flagpr (w, 'X', p - 4); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->x_flag) { X wordok = 1; X return; X } X } X if (strcmp (p-8, "ICATE") == 0) { X p[-8] = 'Y'; /* change "ICATE" to "Y" */ X p[-7] = 0; X n -= 4; X if (cflag) X flagpr (w, 'X', p - 8); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->x_flag) X wordok = 1; X return; X } X if (strcmp (p-3, "EN") == 0 && p[-4] != 'E' && p[-4] != 'Y') { X p[-3] = 0; /* kill "EN" */ X n -= 2; X if (cflag) X flagpr (w, 'X', p - 3); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->x_flag) X wordok = 1; X return; X } X return; X case 'G': /* J */ X if (strcmp (p-4, "ING") != 0) X return; X p[-4] = 'E'; /* change "ING" to "E" */ X p[-3] = 0; X n -= 2; X if (cflag) X flagpr (w, 'J', p - 4); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->j_flag) { X wordok = 1; X return; X } X if (p[-5] == 'E') X return; /* This stops CREATEING */ X p[-4] = 0; /* kill 'E' */ X n--; X if (cflag) X flagpr (w, 'J', p - 4); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->j_flag) X wordok = 1; X return; X case 'R': /* Z */ X if (strcmp (p-3, "ER") != 0) X return; X X p[-2] = 0; /* kill 'R' */ X n--; X if (cflag) X flagpr (w, 'Z', p - 2); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->z_flag) { X wordok = 1; X return; X } X if (p[-4] == 'I' && !vowel (p[-5])) { X p[-4] = 'Y'; /* change "IE" to "Y" */ X p[-3] = 0; X n--; X if (cflag) X flagpr (w, 'Z', p - 4); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->z_flag) { X wordok = 1; X return; X } X p[-4] = 'I'; /* change 'Y' to 'I' */ X } X if ((p[-4] != 'E' && p[-4] != 'Y') || X (p[-4] == 'Y' && vowel (p[-5]))) { X if(p[-3]) n--; X p[-3] = 0; X if (cflag) X flagpr (w, 'Z', p - 3); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->z_flag) X wordok = 1; X } X return; X case 'E': /* S (except simple adding of an S) */ X p[-2] = 0; /* drop the E */ X n--; X if (index ("SXZH", p[-3]) != NULL) { X if (cflag) X flagpr (w, 'S', p - 2); X else if ((dent = lookup (w, n, 1)) != NULL) { X if (dent->s_flag) X wordok = 1;; X return; X } X } X if (p[-3] == 'I') { X p[-3] = 'Y'; X if (cflag) X flagpr (w, 'S', p - 3); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->s_flag) X wordok = 1; X return; X } X return; X X case 'S': /* P */ X if (strcmp (p-4, "NES") != 0) X return; X X p[-4] = 0; /* kill "NES" */ X n -= 3; X if (p[-5] != 'Y' || vowel (p[-6])) { X if (cflag) X flagpr (w, 'P', p - 4); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->p_flag) { X wordok = 1; X return; X } X } X if (p[-5] == 'I') { X p[-5] = 'Y'; X if (cflag) X flagpr (w, 'P', p - 5); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->p_flag) X wordok = 1; X } X return; X case '\'': /* M */ X p[-2] = '\0'; /* kill "'" */ X n--; X if (cflag) X flagpr (w, 'M', p - 2); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->m_flag) X wordok = 1; X return; X } X } X X /* only the N flag */ X n_ending (w,n) X register char *w; X register n; X { X register char *p; X register struct dent *dent; X X p = w + n; X X if (p[-2] == 'E') { X if (p[-3] == 'E' || p[-3] == 'Y') X return; X p[-2] = 0; /* kill "EN" */ X n -= 2; X if (cflag) X flagpr (w, 'N', p - 2); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->n_flag) X wordok = 1; X return; X } X X if (strcmp (p-3, "ION") != 0) X return; X X p[-3] = 'E'; /* change "ION" to "E" */ X p[-2] = 0; X n -= 2; X X if (cflag) X flagpr (w, 'N', p - 3); X else if ((dent = lookup (w, n, 1)) != NULL) { X if (dent->n_flag) X wordok = 1; X return; X } X X if (strcmp (p-7, "ICATE") != 0) /* check is really against "ICATION" */ X return; X X p[-7] = 'Y'; /* change "ICATE" to "Y" */ X p[-6] = 0; X n -= 4; X X if (cflag) X flagpr (w, 'N', p - 7); X else if ((dent = lookup (w, n, 1)) != NULL && dent->n_flag) X wordok = 1; X return; X } X X /* flags: v */ X e_ending (w,n) X register char *w; X register n; X { X register char *p; X register struct dent *dent; X X p = w + n; X X if (strcmp (p-3, "IVE") != 0) X return; X p[-3] = 'E'; /* change "IVE" to "E" */ X p[-2] = 0; X n -= 2; X X if (cflag) X flagpr (w, 'V', p - 3); X else if ((dent = lookup (w, n, 1)) != NULL X && dent->v_flag) { X wordok = 1; X return; X } X X if (p[-4] == 'E') X return; X X p[-3] = 0; /* kill 'E' */ X n--; X X if (cflag) X flagpr (w, 'V', p - 3); X else if ((dent = lookup (w, n, 1)) != NULL && dent->v_flag) X wordok = 1; X return; X } X X /* flags: y */ X y_ending (w,n) X register char *w; X register n; X { X register char *p; X register struct dent *dent; X X p = w + n; X X if (strcmp (p-2, "LY") != 0) X return; X X p[-2] = 0; /* kill "LY" */ X n -= 2; X X if (cflag) X flagpr (w, 'Y', p - 2); X else if ((dent = lookup (w, n, 1)) != NULL && dent->y_flag) X wordok = 1; X return; X } X X vowel (c) X register c; X { X return (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U'); X } SHAR_EOF fi # end of overwriting check echo shar: extracting "'hash.c'" '(350 characters)' if test -f 'hash.c' then echo shar: will not over-write existing file "'hash.c'" else sed 's/^X //' << \SHAR_EOF > 'hash.c' X /* -*- Mode:Text -*- */ X /* X * hash.c - a simple hash function for ispell X * X * Pace Willisson, 1983 X */ X X hash (s, n, hashsize) X register char *s; X register n; X register hashsize; X { X register short h = 0; X X while (n--) { X h ^= *s++; X if (h < 0) { X h <<= 1; X h++; X } else { X h <<= 1; X } X } X X h &= 077777; X return (unsigned long) h % hashsize; X } SHAR_EOF fi # end of overwriting check echo shar: extracting "'icombine.c'" '(5857 characters)' if test -f 'icombine.c' then echo shar: will not over-write existing file "'icombine.c'" else sed 's/^X //' << \SHAR_EOF > 'icombine.c' X /* X icombine: combine multiple ispell dictionary entries into a single X entry with the options of all entries X X Author: Gary Puckering X Cognos, Inc. X X Written: January 29, 1987 X X Notes: Input lines consist of a word followed optionally by X by one or more flags. e.g CREATE/V/N/S X X Flags on lines with identical root words are combined. X No editing on flags is performed. X Flags are forced to uppercase, but roots are left alone. X Old-style flags, like /X/N will be output as /NX. X Flags are output in alphabetical order. X Non-letters appearing before the first "/" are retained, X those after are dropped. X Root words that differ only in capitalization are combined. X */ X X #include <stdio.h> X #include <ctype.h> X #include "config.h" X #include "ispell.h" X X #define MAXFLAGS 26 /* letters A-Z */ X #define MAXLINE 255 /* maximum line size */ X X #define TRUE 1 X #define FALSE 0 X typedef int bool; X X bool flagtbl[MAXFLAGS]; /* array of flag options */ X X char line[MAXLINE]; /* current line */ X char lastword[MAXLINE]; /* previous word */ X char uclastword[MAXLINE]; /* uppercase version of lastword */ X char word[MAXLINE]; /* current word */ X char ucword[MAXLINE]; /* uppercase version of current word */ X char flags[MAXLINE]; /* current flags */ X int expand = 0; /* if NZ, expand instead of combining */ X X extern char *strcpy (); X X X main(argc,argv) X int argc; X char *argv[]; X { X X if (argc > 1 && strcmp (argv[1], "-e") == 0) X expand = 1; X if (gets(line)) X { X parse(line,lastword,flags); X uccopy (uclastword, lastword); X getflags(flags); X } X else X return 0; X X while (gets(line)) X { X parse(line,word,flags); X uccopy (ucword, word); X if (strcmp(word,lastword)!=0) /* possibly different word */ X { X if (strcmp (ucword, uclastword) != 0 /* truly different word */ X || resolvecaps (word, ucword, lastword, uclastword)) X { /* or caps differ */ X putword(); X strcpy(lastword,word); X strcpy(uclastword,ucword); X } X } X getflags(flags); X } X putword(); X return 0; X } X X putword() X { X printf("%s",lastword); X putflags(); X } X X parse(ln,wrd,flgs) X char ln[]; X char wrd[]; X char flgs[]; X { X register char *p, *q; X X /* copy line up to first "/" or to end */ X for (p=ln,q=wrd; *p && *p != '/'; p++,q++) *q = *p; X *q = NULL; X X strcpy(flgs,p); /* copy from "/" to end */ X } X X getflags(flgs) X char *flgs; X { X register char *p; X X for (p=flgs; *p; p++) X if (*p != '/') X { X if (islower (*p)) X *p = toupper (*p); X if (isupper(*p)) X flagtbl[(*p)-'A'] = TRUE; X } X } X X putflags() X { X register int i; X int slashout = 0; X X if (expand) X putchar ('\n'); X X for (i=0; i<MAXFLAGS; i++) X if (flagtbl[i]) X { X if (expand) X printf("%s/%c\n", lastword, i + 'A'); X else X { X if (!slashout) X putchar('/'); X slashout = 1; X putchar(i+'A'); X } X flagtbl[i]=FALSE; X } X if (!expand) X putchar('\n'); X } X X /* X * This routine resolves capitalization conflicts. The idea is to combine X * only those cases that ispell can "uncombine". X * X * Entry: word and lastword differ, but only by case. X * X * Exit: Returns 1 if word and lastword both need to be in the dictionary, X * 0 if they can be handled by a single entry. If the return is zero, X * lastword may have been modified to reflect the union of the two X * entries. X * X * Rules: X * X * (1) If either word is entirely in upper case, it "loses" to the other X * word. The "winning" word is copied to lastword, and 0 is returned. X * (2) If either word is "followcase" (defined as being mixed case with a X * capital letter appearing after the first character), the two X * variants are considered to differ, and 1 is returned. Furthermore, X * a flag is set (by copying the word to "lastfollow") so that all X * future variants fo the word will be considered to differ. X * (3) If one word is capitalized and the other is all-lowercase, the X * lowercase word "wins". It is copied to lastword, and 0 is returned. X * HOWEVER, if a "followcase" variant of the word has been seen, this X * rule does not apply, and rule (4) will cause the words to be X * considered different. X * (4) If a "followcase" variant of the word has been seen, the words are X * always considered to differ. 1 is returned. X * X * Note that the input must be sorted with "sort -t/ +0f -1 +0 -1" for this X * code to work. X */ X resolvecaps (word, ucword, lastword, uclastword) X char *word; X char *ucword; X char *lastword; X char *uclastword; X { X register char *w; X register char *lw; X static char lastfollow[200] = ""; X X /* Rule (1): Upper case loses */ X for (w = word; *w && !mylower (*w); w++) X ; X if (*w == '\0') X return 0; X for (lw = lastword; *lw && !mylower (*lw); lw++) X ; X if (*lw == '\0') X { X strcpy (lastword, word); X strcpy (uclastword, ucword); X return 0; X } X /* Rule (4): followcase forces all subsequent variants to be different. */ X if (strcmp (ucword, lastfollow) == 0) X return 1; X /* Rule (2): "followcase" is different. */ X for (w = word + 1, lw = lastword + 1; X *w && !myupper (*w) && !myupper (*lw); X w++, lw++) X ; X if (*w) /* We don't test *lw 'cause lengths are the same */ X { X strcpy (lastfollow, ucword); X return 1; X } X /* Rule (3): all-lowercase beats capitalized */ X if (myupper (lastword[0])) X { X strcpy (lastword, word); X strcpy (uclastword, ucword); X } X return 0; X } X X uccopy (dest, src) X register char *dest; X register char *src; X { X while (*src) X { X if (mylower (*src)) X *dest++ = toupper (*src++); X else X *dest++ = *src++; X } X *dest = '\0'; X } SHAR_EOF fi # end of overwriting check echo shar: extracting "'isexp1.sed'" '(1451 characters)' if test -f 'isexp1.sed' then echo shar: will not over-write existing file "'isexp1.sed'" else sed 's/^X //' << \SHAR_EOF > 'isexp1.sed' X /^[^/]*$/n X /\/.*[vV]/ { X /^[^/]*E\// { X s@^\([^/]*\)E/\([/A-Za-z]*\)[vV]@\1IVE\ X \1E/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[vV]@\1IVE\ X \1/\2@; P; D X } X /\/.*[nN]/ { X /^[^/]*E\// { X s@^\([^/]*\)E/\([/A-Za-z]*\)[nN]@\1ION\ X \1E/\2@; P; D X } X /^[^/]*Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[nN]@\1ICATION\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[nN]@\1EN\ X \1/\2@; P; D X } X /\/.*[xX]/ { X /^[^/]*E\// { X s@^\([^/]*\)E/\([/A-Za-z]*\)[xX]@\1IONS\ X \1E/\2@; P; D X } X /^[^/]*Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[xX]@\1ICATIONS\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[xX]@\1ENS\ X \1/\2@; P; D X } X /\/.*[hH]/ { X /^[^/]*Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[hH]@\1IETH\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[hH]@\1TH\ X \1/\2@; P; D X } X /\/.*[yY]/ { X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[yY]@\1LY\ X \1/\2@; P; D X } X /\/.*[gG]/ { X /^[^/]*E\// { X s@^\([^/]*\)E/\([/A-Za-z]*\)[gG]@\1ING\ X \1E/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[gG]@\1ING\ X \1/\2@; P; D X } X /\/.*[jJ]/ { X /^[^/]*E\// { X s@^\([^/]*\)E/\([/A-Za-z]*\)[jJ]@\1INGS\ X \1E/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[jJ]@\1INGS\ X \1/\2@; P; D X } X /\/.*[dD]/ { X /^[^/]*E\// { X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[dD]@\1D\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[dD]@\1IED\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[dD]@\1ED\ X \1/\2@; P; D X } SHAR_EOF fi # end of overwriting check echo shar: extracting "'isexp2.sed'" '(1281 characters)' if test -f 'isexp2.sed' then echo shar: will not over-write existing file "'isexp2.sed'" else sed 's/^X //' << \SHAR_EOF > 'isexp2.sed' X /^[^/]*$/n X /\/.*[tT]/ { X /^[^/]*E\// { X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[tT]@\1ST\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[tT]@\1IEST\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[tT]@\1EST\ X \1/\2@; P; D X } X /\/.*[rR]/ { X /^[^/]*E\// { X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[rR]@\1R\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[rR]@\1IER\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[rR]@\1ER\ X \1/\2@; P; D X } X /\/.*[zZ]/ { X /^[^/]*E\// { X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[zZ]@\1RS\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[zZ]@\1IERS\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[zZ]@\1ERS\ X \1/\2@; P; D X } X /\/.*[sS]/ { X /^[^/]*[^/aeiouAEIOU]Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[sS]@\1IES\ X \1Y/\2@; P; D X } X /^[^/]*[SXZH]\// { X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[sS]@\1ES\ X \1/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[sS]@\1S\ X \1/\2@; P; D X } X /\/.*[pP]/ { X /^[^/]*[^/aeiouAEIOU]Y\// { X s@^\([^/]*\)Y/\([/A-Za-z]*\)[pP]@\1INESS\ X \1Y/\2@; P; D X } X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[pP]@\1NESS\ X \1/\2@; P; D X } X /\/.*[mM]/ { X s@^\([^/]*[^/a-z]\)/\([/A-Za-z]*\)[mM]@\1'S\ X \1/\2@; P; D X } SHAR_EOF fi # end of overwriting check echo shar: extracting "'isexp3.sed'" '(1451 characters)' if test -f 'isexp3.sed' then echo shar: will not over-write existing file "'isexp3.sed'" else sed 's/^X //' << \SHAR_EOF > 'isexp3.sed' X /^[^/]*$/n X /\/.*[vV]/ { X /^[^/]*e\// { X s@^\([^/]*\)e/\([/A-Za-z]*\)[vV]@\1ive\ X \1e/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[vV]@\1ive\ X \1/\2@; P; D X } X /\/.*[nN]/ { X /^[^/]*e\// { X s@^\([^/]*\)e/\([/A-Za-z]*\)[nN]@\1ion\ X \1e/\2@; P; D X } X /^[^/]*y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[nN]@\1ication\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[nN]@\1en\ X \1/\2@; P; D X } X /\/.*[xX]/ { X /^[^/]*e\// { X s@^\([^/]*\)e/\([/A-Za-z]*\)[xX]@\1ions\ X \1e/\2@; P; D X } X /^[^/]*y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[xX]@\1ications\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[xX]@\1ens\ X \1/\2@; P; D X } X /\/.*[hH]/ { X /^[^/]*y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[hH]@\1ieth\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[hH]@\1th\ X \1/\2@; P; D X } X /\/.*[yY]/ { X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[yY]@\1ly\ X \1/\2@; P; D X } X /\/.*[gG]/ { X /^[^/]*e\// { X s@^\([^/]*\)e/\([/A-Za-z]*\)[gG]@\1ing\ X \1e/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[gG]@\1ing\ X \1/\2@; P; D X } X /\/.*[jJ]/ { X /^[^/]*e\// { X s@^\([^/]*\)e/\([/A-Za-z]*\)[jJ]@\1ings\ X \1e/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[jJ]@\1ings\ X \1/\2@; P; D X } X /\/.*[dD]/ { X /^[^/]*e\// { X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[dD]@\1d\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[dD]@\1ied\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[dD]@\1ed\ X \1/\2@; P; D X } SHAR_EOF fi # end of overwriting check echo shar: extracting "'isexp4.sed'" '(1290 characters)' if test -f 'isexp4.sed' then echo shar: will not over-write existing file "'isexp4.sed'" else sed 's/^X //' << \SHAR_EOF > 'isexp4.sed' X /^[^/]*$/n X /\/.*[tT]/ { X /^[^/]*e\// { X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[tT]@\1st\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[tT]@\1iest\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[tT]@\1est\ X \1/\2@; P; D X } X /\/.*[rR]/ { X /^[^/]*e\// { X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[rR]@\1r\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[rR]@\1ier\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[rR]@\1er\ X \1/\2@; P; D X } X /\/.*[zZ]/ { X /^[^/]*e\// { X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[zZ]@\1rs\ X \1/\2@; P; D X } X /^[^/]*[^/aeiouAEIOU]y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[zZ]@\1iers\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[zZ]@\1ers\ X \1/\2@; P; D X } X /\/.*[sS]/ { X /^[^/]*[^/aeiouAEIOU]y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[sS]@\1ies\ X \1y/\2@; P; D X } X /^[^/]*[sxzh]\// { X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[sS]@\1es\ X \1/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[sS]@\1s\ X \1/\2@; P; D X } X /\/.*[pP]/ { X /^[^/]*[^/aeiouAEIOU]y\// { X s@^\([^/]*\)y/\([/A-Za-z]*\)[pP]@\1iness\ X \1y/\2@; P; D X } X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[pP]@\1ness\ X \1/\2@; P; D X } X /\/.*[mM]/ { X s@^\([^/]*[^/A-Z]\)/\([/A-Za-z]*\)[mM]@\1's\ X \1/\2@; P; D X } X s@/*$@@g SHAR_EOF fi # end of overwriting check echo shar: extracting "'isexpand.X'" '(750 characters)' if test -f 'isexpand.X' then echo shar: will not over-write existing file "'isexpand.X'" else sed 's/^X //' << \SHAR_EOF > 'isexpand.X' X : Use /bin/sh X # X # Expand the suffix flags in a list of ispell words. X # X # Usage: X # X # isexpand [ file ] ... X # X # All suffixes in the given input files (standard input if none) are X # expanded. The output is sorted with sort -u to remove possible X # duplications. X # X # Geoff Kuenning X # 5/17/87 X # X LIBDIR=!!LIBDIR!! X EXPAND1=${LIBDIR}/isexp1.sed X EXPAND2=${LIBDIR}/isexp2.sed X EXPAND3=${LIBDIR}/isexp3.sed X EXPAND4=${LIBDIR}/isexp4.sed X X # X # We have to test $# because of a bug in the way /bin/sh expands "$@" when X # there are no arguments. X # X if [ $# -eq 0 ] X then X sed -f $EXPAND1 | sed -f $EXPAND2 \ X | sed -f $EXPAND3 | sed -f $EXPAND4 | sort -u X else X sed -f $EXPAND1 "$@" | sed -f $EXPAND2 \ X | sed -f $EXPAND3 | sed -f $EXPAND4 | sort -u X fi SHAR_EOF chmod +x 'isexpand.X' fi # end of overwriting check echo shar: extracting "'ispell.1'" '(13095 characters)' if test -f 'ispell.1' then echo shar: will not over-write existing file "'ispell.1'" else sed 's/^X //' << \SHAR_EOF > 'ispell.1' X .\" -*- Mode:Text -*- X .TH ISPELL local MIT X .SH NAME X ispell \- Correct spelling for a file X .br X munchlist \- Combine suffixes in a spelling list X .br X isexpand \- Expand suffixes in a spelling list X .SH SYNOPSIS X .B ispell X [ X .B \-t X | X .B \-x X | X .B \-S X | X .B \-d X file | X .B \-p X file | X .B \-w X chars ] file ..... X .br X .B ispell X [ X .B \-t X | X .B \-d X file | X .B \-p X file | X .B \-w X chars ] X .B \-l X .br X .B ispell X [ X .B \-t X | X .B \-d X file | X .B \-p X file X ] { X .B \-a X | X .B \-A X } X .br X .B ispell X [ X .B \-w X chars ] X .B \-c X .br X .B ispell X .B \-v X .br X .B munchlist X [ X .B \-d X file | X .B \-e X | X .B \-w X chars ] X [ files ] X .br X .B isexpand X [ files ] X .SH DESCRIPTION X .PP X .I Ispell X is fashioned after the X .I spell X program from ITS (called X .I ispell X on Twenex systems.) The most common usage is "ispell filename". In this X case, X .I ispell X will display each word which does not appear in the dictionary, and X allow you to change it. If there are "near misses" in the dictionary X (words which differ by only a single letter, a missing or extra letter, X or a pair of transposed letters), then they are also displayed. If you X think the word is correct as it stands, you can type either "Space" to X accept it this one time, or "I" to accept it and put it in your private X dictionary. If one of the near misses is the word you want, type the X corresponding number. X (If there are more than 10 choices, X you may have to type a carriage return to complete a single-digit number). X Finally, if none of these choices is right, you X can type "R" and you will be prompted for a replacement word. X If you want to see a list of words that might be close using wildcard X characters, type "L" to lookup a word in the system dictionary. X .PP X When a misspelled word is found, it is printed at the top of the screen. X Any near misses will be printed on the following lines, and finally, two X lines containing the word are printed at the bottom of the screen. If X your terminal can type in reverse video, the word itself is highlighted. X .PP X The X .B \-v X option causes X .I ispell X to print its current version identification on the standard output X and exit. X .PP X The X .B \-l X or "list" option to X .I ispell X is used to produce a list of misspelled words from the standard input. X .PP X The X .B \-a X option X is intended to be used from other programs through a pipe. In this X mode, X .I ispell X expects the standard input to consist of lines containing single words. X Each word is X read, and a single line is written to the standard output. If the word X was found in the main dictionary, or your personal dictionary, then the X line contains only a '*'. If the word was found through suffix removal, X then the line contains a '+', a space, and the root word. If the word X is not in the dictionary, but there are near misses, then the line X contains an '&', a space, and a list of the near misses separated by X spaces. X Also, each near miss is capitalized the same as the input X word if unless such capitalization is illegal; X in the latter case each near miss is capitalized correctly X according to the dictionary. X Finally, if the word neither appears in the dictionary, and X there are no near misses, then the line contains only a '#'. This mode X is also suitable for interactive use when you want to figure out the X spelling of a single word. (These characters are the same as the codes X that the real spell program uses.) X .PP X The X .B \-A X option works just like X .BR \-a , X except that if a line begins with the string "&Include_File&", the rest X of the line is taken as the name of a file to read for further words. X Input returns to the original file when the include file is exhausted. X Inclusion may be nested up to five deep. X The key string may be changed with the environment variable X .B INCLUDE_STRING X (the ampersands, if any, must be included). X .PP X When in the X .B \-a X mode, X .I ispell X will also accept lines of single words prefixed with either a '*' or a '@'. X A line starting with '*' tells X .I ispell X to insert the word into the user's dictionary (similar to the I command). X A line starting with '@' causes X .I ispell X to accept this word in the future (similar to the A command). X .PP X The X .B \-x X option causes X .I ispell X to remove the .bak file that it normally leaves. The .bak file contains X the pre-corrected text. If there are file opening / writing errors, X the .bak file may be left for recovery purposes even with the -x option. X .PP X The X .B \-S X option suppresses X .IR ispell "'s" X normal behavior of sorting the list of possible replacement words. X Some people may prefer this, since it somewhat enhances the probability X that the correct word will be low-numbered. X .PP X The X .B \-t X option selects TeX/LaTeX input mode. X TeX/LaTeX mode is also automatically selected if an input file has X the extension ".tex". X In this mode, whenever a backslash ("\e") is found, X .I ispell X will skip to the next whitespace. X Thus, for example, given X .RS X \echapter {This is a Ckapter} X \ecite{SCH86} X .RE X will find "Ckapter" but will not look for SCH. X The X .B \-t X option does not recognize the TeX comment character "%". X .PP X The X .B \-d X option is used to specify an alternate hashed dictionary file, X other than the default. If the filename does not begin with a "/", X the library directory for the default dictionary file is prefixed. X This is useful to allow dictionaries which prefer alternate British X spellings ("centre", "tyre", etc), or add lists of special-purpose X jargon and acronyms for subclasses of documents. There are some shortcomings X in attempting to provide foreign-language dictionaries, but something X like "-dfrench" could be made to work somewhat. X The X .B \-d X option may specify X .IR /dev/null , X in which case the dictionary is limited to the personal one. X This may be useful for certain private dictionaries. X .PP X The X .B \-p X option is used to specify an alternate personal dictionary file. X If the file name does not begin with "/", $HOME is prefixed. Also, the X shell variable WORDLIST may be set, which renames the personal dictionary X in the same manner. The command line overrides WORDLIST setting. If X neither is present "ispell.words" is used. X .PP X The X .B \-w X option may be used to specify characters other than alphabetics X which may also appear in words. For instance, X .B \-w X "&" will allow "AT&T" X to be picked up. Underscores are useful in many technical documents. X There is an admittedly crude provision in this option for 8-bit international X characters. X Non-printing characters may be specified in the usual way by inserting a X backslash followed by the octal character code; X e.g., "\e014" for a form feed. X Alternatively, if "n" appears in the character string, the (up to) X three characters X following are a DECIMAL code 0 - 255, for the character. X For example, to include bells and form feeds in your words (an admittedly X silly thing to do, but aren't most pedagogical examples): X .PP X n007n012 X .PP X Numeric digits other than the three following "n" are simply numeric X characters. Use of "n" does not conflict with anything because actual X alphabetics have no meaning - alphabetics are already accepted. X .I Ispell X will typically be used with input from a file, meaning that preserving X parity for possible 8 bit characters from the input text is OK. If you X specify the -l option, and actually type text from the terminal, this may X create problems if your stty settings preserve parity. X .PP X The X .B \-c X option is primarily intended for use by the X .I munchlist X shell script. X In this mode, a list of words is read from the standard input. X For each word, a list of possible root words and suffixes will be X written to the standard output. X Some of the root words will be illegal and must be filtered from the X output by other means; X the X .I munchlist X script does this. X As an example, the command "echo BOTHER | ispell -c" produces: X .PP X .RS X .nf X BOTH X BOTHE/R X BOTH/R X .fi X .RE X .PP X Unless it has been installed without the feature by your system administrator, X .I ispell X is aware of the correct capitalizations of words in the dictionary and X in your personal dictionary. X As well as recognizing words that must be capitalized (e.g., George) and X words that must be all-capitals (e.g., NASA), it can also handle words X with "unusual" capitalization (e.g., "ITCorp" or "TeX"). X If a word is capitalized incorrectly, the list of possibilities will X include all acceptable capitalizations. X (More than one capitalization may be acceptable; X for example, my dictionary lists both "ITCorp" and "ITcorp".) X Normally, this feature will not cause you surprises, but there is one X circumstance you need to be aware of. X If you add a word to your dictionary that is at the beginning of a sentence X (e.g., the first word of this paragraph if "unless" were not in the X dictionary), it will be marked as "capitalization required". X A subsequent usage of this word without capitalization (e.g., the quoted word X in the previous sentence), X .I ispell X will object and suggest the capitalized version. X You must then compare the actual spellings by eye, and then type "I" X to add the un-capitalized variant to your personal dictionary. X .PP X The rules for capitalization are as follows: X .IP (1) X Any word may appear in all capitals, as in headings. X .IP (2) X Any word that is in the dictionary in all-lowercase form may appear X either in lowercase or capitalized (as at the beginning of a sentence). X .IP (3) X Any word that has "funny" capitalization (i.e., it contains both cases X and there is an uppercase character besides the first) must appear X exactly as in the dictionary, except as permitted by rule (1). X If the word is acceptable in all-lowercase, it must appear thus in a X dictionary entry. X .PP X The X .I munchlist X shell script is used to reduce the size of dictionary files, X primarily personal dictionary files. X It is also capable of combining dictionaries from various sources. X The given X .I files X are read (standard input if no arguments are given), X reduced to a minimal set of roots and suffixes that will match the X same list of words, and written to standard output. X .PP X Normally, words that are in the default dictionary are removed by X .I munchlist X during processing. X If the list is to be used with a different dictionary, the X .B \-d X option can be used to specify an alternate (hashed) dictionary file X containing words to be removed from the output list. X If a dictionary file of X .I /dev/null X is specified, no words will be removed from the output; X this is useful when munching the primary dictionary file. X .PP X The X .B \-w X option is passed on to X .IR ispell . X The X .B \-e X ("efficient") option causes the script to use a slower algorithm that uses X somewhat less space in TMPDIR (normally X .IR /usr/tmp ")." X .PP X The X .I isexpand X shell script is used to expand the various suffix flags in an X .I ispell X word list. X This script can be used when looking words up in the dictionary, or X to verify that a particular suffix flag actually produces the expected X result. X .PP X It is possible to install X .I ispell X in such a way as to only support ASCII range text if desired. X .SH ENVIRONMENT X WORDLIST Personal dictionary file name X .br X INCLUDE_STRING Code for file inclusion under the -A option X .br X TMPDIR Directory used for some of munchlist's temporary files X .SH FILES X /usr/public/lib/ispell.hash X .br X /usr/dict/web2 for the Lookup function X .br X $HOME/.ispell_words user's private dictionary X .br X /usr/public/lib/isexp[1-4].sed sed scripts for expanding suffixes X .br X /usr/public/lib/icombine program for combining suffix flags X .SH SEE ALSO X spell(1), egrep(1), look(1), ispell(4) X .SH BUGS X It takes about five seconds for X .I ispell X to read in the hash table. X .sp X Perhaps more than ten choices should be allowed for near misses. X .sp X The hash table is stored as a quarter-megabyte array, so a PDP-11 X version does not seem likely. X .sp X .I Ispell X should understand more X .I troff X syntax, and deal more intelligently with contractions. X .sp X While alternate dictionaries for foreign languages could be defined, and X the international characters included in words, rules concerning X word endings / pluralization accommodate English only. X .sp X When the X .B \-x X flag is specified, X .I ispell X will unlink any existing .bak file. X .sp X .I Munchlist X requires tremendous amounts of temporary file space for X large dictionaries. X It does respect the TMPDIR environment variable, so this space can be X redirected. X However, a lot of the temporary space it needs is for sorting, so TMPDIR X is only a partial help on systems with an uncooperative X .IR sort (1). X As a benchmark, the 15000-word X .I dict.191 X takes about 1200 blocks in TMPDIR, and 2000 in X .IR sort "'s" X temporary directories. X Munching X .I dict.191 X with X .I /usr/dict/words X (28000 words output) X took another 1500 blocks or so, and ran for the better part of an hour. X .SH AUTHOR X Pace Willisson (pace@mit-vax) X .br X Collected, revised, and enhanced for the Usenet by Walt Buehring. X .br X Further enhanced and debugged by X Isaac Balbin, X Stewart Clamen, X Mark Davies, X Steve Dum, X Don Kark, X Steve Kelem, X Jim Knutson, X Geoff Kuenning, X Evan Marcus, X Dave Mason, X Rob McMahon, X Bob McQueer, X David Neves, X Joe Orost, X Israel Pinkas, X Gary Puckering, X Bill Randle, X Marc Ries, X Rich Salz, X Greg Schaffer, X George Sipe, X Perry Smith, X Stefan Taxhet, X Andrew Vignaux, X James Woods, X and Ken Yap. SHAR_EOF fi # end of overwriting check echo shar: extracting "'ispell.4'" '(6562 characters)' if test -f 'ispell.4' then echo shar: will not over-write existing file "'ispell.4'" else sed 's/^X //' << \SHAR_EOF > 'ispell.4' X .TH ISPELL 4 X .SH NAME X ispell \- format of ispell dictionaries X .SH DESCRIPTION X Dictionaries for X .IR ispell (1) X come in two formats: X raw, and unhashed. X The hashed dictionary is generated by X .I buildhash X (see X .IR ispell "(1))" X from the raw dictionary, and is not described here. X .PP X A raw X .I ispell X dictionary (either the main dictionary or your own personal X dictionary) contains a list of words, one per line. X Each word may optionally be followed by a slash ("/") and one or more X flags, which modify the root word as explained below. X Case is significant in the root word, but ignored in the flags. X The dictionary does not need to be sorted. X .PP X The case of the root word controls the case of words accepted by X .IR ispell , X as follows: X .IP (1) X If the root word appears only in lower case (e.g., "bob"), X it will be accepted in lower case, capitalized, or all capitals. X .IP (2) X If the root word appears capitalized (e.g., "Robert"), it will be not X be accepted in X all-lower case, but will be accepted capitalized or all in capitals. X .IP (3) X If the root word appears all in capitals (e.g., "UNIX"), X it will only be accepted all in capitals. X .IP (4) X If the root word appears with a "funny" capitalization (e.g., "ITCorp"), X a word will be accepted only if it follows that capitalization, or if X it appears all in capitals. X .IP (5) X More than one capitalization of a root word may appear in the dictionary. X Flags from different capitalizations are combined by OR-ing them together. X .PP X Redundant capitalizations (e.g., "bob" and "Bob") will be combined X by X .I buildhash X and by X .I ispell X (for personal dictionaries), X and can be removed from a raw dictionary by X .IR munchlist . X .PP X For example, the dictionary: X .PP X .RS X .nf X bob X Robert X UNIX X ITcorp X ITCorp X .fi X .RE X .PP X will accept "bob," "Bob," "BOB," "Robert," "ROBERT," "UNIX," "ITcorp," X "ITCorp," and "ITCORP," and will reject all others. X Some of the unacceptable forms are "bOb," "robert," "Unix," and "ItCorp." X .PP X As mentioned above, root words in any dictionary may be extended by flags. X Each flag is a single alphabetic character, which represents a suffix X that may be added to the root to form a new word. X For example, the "D" flag can be added to "bathe" to make "bathed". X Since flags are represented as a single bit in the hashed dictionary, this X results in significant space savings. X The X .I munchlist X script will reduce an existing raw dictionary by adding flags when possible. X .PP X When a word is extended with a suffix, the suffix will be accepted only X if it appears in the same case X as the final letter of the word. X Thus, for example, the entry "UNIX/M" in the main dictionary ("M" means X add an apostrophe and an "s" to make a possessive) would accept "UNIX'S" X but would reject "UNIX's". X If "UNIX's" is legal, it must appear as a separate dictionary entry, X and it will not be combined by X .IR munchlist . X .PP X In the following discussion of the flags, X let # and @ be "variables" that can stand for any letter. X Upper case letters are constants. X "..." stands for any string of zero or more X letters, but note that no word may exist in the dictionary which is not at X least 2 letters long, so, for example, "fly" may not be produced by placing X the "Y" flag on "f". X Also, no flag is effective unless the word that it X creates is at least 4 letters long, so, for example, "wed" may not be X produced by placing the "D" flag on "we". X .PP X The meaning of the flags is as follows: X .IP "V" X .in +5m X .ti -5m X \&...e --> ...ive as in create --> creative X .br X .ti -5m X if # .ne. e, ...# --> ...#ive as in prevent --> preventive X .in -5m X .IP "N" X .in +5m X .ti -5m X \&...e --> ...ion as in create --> creation X .br X .ti -5m X \&...y --> ...ication as in multiply --> multiplication X .br X .ti -5m X if # .ne. e or y, ...# --> ...#en as in fall --> fallen X .in -5m X .IP "X" X .in +5m X .ti -5m X \&...e --> ...ions as in create --> creations X .br X .ti -5m X \&...y --> ...ications as in multiply --> multiplications X .br X .ti -5m X if # .ne. e or y, ...# --> ...#ens as in weak --> weakens X .in -5m X .IP "H" X .in +5m X .ti -5m X \&...y --> ...ieth as in twenty --> twentieth X .br X .ti -5m X if # .ne. y, ...# --> ...#th as in hundred --> hundredth X .in -5m X .IP "Y" X .in +5m X .ti -5m X \&... --> ...ly as in quick --> quickly X .in -5m X .IP "G" X .in +5m X .ti -5m X \&...e --> ...ing as in file --> filing X .br X .ti -5m X if # .ne. e, ...# --> ...#ing as in cross --> crossing X .in -5m X .IP "J" X .in +5m X .ti -5m X \&...e --> ...ings as in file --> filings X .br X .ti -5m X if # .ne. e, ...# --> ...#ings as in cross --> crossings X .in -5m X .IP "D" X .in +5m X .ti -5m X \&...e --> ...ed as in create --> created X .br X .ti -5m X .br X .ti -5m X if @ .ne. a, e, i, o, or u, X \&...@y --> ...@ied as in imply --> implied X .br X .ti -5m X if # .ne. e or y, or (# = y and @ = a, e, i, o, or u) X \&...@# --> ...@#ed as in cross --> crossed X or convey --> conveyed X .in -5m X .IP "T" X .in +5m X .ti -5m X \&...e --> ...est as in late --> latest X .br X .ti -5m X if @ .ne. a, e, i, o, or u, X \&...@y --> ...@iest as in dirty --> dirtiest X .br X .ti -5m X if # .ne. e or y, or (# = y and @ = a, e, i, o, or u) X \&...@# --> ...@#est as in small --> smallest X or gray --> grayest X .in -5m X .IP "R" X .in +5m X .ti -5m X \&...e --> ...er as in skate --> skater X .br X .ti -5m X if @ .ne. a, e, i, o, or u, X \&...@y --> ...@ier as in multiply --> multiplier X .br X .ti -5m X if # .ne. e or y, or (# = y and @ = a, e, i, o, or u) X \&...@# --> ...@#er as in build --> builder X or convey --> conveyer X .in -5m X .IP "Z" X .in +5m X .ti -5m X \&...e --> ...ers as in skate --> skaters X .br X .ti -5m X if @ .ne. a, e, i, o, or u, X \&...@y --> ...@iers as in multiply --> multipliers X .br X .ti -5m X if # .ne. e or y, or (# = y and @ = a, e, i, o, or u) X \&...@# --> ...@#ers as in build --> builders X or slay --> slayers X .in -5m X .IP "S" X .in +5m X .ti -5m X if @ .ne. a, e, i, o, or u, X \&...@y --> ...@ies as in imply --> implies X .br X .ti -5m X if # .eq. s, x, z, or h, X \&...# --> ...#es as in fix --> fixes X .br X .ti -5m X if # .ne. s, x, z, h, or y, or (# = y and @ = a, e, i, o, or u) X \&...@# --> ...@#s as in bat --> bats X or convey --> conveys X .in -5m X .IP "P" X .in +5m X .ti -5m X if @ .ne. a, e, i, o, or u, X \&...@y --> ...@iness as in cloudy --> cloudiness X .br X .ti -5m X if # .ne. y, or @ = a, e, i, o, or u, X \&...@# --> ...@#ness as in late --> lateness X or gray --> grayness X .in -5m X .IP "M" X .in +5m X .ti -5m X \&... --> ...'s as in dog --> dog's X .in -5m X .PP X To summarize more briefly: X .PP X .RS X .nf X V \- ive X N \- ion, tion, en X X \- ions, ications, ens X H \- th, ieth X Y \- ly X G \- ing X J \- ings X D \- ed X T \- est X R \- er X Z \- ers X S \- s, es, ies X P \- ness, iness X M \- 's X .fi X .RE X .SH "SEE ALSO" X ispell(1) SHAR_EOF fi # end of overwriting check # End of shell archive exit 0 -- Brandon S. Allbery {decvax,cbatt,cbosgd}!cwruecmp!ncoast!allbery Tridelta Industries {ames,mit-eddie,talcott}!necntc!ncoast!allbery 7350 Corporate Blvd. necntc!ncoast!allbery@harvard.HARVARD.EDU Mentor, OH 44060 +01 216 255 1080 (also eddie.MIT.EDU)