gtoal@tharr.UUCP (Graham Toal) (08/21/90)
/* Hacked a long time ago from Byte article; sorry I've lost the reference to the original author. This isn't code for reading; this is code for laying down and avoiding... ;-) This is for the person who requested it recently; I'm afraid his article has dropped off here, so I can't mail it. However it's a traditional 'quick hack' so I won't put an 'Archive-name:' header in. If some site *really* wants it archived, why not tidy it up and repost it! */ #include <stdio.h> #include <stdlib.h> #include <string.h> #define maxtext 250000 #define maxwords 30000 #define NULL 0 #define bitmask unsigned int #define MAXMASKS 3 #define maskwidth (8*sizeof(bitmask)) #define STACKMAX 32 typedef bitmask bitsig[MAXMASKS+1]; int freqs[26]; bitsig uflosig; int letmask[26]; int letbit[26]; int letwidth[26]; int lastmask; int numwords; char *textnext; bitmask *wordsigs; char *anawords[STACKMAX]; char **anaptr; int anacount=0; int targetwords; void die (char *ermsg); void printanagram(char *target, int expected); int fieldwidth(num) int num; { int tot = 0; while (num != 0) { tot += 1; num = num >> 1; } return(tot+1); } void choosefields(freqs) int freqs[]; { int letter; int curmask = 0, curbit = 0; int width; for (letter = 0; letter < 26; letter++) if (freqs[letter] != 0) { width = fieldwidth(freqs[letter]); if (curbit+width > maskwidth) { if (++curmask >= MAXMASKS) die("Sorry: Phrase too long to handle.\n"); curbit = 0; } letmask[letter] = curmask; letbit[letter] = curbit; letwidth[letter] = width; curbit += width; } if (curmask > lastmask) lastmask = curmask; } void makefreqs(str, freq) char *str; int freq[]; { char c; int i; for (i = 0; i < 26; i++) freq[i] = 0; while ((c = *str++) != '\0') { if (('A' <= c) && (c <= 'Z')) c += 32; c -= 'a'; if ((c >= 0) && (c < 26)) freq[c] += 1; } } void makeonesig(str, sig) register char *str; bitmask sig[]; { register int l; int sfreqs[26]; register bitmask fr; makefreqs(str, sfreqs); for (l = 0; l <= lastmask; l++) sig[l] = 0; for (l = 0; l < 26; l++) { if (sfreqs[l]) { fr = ((bitmask) sfreqs[l]) << letbit[l]; sig[letmask[l]] += fr; } } } void makeuf(freqs, letmask, letbit, letwidth) int freqs[]; int letmask[], letbit[], letwidth[]; { int l; int bnum, bwidth; for (l = 0; l <= MAXMASKS; l++) uflosig[l] = 0; for (l = 0; l < 26; l++) if (freqs[l] != 0) { bnum = letbit[l]; bwidth = letwidth[l]; uflosig[letmask[l]] += (1 << (bnum+bwidth-1)); } } #define DOMASK(MASK) { \ newmask = curnode[MASK] - cursig[MASK]; \ if ((newmask & uflosig[MASK])) break; \ newsig[MASK] = newmask; \ bitsleft = bitsleft | newmask; \ } void findanagrams(curword, curnode, wordlist, target) register int curword; register bitmask *curnode; char *wordlist[]; char *target; { bitsig newsig; register bitmask newmask; register bitmask *cursig; register int bitsleft; cursig = &wordsigs[curword * (lastmask+1)]; while (curword < numwords) { bitsleft = 0; if (lastmask > 2) {fprintf(stderr,"BUG 1\n");exit(0);} if (lastmask < 0) {fprintf(stderr,"BUG 2\n");exit(0);} switch (lastmask) { case 2: DOMASK(2) case 1: DOMASK(1) case 0: DOMASK(0) *anaptr++ = wordlist[curword]; if (bitsleft == 0) { printanagram(target,targetwords); } else { findanagrams(curword, newsig, wordlist, target); } --anaptr; } curword++; cursig += (lastmask+1); } } void printanagram(target, expected) char *target; int expected; { int wc; char **word; wc = 0; for (word = &anawords[0]; word != anaptr; word++) { wc ++; } /* if (wc == expected) {*/ wc = 0; fprintf(stderr, "%s is an anagram of ", target); for (word = &anawords[0]; word != anaptr; word++) { printf("%s", *word); wc++; /*if ((expected > 1) && (wc != expected))*/ fprintf(stderr, " "); } printf("\n"); /* }*/ } void die (ermsg) char *ermsg; { printf(ermsg); exit(0); } int getline(s, lim) char s[]; int lim; { int c, i; fprintf(stderr, "Word: "); i = 0; for (;;) { if (--lim <= 0) break; c=fgetc(stdin); if (c == EOF) break; c &= 255; if (c == 10) break; if (c == 13) break; s[i++] = c; } s[i] = '\0'; return(i); } int main (int argc, char **argv) { int c; int i, len, compatible; FILE *DictFile; char inittarget[64]; char *target; char *textbuffer; char **wordlist; int wfreqs[26]; if (argc != 2) { fprintf(stderr, "syntax: magna wordlist\n"); exit(0); } textbuffer = (char *)malloc(maxtext); wordlist = (char **)malloc(maxwords*sizeof(char *)); if ((textbuffer == NULL) || (wordlist == NULL)) { fprintf(stderr, "Not enough store\n");exit(0); } DictFile = fopen(argv[1], "r"); if (DictFile == NULL) { fprintf(stderr, "Sorry - can\'t open word list \'%s\'\n", argv[1]); exit(0); } target = &inittarget[0]; numwords = 0; anaptr = &anawords[0]; textnext = &textbuffer[0]; len = getline(target, 64); target = strcpy(textnext, target); textnext += len+1; textnext = (char *) ((((int) textnext)+3) & (~3)); makefreqs(target, freqs); while ((c = fgetc(DictFile)) != EOF) { char *backtrack = textnext; compatible = 0; if ((c < 0) || (c > 128)) compatible = 1; wordlist[numwords] = textnext; *textnext++ = c; while ((c = fgetc(DictFile)) != EOF && c != '\n') { if ((c < 0) || (c > 128)) compatible = 1; *textnext++ = c; } *textnext++ = '\0'; /* If strlen(wordlist[numwords]) > strlen[target]) skip this... */ makefreqs(wordlist[numwords], wfreqs); for (i= 0; i < 26; i++) { if (wfreqs[i] > freqs[i]) { compatible = 1; break; } } if (compatible==0) { fprintf(stderr,"Using %s\n", wordlist[numwords++]); } else textnext = backtrack; } choosefields(freqs); textnext = (char *)((((int)textnext)+3) & (~3)); wordsigs = (bitmask *) &textnext[0]; makeonesig(target, &wordsigs[numwords*(lastmask+1)]); makeuf(freqs, letmask, letbit, letwidth); for (i = 0; i < numwords; i++) { makeonesig(wordlist[i], &wordsigs[i*(lastmask+1)]); } findanagrams(0, &wordsigs[numwords*(lastmask+1)], wordlist, target); return(0); }