billr@saab.CNA.TEK.COM (Bill Randle) (05/20/91)
Submitted-by: Morten Lerskau Ronseth <morten@cs.qmw.ac.uk> Posting-number: Volume 12, Issue 81 Archive-name: ag/Part01 Environment: [From the author...] [[This is an anagram generator I wrote a little while back, based on an article in Byte, Nov. 1987. It should run on any Unix; I've compiled and run it on HP-UX (6.5 - 7.0), Sun's, Sequent, Mac (Think C). The algorithm used to generate anagrams is very quick, an order of magnitude faster than the program found on uunet.uu.net. As literally zillions of anagrams are generated, long phrases should be avoided, and the output redirected to a file (for later humorous viewing..:-) BTW ``billrandle'' produced 183 anagrams from /usr/dict/words - any longer phrases than that, and you'll spend the rest of the day reading anagrams).]] #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh <file", e.g.. If this archive is complete, you # will see the following message at the end: # "End of archive 1 (of 1)." # Contents: README MANIFEST Makefile anagram.c # Wrapped by billr@saab on Mon May 20 09:53:38 1991 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'README' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'README'\" else echo shar: Extracting \"'README'\" \(523 characters\) sed "s/^X//" >'README' <<'END_OF_FILE' XThis is an anagram generator I wrote a little while back, based Xon an article in Byte, Nov. 1987. XThe program compiles with X make ag Xand should run on any Unix; I've compiled and run it on XHP-UX (6.5 - 7.0), Sun's, Sequent, Mac (Think C). X XThe algorithm used to generate anagrams is very quick, an order Xof magnitude faster than the program found on uunet.uu.net. XAs literally zillions of anagrams are generated, long phrases Xshould be avoided, and the output redirected to a file (for Xlater humorous viewing..:-) X X Morten END_OF_FILE if test 523 -ne `wc -c <'README'`; then echo shar: \"'README'\" unpacked with wrong size! fi # end of 'README' fi if test -f 'MANIFEST' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'MANIFEST'\" else echo shar: Extracting \"'MANIFEST'\" \(238 characters\) sed "s/^X//" >'MANIFEST' <<'END_OF_FILE' X File Name Archive # Description X----------------------------------------------------------- X MANIFEST 1 This shipping list X Makefile 1 X README 1 X anagram.c 1 END_OF_FILE if test 238 -ne `wc -c <'MANIFEST'`; then echo shar: \"'MANIFEST'\" unpacked with wrong size! fi # end of 'MANIFEST' fi if test -f 'Makefile' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'Makefile'\" else echo shar: Extracting \"'Makefile'\" \(210 characters\) sed "s/^X//" >'Makefile' <<'END_OF_FILE' XCC = gcc XCCFLAGS = -O XBINDIR = /nfs/sequent/users/staff/morten/bin X Xag : anagram.c X $(CC) $(CCFLAGS) anagram.c -o ag X Xinstall : ag X mv ag $(BINDIR) X Xshar : NOTREACHED X shar anagram.c Makefile X X XNOTREACHED: END_OF_FILE if test 210 -ne `wc -c <'Makefile'`; then echo shar: \"'Makefile'\" unpacked with wrong size! fi # end of 'Makefile' fi if test -f 'anagram.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'anagram.c'\" else echo shar: Extracting \"'anagram.c'\" \(16239 characters\) sed "s/^X//" >'anagram.c' <<'END_OF_FILE' X/********************************************************** X * * X * * X * * X * A program to generate anagrams * X * Based on an idea in Byte, Nov 1987 * X * * X * Written by * X * * X * Morten Lerskau Ronseth * X * morten@cs.qmw.ac.uk * X * * X * 9|3|1988 * X * * X * * X **********************************************************/ X X X/* X * Jan. 30, 1991 X * rewrote the dictionary loader, now uses a pool for allocation. X */ X X/* X * Sept. 28, 1990 X * removed "islower" & "isupper" - they didn't work... X */ X X/* X * Sept. 26, 1990 X * wrote my own "toupper" & "tolower" so as to be compatible X * with any system. X */ X X#include <stdio.h> X#include <signal.h> X Xextern char *xmalloc (), *xrealloc (); Xextern void xfree (); Xextern char *cat_strings (); Xextern char *xindex (); Xextern void fatal (); Xextern void _abort (); X X X/* various definitions */ X X#define STD_DICTIONARY "/usr/dict/words" X#define MAX_LINELEN 70 X#define MAX_WORDLEN 32 X#define MAXMASKS 3 X#define bitmask long X#define maskwidth (8 * sizeof (bitmask)) X#define large_pool_space 8192 X#define large_word_space 80 X#define large_stack_space 200 X#define tolower(c) hi2lo[c - 65] X#define toupper(c) lo2hi[c - 65] X X/* some systems cannot handle lowercase in tolower X and vice versa...use my own table for fast lookup */ X Xchar hi2lo[] = { X 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', X 'p','q','r','s','t','u','v','w','x','y','z',0,0,0,0,0,0, X 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', X 'p','q','r','s','t','u','v','w','x','y','z' X}; X Xchar lo2hi[] = { X 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', X 'P','Q','R','S','T','U','V','W', 'X','Y','Z', 0,0,0,0,0,0, X 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', X 'P','Q','R','S','T','U','V','W', 'X','Y','Z' X}; X X Xchar *program; Xchar *usage = "[-d `dictionary'] [-a] [-v] [-V] [-w] [-W] [-s `SIZE'] [-o `outfile'] [phrase(s)]"; X X/* 1 if only action is to print out the ellegible words [-W]*/ X Xint words_only = 0; X X/* 1 if the ellegible words are to be printed out [-w]*/ X Xint words_out = 0; X X/* size of smallest word in an angram. X i.e. -s 2 would prevent angrams with singlelettered words [-s]*/ X Xint min_size = 2; X X/* should we count `a' and `i' as complete words? [-a]*/ X Xint do_ai = 0; X X/* speak up! [-v] */ X Xint verbose = 0; X X/* 1 if write out version info [-V] */ X Xint version = 0; X X/* file to write anagrams to, if any */ X Xchar *outfile = NULL; Xchar *infile = STD_DICTIONARY; X Xchar version_string[] = "Anagram Finder v. 1.0b3"; X Xint no_of_phrases = 0; X X X/* phrase variables */ X Xtypedef bitmask bitsig[MAXMASKS]; Xint freqs[26], letmask[26], letbit[26], letwidth[26], lastmask; Xbitsig uflosig; Xchar *phrase; Xbitmask phrasesig[MAXMASKS]; X X/* pool information */ X Xchar *pool = NULL; X X/* dictionary information */ X Xchar **wordlist = NULL; Xint wordcount = 0; Xbitmask *wordsigs; X X/* for printing anagrams */ X Xchar **analist; Xlong anacount = 0; Xlong total = 0; X X X/*************************************************************************/ X X#define IS_NUM(X) ((X) >= '0' && (X) <= '9') X#define IS_LETTER(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z')) X Xint Xstr_to_num (s) X char *s; X{ X int result = 0; X X while (*s) { X if (IS_NUM(*s)) X result = (result * 10) + *s++ - '0'; X } X return result; X} X Xint Xcandidate (word) X register char *word; X{ X register char c; X register int count = 0; X X if (strlen (word) < min_size && !do_ai) return 0; X while (c = word[count++]) { X if (!IS_LETTER (c) || X (!xindex (phrase, tolower (c)) && X !xindex (phrase, toupper (c)))) X return 0; X } X if (do_ai && !word[1]) X return (xindex ("aAiI", word[0]) != 0); X return 1; X} X X X X/* prints out all the anagrams found so far, if any */ X Xvoid Xprintlist (list, no) X char **list; X int no; X{ X register int count; X register int linelen = 0; X X for (count = 0; count < no; count++) { X linelen += strlen (list[count]) + 1; X if (linelen >= MAX_LINELEN) { X linelen = 0; X (void)fprintf (stdout, "\n"); X } X (void)fprintf (stdout, "%s ", list[count]); X } X (void)fprintf (stdout, "\n"); X (void)fflush (stdout); X} X X/* old version using log didn't work too well... X use smalltalk's version of highBit instead */ Xint Xfieldwidth (v) X int v; X{ X int i = 1, bit = 1; X X if (v < 0) _abort ("Fieldwidth: negative number"); X if (!v) return 2; X while (v > bit) { X i++; bit += bit + 1; X } X return (i + 2); X} X Xvoid Xmakefreqs (str, freq) X register char *str; X register int freq[]; X{ X register char c; X register int f; X X for (f = 0; f < 26; f++) X freq[f] = 0; X while (c = *(str++)) { X freq[tolower (c) - 97] += 1; X } X} X Xvoid Xchoosefields (frq) X int frq[]; X{ X int letter; X int curmask = 0,curbit = 0; X int width; X X for (letter = 0; letter < 26; letter++) X if (frq[letter] != 0) { X width = fieldwidth (frq[letter]); X if ((curbit + width) > maskwidth) { X if (++curmask >= MAXMASKS) X _abort ("Phrase too long to handle."); X curbit = 0; X } X letmask[letter] = curmask; X letbit[letter] = curbit; X letwidth[letter] = width; X curbit += width; X } X lastmask = curmask; X} X Xvoid Xmakeonesig (str, sig) X register char *str; X register bitmask sig[]; X{ X register int i, j = 0; X int sfreqs[26]; X register bitmask fr; X X makefreqs (str, sfreqs); X for (i = 0; i <= lastmask; i++) X sig[i] = 0; X for (i = 0; i < 26; i++) X if (sfreqs[i]) { X fr = ((bitmask)sfreqs[i]) << letbit[i]; X sig[letmask[i]] += fr; X j += fr; X } X} X Xvoid Xmakeuf (frq) X int frq[]; X{ X int i, bnum, bwidth; X X for (i = 0; i < MAXMASKS; i++) X uflosig[i] = 0; X for (i = 0; i < 26; i++) X if (frq[i]) { X bnum = letbit[i]; X bwidth = letwidth[i]; X uflosig[letmask[i]] += (1L << (bnum + bwidth - 1)); X } X} X X#define DOMASK(MASK) { \ X newmask = curnode[MASK] - cursig[MASK]; \ X if (newmask & uflosig[MASK]) break; \ X newsig[MASK] = newmask; \ X bitsleft |= newmask; \ X} X X Xvoid Xfindanagrams (curword, curnode) X register int curword; X register bitmask *curnode; X{ X bitsig newsig; X register bitmask newmask, *cursig; X register long bitsleft; X int bsize = large_stack_space; X X cursig = &wordsigs[curword * (lastmask + 1)]; X while (curword < wordcount) { X bitsleft = 0; X switch (lastmask) { X case 2:DOMASK(2) X case 1:DOMASK(1) X case 0:DOMASK(0) X X if (anacount == bsize) { X bsize *= 2; X analist = (char **)xrealloc (analist, bsize * sizeof (char *)); X } X analist[anacount++] = wordlist[curword]; X if (!bitsleft) { X printlist (analist, anacount); X total++; X } X else findanagrams (curword, newsig); X --anacount; X } X curword++; X cursig += (lastmask + 1); X } X} X X X/* If realloc moves the list around in memory (not very likely, but still...) X * then update all pointers in the wordlist. X */ X Xvoid Xadjust_list (list, offset, count) X char **list; X int offset, count; X{ X register int i; X X for (i = 0; i < count; i++) X list[i] += offset; X} X X Xvoid Xread_dict (name) X char *name; X{ X FILE *dict; X int psize = large_pool_space; X int bsize = large_stack_space; X int msize = large_stack_space * MAXMASKS; X char *pend, *next_slot; X X if (!(dict = fopen (name, "r"))) { X char *s = cat_strings ("Couldn't open <", name, ">"); X _abort (s); X } X if (verbose) { X (void)fprintf (stderr, "\nLoading dictionary..."); X (void)fflush (stderr); X } X X /* analist has to be set up here, as "findanagrams" is recursive */ X analist = (char **)xmalloc (bsize * sizeof (char *)); X wordlist = (char **) xmalloc (bsize * sizeof (char *)); X wordsigs = (bitmask *) xmalloc (msize * sizeof (bitmask)); X pool = (char *)xmalloc (psize * sizeof (char)); X pend = pool + psize; X next_slot = pool; X X while (fscanf (dict, "%s", next_slot) != EOF) { X if (candidate (next_slot)) { X X /* if this entry would overflow stack, expand it */ X X if (wordcount == bsize) { X bsize *= 2; msize *= 2; X wordlist = (char **)xrealloc (wordlist, bsize * sizeof (char *)); X wordsigs = (bitmask *)xrealloc (wordsigs, msize * sizeof (bitmask)); X } X wordlist[wordcount] = next_slot; X makeonesig (next_slot, &wordsigs[wordcount * (lastmask + 1)]); X X next_slot += strlen (next_slot) + 1; X if ((next_slot + MAX_WORDLEN) >= pend) { X char *old_pool = pool; X X psize *= 2; X pool = (char *)xrealloc (pool, psize * sizeof (char)); X if (old_pool != pool) X adjust_list (wordlist, pool - old_pool, wordcount); X pend = pool + psize; X } X wordcount++; X } X } X if (verbose) { X (void)fprintf (stderr, "done\n"); X (void)fflush (stderr); X } X (void)fclose (dict); X} X Xvoid Xclean_up () X{ X /* first, free all strings allocated */ X X xfree (pool); X X /* then, free the array of pointers to these strings */ X X xfree (wordlist); X X /* then, free the array of pointers to the anagrams */ X X xfree (analist); X X /* At last, free the array `wordsigs' */ X X xfree (wordsigs); X} X Xvoid Xparse_flags (argc, argv) X int argc; X char *argv[]; X{ X int i; X X for (i = 1; i < argc; i++) { X if (argv[i][0] == '-' && argv[i][1] != 0) { X X register char *str = argv[i] + 1; X X switch (str[0]) { X case 'd': X if (argv[i + 1] != NULL) { X infile = argv[i + 1]; X argv[i + 1] = NULL; X } X break; X X case 's': X if (argv[i + 1] != NULL) { X min_size = str_to_num (argv[i + 1]); X argv[i + 1] = NULL; X } X else X _abort ("No size specified."); X break; X X case 'o': X if (outfile != NULL) X _abort ("Outfile specified twice"); X if (argv[i + 1] != NULL) { X outfile = argv[i + 1]; X if (!strncmp (outfile, "-", 1)) X outfile = ""; X else X argv[i + 1] = NULL; X } X else X _abort ("No outfile specified."); X break; X X case 'a': X do_ai = 1; X break; X X case 'v': X verbose = 1; X break; X X case 'V': X version = 1; X break; X X case 'w': X words_out = 1; X break; X X case 'W': X words_only = 1; X break; X X case '-': X case '\n': X if (outfile == NULL) X outfile = ""; X argv[i] = NULL; X break; X X default : { X char *s = cat_strings ("Unrecognized flag: \"", str, "\""); X _abort (s); X } X } X } X else if (argv[i] != NULL && ++no_of_phrases != i) { X argv[no_of_phrases] = argv[i]; X argv[i] = NULL; X } X } X} X Xvoid Xdo_all (argv) X char *argv[]; X{ X int i, j; X X for (i = 1; i <= no_of_phrases; i++) { X X phrase = argv[i]; X wordcount = 0; X anacount = 0; X total = 0; X X makefreqs (phrase, freqs); X choosefields (freqs); X makeonesig (phrase, phrasesig); X makeuf (freqs); X read_dict (infile); X if (words_out || words_only) { X (void)fprintf (stdout, "number of words read : %d\n", X wordcount); X (void)fflush (stdout); X printlist (wordlist, wordcount); X } X if (words_only) X continue; X if (verbose) { X (void)fprintf (stderr, "\nStarting generator..."); X X (void)fprintf (stdout, "\n Anagrams generated from \"%s\" :", phrase); X (void)fprintf (stdout, "\n--------------------------"); X for (j = 0; j < strlen (phrase); j++) X (void)fprintf (stdout, "-"); X (void)fprintf (stdout, "---\n"); X (void)fflush (stdout); X } X X /* Find all anagrams asociated with the specified pattern */ X X findanagrams (0, phrasesig); X if (verbose) { X (void)fprintf (stdout, "\nNo. of anagrams : %ld\n", total); X (void)fprintf (stderr, "done\n"); X } X clean_up (); X } X X} X Xmain (argc, argv) X int argc; X char **argv; X{ X /* set up interrupt handler */ X if (signal (SIGINT, SIG_IGN) != SIG_IGN) X signal (SIGINT, fatal); X if (signal (SIGKILL, SIG_IGN) != SIG_IGN) X signal (SIGKILL, fatal); X X X program = argv[0]; X X if (argc == 1) X _abort (usage); X X parse_flags (argc, argv); X X if (version) { X (void)fprintf (stderr, "%s\n", version_string); X if (no_of_phrases == 0) X exit (0); X } X if (no_of_phrases == 0) X _abort ("No pattern(s) specified"); X X if (!outfile || !strcmp (outfile, "")) X outfile = "stdout"; X else if (!freopen (outfile, "w", stdout)) { X char *s = cat_strings ("Couldn't open ", outfile, ""); X _abort (s); X } X do_all (argv); X return (0); X} X X X Xchar * Xxrealloc (ptr, size) X char *ptr; X int size; X{ X register char *result = (char *)realloc (ptr, size); X if (!result) X _abort ("Virtual memory exhausted."); X return result; X} X Xchar * Xxmalloc (size) X int size; X{ X register char *result = (char *)calloc (1, size); X if (!result) X _abort ("Virtual memory exhausted."); X return result; X} X Xvoid Xxfree (ptr) X char *ptr; X{ X if (ptr) free (ptr); X} X Xvoid Xfatal (signum) X int signum; X{ X signal (signum, SIG_DFL); X} X Xchar * Xcat_strings (s1, s2, s3) X char *s1, *s2, *s3; X{ X int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); X char *s = (char *) xmalloc (len1 + len2 + len3 + 1); X X (void)strcpy (s, s1); X (void)strcpy (s + len1, s2); X (void)strcpy (s + len1 + len2, s3); X *(s + len1 + len2 + len3) = 0; X X return s; X} X X/* BSD and SYSV use different names, so use my own */ X Xchar * Xxindex (s, c) X register char *s, c; X{ X while (*s) { X if (*s == c) return (s); X s++; X } X return 0; X} X Xvoid X_abort (arg) X char *arg; X{ X (void)fprintf (stderr, "%s: ", program); X (void)fprintf (stderr, arg); X (void)fprintf (stderr, "\n"); X (void)fflush (stderr); X exit (-1); X} END_OF_FILE if test 16239 -ne `wc -c <'anagram.c'`; then echo shar: \"'anagram.c'\" unpacked with wrong size! fi # end of 'anagram.c' fi echo shar: End of archive 1 \(of 1\). cp /dev/null ark1isdone MISSING="" for I in 1 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have the archive. rm -f ark[1-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0