[comp.sources.games] v12i081: ag - anagram generator, Part01/01

billr@saab.CNA.TEK.COM (Bill Randle) (05/20/91)

Submitted-by: Morten Lerskau Ronseth <morten@cs.qmw.ac.uk>
Posting-number: Volume 12, Issue 81
Archive-name: ag/Part01
Environment: 

[From the author...]
	[[This is an anagram generator I wrote a little while back,
	  based on an article in Byte, Nov. 1987.
	  It should run on any Unix; I've compiled and run it on
	  HP-UX (6.5 - 7.0), Sun's, Sequent, Mac (Think C).

	  The algorithm used to generate anagrams is very quick, an order
	  of magnitude faster than the program found on uunet.uu.net.
	  As literally zillions of anagrams are generated, long phrases
	  should be avoided, and the output redirected to a file (for
	  later humorous viewing..:-) BTW ``billrandle'' produced 183
	  anagrams from /usr/dict/words - any longer phrases than that,
	  and you'll spend the rest of the day reading anagrams).]]

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of archive 1 (of 1)."
# Contents:  README MANIFEST Makefile anagram.c
# Wrapped by billr@saab on Mon May 20 09:53:38 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'README' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'README'\"
else
echo shar: Extracting \"'README'\" \(523 characters\)
sed "s/^X//" >'README' <<'END_OF_FILE'
XThis is an anagram generator I wrote a little while back, based
Xon an article in Byte, Nov. 1987.
XThe program compiles with
X	make ag
Xand should run on any Unix; I've compiled and run it on
XHP-UX (6.5 - 7.0), Sun's, Sequent, Mac (Think C).
X
XThe algorithm used to generate anagrams is very quick, an order
Xof magnitude faster than the program found on uunet.uu.net.
XAs literally zillions of anagrams are generated, long phrases
Xshould be avoided, and the output redirected to a file (for
Xlater humorous viewing..:-)
X
X	Morten
END_OF_FILE
if test 523 -ne `wc -c <'README'`; then
    echo shar: \"'README'\" unpacked with wrong size!
fi
# end of 'README'
fi
if test -f 'MANIFEST' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'MANIFEST'\"
else
echo shar: Extracting \"'MANIFEST'\" \(238 characters\)
sed "s/^X//" >'MANIFEST' <<'END_OF_FILE'
X   File Name		Archive #	Description
X-----------------------------------------------------------
X MANIFEST                   1	This shipping list
X Makefile                   1	
X README                     1	
X anagram.c                  1	
END_OF_FILE
if test 238 -ne `wc -c <'MANIFEST'`; then
    echo shar: \"'MANIFEST'\" unpacked with wrong size!
fi
# end of 'MANIFEST'
fi
if test -f 'Makefile' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'Makefile'\"
else
echo shar: Extracting \"'Makefile'\" \(210 characters\)
sed "s/^X//" >'Makefile' <<'END_OF_FILE'
XCC = 		gcc
XCCFLAGS = 	-O
XBINDIR = /nfs/sequent/users/staff/morten/bin
X
Xag : anagram.c
X	 $(CC) $(CCFLAGS) anagram.c -o ag
X
Xinstall : ag
X	mv ag $(BINDIR)
X
Xshar : NOTREACHED
X	shar anagram.c Makefile
X
X
XNOTREACHED:
END_OF_FILE
if test 210 -ne `wc -c <'Makefile'`; then
    echo shar: \"'Makefile'\" unpacked with wrong size!
fi
# end of 'Makefile'
fi
if test -f 'anagram.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'anagram.c'\"
else
echo shar: Extracting \"'anagram.c'\" \(16239 characters\)
sed "s/^X//" >'anagram.c' <<'END_OF_FILE'
X/**********************************************************
X *                                                        *
X *                                                        *
X *                                                        *   
X *            A program to generate anagrams              *
X *              Based on an idea in Byte, Nov 1987        *
X *                                                        *
X *                     Written by                         *      
X *                                                        *
X *                Morten Lerskau Ronseth                  *
X *                 morten@cs.qmw.ac.uk                    *
X *                                                        *
X *                       9|3|1988                         *
X *                                                        *
X *                                                        *
X **********************************************************/                     
X
X
X/*
X * Jan. 30, 1991
X * rewrote the dictionary loader, now uses a pool for allocation.
X */
X
X/*
X * Sept. 28, 1990
X * removed "islower" & "isupper" - they didn't work...
X */
X     
X/*
X * Sept. 26, 1990
X * wrote my own "toupper" & "tolower" so as to be compatible 
X * with any system.
X */
X     
X#include <stdio.h>
X#include <signal.h>
X
Xextern char *xmalloc (), *xrealloc ();
Xextern void  xfree ();
Xextern char *cat_strings ();
Xextern char *xindex ();
Xextern void fatal ();
Xextern void _abort ();
X
X
X/* various definitions */
X
X#define STD_DICTIONARY "/usr/dict/words"
X#define MAX_LINELEN    70
X#define MAX_WORDLEN    32
X#define MAXMASKS 3
X#define bitmask long
X#define maskwidth  (8 * sizeof (bitmask))
X#define large_pool_space 8192
X#define large_word_space 80
X#define large_stack_space 200
X#define tolower(c) hi2lo[c - 65]
X#define toupper(c) lo2hi[c - 65]
X
X/* some systems cannot handle lowercase in tolower
X   and vice versa...use my own table for fast lookup */
X
Xchar hi2lo[] = {
X    'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
X    'p','q','r','s','t','u','v','w','x','y','z',0,0,0,0,0,0,
X    'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
X    'p','q','r','s','t','u','v','w','x','y','z'
X};
X
Xchar lo2hi[] = {
X    'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
X    'P','Q','R','S','T','U','V','W', 'X','Y','Z', 0,0,0,0,0,0,
X    'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
X    'P','Q','R','S','T','U','V','W', 'X','Y','Z'
X};
X
X
Xchar *program;
Xchar *usage = "[-d `dictionary'] [-a] [-v] [-V] [-w] [-W] [-s `SIZE'] [-o `outfile'] [phrase(s)]";
X
X/* 1 if only action is to print out the ellegible words [-W]*/
X
Xint words_only = 0;
X
X/* 1 if the ellegible words are to be printed out [-w]*/
X
Xint words_out = 0;
X
X/* size of smallest word in an angram.
X   i.e. -s 2 would prevent angrams with singlelettered words [-s]*/
X   
Xint min_size = 2;
X
X/* should we count `a' and `i' as complete words? [-a]*/
X
Xint do_ai = 0;
X
X/* speak up! [-v] */
X
Xint verbose = 0;
X
X/* 1 if write out version info [-V] */
X
Xint version = 0;
X
X/* file to write anagrams to, if any */
X
Xchar *outfile = NULL;
Xchar *infile = STD_DICTIONARY;
X
Xchar version_string[] = "Anagram Finder v. 1.0b3";
X
Xint no_of_phrases = 0;
X
X
X/* phrase variables */
X
Xtypedef bitmask bitsig[MAXMASKS];
Xint freqs[26], letmask[26], letbit[26], letwidth[26], lastmask;
Xbitsig uflosig;
Xchar *phrase;
Xbitmask phrasesig[MAXMASKS];
X
X/* pool information */
X
Xchar *pool = NULL;
X
X/* dictionary information */
X
Xchar **wordlist = NULL;
Xint wordcount = 0;
Xbitmask *wordsigs;
X
X/* for printing anagrams */
X
Xchar **analist;
Xlong anacount = 0;
Xlong total = 0;
X
X
X/*************************************************************************/
X
X#define IS_NUM(X) ((X) >= '0' && (X) <= '9')
X#define IS_LETTER(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
X
Xint
Xstr_to_num (s)
X    char *s;
X{
X    int result = 0;
X    
X    while (*s) {
X        if (IS_NUM(*s))
X            result = (result * 10) + *s++ - '0';
X    }
X    return result;
X}
X
Xint
Xcandidate (word)
X    register char *word;
X{
X    register char c;
X    register int count = 0;
X    
X    if (strlen (word) < min_size && !do_ai) return 0;
X    while (c = word[count++]) {
X		if (!IS_LETTER (c) ||
X            (!xindex (phrase, tolower (c)) && 
X             !xindex (phrase, toupper (c))))
X            return 0;
X	}
X    if (do_ai && !word[1])
X        return (xindex ("aAiI", word[0]) != 0);
X    return 1;
X}
X
X
X
X/* prints out all the anagrams found so far, if any */
X
Xvoid
Xprintlist (list, no)
X    char **list;
X    int no;
X{
X    register int count;
X    register int linelen = 0;
X    
X    for (count = 0; count < no; count++) {
X        linelen += strlen (list[count]) + 1;
X        if (linelen >= MAX_LINELEN) {
X            linelen = 0;
X            (void)fprintf (stdout, "\n");
X        }
X        (void)fprintf (stdout, "%s ", list[count]);
X    }
X    (void)fprintf (stdout, "\n");
X    (void)fflush (stdout);
X}
X
X/* old version using log didn't work too well...
X   use smalltalk's version of highBit instead */
Xint
Xfieldwidth (v)
X    int v;
X{
X    int i = 1, bit = 1;
X
X    if (v < 0) _abort ("Fieldwidth: negative number");
X    if (!v) return 2;
X    while (v > bit) {
X        i++; bit += bit + 1;
X    }
X    return (i + 2);
X}
X
Xvoid
Xmakefreqs (str, freq)
X    register char *str;
X    register int freq[];
X{
X    register char c;
X    register int f;
X    
X    for (f = 0; f < 26; f++) 
X        freq[f] = 0;
X    while (c = *(str++)) {
X        freq[tolower (c) - 97] += 1;
X    }
X}
X
Xvoid
Xchoosefields (frq)
X    int frq[];
X{
X    int letter;
X    int curmask = 0,curbit = 0;
X    int width;
X    
X    for (letter = 0; letter < 26; letter++)
X        if (frq[letter] != 0) {
X            width = fieldwidth (frq[letter]);
X            if ((curbit + width) > maskwidth) {
X                if (++curmask >= MAXMASKS)
X                    _abort ("Phrase too long to handle.");
X                curbit = 0;
X            }
X            letmask[letter] = curmask;
X            letbit[letter] = curbit;
X            letwidth[letter] = width;
X            curbit += width;
X        }
X    lastmask = curmask;
X}
X
Xvoid
Xmakeonesig (str, sig)
X    register char *str;
X    register bitmask sig[];
X{
X    register int i, j = 0;
X    int sfreqs[26];
X    register bitmask fr;
X    
X    makefreqs (str, sfreqs);
X    for (i = 0; i <= lastmask; i++)
X        sig[i] = 0;
X    for (i = 0; i < 26; i++)
X        if (sfreqs[i]) {
X            fr = ((bitmask)sfreqs[i]) << letbit[i];
X            sig[letmask[i]] += fr;
X            j += fr;
X        }
X}
X
Xvoid
Xmakeuf (frq)
X    int frq[];
X{
X    int i, bnum, bwidth;
X    
X    for (i = 0; i < MAXMASKS; i++)
X        uflosig[i] = 0;
X    for (i = 0; i < 26; i++)
X        if (frq[i]) {
X            bnum = letbit[i];
X            bwidth = letwidth[i];
X            uflosig[letmask[i]] += (1L << (bnum + bwidth - 1));
X        }
X}
X
X#define DOMASK(MASK) {                           \
X    newmask = curnode[MASK] - cursig[MASK];      \
X    if (newmask & uflosig[MASK]) break;          \
X    newsig[MASK] = newmask;                      \
X    bitsleft |= newmask;                         \
X}
X    
X
Xvoid
Xfindanagrams (curword, curnode)
X    register int curword;
X    register bitmask *curnode;
X{
X    bitsig newsig;
X    register bitmask newmask, *cursig;
X    register long bitsleft;
X    int bsize = large_stack_space;
X    
X    cursig = &wordsigs[curword * (lastmask + 1)];
X    while (curword < wordcount) {
X        bitsleft = 0;
X        switch (lastmask) {
X            case 2:DOMASK(2)
X            case 1:DOMASK(1)
X            case 0:DOMASK(0)
X            
X            if (anacount == bsize) {
X                bsize *= 2;
X                analist = (char **)xrealloc (analist, bsize * sizeof (char *));
X            }
X            analist[anacount++] = wordlist[curword];
X            if (!bitsleft) {
X                printlist (analist, anacount);
X                total++;
X            }
X            else findanagrams (curword, newsig);
X            --anacount;
X        }
X        curword++;
X        cursig += (lastmask + 1);
X    }
X}
X
X
X/* If realloc moves the list around in memory (not very likely, but still...)
X * then update all pointers in the wordlist.
X */
X
Xvoid
Xadjust_list (list, offset, count)
X	char **list;
X	int offset, count;
X{
X	register int i;
X
X	for (i = 0; i < count; i++)
X		list[i] += offset;
X}
X
X
Xvoid
Xread_dict (name)
X    char *name;
X{
X    FILE *dict;
X    int psize = large_pool_space;
X    int bsize = large_stack_space;
X    int msize = large_stack_space * MAXMASKS;
X    char *pend, *next_slot;
X    
X    if (!(dict = fopen (name, "r"))) {
X        char *s = cat_strings ("Couldn't open <", name, ">");
X        _abort (s);
X    }
X    if (verbose) {
X        (void)fprintf (stderr, "\nLoading dictionary...");
X        (void)fflush (stderr);
X    }
X    
X    /* analist has to be set up here, as "findanagrams" is recursive */
X    analist = (char **)xmalloc (bsize * sizeof (char *));
X    wordlist = (char **) xmalloc (bsize * sizeof (char *));
X    wordsigs = (bitmask *) xmalloc (msize * sizeof (bitmask));
X    pool = (char *)xmalloc (psize * sizeof (char));
X    pend = pool + psize;
X    next_slot = pool;
X    
X    while (fscanf (dict, "%s", next_slot) != EOF) {
X        if (candidate (next_slot)) {
X        
X            /* if this entry would overflow stack, expand it */
X            
X            if (wordcount == bsize) {
X                bsize *= 2; msize *= 2;
X                wordlist = (char **)xrealloc (wordlist, bsize * sizeof (char *));
X                wordsigs = (bitmask *)xrealloc (wordsigs, msize * sizeof (bitmask));
X            }
X            wordlist[wordcount] = next_slot;
X            makeonesig (next_slot, &wordsigs[wordcount * (lastmask + 1)]);
X
X            next_slot += strlen (next_slot) + 1;
X            if ((next_slot + MAX_WORDLEN) >= pend) {
X				char *old_pool = pool;
X
X                psize *= 2;
X                pool = (char *)xrealloc (pool, psize * sizeof (char));
X				if (old_pool != pool)
X					adjust_list (wordlist, pool - old_pool, wordcount);
X                pend = pool + psize;
X            }
X            wordcount++;
X        }
X    }
X    if (verbose) {
X        (void)fprintf (stderr, "done\n"); 
X        (void)fflush (stderr);
X    }
X    (void)fclose (dict);
X}
X
Xvoid
Xclean_up ()
X{
X    /* first, free all strings allocated */
X    
X    xfree (pool);
X        
X    /* then, free the array of pointers to these strings */
X        
X    xfree (wordlist);
X    
X    /* then, free the array of pointers to the anagrams */
X    
X    xfree (analist);
X    
X    /* At last, free the array `wordsigs' */
X    
X    xfree (wordsigs);
X}
X
Xvoid
Xparse_flags (argc, argv)
X    int argc;
X    char *argv[];
X{
X    int i;
X
X    for (i = 1; i < argc; i++) {
X        if (argv[i][0] == '-' && argv[i][1] != 0) {
X        
X            register char *str = argv[i] + 1;
X            
X            switch (str[0]) {
X                case 'd':
X                    if (argv[i + 1] != NULL) {
X                        infile = argv[i + 1];
X                        argv[i + 1] = NULL;
X                    }
X                    break;
X
X                case 's':
X                    if (argv[i + 1] != NULL) {
X                        min_size = str_to_num (argv[i + 1]);
X                        argv[i + 1] = NULL;
X                    }
X                    else
X                        _abort ("No size specified.");
X                    break;
X
X                case 'o':
X                    if (outfile != NULL)
X                        _abort ("Outfile specified twice");
X                    if (argv[i + 1] != NULL) {
X                        outfile = argv[i + 1];
X                        if (!strncmp (outfile, "-", 1))
X                            outfile = "";
X                        else
X                            argv[i + 1] = NULL;
X                    }
X                    else
X                        _abort ("No outfile specified.");
X                    break;
X
X                case 'a':
X                    do_ai = 1;
X                    break;
X
X                case 'v':
X                    verbose = 1;
X                    break;
X
X                case 'V':
X                    version = 1;
X                    break;
X
X                case 'w':
X                    words_out = 1;
X                    break;
X
X                case 'W':
X                    words_only = 1;
X                    break;
X
X                case '-':
X                case '\n':
X                    if (outfile == NULL)
X                        outfile = "";
X                    argv[i] = NULL;
X                    break;
X
X                default : {
X                    char *s = cat_strings ("Unrecognized flag: \"", str, "\"");
X                    _abort (s);
X                }
X            }
X        }
X        else if (argv[i] != NULL && ++no_of_phrases != i) {
X            argv[no_of_phrases] = argv[i];
X            argv[i] = NULL;
X        }
X    }
X}
X
Xvoid
Xdo_all (argv)
X    char *argv[];
X{
X    int i, j;
X
X    for (i = 1; i <= no_of_phrases; i++) {
X    
X        phrase = argv[i];
X        wordcount = 0;
X        anacount = 0;
X        total = 0;
X
X        makefreqs (phrase, freqs);
X        choosefields (freqs);
X        makeonesig (phrase, phrasesig);
X        makeuf (freqs);
X        read_dict (infile);
X        if (words_out || words_only) {
X             (void)fprintf (stdout, "number of words read : %d\n", 
X                            wordcount); 
X             (void)fflush (stdout);
X             printlist (wordlist, wordcount);
X        }
X        if (words_only)
X            continue;
X        if (verbose) {
X            (void)fprintf (stderr, "\nStarting generator...");
X                
X            (void)fprintf (stdout, "\n Anagrams generated from \"%s\" :", phrase);
X            (void)fprintf (stdout, "\n--------------------------");
X            for (j = 0; j < strlen (phrase); j++)
X                (void)fprintf (stdout, "-");
X            (void)fprintf (stdout, "---\n");
X            (void)fflush (stdout);
X        }
X
X        /* Find all anagrams asociated with the specified pattern */
X        
X        findanagrams (0, phrasesig);
X        if (verbose) {
X            (void)fprintf (stdout, "\nNo. of anagrams : %ld\n", total);
X            (void)fprintf (stderr, "done\n");
X        }
X        clean_up ();
X    }
X    
X}
X
Xmain (argc, argv)
X    int argc;
X    char **argv;
X{
X    /* set up interrupt handler */
X    if (signal (SIGINT, SIG_IGN) != SIG_IGN)
X        signal (SIGINT, fatal);
X    if (signal (SIGKILL, SIG_IGN) != SIG_IGN)
X        signal (SIGKILL, fatal);
X
X
X    program = argv[0];
X
X    if (argc == 1)
X        _abort (usage);
X
X    parse_flags (argc, argv);
X        
X    if (version) {
X        (void)fprintf (stderr, "%s\n", version_string);
X        if (no_of_phrases == 0)
X            exit (0);
X    }
X    if (no_of_phrases == 0)
X        _abort ("No pattern(s) specified");
X
X    if (!outfile || !strcmp (outfile, ""))
X        outfile = "stdout";
X    else if (!freopen (outfile, "w", stdout)) {
X        char *s = cat_strings ("Couldn't open ", outfile, "");
X        _abort (s);
X    }
X    do_all (argv);
X    return (0);
X}
X     
X
X     
Xchar *
Xxrealloc (ptr, size)
X    char *ptr;
X    int size;
X{
X    register char *result = (char *)realloc (ptr, size);
X    if (!result)
X        _abort ("Virtual memory exhausted.");
X    return result;
X}
X
Xchar *
Xxmalloc (size)
X    int size;
X{
X    register char *result = (char *)calloc (1, size);
X    if (!result)
X        _abort ("Virtual memory exhausted.");
X    return result;
X}
X
Xvoid
Xxfree (ptr)
X    char *ptr;
X{
X    if (ptr) free (ptr);
X}
X
Xvoid
Xfatal (signum)
X    int signum;
X{
X  signal (signum, SIG_DFL);
X}
X
Xchar *
Xcat_strings (s1, s2, s3)
X    char *s1, *s2, *s3;
X{
X    int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
X    char *s = (char *) xmalloc (len1 + len2 + len3 + 1);
X
X    (void)strcpy (s, s1);
X    (void)strcpy (s + len1, s2);
X    (void)strcpy (s + len1 + len2, s3);
X    *(s + len1 + len2 + len3) = 0;
X    
X    return s;
X}
X
X/* BSD and SYSV use different names, so use my own */
X
Xchar *
Xxindex (s, c)
X    register char *s, c;
X{
X    while (*s) {
X        if (*s == c) return (s);
X        s++;
X    }
X    return 0;
X}
X
Xvoid
X_abort (arg)
X    char *arg;
X{
X    (void)fprintf (stderr, "%s: ", program);
X    (void)fprintf (stderr, arg);
X    (void)fprintf (stderr, "\n");
X    (void)fflush (stderr);
X    exit (-1);
X}
END_OF_FILE
if test 16239 -ne `wc -c <'anagram.c'`; then
    echo shar: \"'anagram.c'\" unpacked with wrong size!
fi
# end of 'anagram.c'
fi
echo shar: End of archive 1 \(of 1\).
cp /dev/null ark1isdone
MISSING=""
for I in 1 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have the archive.
    rm -f ark[1-9]isdone
else
    echo You still need to unpack the following archives:
    echo "        " ${MISSING}
fi
##  End of shell archive.
exit 0