[alt.sources] ars magna

gtoal@tharr.UUCP (Graham Toal) (08/21/90)

/* Hacked a long time ago from Byte article; sorry I've lost the reference
   to the original author.  This isn't code for reading; this is code
   for laying down and avoiding... ;-)
    This is for the person who requested it recently; I'm afraid his
   article has dropped off here, so I can't mail it.  However it's a
   traditional 'quick hack' so I won't put an 'Archive-name:' header
   in.  If some site *really* wants it archived, why not tidy it up
   and repost it!
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define maxtext 250000
#define maxwords 30000
#define NULL 0
#define bitmask unsigned int
#define MAXMASKS 3
#define maskwidth (8*sizeof(bitmask))
#define STACKMAX 32

typedef bitmask bitsig[MAXMASKS+1];
int     freqs[26];
bitsig  uflosig;
int     letmask[26];
int     letbit[26];
int     letwidth[26];
int     lastmask;
int     numwords;
char    *textnext;
bitmask *wordsigs;
char    *anawords[STACKMAX];
char    **anaptr;
int     anacount=0;
int     targetwords;

void die (char *ermsg);
void printanagram(char *target, int expected);

int fieldwidth(num)
  int num;
{
  int tot = 0;
  while (num != 0) {
    tot += 1;
    num = num >> 1;
  }
  return(tot+1);
}

void choosefields(freqs)
  int freqs[];
{
  int letter;
  int curmask = 0, curbit = 0;
  int width;

  for (letter = 0; letter < 26; letter++)
    if (freqs[letter] != 0) {
      width = fieldwidth(freqs[letter]);
      if (curbit+width > maskwidth) {
        if (++curmask >= MAXMASKS) die("Sorry: Phrase too long to handle.\n");
        curbit = 0;
      }
      letmask[letter] = curmask;
      letbit[letter] = curbit;
      letwidth[letter] = width;
      curbit += width;
    }
  if (curmask > lastmask) lastmask = curmask;
}

void makefreqs(str, freq)
  char *str;
  int freq[];
{
  char c;
  int i;
  for (i = 0; i < 26; i++) freq[i] = 0;
  while ((c = *str++) != '\0') {
    if (('A' <= c) && (c <= 'Z')) c += 32;
    c -= 'a';
    if ((c >= 0) && (c < 26)) freq[c] += 1;
  }
}

void makeonesig(str, sig)
  register char *str;
  bitmask sig[];
{
  register int l;
  int sfreqs[26];
  register bitmask fr;

  makefreqs(str, sfreqs);

  for (l = 0; l <= lastmask; l++) sig[l] = 0;

  for (l = 0; l < 26; l++) {
      if (sfreqs[l]) {
        fr = ((bitmask) sfreqs[l]) << letbit[l];
        sig[letmask[l]] += fr;
      }
   }
}

void makeuf(freqs, letmask, letbit, letwidth)
  int freqs[];
  int letmask[], letbit[], letwidth[];
{
  int l;
  int bnum, bwidth;

  for (l = 0; l <= MAXMASKS; l++) uflosig[l] = 0;

  for (l = 0; l < 26; l++)
    if (freqs[l] != 0) {
      bnum = letbit[l];
      bwidth = letwidth[l];
      uflosig[letmask[l]] += (1 << (bnum+bwidth-1));
    }
}

#define DOMASK(MASK) { \
  newmask = curnode[MASK] - cursig[MASK]; \
  if ((newmask & uflosig[MASK])) break; \
  newsig[MASK] = newmask; \
  bitsleft = bitsleft | newmask; \
}


void findanagrams(curword, curnode, wordlist, target)
  register int curword;
  register bitmask *curnode;
  char *wordlist[];
  char *target;
{
  bitsig newsig;
  register bitmask newmask;
  register bitmask *cursig;
  register int bitsleft;

  cursig = &wordsigs[curword * (lastmask+1)];

  while (curword < numwords) {
    bitsleft = 0;
    if (lastmask > 2) {fprintf(stderr,"BUG 1\n");exit(0);}
    if (lastmask < 0) {fprintf(stderr,"BUG 2\n");exit(0);}
    switch (lastmask) {

    case 2: DOMASK(2)
    case 1: DOMASK(1)
    case 0: DOMASK(0)

      *anaptr++ = wordlist[curword];
      if (bitsleft == 0) {
        printanagram(target,targetwords);
      } else {
        findanagrams(curword, newsig, wordlist, target);
      }
      --anaptr;
    }
    curword++;
    cursig += (lastmask+1);
  }
}

void printanagram(target, expected) char *target; int expected;
{
  int wc;
  char **word;

  wc = 0;
  for (word = &anawords[0]; word != anaptr; word++) {
    wc ++;
  }
/*  if (wc == expected) {*/
    wc = 0;
    fprintf(stderr, "%s is an anagram of ", target);
    for (word = &anawords[0]; word != anaptr; word++) {
      printf("%s", *word); wc++;
      /*if ((expected > 1) && (wc != expected))*/ fprintf(stderr, " ");
    }
    printf("\n");
/*  }*/
}

void die (ermsg)
  char *ermsg;
{
  printf(ermsg);
  exit(0);
}

int getline(s, lim)
char s[];
int lim;
{
  int c, i;

  fprintf(stderr, "Word: ");
  i = 0;
  for (;;) {
    if (--lim <= 0) break;
    c=fgetc(stdin);
    if (c == EOF) break;
    c &= 255;
    if (c == 10) break;
    if (c == 13) break;
    s[i++] = c;
  }
  s[i] = '\0';
  return(i);
}

int main (int argc, char **argv)
{
  int c;
  int i, len, compatible;
  FILE *DictFile;
  char inittarget[64];
  char *target;
  char *textbuffer;
  char **wordlist;
  int wfreqs[26];

  if (argc != 2) {
    fprintf(stderr, "syntax: magna wordlist\n");
    exit(0);
  }

  textbuffer = (char *)malloc(maxtext);
  wordlist = (char **)malloc(maxwords*sizeof(char *));
  if ((textbuffer == NULL) || (wordlist == NULL)) {
    fprintf(stderr, "Not enough store\n");exit(0);
  }

  DictFile = fopen(argv[1], "r");
  if (DictFile == NULL) {
    fprintf(stderr, "Sorry - can\'t open word list \'%s\'\n", argv[1]);
    exit(0);
  } 
  target = &inittarget[0];
  numwords = 0;
  anaptr = &anawords[0];
  textnext = &textbuffer[0];


  len = getline(target, 64);
  target = strcpy(textnext, target); 
  textnext += len+1;
  textnext = (char *) ((((int) textnext)+3) & (~3));
  makefreqs(target, freqs);

  while ((c = fgetc(DictFile)) != EOF) {
    char *backtrack = textnext;
    compatible = 0;
    if ((c < 0) || (c > 128)) compatible = 1;
    wordlist[numwords] = textnext;
    *textnext++ = c;
    while ((c = fgetc(DictFile)) != EOF && c != '\n') {
      if ((c < 0) || (c > 128)) compatible = 1;
      *textnext++ = c;
    }
    *textnext++ = '\0';
    /* If strlen(wordlist[numwords]) > strlen[target]) skip this... */
      makefreqs(wordlist[numwords], wfreqs);
      for (i= 0; i < 26; i++) {
        if (wfreqs[i] > freqs[i]) {
          compatible = 1; break;
        }
      }
      if (compatible==0) {
        fprintf(stderr,"Using %s\n", wordlist[numwords++]);
      } else textnext = backtrack;
  }
  choosefields(freqs);

  textnext = (char *)((((int)textnext)+3) & (~3));
  wordsigs = (bitmask *) &textnext[0];


  makeonesig(target, &wordsigs[numwords*(lastmask+1)]);
  makeuf(freqs, letmask, letbit, letwidth);


  for (i = 0; i < numwords; i++) {
    makeonesig(wordlist[i], &wordsigs[i*(lastmask+1)]);
  }


  findanagrams(0, &wordsigs[numwords*(lastmask+1)], wordlist, target);
  return(0);
}