[comp.sources.misc] v05i059: pwgen -- random-but-pronounceable password generator

allbery@ncoast.UUCP (Brandon S. Allbery) (11/26/88)
Posting-number: Volume 5, Issue 59
Submitted-by: "Brandon S. Allbery" <allbery@ncoast.UUCP>
Archive-name: pwgen

[Dr. Jekyll and Mr. Hyde, anyone?  ;-)  ++bsa]

I've described this program, in its OSI Superboard-II incarnation, in
comp.unix.wizards and news.sysadmin.  Now, here's the code for a UN*X
version.  It seems to work OK on ncoast, except for some oddness in the
"random" selections -- which are probably the result of our not having a
good RNG.  (Could someone mail me one or more of the alternative
generators?  Or maybe even the source to BSD random(), assuming that it
falls outside the subset of BSD code that can be traced to AT&T?  To put it
mildly, our random number generator isn't.)

This is not as complete as the original program, which actually had a list
of characters (the current one uses phonemes) which were most likely to
follow other phonemes.  On the other hand, this version does use phonemes
rather than characters, so its creations are more pronounceable as a rule
than those of the original.  The resulting passwords aren't quite as
"natural" as the original, however.  (That may be for the best; the original
was intended as a simple experiment to see if a rule for "proper English
words" could be defined in terms of rules which related letters.  This was
before I got to college and learned that such things were rather unlikely,
but the original program still did a pretty good job of spewing out some
common English words.  That can't be said for *this* program.)

To compile:	cc -O -o pwgen pwgen.c -lm
(The sqrt() function is used in an attempt to produce a random number which
is "weighted" toward one end of the range, so as to prefer one end of a list
of "spellings" for a phoneme over the other end.  It seems to work, but with
this rotted RNG, I can't be absolutely certain.  A trial distribution seems
to be correct, however.)

What's the intent?  I find that I can remember a "word" better if I can
pronounce it.  This may or may not be true for other people, but *anything*
that encourages the use of random passwords is an improvement on what I
typically see at client sites every day.  So this program spits out
passwords which are virtually guaranteed not to be found in the dictionary,
but are (usually) pronounceable to speakers of English.  (The tables can be
modified for other languages, but they're rather hairy.  Perhaps I'll write
a version which loads "compiled" language descriptions, and let the compiler
deal with the hairy stuff.  Hey, they were even hairier in the BASIC
version!)

Oh, well, shar and enjoy.

++Brandon, sitting on the other side of the fence for the moment
-------------------------------------------------------------------------------
#! /bin/sh
# This file was wrapped with "dummyshar".  "sh" this file to extract.
# Contents:  pwgen.c
echo extracting 'pwgen.c'
if test -f 'pwgen.c' -a -z "$1"; then echo Not overwriting 'pwgen.c'; else
sed 's/^X//' << \EOF > 'pwgen.c'
X/*
X * Generate (hopefully) pronounceable random passwords.  These can often be
X * remembered more easily than completely random passwords, and are immune to
X * dictionary searches, etc.
X *
X * The original version of this program was written in BASIC on an OSI
X * Superboard II SBC.  That version is long gone (the SB2's cassette drive
X * was never trustworthy, it eventually scrambled the tape), but the basic
X * (pardon the pun) idea lives on here, with a few modification like basing
X * the selection on "graphs" (actually, they are a restricted set of phonemes)
X * and having randomly-selected spellings for those graphs.
X */
X
X#include <stdio.h>
X#include <math.h>
X
X#define RANDOM(c)	((int) (rand(c) / 32767.0 * (c)))
X
Xchar *spelling[] = {
X/*a*/	"a",				(char *) 0,	/* 2*/
X/*A*/	"a",	"ae",	"ai",		(char *) 0,	/* 6*/
X/*b*/	"b",				(char *) 0,	/* 8*/
X/*ch*/	"ch",				(char *) 0,	/*10*/
X/*d*/	"d",				(char *) 0,	/*12*/
X/*e*/	"e",				(char *) 0,	/*14*/
X/*E*/	"e",	"ee",	"ie",		(char *) 0,	/*18*/
X/*f*/	"f",	"ph",	"gh",		(char *) 0,	/*22*/
X/*g*/	"g",				(char *) 0,	/*24*/
X/*h*/	"h",				(char *) 0,	/*26*/
X/*i*/	"i",	"e",			(char *) 0,	/*29*/
X/*I*/	"i",	"ai",			(char *) 0,	/*32*/
X/*i'*/	"i",	"ei",			(char *) 0,	/*35*/
X/*j*/	"j",	"g",			(char *) 0,	/*38*/
X/*k*/	"k",	"c",			(char *) 0,	/*41*/
X/*l*/	"l",				(char *) 0,	/*43*/
X/*m*/	"m",				(char *) 0,	/*45*/
X/*n*/	"n",				(char *) 0,	/*47*/
X/*ng*/	"ng",				(char *) 0,	/*49*/
X/*o*/	"o",	"a",	"ah",		(char *) 0,	/*53*/
X/*O*/	"o",	"oh",			(char *) 0,	/*56*/
X/*oo*/	"oo",	"u",			(char *) 0,	/*59*/
X/*OO*/	"oo",	"w",			(char *) 0,	/*62*/
X/*p*/	"p",				(char *) 0,	/*64*/
X/*qu*/	"qu",				(char *) 0,	/*66*/
X/*r*/	"r",				(char *) 0,	/*68*/
X/*s*/	"s",	"c",			(char *) 0,	/*71*/
X/*sh*/	"sh",	"s",			(char *) 0,	/*74*/
X/*t*/	"t",				(char *) 0,	/*76*/
X/*th*/	"th",				(char *) 0,	/*78*/
X/*TH*/	"th",				(char *) 0,	/*80*/
X/*u*/	"u",				(char *) 0,	/*82*/
X/*U*/	"u",	"oo",			(char *) 0,	/*85*/
X/*v*/	"v",				(char *) 0,	/*87*/
X/*x*/	"x",				(char *) 0,	/*89*/
X/*y*/	"y",				(char *) 0,	/*91*/
X/*z*/	"z",	"s",			(char *) 0,	/*94*/
X};
X
Xstruct graph {
X	char *graph;
X	char type;
X#define CONSONANT	0
X#define VOWEL_LONG	1
X#define VOWEL_SHORT	2
X#define VOWEL_OTHER	3
X#define VOWEL_MASK	3
X#define iscons(c)	(((c)->type & VOWEL_MASK) == 0)
X#define isvowel(c)	(((c)->type & VOWEL_MASK) != 0)
X/*	char frequency;			*/	/* unused for now */
X	char **spellings;
X/*	struct graph **following;	*/	/* maybe later */
X} graph[] = {
X	"a",	VOWEL_SHORT,	&spelling[0],
X	"A",	VOWEL_LONG,	&spelling[2],
X	"b",	CONSONANT,	&spelling[6],
X	"ch",	CONSONANT,	&spelling[8],
X	"d",	CONSONANT,	&spelling[10],
X	"e",	VOWEL_SHORT,	&spelling[12],
X	"E",	VOWEL_LONG,	&spelling[14],
X	"f",	CONSONANT,	&spelling[18],
X	"g",	CONSONANT,	&spelling[22],
X	"h",	CONSONANT,	&spelling[24],
X	"i",	VOWEL_SHORT,	&spelling[26],
X	"I",	VOWEL_LONG,	&spelling[29],
X	"i'",	VOWEL_OTHER,	&spelling[32],
X	"j",	CONSONANT,	&spelling[35],
X	"k",	CONSONANT,	&spelling[38],
X	"l",	CONSONANT,	&spelling[41],
X	"m",	CONSONANT,	&spelling[43],
X	"n",	CONSONANT,	&spelling[45],
X	"ng",	CONSONANT,	&spelling[47],
X	"o",	VOWEL_SHORT,	&spelling[49],
X	"O",	VOWEL_LONG,	&spelling[53],
X	"oo",	VOWEL_SHORT,	&spelling[56],
X	"OO",	VOWEL_LONG,	&spelling[59],
X	"p",	CONSONANT,	&spelling[62],
X	"qu",	CONSONANT,	&spelling[64],
X	"r",	CONSONANT,	&spelling[66],
X	"s",	CONSONANT,	&spelling[68],
X	"sh",	CONSONANT,	&spelling[71],
X	"t",	CONSONANT,	&spelling[74],
X	"th",	CONSONANT,	&spelling[76],
X	"TH",	CONSONANT,	&spelling[78],
X	"u",	VOWEL_SHORT,	&spelling[80],
X	"U",	VOWEL_LONG,	&spelling[82],
X	"v",	CONSONANT,	&spelling[85],
X	"x",	CONSONANT,	&spelling[87],
X	"y",	CONSONANT,	&spelling[89],
X	"z",	CONSONANT,	&spelling[91],
X	0,	0,		&spelling[94],
X};
X
Xstruct graph *vowel[] = {
X	&graph[0],	&graph[1],	&graph[5],	&graph[6],
X	&graph[10],	&graph[11],	&graph[12],	&graph[19],
X	&graph[20],	&graph[21],	&graph[22],	&graph[30],
X	&graph[31],
X	(struct graph *) 0,
X};
X
Xstruct graph *consonant[] = {
X	&graph[2],	&graph[3],	&graph[4],	&graph[7],
X	&graph[8],	&graph[9],	&graph[13],	&graph[14],
X	&graph[15],	&graph[16],	&graph[17],	&graph[18],
X	&graph[23],	&graph[24],	&graph[25],	&graph[26],
X	&graph[27],	&graph[28],	&graph[29],	&graph[32],
X	&graph[33],	&graph[34],	&graph[35],
X	(struct graph *) 0,
X};
X
X/*
X * Randomly select a graph from the specifield array.  Eventually, this should
X * account for graph frequencies as well.
X */
X
Xstruct graph *selgraph(graphs)
X	struct graph **graphs;
X{
X	register int cnt;
X
X	for (cnt = 0; graphs[cnt] != (struct graph *) 0; cnt++)
X		;
X	return graphs[RANDOM(cnt)];
X}
X
X/*
X * Randomly select a spelling for the specified graph.  This is not linear:
X * earlier spellings are preferred over later ones, but the latter do
X * sometimes sneak in.
X */
X
Xchar *selspell(graph)
X	struct graph *graph;
X{
X	register int cnt, sel;
X
X	for (cnt = 0; graph->spellings[cnt] != (char *) 0; cnt++)
X		;
X	if (cnt == 0) {
X		fprintf(stderr, "PANIC: selspell(%s) got count(spellings) == 0\n", graph->graph);
X		exit(2);
X	}
X	if (cnt == 1)
X		return *graph->spellings;
X/*
X * This may not be the best way to do it... maybe Weemba'd care to lend a
X * hand here?  After all, my specialty is programming, NOT math.
X */
X	if ((sel = cnt - (int) sqrt((double) RANDOM(cnt * cnt) + 1) - 1) < 0 || sel >= cnt) {
X#ifdef BUGCATCH
X		fprintf(stderr, "PANIC: selspell(%s) got nlrand(%d) == %d\n", graph->graph, cnt, sel);
X		exit(2);
X#else
X		sel = 0;
X#endif
X	}
X	return graph->spellings[sel];
X}
X
X/*
X * Choose the next source for a graph.  The rules are:  a consonant MUST be
X * followed by a vowel; a vowel may be followed by a vowel of a different
X * type or by a consonant, but never more than two consecutive vowel graphs.
X */
X
Xchoosenext(cur, prev)
X{
X	if (cur == CONSONANT)
X		return VOWEL_MASK;
X	else if (prev == -1 || (prev & VOWEL_MASK) != 0)
X		return CONSONANT;
X	else if (RANDOM(10) == 5)
X		return VOWEL_MASK;
X	else
X		return CONSONANT;
X}
X
X/*
X * We are passed an array of (struct graph *); choose an entry randomly and
X * assemble a string fitting the size constraint.  We use the original (OSI)
X * paradigm:  alternate consonants and vowels, with the option of two vowels
X * in a row occasionally.  The only difference is that they must be different
X * *types* of vowels, a distinction that the OSI version didn't consider.
X */
X
Xpwgen(initial, pw, maxlen)
X	struct graph **initial;
X	char *pw;
X{
X	int pwlen, state, prev, tmp;
X	struct graph *graph;
X	char *spelling;
X
X	pwlen = 0;
X	state = initial[0]->type;
X	prev = -1;
X	while (pwlen < maxlen - 1) {
X		do {
X			graph = selgraph(initial);
X		} while (state != CONSONANT && graph->type == prev);
X		if ((spelling = selspell(graph)) == (char *) 0) {
X			fprintf(stderr, "PANIC: got NULL in selspell(%s)\n", graph->graph);
X			exit(2);
X		}
X		strcpy(pw, spelling);
X		while (*pw != '\0')
X			pwlen++, pw++;
X		tmp = prev;
X		prev = graph->type;
X		if ((state = choosenext(prev, tmp)) == CONSONANT)
X			initial = consonant;
X		else
X			initial = vowel;
X	}
X}
X
Xmain(argc, argv)
X	char **argv;
X{
X	int cnt, len;
X	char buf[20];
X
X	if (argc < 2 || argc > 3) {
X		fprintf(stderr, "usage: %s length [count]\n", argv[0]);
X		exit(1);
X	}
X	if ((len = atoi(argv[1])) < 4 || len > 16) {
X		fprintf(stderr, "%s: invalid length %s\n", argv[0], argv[1]);
X		exit(1);
X	}
X	if (argc == 2)
X		cnt = 1;
X	else if ((cnt = atoi(argv[2])) < 1) {
X		fprintf(stderr, "%s: invalid count %s\n",  argv[0], argv[2]);
X		exit(1);
X	}
X	srand(time(0) + (getpgrp() << 8) + getpid());
X	while (cnt-- != 0) {
X		pwgen((RANDOM(10) < 4? vowel: consonant), buf, len);
X		printf("%s\n", buf);
X	}
X	exit(0);
X}
EOF
chars=`wc -c < 'pwgen.c'`
if test $chars !=    7757; then echo 'pwgen.c' is $chars characters, should be    7757 characters!; fi
exit 0
-- 
Brandon S. Allbery, comp.sources.misc moderator and one admin of ncoast PA UN*X
uunet!hal.cwru.edu!ncoast!allbery  <PREFERRED!>	    ncoast!allbery@hal.cwru.edu
allberyb@skybridge.sdi.cwru.edu	      <ALSO>		   allbery@uunet.uu.net
comp.sources.misc is moving off ncoast -- please do NOT send submissions direct
      Send comp.sources.misc submissions to comp-sources-misc@<backbone>.