jmc@ptsfa.UUCP (07/10/87)
Here is a short program I wrote a few years ago illustrating the use
of the "soundex" algorithm. It is used to generate the same token for
similarly pronounced names (like Smith and Smythe).
/* soundex algorithm from 1918 patent */
#include <stdio.h>
#include <ctype.h>
#define MAXNAME 30 /* max name length */
#define SOUNDLEN 6 /* max len soundex string */
main(argc,argv)
int argc;
char *argv[];
{
char name[MAXNAME]; /* input string for surname */
char sound[SOUNDLEN]; /* SOUNDLEN -1 length */
int namelen; /* length of entered name */
int i;
if (argc > 1)
{
for (i = 1; i < argc; i++)
{
getsound(argv[i],sound);
printf("%s\n",sound);
}
exit(0);
}
gets(name);
if (name[0] == '\0')
exit(0);
getsound(name,sound);
printf("%s\n",sound);
exit(0);
}
/***************************************************************************/
/* soundex algorhithm - format of result is annnn */
/***************************************************************************/
getsound(name,sound)
char *name;
char *sound;
{
int i;
int j;
int val; /* char value */
int oldval;
char tempc;
int namelen;
namelen = strlen(name);
j = 1; /* start 2nd pos */
oldval = 0;
tempc = name[0];
sound[0] = isupper(tempc) ? tolower(tempc) : tempc;
for (i = 1; (i <= namelen) && (j < SOUNDLEN - 1); i++)
{
tempc = name[i];
if (isupper(tempc))
tempc = tolower(tempc);
switch (tempc)
{
case 'b':
case 'f':
case 'p':
case 'v':
val = 1;
break;
case 'c':
case 'g':
case 'j':
case 'k':
case 'q':
case 's':
case 'x':
case 'z':
val = 2;
break;
case 'd':
case 't':
val = 3;
break;
case 'l':
val = 4;
break;
case 'm':
case 'n':
val = 5;
break;
case 'r':
val = 6;
break;
default:
val = 0;
break;
}
if (val != 0 && val != oldval)
sound[j++] = val + '0';
oldval = val;
}
while (j < SOUNDLEN-1)
sound[j++] = '0';
sound[SOUNDLEN] = '\0';
}
--
voice: (415) 823-2441 uucp: {ihnp4,lll-crg,ames,qantel,pyramid}!ptsfa!jmc
Where am I? In the village. Whose side are you on? That would be telling.john@riddle.UUCP (Jonathan Leffler) (12/22/89)
Archive-name: soundex.c/john Original-posting-by: john@riddle.UUCP (Jonathan Leffler) Original-subject: Re: Soundex (sounds like) Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti) [This is an experimental alt.sources re-posting from the newsgroup(s) comp.lang.c,comp.sources.wanted,comp.lang.c++. Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).] In article <1842@naucse.UUCP> wew@naucse.UUCP (Bill Wilson) writes: >From article <488@hades.OZ>, by ing@hades.OZ (Ian Gold): >> I am looking for a 'soundex' routine in C (or C++). >> >> char *soundex(char *target, char *given); >> >I would be interested in the code as well... Will this do? : "@(#)shar2.c 1.5" #!/bin/sh # shar: Shell Archiver (v1.22) # # This is a shell archive. # Remove everything above this line and run sh on the resulting file # If this archive is complete, you will see this message at the end # "All files extracted" # # Created: Fri Dec 15 21:33:37 1989 by john at Sphinx Ltd. # Files archived in this archive: # soundex.c # if test -f soundex.c; then echo "File soundex.c exists"; else echo "x - soundex.c" sed 's/^X//' << 'SHAR_EOF' > soundex.c && X/* X** SOUNDEX CODING X** X** Rules: X** 1. Retain the first letter; ignore non-alphabetic characters. X** 2. Replace second and subsequent characters by a group code. X** Group Letters X** 1 BFPV X** 2 CGJKSXZ X** 3 DT X** 4 L X** 5 MN X** 6 R X** 3. Do not repeat digits X** 4. Truncate or ser-pad to 4-character result. X** X** Originally formatted with tabstops set at 4 spaces -- you were warned! X** X** Code by: Jonathan Leffler (john@sphinx.co.uk) X** This code is shareware -- I wrote it; you can have it for free X** if you supply it to anyone else who wants it for free. X** X** BUGS: Assumes ASCII X*/ X X#include <ctype.h> Xstatic char lookup[] = { X '0', /* A */ X '1', /* B */ X '2', /* C */ X '3', /* D */ X '0', /* E */ X '1', /* F */ X '2', /* G */ X '0', /* H */ X '0', /* I */ X '2', /* J */ X '2', /* K */ X '4', /* L */ X '5', /* M */ X '5', /* N */ X '0', /* O */ X '1', /* P */ X '0', /* Q */ X '6', /* R */ X '2', /* S */ X '3', /* T */ X '0', /* U */ X '1', /* V */ X '0', /* W */ X '2', /* X */ X '0', /* Y */ X '2', /* Z */ X}; X X/* X** Soundex for arbitrary number of characters of information X*/ Xchar *nsoundex(str, n) Xchar *str; /* In: String to be converted */ Xint n; /* In: Number of characters in result string */ X{ X static char buff[10]; X register char *s; X register char *t; X char c; X char l; X X if (n <= 0) X n = 4; /* Default */ X if (n > sizeof(buff) - 1) X n = sizeof(buff) - 1; X t = &buff[0]; X X for (s = str; ((c = *s) != '\0') && t < &buff[n]; s++) X { X if (!isascii(c)) X continue; X if (!isalpha(c)) X continue; X c = toupper(c); X if (t == &buff[0]) X { X l = *t++ = c; X continue; X } X c = lookup[c-'A']; X if (c != '0' && c != l) X l = *t++ = c; X } X while (t < &buff[n]) X *t++ = '0'; X *t = '\0'; X return(&buff[0]); X} X X/* Normal external interface */ Xchar *soundex(str) Xchar *str; X{ X return(nsoundex(str, 4)); X} X X/* X** Alternative interface: X** void soundex(given, gets) X** char *given; X** char *gets; X** { X** strcpy(gets, nsoundex(given, 4)); X** } X*/ X X X#ifdef TEST X#include <stdio.h> Xmain() X{ X char buff[30]; X X while (fgets(buff, sizeof(buff), stdin) != (char *)0) X printf("Given: %s Soundex produces %s\n", buff, soundex(buff)); X} X#endif SHAR_EOF chmod 0640 soundex.c || echo "$0: failed to restore soundex.c" fi echo All files extracted exit 0