jmc@ptsfa.UUCP (07/10/87)
Here is a short program I wrote a few years ago illustrating the use of the "soundex" algorithm. It is used to generate the same token for similarly pronounced names (like Smith and Smythe). /* soundex algorithm from 1918 patent */ #include <stdio.h> #include <ctype.h> #define MAXNAME 30 /* max name length */ #define SOUNDLEN 6 /* max len soundex string */ main(argc,argv) int argc; char *argv[]; { char name[MAXNAME]; /* input string for surname */ char sound[SOUNDLEN]; /* SOUNDLEN -1 length */ int namelen; /* length of entered name */ int i; if (argc > 1) { for (i = 1; i < argc; i++) { getsound(argv[i],sound); printf("%s\n",sound); } exit(0); } gets(name); if (name[0] == '\0') exit(0); getsound(name,sound); printf("%s\n",sound); exit(0); } /***************************************************************************/ /* soundex algorhithm - format of result is annnn */ /***************************************************************************/ getsound(name,sound) char *name; char *sound; { int i; int j; int val; /* char value */ int oldval; char tempc; int namelen; namelen = strlen(name); j = 1; /* start 2nd pos */ oldval = 0; tempc = name[0]; sound[0] = isupper(tempc) ? tolower(tempc) : tempc; for (i = 1; (i <= namelen) && (j < SOUNDLEN - 1); i++) { tempc = name[i]; if (isupper(tempc)) tempc = tolower(tempc); switch (tempc) { case 'b': case 'f': case 'p': case 'v': val = 1; break; case 'c': case 'g': case 'j': case 'k': case 'q': case 's': case 'x': case 'z': val = 2; break; case 'd': case 't': val = 3; break; case 'l': val = 4; break; case 'm': case 'n': val = 5; break; case 'r': val = 6; break; default: val = 0; break; } if (val != 0 && val != oldval) sound[j++] = val + '0'; oldval = val; } while (j < SOUNDLEN-1) sound[j++] = '0'; sound[SOUNDLEN] = '\0'; } -- voice: (415) 823-2441 uucp: {ihnp4,lll-crg,ames,qantel,pyramid}!ptsfa!jmc Where am I? In the village. Whose side are you on? That would be telling.
john@riddle.UUCP (Jonathan Leffler) (12/22/89)
Archive-name: soundex.c/john Original-posting-by: john@riddle.UUCP (Jonathan Leffler) Original-subject: Re: Soundex (sounds like) Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti) [This is an experimental alt.sources re-posting from the newsgroup(s) comp.lang.c,comp.sources.wanted,comp.lang.c++. Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).] In article <1842@naucse.UUCP> wew@naucse.UUCP (Bill Wilson) writes: >From article <488@hades.OZ>, by ing@hades.OZ (Ian Gold): >> I am looking for a 'soundex' routine in C (or C++). >> >> char *soundex(char *target, char *given); >> >I would be interested in the code as well... Will this do? : "@(#)shar2.c 1.5" #!/bin/sh # shar: Shell Archiver (v1.22) # # This is a shell archive. # Remove everything above this line and run sh on the resulting file # If this archive is complete, you will see this message at the end # "All files extracted" # # Created: Fri Dec 15 21:33:37 1989 by john at Sphinx Ltd. # Files archived in this archive: # soundex.c # if test -f soundex.c; then echo "File soundex.c exists"; else echo "x - soundex.c" sed 's/^X//' << 'SHAR_EOF' > soundex.c && X/* X** SOUNDEX CODING X** X** Rules: X** 1. Retain the first letter; ignore non-alphabetic characters. X** 2. Replace second and subsequent characters by a group code. X** Group Letters X** 1 BFPV X** 2 CGJKSXZ X** 3 DT X** 4 L X** 5 MN X** 6 R X** 3. Do not repeat digits X** 4. Truncate or ser-pad to 4-character result. X** X** Originally formatted with tabstops set at 4 spaces -- you were warned! X** X** Code by: Jonathan Leffler (john@sphinx.co.uk) X** This code is shareware -- I wrote it; you can have it for free X** if you supply it to anyone else who wants it for free. X** X** BUGS: Assumes ASCII X*/ X X#include <ctype.h> Xstatic char lookup[] = { X '0', /* A */ X '1', /* B */ X '2', /* C */ X '3', /* D */ X '0', /* E */ X '1', /* F */ X '2', /* G */ X '0', /* H */ X '0', /* I */ X '2', /* J */ X '2', /* K */ X '4', /* L */ X '5', /* M */ X '5', /* N */ X '0', /* O */ X '1', /* P */ X '0', /* Q */ X '6', /* R */ X '2', /* S */ X '3', /* T */ X '0', /* U */ X '1', /* V */ X '0', /* W */ X '2', /* X */ X '0', /* Y */ X '2', /* Z */ X}; X X/* X** Soundex for arbitrary number of characters of information X*/ Xchar *nsoundex(str, n) Xchar *str; /* In: String to be converted */ Xint n; /* In: Number of characters in result string */ X{ X static char buff[10]; X register char *s; X register char *t; X char c; X char l; X X if (n <= 0) X n = 4; /* Default */ X if (n > sizeof(buff) - 1) X n = sizeof(buff) - 1; X t = &buff[0]; X X for (s = str; ((c = *s) != '\0') && t < &buff[n]; s++) X { X if (!isascii(c)) X continue; X if (!isalpha(c)) X continue; X c = toupper(c); X if (t == &buff[0]) X { X l = *t++ = c; X continue; X } X c = lookup[c-'A']; X if (c != '0' && c != l) X l = *t++ = c; X } X while (t < &buff[n]) X *t++ = '0'; X *t = '\0'; X return(&buff[0]); X} X X/* Normal external interface */ Xchar *soundex(str) Xchar *str; X{ X return(nsoundex(str, 4)); X} X X/* X** Alternative interface: X** void soundex(given, gets) X** char *given; X** char *gets; X** { X** strcpy(gets, nsoundex(given, 4)); X** } X*/ X X X#ifdef TEST X#include <stdio.h> Xmain() X{ X char buff[30]; X X while (fgets(buff, sizeof(buff), stdin) != (char *)0) X printf("Given: %s Soundex produces %s\n", buff, soundex(buff)); X} X#endif SHAR_EOF chmod 0640 soundex.c || echo "$0: failed to restore soundex.c" fi echo All files extracted exit 0