tif@cpe.UUCP (09/17/88)
Written 5:52 pm Sep 15, 1988 by pyramid.UUCP!dhaile in cpe:comp.sources.w
>Hi y'all! Had a need all of a sudden for a SOUNDEX or pseudo-SOUNDEX
I'll post this since it's small and he didn't say "I don't usually
read this group" :-)
It comes from a bigger package which is a spelling aid (although
the spelling aid could not meet my requirements). Note that
SOUNDEX is a pretty crude algorithm, nothing like phonemes.
(Somebody asked for that too, wish I had it to give.)
Paul Chamberlain
Computer Product Engineering, Tandy Corp.
{convex,killer}!ninja!cpe!tif
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
# calcsoundex.c
# This archive created: Fri Sep 16 22:54:34 1988
export PATH; PATH=/bin:/usr/bin:$PATH
echo shar: "extracting 'calcsoundex.c'" '(2455 characters)'
if test -f 'calcsoundex.c'
then
echo shar: "will not over-write existing file 'calcsoundex.c'"
else
sed 's/^ X//' << \SHAR_EOF > 'calcsoundex.c'
X/* vi: set tabstop=4 : */
X
X/*
X * calcsoundex - calculate soundex codes
X *
X * Permission is given to copy or distribute this program provided you
X * do not remove this header or make money off of the program.
X *
X * Please send comments and suggestions to:
X * Barry Brachman
X * Dept. of Computer Science
X * Univ. of British Columbia
X * Vancouver, B.C. V6T 1W5
X *
X * .. {ihnp4!alberta, uw-beaver}!ubc-vision!ubc-cs!brachman
X * brachman@cs.ubc.cdn
X * brachman%ubc.csnet@csnet-relay.arpa
X * brachman@ubc.csnet
X */
X
X#include <stdio.h>
X#include <ctype.h>
X
X#include "sp.h"
X
Xchar word[MAXWORDLEN + 2];
X
Xchar soundex_code_map[26] = {
X/*** A B C D E F G H I J K L M N O P ***/
X 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1,
X
X/*** Q R S T U V W X Y Z ***/
X 2, 6, 2, 3, 0, 1, 0, 2, 0, 2
X};
X
Xmain(argc, argv)
Xint argc;
Xchar **argv;
X{
X register int c, i, soundex_length, digit_part, previous_code;
X int ch, len, vflag;
X short soundex;
X char *gets();
X
X vflag = 0;
X if (argc > 2 || (argc == 2 && strcmp(argv[1], "-v"))) {
X fprintf(stderr, "Usage: calcsoundex [-v]\n");
X exit(1);
X }
X if (argc > 1)
X vflag = 1;
X
X while (fgets(word, sizeof(word), stdin) != (char *) NULL) {
X len = strlen(word);
X if (word[len - 1] != '\n') {
X fprintf(stderr, "calcsoundex: Word too long: %s", word);
X while ((ch = getchar()) != '\n') /* flush rest of line */
X putc(ch, stderr);
X putc('\n', stderr);
X continue;
X }
X word[--len] = '\0';
X if (len > MAXWORDLEN) {
X fprintf(stderr, "calcsoundex: Word too long: %s\n", word);
X continue;
X }
X
X for (i = 0; word[i] != '\0'; i++) {
X if (isupper(word[i]))
X word[i] = tolower(word[i]);
X }
X if (!isalpha(word[0]))
X continue;
X
X digit_part = 0;
X soundex_length = 0;
X previous_code = soundex_code_map[word[0] - 'a'];
X for (i = 1; word[i] != '\0' && soundex_length < 3; i++) {
X if (!isalpha(word[i]))
X continue;
X c = soundex_code_map[word[i] - 'a'];
X if (c == 0 || previous_code == c) {
X previous_code = c;
X continue;
X }
X digit_part = digit_part * 10 + c;
X previous_code = c;
X soundex_length++;
X }
X while (soundex_length++ < 3)
X digit_part *= 10;
X soundex = digit_part << 5 + word[0] - 'a';
X printf("%c", word[0]);
X if (digit_part < 100)
X putchar('0');
X if (digit_part < 10)
X putchar('0');
X if (digit_part == 0)
X putchar('0');
X else
X printf("%d", digit_part);
X if (vflag)
X printf(" %s", word);
X putchar('\n');
X }
X putchar('\n');
X exit(0);
X}
X
SHAR_EOF
if test 2455 -ne "`wc -c < 'calcsoundex.c'`"
then
echo shar: "error transmitting 'calcsoundex.c'" '(should have been 2455 characters)'
fi
fi
exit 0
# End of shell archive