josh@unm-cvax.UUCP (01/15/85)
Does anybody have a routines written in C that can be used for something such as finding which string in a list looks the most like the control string. It would also be nice if the routine could handle wild card characters. If you don't have a program, helpful ideas would also be appreciated. --thanks much Josh Siegel {convex,ucbvax,gatech,csu-cs,anl-mcs,lanl-a}!unmvax!unm-cvax!josh
"Gary S. Moss (AMXBR-VLD-V)" <moss@Brl-Vld.ARPA> (01/17/85)
#!/bin/sh # Josh - # I wrote a boolean function which mimics UNIX shell file name # expansion in the attempt to match its first argument with its # second. Matchtest.c is a test driver for it. # # # Self-unpacking archive format. To unbundle, sh this file. echo 'match.c' 1>&2 cat >'match.c' <<'END OF match.c' /* SCCS id: @(#) match.c 1.3 Last edit: 1/16/85 at 21:18:57 Retrieved: 1/16/85 at 21:19:58 SCCS archive: /vld/moss/work/libWIND/s.match.c Author: Gary S. Moss U. S. Army Ballistic Research Laboratory Aberdeen Proving Ground Maryland 21005-5066 (301)278-6647 or AV-283-6647 */ static char sccsTag[] = "@(#) match.c 1.3 last edit 1/16/85 at 21:18:57"; #include <stdio.h> #include <string.h> #include "./ascii.h" extern void prnt1Err(); /* m a t c h ( ) if string matches pattern, return 1, else return 0 special characters: * Matches any string including the null string. ? Matches any single character. [...] Matches any one of the characters enclosed. [!..] Matchea any character NOT enclosed. - May be used inside brackets to specify range (i.e. str[1-58] matches str1, str2, ... str5, str8) \ Escapes special characters. */ match( pattern, string ) register char *pattern, *string; { do { switch( pattern[0] ) { case '*': /* Match any string including null string. */ if( pattern[1] == NUL || string[0] == NUL ) return 1; while( string[0] != NUL ) { if( match( &pattern[1], string ) ) return 1; ++string; } return 0; case '?': /* Match any character. */ break; case '[': /* Match one of the characters in brackets unless first is a '!', then match any character not inside brackets. */ { register char *rgtBracket; static int negation; ++pattern; /* Skip over left bracket. */ /* Find matching right bracket. */ if( (rgtBracket = strchr( pattern, ']' )) == NULL ) { prnt1Err( "Unmatched '['." ); return 0; } /* Check for negation operator. */ if( pattern[0] == '!' ) { ++pattern; negation = 1; } else { negation = 0; } /* Traverse pattern inside brackets. */ for( ; pattern < rgtBracket && pattern[0] != string[0]; ++pattern ) { if( pattern[ 0] == '-' && pattern[-1] != '\\' ) { if( pattern[-1] <= string[0] && pattern[-1] != '[' && pattern[ 1] >= string[0] && pattern[ 1] != ']' ) break; } } if( pattern == rgtBracket ) { if( ! negation ) { return 0; } } else { if( negation ) { return 0; } } pattern = rgtBracket; /* Skip to right bracket. */ break; } case '\\': /* Escape special character. */ ++pattern; /* WARNING: falls through to default case. */ default: /* Compare characters. */ if( pattern[0] != string[0] ) return 0; } ++pattern; ++string; } while( pattern[0] != NUL && string[0] != NUL ); if( (pattern[0] == NUL || pattern[0] == '*' ) && string[0] == NUL ) { return 1; } else { return 0; } } END OF match.c echo 'matchtest.c' 1>&2 cat >'matchtest.c' <<'END OF matchtest.c' /* SCCS id: @(#) matchtest.c 1.2 Last edit: 1/16/85 at 21:19:12 Retrieved: 1/16/85 at 21:20:06 SCCS archive: /vld/moss/work/libWIND/s.matchtest.c Author: Gary S. Moss U. S. Army Ballistic Research Laboratory Aberdeen Proving Ground Maryland 21005-5066 (301)278-6647 or AV-283-6647 */ #if ! defined( lint ) static char sccsTag[] = "@(#) matchtest.c 1.2 last edit 1/16/85 at 21:19:12"; #endif #include <stdio.h> extern int match(); char *usage[] = { "", "matchtest(1.2)", "", "Usage: matchtest [pattern string]", "", "If no arguments are given, the program reads words on its standard input.", "The program writes to its standard output.", 0 }; void prntUsage(), prnt1Err(); static char *pattern, *string; static char patbuf[BUFSIZ], strbuf[BUFSIZ]; /* m a i n ( ) */ main( argc, argv ) char *argv[]; { if( ! parsArgv( argc, argv ) ) { prntUsage(); exit( 1 ); } if( pattern != NULL ) { if( match( pattern, string ) ) { (void) printf( "'%s' matches '%s'.\n", pattern, string ); exit( 0 ); } else { (void) printf( "'%s' does not match '%s'.\n", pattern, string ); exit( 1 ); } } while( scanf( "%s %s", patbuf, strbuf ) == 2 ) { if( match( patbuf, strbuf ) ) { (void) printf( "'%s' matches '%s'.\n", patbuf, strbuf ); } else { (void) printf( "'%s' does not match '%s'.\n", patbuf, strbuf ); } } exit( 0 ); } /* p a r s A r g v ( ) */ parsArgv( argc, argv ) register char **argv; { register int c; extern int optind; extern char *optarg; /* Parse options. */ while( (c = getopt( argc, argv, "" )) != EOF ) { switch( c ) { case '?' : return 0; } } if( argc - optind != 2 ) { if( argc == optind ) { pattern = string = NULL; } else { (void) fprintf( stderr, "Arg count wrong!\n" ); return 0; } } else { pattern = argv[optind++]; string = argv[optind++]; } return 1; } /* p r n t U s a g e ( ) Print usage message. */ void prntUsage() { register char **p = usage; while( *p ) (void) fprintf( stderr, "%s\n", *p++ ); return; } END OF matchtest.c
fnf@unisoft.UUCP (Fred Fish) (01/21/85)
[--------------] A recent posting to net.lang.c of a "shell style" pattern matching routine by Gary Moss appeared to be incomplete so I decided to post an alternate. This posting also contains my version (nmatch.c) and Gary's two routines (match.c and matchtest.c) for the benefit of those who might have missed the original posting since it wasn't in net.sources. By the way, it appears that the usage message for matchtest is wrong. It apparently wants the arguments [string pattern] rather than [pattern string]. I'm sure there are many ways to do this, and probably some that are far more compact than my solution. I would love to see some alternate versions (that I can understand that is :-). [---- see net.sources for actual posting ----]
friesen@psivax.UUCP (Stanley Friesen) (01/22/85)
And another solution, if you are using BSD 4.1(and probably 4.2) there are a pair of library routines, re_comp & re_exec, in libc.a which implement ex style regular expressions. To find out how to use them read the manual page titled "regex" in section 3. -- Sarima (Stanley Friesen) {trwrb|allegra|burdvax|cbosgd|hplabs|ihnp4|sdcsvax}!sdcrdcf!psivax!friesen
"Gary S. Moss (AMXBR-VLD-V)" <moss@Brl-Vld.ARPA> (01/23/85)
~ A recent posting to net.lang.c of a "shell style" pattern matching ~ routine by Gary Moss appeared to be incomplete so I decided to post ~ an alternate. Now Fred, this makes me nervous, the posted routine was written for a table-of-contents lookup routine, and complete functionality a la UNIX shell, was not critical, however I am interested in what I left out since I am likely to find other uses for it. ~ ... By the way, it appears that the usage message ~ for matchtest is wrong. It apparently wants the arguments ~ [string pattern] rather than [pattern string]. Sorry Fred, it works as advertised, match [pattern string], if you leave out the arguments, it reads pattern-string pairs on stdin until EOF. YOUR match() function switches the arguments, i.e., BOOLEAN nmatch( string, pattern ) {...} while mine is int match( pattern, string ) {...} So, if you substituted your function for mine ... :-) Thanks for posting my sources, I should have done that originally. __Moss__
robert@gitpyr.UUCP (Robert Viduya) (01/25/85)
> > And another solution, if you are using BSD 4.1(and probably 4.2) > there are a pair of library routines, re_comp & re_exec, in libc.a > which implement ex style regular expressions. To find out how to > use them read the manual page titled "regex" in section 3. > -- > Regex(3) is also in System V release 1. I don't know for sure, but I think it's safe to assume it's also part of Sys V R2.0. Anyone know for sure? robert -- Robert Viduya Office of Computing Services Georgia Institute of Technology, Atlanta GA 30332 Phone: (404) 894-4669 ...!{akgua,allegra,amd,hplabs,ihnp4,masscomp,ut-ngp}!gatech!gitpyr!robert ...!{rlgvax,sb1,uf-cgrl,unmvax,ut-sally}!gatech!gitpyr!robert