pokey@well.UUCP (Jef Poskanzer) (05/19/89)
In the referenced message, mhb@hcx.uucp (MARK H BOTNER) wrote: }Can anybody help me with pattern matching? }P.S. if you have any answer or a subroutine for me, please mail it to me. }My address is: } mhb@cseg.uucp That address is useless, so I'm posting this. This is free software, no copyright or trade secret restrictions whatsoever. --- Jef Jef Poskanzer jef@helios.ee.lbl.gov ...well!pokey /* ** Simple pattern matcher. Does ?, *, and [], much like various shells. ** ** by Jef Poskanzer */ amatch( s, p ) register char *s, *p; { for ( ; *p != '\0'; p++, s++ ) { if ( *p == '*' ) { if ( *++p == '\0' ) return 1; for ( ; *s != '\0'; s++ ) if ( amatch( s, p ) ) return 1; return 0; } if ( *s == '\0' ) return 0; if ( *p == '?' ) continue; if ( *p == '[' ) { int negcc = 0, ccmatch = 0; char prevc; if ( *++p == '!' ) { negcc = 1; p++; } for ( ; *p != ']'; p++ ) { if ( *p == '\0' ) { fprintf( stderr, "amatch: missing ']'\n" ); return 0; } if ( *p == '-' ) { if ( prevc <= *s && *++p >= *s ) ccmatch = 1; } else if ( *p == *s ) ccmatch = 1; prevc = *p; } if ( ( ccmatch && ! negcc ) || ( negcc && ! ccmatch ) ) continue; return 0; } if ( *p != *s ) return 0; } return *s == '\0'; }
koblas@mips.COM (David Koblas) (05/19/89)
In article <11733@well.UUCP> Jef Poskanzer <jef@helios.ee.lbl.gov> writes: >In the referenced message, mhb@hcx.uucp (MARK H BOTNER) wrote: >}Can anybody help me with pattern matching? >}P.S. if you have any answer or a subroutine for me, please mail it to me. >}My address is: >} mhb@cseg.uucp > >That address is useless, so I'm posting this. This is free software, no >copyright or trade secret restrictions whatsoever. Similar to Jef Posaknzer's but understands '{' ... '}' syntax. #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # If this archive is complete, you will see the following message at the end: # "End of shell archive." # Contents: glob.c # Wrapped by koblas@yoyodyne.mips.com on Fri May 19 09:04:13 1989 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'glob.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'glob.c'\" else echo shar: Extracting \"'glob.c'\" \(3831 characters\) sed "s/^X//" >'glob.c' <<'END_OF_FILE' X/* X * input: "str" string will attempted to be matched X * X * "pattern" string with wildcards that will match against "str". X * X * wild: X * '*' = match 0 or more occurances of anything X * "[abc]" = match anyof "abc" (ranges supported) X * "{xx,yy,zz}" = match anyof "xx", "yy", or "zz" X * '?' = match any character X */ X X#define FALSE 0 X#define TRUE 1 X X#ifdef TEST X X#define TESTGLOB(str1,str2) \ X printf("%s %s = %s\n",str1,str2,glob(str1,str2)?"TRUE":"FALSE") X Xmain() X{ X TESTGLOB("abcdefg","abcdefg"); X TESTGLOB("abcdefg","a?cd?fg"); X TESTGLOB("abcdefg","ab[cde]defg"); X TESTGLOB("abcdefg","ab[a-z]defg"); X TESTGLOB("abcdefg","ab[a-z]defg"); X TESTGLOB("ab]defg","ab[a]c]defg"); X TESTGLOB("ab]defg","ab[a\\]c]defg"); X TESTGLOB("abcdefg","ab*fg"); X TESTGLOB("./bc/def/gh/ij","*de*"); X TESTGLOB("./der/den/deq/der/","*deq*"); X TESTGLOB("./bc/def/gh/ij","*ij"); X TESTGLOB("./bc/def/gh/ij","./*"); X TESTGLOB("abcdef","*def"); X TESTGLOB("abcdef","*abcdef"); X TESTGLOB("abcdef","abc*"); X TESTGLOB("abcdef","abcdef*"); X TESTGLOB("abcdef","*?*{xxx,,yy}"); X TESTGLOB("abcdef","abcde{f}"); X TESTGLOB("abcdef","abcdef{xxx,,yyy}"); X TESTGLOB("abcdef","abc{def,qwrx}"); X TESTGLOB("abcdef","abc{ab,def,qwrx}"); X TESTGLOB("abcdef","{naqrwer,fuwnwer,as,abc,a}{ab,def,qwrx}"); X} X X#endif X Xglob(str,pattern) Xchar *str,*pattern; X{ X char c,*cp; X int done=FALSE,ret_code,ok; X X while ((*pattern != '\0') && (!done) && (((*str=='\0') && X ((*pattern=='{') || (*pattern=='*'))) || (*str!='\0'))) { X switch (*pattern) { X case '\\': X pattern++; X if (*pattern != '\0') X pattern++; X break; X case '*': X pattern++; X ret_code=FALSE; X while ((*str != '\0') && (!(ret_code=glob(str++,pattern)))); X if (ret_code) { X while (*str != '\0') str++; X while (*pattern != '\0') pattern++; X } X break; X case '[': X pattern++; Xrepeat: X if ((*pattern == '\0') || (*pattern == ']')) { X done=TRUE; X break; X } X if (*pattern == '\\') { X pattern++; X if (*pattern == '\0') { X done=TRUE; X break; X } X } X if (*(pattern+1) == '-') { X c = *pattern; X pattern+=2; X if (*pattern == ']') { X done=TRUE; X break; X } X if (*pattern == '\\') { X pattern++; X if (*pattern == '\0') { X done=TRUE; X break; X } X } X if ((*str < c) || (*str > *pattern)) { X pattern++; X goto repeat; X } X } else if (*pattern != *str) { X pattern++; X goto repeat; X } X pattern++; X while ((*pattern != ']') && (*pattern != '\0')) { X if ((*pattern == '\\') && (*(pattern+1) != '\0')) X pattern++; X pattern++; X } X if (*pattern != '\0') { X pattern++; X str++; X } X break; X case '?': X pattern++; X str++; X break; X case '{': /*}*/ X pattern++; X/*{*/ while ((*pattern != '}') && (*pattern!='\0')) { X cp = str; X ok = TRUE; X while (ok && (*cp != '\0') && (*pattern!='\0') && X/*{*/ (*pattern!=',') && (*pattern!='}')) { X if (*pattern == '\\') X pattern++; X ok=(*pattern == *cp); X cp++; X pattern++; X } X if (*pattern=='\0') { X ok=FALSE; X done=TRUE; X break; X } else if (ok) { X str=cp; X/*{*/ while ((*pattern!='}') && (*pattern!='\0')) { X pattern++; X if (*pattern=='\\') { X pattern++; X/*{*/ if (*pattern=='}') X pattern++; X } X } X } else { X/*{*/ while ((*pattern!='}') && (*pattern!=',') && X (*pattern!='\0')) { X pattern++; X if (*pattern=='\\') { X pattern++; X/*{*/ if ((*pattern=='}') || (*pattern==',')) X pattern++; X } X } X } X if (*pattern!='\0') X pattern++; X } X break; X default: X if (*str == *pattern) { X str++; X pattern++; X } else { X done=TRUE; X } X } X } X while (*pattern == '*') pattern++; X return ((*str == '\0') && (*pattern == '\0')); X} END_OF_FILE if test 3831 -ne `wc -c <'glob.c'`; then echo shar: \"'glob.c'\" unpacked with wrong size! fi # end of 'glob.c' fi echo shar: End of shell archive. exit 0 -- name : David Koblas uucp : {ames,decwrl}!mips!koblas place: MIPS Computers Systems domain: koblas@mips.com quote: "Time has little to do with infinity and jelly donuts."
faulkner@jmullins.harvard.edu (Don Faulkner) (05/20/89)
Any one thinking about regular expression handlers should really take a look at the one from GNU EMACS --- it is VERY powerful, fast, and very flexible (you can set it up with a translation table to ignore case ...) Works very well stand alone --- need only two files: "regex.c" and "regex.h" Best part: syntax and capabilities are identical to those from GNU EMACS (golly!) -- Don Faulkner Building 1, Room 803 Harvard University, School of Public Health 665 Huntington Avenue Boston, MA 02115 ARPA: faulkner%jmullins@harvard.harvard.edu BITNET: faulkner@harvard Telephone: (617) 732-2297