[alt.sources] pattern matching

pokey@well.UUCP (Jef Poskanzer) (05/19/89)

In the referenced message, mhb@hcx.uucp (MARK H BOTNER) wrote:
}Can anybody help me with pattern matching?
}P.S. if you have any answer or a subroutine for me, please mail it to me.
}My address is:
}     mhb@cseg.uucp

That address is useless, so I'm posting this.  This is free software, no
copyright or trade secret restrictions whatsoever.
---
Jef

            Jef Poskanzer   jef@helios.ee.lbl.gov   ...well!pokey

/*
** Simple pattern matcher.  Does ?, *, and [], much like various shells.
**
** by Jef Poskanzer
*/

amatch( s, p )
register char *s, *p;
    {
    for ( ; *p != '\0'; p++, s++ )
	{
	if ( *p == '*' )
	    {
	    if ( *++p == '\0' )
		return 1;
	    for ( ; *s != '\0'; s++ )
		if ( amatch( s, p ) )
		    return 1;
	    return 0;
	    }
	if ( *s == '\0' )
	    return 0;
	if ( *p == '?' )
	    continue;
	if ( *p == '[' )
	    {
	    int negcc = 0, ccmatch = 0;
	    char prevc;
	    if ( *++p == '!' )
		{
		negcc = 1;
		p++;
		}
	    for ( ; *p != ']'; p++ )
		{
		if ( *p == '\0' )
		    {
		    fprintf( stderr, "amatch: missing ']'\n" );
		    return 0;
		    }
		if ( *p == '-' )
		    {
		    if ( prevc <= *s && *++p >= *s )
			ccmatch = 1;
		    }
		else if ( *p == *s )
		    ccmatch = 1;
		prevc = *p;
		}
	    if ( ( ccmatch && ! negcc ) || ( negcc && ! ccmatch ) )
		continue;
	    return 0;
	    }
	if ( *p != *s )
	    return 0;
	}
    return *s == '\0';
    }

koblas@mips.COM (David Koblas) (05/19/89)

In article <11733@well.UUCP> Jef Poskanzer <jef@helios.ee.lbl.gov> writes:
>In the referenced message, mhb@hcx.uucp (MARK H BOTNER) wrote:
>}Can anybody help me with pattern matching?
>}P.S. if you have any answer or a subroutine for me, please mail it to me.
>}My address is:
>}     mhb@cseg.uucp
>
>That address is useless, so I'm posting this.  This is free software, no
>copyright or trade secret restrictions whatsoever.

Similar to Jef Posaknzer's but understands '{' ... '}' syntax.

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then feed it
# into a shell via "sh file" or similar.  To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix@uunet.uu.net if you want that tool.
# If this archive is complete, you will see the following message at the end:
#		"End of shell archive."
# Contents:  glob.c
# Wrapped by koblas@yoyodyne.mips.com on Fri May 19 09:04:13 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'glob.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'glob.c'\"
else
echo shar: Extracting \"'glob.c'\" \(3831 characters\)
sed "s/^X//" >'glob.c' <<'END_OF_FILE'
X/*
X *  input: "str" string will attempted to be matched
X *
X *         "pattern" string with wildcards that will match against "str".
X *
X *   wild:
X *			'*'          = match 0 or more occurances of anything
X *			"[abc]"  	 = match anyof "abc" (ranges supported)
X *		    "{xx,yy,zz}" = match anyof "xx", "yy", or "zz"
X *          '?'          = match any character
X */
X
X#define	FALSE	0
X#define	TRUE	1
X
X#ifdef TEST
X
X#define TESTGLOB(str1,str2) \
X	printf("%s %s = %s\n",str1,str2,glob(str1,str2)?"TRUE":"FALSE")
X
Xmain()
X{
X	TESTGLOB("abcdefg","abcdefg");
X	TESTGLOB("abcdefg","a?cd?fg");
X	TESTGLOB("abcdefg","ab[cde]defg");
X	TESTGLOB("abcdefg","ab[a-z]defg");
X	TESTGLOB("abcdefg","ab[a-z]defg");
X	TESTGLOB("ab]defg","ab[a]c]defg");
X	TESTGLOB("ab]defg","ab[a\\]c]defg");
X	TESTGLOB("abcdefg","ab*fg");
X	TESTGLOB("./bc/def/gh/ij","*de*");
X	TESTGLOB("./der/den/deq/der/","*deq*");
X	TESTGLOB("./bc/def/gh/ij","*ij");
X	TESTGLOB("./bc/def/gh/ij","./*");
X	TESTGLOB("abcdef","*def");
X	TESTGLOB("abcdef","*abcdef");
X	TESTGLOB("abcdef","abc*");
X	TESTGLOB("abcdef","abcdef*");
X	TESTGLOB("abcdef","*?*{xxx,,yy}");
X	TESTGLOB("abcdef","abcde{f}");
X	TESTGLOB("abcdef","abcdef{xxx,,yyy}");
X	TESTGLOB("abcdef","abc{def,qwrx}");
X	TESTGLOB("abcdef","abc{ab,def,qwrx}");
X	TESTGLOB("abcdef","{naqrwer,fuwnwer,as,abc,a}{ab,def,qwrx}");
X}
X
X#endif
X
Xglob(str,pattern)
Xchar	*str,*pattern;
X{
X	char	c,*cp;
X	int		done=FALSE,ret_code,ok;
X
X	while ((*pattern != '\0') && (!done) && (((*str=='\0') &&
X			((*pattern=='{') || (*pattern=='*'))) || (*str!='\0'))) {
X		switch (*pattern) {
X		case '\\':
X			pattern++;
X			if (*pattern != '\0')
X				pattern++;
X			break;
X		case '*':
X			pattern++;
X			ret_code=FALSE;
X			while ((*str != '\0') && (!(ret_code=glob(str++,pattern))));
X			if (ret_code) {
X				while (*str != '\0') str++;
X				while (*pattern != '\0') pattern++;
X			}
X			break;
X		case '[':
X			pattern++;
Xrepeat:
X			if ((*pattern == '\0') || (*pattern == ']')) {
X				done=TRUE;
X				break;
X			} 
X			if (*pattern == '\\') {
X				pattern++;
X				if (*pattern == '\0') {
X					done=TRUE;
X					break;
X				}
X			}
X			if (*(pattern+1) == '-') {
X				c = *pattern;
X				pattern+=2;
X				if (*pattern == ']') {
X					done=TRUE;
X					break;
X				}
X				if (*pattern == '\\') {
X					pattern++;
X					if (*pattern == '\0') {
X						done=TRUE;
X						break;
X					}
X				}
X				if ((*str < c) || (*str > *pattern)) {
X					pattern++;
X					goto repeat;
X				} 
X			} else if (*pattern != *str) {
X				pattern++;
X				goto repeat;
X			}
X			pattern++;
X			while ((*pattern != ']') && (*pattern != '\0')) {
X				if ((*pattern == '\\') && (*(pattern+1) != '\0'))
X					pattern++;
X				pattern++;
X			}
X			if (*pattern != '\0') {
X				pattern++;
X				str++;
X			}
X			break;
X		case '?':
X			pattern++;
X			str++;
X			break;
X		case '{':	/*}*/
X			pattern++;
X/*{*/		while ((*pattern != '}') && (*pattern!='\0')) {
X				cp = str;
X				ok = TRUE;
X				while (ok && (*cp != '\0') && (*pattern!='\0') &&
X/*{*/				   (*pattern!=',') && (*pattern!='}')) {
X					if (*pattern == '\\')
X						pattern++;
X					ok=(*pattern == *cp);
X					cp++;
X					pattern++;
X				}
X				if (*pattern=='\0') {
X					ok=FALSE;
X					done=TRUE;
X					break;
X				} else if (ok) {
X					str=cp;
X/*{*/				while ((*pattern!='}') && (*pattern!='\0')) {
X						pattern++;
X						if (*pattern=='\\') {
X							pattern++;
X/*{*/						if (*pattern=='}')
X								pattern++;
X						}
X					}
X				} else {
X/*{*/				while ((*pattern!='}') && (*pattern!=',') &&
X						   (*pattern!='\0')) {
X						pattern++;
X						if (*pattern=='\\') {
X							pattern++;
X/*{*/						if ((*pattern=='}') || (*pattern==','))
X								pattern++;
X						}
X					}
X				}
X				if (*pattern!='\0')
X					pattern++;
X			}
X			break;
X		default:
X			if (*str == *pattern) {
X				str++;
X				pattern++;
X			} else {
X				done=TRUE;
X			}
X		}
X	}
X	while (*pattern == '*') pattern++;
X	return ((*str == '\0') && (*pattern == '\0'));
X}
END_OF_FILE
if test 3831 -ne `wc -c <'glob.c'`; then
    echo shar: \"'glob.c'\" unpacked with wrong size!
fi
# end of 'glob.c'
fi
echo shar: End of shell archive.
exit 0
-- 
name : David Koblas                  uucp  : {ames,decwrl}!mips!koblas 
place: MIPS Computers Systems        domain: koblas@mips.com
quote: "Time has little to do with infinity and jelly donuts."

faulkner@jmullins.harvard.edu (Don Faulkner) (05/20/89)

Any one thinking about regular expression handlers should really
take a look at the one from GNU EMACS --- it is VERY powerful, fast,
and very flexible (you can set it up with a translation table
to ignore case ...)  Works very well stand alone --- need only two
files: "regex.c" and "regex.h"

Best part:  syntax and capabilities are identical to those from GNU
EMACS (golly!)

--

 Don Faulkner                                       
 Building 1, Room 803
 Harvard University, School of Public Health
 665 Huntington Avenue
 Boston, MA  02115

 ARPA:      faulkner%jmullins@harvard.harvard.edu                
 BITNET:    faulkner@harvard
 Telephone: (617) 732-2297