[alt.sources] amatch

davidsen@sixhub.UUCP (Wm E. Davidsen Jr) (08/17/89)

In digging for something else the other day I found this, the heart of
a routine to do wildcard matching and name expansion in MS-DOS. This
procedure does a match between a test string and a wildcard pattern
using the Bourne shell conventions. It's sort of like regexp but with
another set of conventions. I wrote a man page, and here it is.

I will be posting the rest of the package as I get the man pages
written, the directory search package and the actual command line
expander. 

If anyone is trying to get things from the archive server, uunet's maps
don't seem to show sixhub correctly, mail to it is not bounced, but it
doesn't get here, either. Here's how to reach the server:
	uucp:	uunet!crdgw1!sixhub!archive-server
	inet:	sixhub!archive-server@crdgw1.crd.ge.com

The message test should include a line with:
	send index to YOURADDRESS
- or -
	send help to YOURADDRESS
where address is either site!user or user@site.domain.

#!/bin/sh
# shar:	Shell Archiver  (v1.24)
#
#	Run the following text with /bin/sh to create:
#	  amatch.1
#	  amatch.c
#
echo "x - extracting amatch.1 (Text)"
sed 's/^X//' << 'SHAR_EOF' > amatch.1 &&
X.TH AMATCH 3 Local
X.SH NAME
Xamatch - match string against wildcard pattern using shell conventions
X.SH SYNOPSIS
X.B amatch
Xcompares a test string against a wildacrd pattern using the conventions
Xof the Bourne shell, rather than standard regular expressions. This was
Xoriginally written for MS-DOS, but may be generally useful.
X.SH DESCRIPTION
X.ft B
X.in +.5i
X int amatch(wildcard, teststr)
X const char *wildcard, *teststr
X.in -.5i
X.ft P
X.SH EXAMPLES
X gets(buffer);
X if (!amatch("*.[ch]", buffer)) unlink(buffer);
X.SH WARNINGS
XAlthough tested and in use for several years, this program may not
Xproduce the same output as \fBglob\fP, assuming anyone still uses it.
X.SH FILES
Xnone.
X.SH LIMITATIONS
XNone known.
X.SH AUTHOR
XBill Davidsen, 1985. (davidsen@crdos1.crd.ge.com)
X.SH Copyright
XCopyright (c) 1985, 1989 by Bill Davidsen. This program may be freely
Xgiven, bartered, traded or sold by anyone for any purpose. The supplier
Xassumes all risk for any consequences of malfunction. All other rights
Xreserved. 
X
SHAR_EOF
chmod 0644 amatch.1 || echo "restore of amatch.1 fails"
echo "x - extracting amatch.c (Text)"
sed 's/^X//' << 'SHAR_EOF' > amatch.c &&
X/*****************************************************************
X | amatch - match a string against a wildcard pattern
X |----------------------------------------------------------------
X |  Compares a data string against a test pattern using Bourne shell
X |  wildcard rules (*not* regular expression rules).
X |
X |  This routine is ugly and not well structured.
X |
X |  Author: Bill Davidsen (davidsen@crdos1.crd.ge.com) Mar 24, 1985
X |        with helpful suggestions from Andy Robinson
X |----------------------------------------------------------------
X |  Copyright:
X |    Copyright (c) 1985, 1989 by Bill Davidsen. This program may be
X |    freely given, bartered, traded or sold by anyone for any
X |    purpose. The supplier assumes all risk for any consequences of
X |    malfunction. All other rights reserved. 
X |----------------------------------------------------------------
X |  Arguments:
X |   1 - address of wildcard pattern
X |   2 - address of string to test
X ****************************************************************/
X
X#ifndef TRUE
X#define TRUE    1
X#define FALSE   0
X#endif
X
Xint amatch(ts, cs)
Xregister char *ts, *cs;
X{
X	int low, hi, notf;
X
X	while (1) { /* keep going until done */
X
X		if (*cs == '\0') { /* out of string */
X			for (; *ts == '*'; ++ts)
X				; /* get rid of extra '*' */
X			return(*ts == '\0');
X		}
X
X		switch (*ts) {
X
X		case '\0':
X			return(FALSE);
X 
X		case '[': /* the hardest case (see '*' below) */
X 
X			   /* is the not flag set? */
X			if (notf = (*(ts + 1) == '!'))
X				++ts; /* ! flag set */
X
X			   /* loop through the bracket */
X			while (*++ts != ']' && *ts != '\0') {
X
X				if (*ts == '-') { /* a range of values */
X 
X					    /* get lower limit */
X					if ((*--ts == '[' || *ts == '!')
X							   && *(ts - 1) != '\\')
X						low = '\0';
X					else
X						low = *ts;
X
X					    /* get upper limit */
X					if (*(ts += 2) == ']' || *ts == '\0') {
X						hi = '\377';
X						--ts;
X					}
X					else {
X						if (*ts == '\\' &&
X								*++ts == '\0') {
X							--ts;
X							hi = '\377';
X						}
X						else
X							hi = *ts;
X					}
X 
X					     /* and compare */
X					if (*cs > low && *cs <= hi)
X						goto foo;
X
X					continue; /* in bracket loop */
X				}
X
X				    /* process wildcard */
X				if (*ts == '\\' && *++ts == '\0')
X					break; /* bump and check for \0 */
X
X				/* check if they are the same */
X				if (*cs == *ts)
X					goto foo;
X
X			} /* while */
X
X			/* get here if no match (out of ts or reached ]) */
X			if (!notf)
X				return(FALSE);
X			if (*ts)
X				++ts;
X			break;
X
X			/* come here if a match */
X		foo:    if (notf)
X				return(FALSE);
X			++ts;
X			while (*ts != '\0' && *ts++ != ']')
X				; /* get to end of bracket */
X			break;
X
X		case '*': /* a chicken way out! my only recursive part */
X			while (*++ts == '*')
X				; /* get rid of extra '*' */
X
X			if (!*ts) /* trailing '*' matches anything */
X				return(TRUE);
X
X			do
X				if (amatch(ts, cs))
X					return(TRUE);
X			while (*++cs);
X			return(*ts == '\0');
X
X		case '?': /* just bump the pointers */
X			++ts;
X			break;
X
X		case '\\': /* this drops through to next one */
X			++ts;
X
X		default: /* if they ain't the same here forget it */
X			if (*cs != *ts++)
X				return(FALSE);
X		} /* switch */
X
X		++cs;
X
X	} /* while (1) */
X
X} /* match */
X
X/*****************************************************************
X |  test program for amatch
X |----------------------------------------------------------------
X |  Reads patterns and paths until none given.
X ****************************************************************/
X
X#ifdef	TEST
X#include <stdio.h>
X
Xstatic getline();
X
Xmain() {
X	char data[80], pattern[80];
X	int match;
X
X	do {
X		/* read wildcard patterns */
X		printf("Enter pattern: ");
X		getline(pattern, 80);
X		if (strlen(pattern)) {
X			/* read test cases */
X			do {
X				printf("  Enter test data: ");
X				getline(data, 80);
X				if (strlen(data)) {
X					match = amatch(pattern, data);
X					printf("\t%s\n", (match ? "YES" : "NO"));
X				}
X			} while (strlen(data));
X		}
X	} while (strlen(pattern));
X}
X
Xstatic
Xgetline(buffer, len)
X	char *buffer;
X	int len;
X{
X	fgets(buffer, len, stdin);
X	len = strlen(buffer) - 1;
X	if (buffer[len] == '\n') buffer[len] = 0;
X}
X#endif
SHAR_EOF
chmod 0644 amatch.c || echo "restore of amatch.c fails"
exit 0

rsalz@bbn.com (Rich Salz) (08/17/89)

I wrote this a long time ago.  It's short, fast, and clean.  It was posted
some time back and appears, e.g., in John Gilmore's PDtar.

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  wildmat.c
# Wrapped by rsalz@prune.bbn.com on Thu Aug 17 10:38:48 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'wildmat.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'wildmat.c'\"
else
echo shar: Extracting \"'wildmat.c'\" \(2123 characters\)
sed "s/^X//" >'wildmat.c' <<'END_OF_FILE'
X/*
X**  Do shell-style pattern matching for ?, \, [], and * characters.
X**  Might not be robust in face of malformed patterns; e.g., "foo[a-"
X**  could cause a segmentation violation.  I think it's 8bit clean.
X**
X**  Written by Rich $alz, bbn!rsalz, Wed Nov 26 19:03:17 EST 1986.
X*/
X
X#define TRUE		1
X#define FALSE		0
X
X
Xstatic int
XStar(s, p)
X    register char	*s;
X    register char	*p;
X{
X    while (wildmat(s, p) == FALSE)
X	if (*++s == '\0')
X	    return FALSE;
X    return TRUE;
X}
X
X
Xint
Xwildmat(s, p)
X    register char	*s;
X    register char	*p;
X{
X    register int 	 last;
X    register int 	 matched;
X    register int 	 reverse;
X
X    for ( ; *p; s++, p++)
X	switch (*p) {
X	case '\\':
X	    /* Literal match with following character. */
X	    p++;
X	    /* FALLTHROUGH */
X	default:
X	    if (*s != *p)
X		return FALSE;
X	    continue;
X	case '?':
X	    /* Match anything. */
X	    if (*s == '\0')
X		return FALSE;
X	    continue;
X	case '*':
X	    /* Trailing star matches everything. */
X	    return *++p ? Star(s, p) : TRUE;
X	case '[':
X	    /* [^....] means inverse character class. */
X	    if (reverse = p[1] == '^')
X		p++;
X	    for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
X		/* This next line requires a good C compiler. */
X		if (*p == '-' ? *s <= *++p && *s >= last : *s == *p)
X		    matched = TRUE;
X	    if (matched == reverse)
X		return FALSE;
X	    continue;
X	}
X
X    return *s == '\0';
X}
X
X
X#ifdef	TEST
X#include <stdio.h>
X
X/* Yes, we use gets not fgets.  Sue me. */
Xextern char	*gets();
X
X
Xmain()
X{
X    char	 pattern[80];
X    char	 text[80];
X
X    printf("Wildmat tester.  Enter pattern, then strings to test.\n");
X    printf("A blank line gets prompts for a new pattern; a blank pattern\n");
X    printf("exits the program.\n\n");
X
X    for ( ; ; ) {
X	printf("Enter pattern:  ");
X	if (gets(pattern) == NULL)
X	    break;
X	for ( ; ; ) {
X	    printf("Enter text:  ");
X	    if (gets(text) == NULL)
X		exit(0);
X	    if (text[0] == '\0')
X		/* Blank line; go back and get a new pattern. */
X		break;
X	    printf("      %s\n", wildmat(text, pattern) ? "YES" : "NO");
X	}
X    }
X
X    exit(0);
X    /* NOTREACHED */
X}
X#endif	/* TEST */
END_OF_FILE
if test 2123 -ne `wc -c <'wildmat.c'`; then
    echo shar: \"'wildmat.c'\" unpacked with wrong size!
fi
# end of 'wildmat.c'
fi
echo shar: End of shell archive.
exit 0
-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.