[comp.lang.c] Revised C filename wildcard routine

rsalz@bbn.com (Rich Salz) (01/04/91)

This is a revised version of my pattern-matching routine.  It has been
posted to the net several times, and shows up in several places including
Gilmore's public domain TAR.  You might want to test this, but I did
and it seems solid.
	/r$

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then feed it
# into a shell via "sh file" or similar.  To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix@uunet.uu.net if you want that tool.
# Contents:  wildmat.c
# Wrapped by rsalz@litchi.bbn.com on Thu Jan  3 13:27:38 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
echo If this archive is complete, you will see the following message:
echo '          "shar: End of archive."'
if test -f 'wildmat.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'wildmat.c'\"
else
  echo shar: Extracting \"'wildmat.c'\" \(2677 characters\)
  sed "s/^X//" >'wildmat.c' <<'END_OF_FILE'
X/*
X**  Do shell-style pattern matching for ?, \, [], and * characters.
X**  Might not be robust in face of malformed patterns; e.g., "foo[a-"
X**  could cause a segmentation violation.  It is 8bit clean.
X**
X**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
X**  Special thanks to Lars Mathiesen for the ABORT code.  This can greatly
X**  speed up failing wildcard patterns.  For example:
X**	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
X**	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
X**	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-p-70-iso8859-1
X**  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
X**  the ABORT, then it takes 22310 calls to fail.  Ugh.
X*/
X
X#define TRUE		1
X#define FALSE		0
X#define ABORT		-1
X
Xstatic int
XStar(s, p)
X    register char	*s;
X    register char	*p;
X{
X    while (wildmat(s, p) == FALSE)
X	if (*++s == '\0')
X	    return ABORT;
X    return TRUE;
X}
X
X
Xstatic int
XDoMatch(s, p)
X    register char	*s;
X    register char	*p;
X{
X    register int 	 last;
X    register int 	 matched;
X    register int 	 reverse;
X
X    for ( ; *p; s++, p++) {
X	if (*s == '\0')
X	    return ABORT;
X	switch (*p) {
X	case '\\':
X	    /* Literal match with following character. */
X	    p++;
X	    /* FALLTHROUGH */
X	default:
X	    if (*s != *p)
X		return FALSE;
X	    continue;
X	case '?':
X	    /* Match anything. */
X	    continue;
X	case '*':
X	    /* Trailing star matches everything. */
X	    return *++p ? Star(s, p) : TRUE;
X	case '[':
X	    /* [^....] means inverse character class. */
X	    if (reverse = p[1] == '^')
X		p++;
X	    for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
X		/* This next line requires a good C compiler. */
X		if (*p == '-' ? *s <= *++p && *s >= last : *s == *p)
X		    matched = TRUE;
X	    if (matched == reverse)
X		return FALSE;
X	    continue;
X	}
X    }
X
X    return *s == '\0';
X}
X
X
Xint
Xwildmat(s, p)
X    char	*s;
X    char	*p;
X{
X    return DoMatch(s, p) == TRUE;
X}
X
X
X
X#ifdef	TEST
X#include <stdio.h>
X
X/* Yes, we use gets not fgets.  Sue me. */
Xextern char	*gets();
X
X
Xmain()
X{
X    char	 pattern[80];
X    char	 text[80];
X
X    printf("Wildmat tester.  Enter pattern, then strings to test.\n");
X    printf("A blank line gets prompts for a new pattern; a blank pattern\n");
X    printf("exits the program.\n\n");
X
X    for ( ; ; ) {
X	printf("Enter pattern:  ");
X	if (gets(pattern) == NULL)
X	    break;
X	for ( ; ; ) {
X	    printf("Enter text:  ");
X	    if (gets(text) == NULL)
X		exit(0);
X	    if (text[0] == '\0')
X		/* Blank line; go back and get a new pattern. */
X		break;
X	    printf("      %s\n", wildmat(text, pattern) ? "YES" : "NO");
X	}
X    }
X
X    exit(0);
X    /* NOTREACHED */
X}
X#endif	/* TEST */
END_OF_FILE
  if test 2677 -ne `wc -c <'wildmat.c'`; then
    echo shar: \"'wildmat.c'\" unpacked with wrong size!
  fi
  # end of 'wildmat.c'
fi
echo shar: End of archive.
exit 0
-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.

rsalz@bbn.com (Rich Salz) (01/05/91)

In <3154@litchi.bbn.com> rsalz@bbn.com (Rich Salz) writes:
>This is a revised version of my pattern-matching routine.  It has been
>posted to the net several times, and shows up in several places including
>Gilmore's public domain TAR.  You might want to test this, but I did
>and it seems solid.
>	/r$

Well, I didn't test it enough.  Thanks to David Polansky <dp@meson.cray.com>
for showing that "*test* fails to match "atest"

Use the old version instead:
#! /bin/sh
# This is a shell archive.  Remove anything before this line, then feed it
# into a shell via "sh file" or similar.  To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix@uunet.uu.net if you want that tool.
# Contents:  wildmat.c
# Wrapped by rsalz@litchi.bbn.com on Fri Jan  4 16:27:29 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
echo If this archive is complete, you will see the following message:
echo '          "shar: End of archive."'
if test -f 'wildmat.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'wildmat.c'\"
else
  echo shar: Extracting \"'wildmat.c'\" \(2157 characters\)
  sed "s/^X//" >'wildmat.c' <<'END_OF_FILE'
X/*
X**  Do shell-style pattern matching for ?, \, [], and * characters.
X**  Might not be robust in face of malformed patterns; e.g., "foo[a-"
X**  could cause a segmentation violation.  I think it's 8bit clean.
X**
X**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
X*/
X
X#define TRUE		1
X#define FALSE		0
X
X
Xstatic int
XStar(text, p)
X    register char	*text;
X    register char	*p;
X{
X    while (wildmat(text, p) == FALSE)
X	if (*++text == '\0')
X	    return FALSE;
X    return TRUE;
X}
X
X
Xint
Xwildmat(text, p)
X    register char	*text;
X    register char	*p;
X{
X    register int	last;
X    register int	matched;
X    register int	reverse;
X
X    for ( ; *p; text++, p++)
X	switch (*p) {
X	case '\\':
X	    /* Literal match with following character. */
X	    p++;
X	    /* FALLTHROUGH */
X	default:
X	    if (*text != *p)
X		return FALSE;
X	    continue;
X	case '?':
X	    /* Match anything. */
X	    if (*text == '\0')
X		return FALSE;
X	    continue;
X	case '*':
X	    /* Trailing star matches everything. */
X	    return *++p ? Star(text, p) : TRUE;
X	case '[':
X	    /* [^....] means inverse character class. */
X	    if (reverse = p[1] == '^')
X		p++;
X	    for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
X		/* This next line requires a good C compiler. */
X		if (*p == '-' ? *text <= *++p && *text >= last : *text == *p)
X		    matched = TRUE;
X	    if (matched == reverse)
X		return FALSE;
X	    continue;
X	}
X
X    return *text == '\0';
X}
X
X
X#ifdef	TEST
X#include <stdio.h>
X
X/* Yes, we use gets not fgets.  Sue me. */
Xextern char	*gets();
X
X
Xmain()
X{
X    char	pattern[80];
X    char	text[80];
X
X    printf("Wildmat tester.  Enter pattern, then strings to test.\n");
X    printf("A blank line gets prompts for a new pattern; a blank pattern\n");
X    printf("exits the program.\n\n");
X
X    for ( ; ; ) {
X	printf("Enter pattern:  ");
X	if (gets(pattern) == NULL)
X	    break;
X	for ( ; ; ) {
X	    printf("Enter text:  ");
X	    if (gets(text) == NULL)
X		exit(0);
X	    if (text[0] == '\0')
X		/* Blank line; go back and get a new pattern. */
X		break;
X	    printf("      %s\n", wildmat(text, pattern) ? "YES" : "NO");
X	}
X    }
X
X    exit(0);
X    /* NOTREACHED */
X}
X#endif	/* TEST */
END_OF_FILE
  if test 2157 -ne `wc -c <'wildmat.c'`; then
    echo shar: \"'wildmat.c'\" unpacked with wrong size!
  fi
  # end of 'wildmat.c'
fi
echo shar: End of archive.
exit 0
-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.