rsalz@bbn.com (Rich Salz) (01/04/91)
This is a revised version of my pattern-matching routine. It has been posted to the net several times, and shows up in several places including Gilmore's public domain TAR. You might want to test this, but I did and it seems solid. /r$ #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # Contents: wildmat.c # Wrapped by rsalz@litchi.bbn.com on Thu Jan 3 13:27:38 1991 PATH=/bin:/usr/bin:/usr/ucb ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive."' if test -f 'wildmat.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'wildmat.c'\" else echo shar: Extracting \"'wildmat.c'\" \(2677 characters\) sed "s/^X//" >'wildmat.c' <<'END_OF_FILE' X/* X** Do shell-style pattern matching for ?, \, [], and * characters. X** Might not be robust in face of malformed patterns; e.g., "foo[a-" X** could cause a segmentation violation. It is 8bit clean. X** X** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. X** Special thanks to Lars Mathiesen for the ABORT code. This can greatly X** speed up failing wildcard patterns. For example: X** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-* X** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1 X** text 2: -adobe-courier-bold-o-normal--12-120-75-75-p-70-iso8859-1 X** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without X** the ABORT, then it takes 22310 calls to fail. Ugh. X*/ X X#define TRUE 1 X#define FALSE 0 X#define ABORT -1 X Xstatic int XStar(s, p) X register char *s; X register char *p; X{ X while (wildmat(s, p) == FALSE) X if (*++s == '\0') X return ABORT; X return TRUE; X} X X Xstatic int XDoMatch(s, p) X register char *s; X register char *p; X{ X register int last; X register int matched; X register int reverse; X X for ( ; *p; s++, p++) { X if (*s == '\0') X return ABORT; X switch (*p) { X case '\\': X /* Literal match with following character. */ X p++; X /* FALLTHROUGH */ X default: X if (*s != *p) X return FALSE; X continue; X case '?': X /* Match anything. */ X continue; X case '*': X /* Trailing star matches everything. */ X return *++p ? Star(s, p) : TRUE; X case '[': X /* [^....] means inverse character class. */ X if (reverse = p[1] == '^') X p++; X for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p) X /* This next line requires a good C compiler. */ X if (*p == '-' ? *s <= *++p && *s >= last : *s == *p) X matched = TRUE; X if (matched == reverse) X return FALSE; X continue; X } X } X X return *s == '\0'; X} X X Xint Xwildmat(s, p) X char *s; X char *p; X{ X return DoMatch(s, p) == TRUE; X} X X X X#ifdef TEST X#include <stdio.h> X X/* Yes, we use gets not fgets. Sue me. */ Xextern char *gets(); X X Xmain() X{ X char pattern[80]; X char text[80]; X X printf("Wildmat tester. Enter pattern, then strings to test.\n"); X printf("A blank line gets prompts for a new pattern; a blank pattern\n"); X printf("exits the program.\n\n"); X X for ( ; ; ) { X printf("Enter pattern: "); X if (gets(pattern) == NULL) X break; X for ( ; ; ) { X printf("Enter text: "); X if (gets(text) == NULL) X exit(0); X if (text[0] == '\0') X /* Blank line; go back and get a new pattern. */ X break; X printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO"); X } X } X X exit(0); X /* NOTREACHED */ X} X#endif /* TEST */ END_OF_FILE if test 2677 -ne `wc -c <'wildmat.c'`; then echo shar: \"'wildmat.c'\" unpacked with wrong size! fi # end of 'wildmat.c' fi echo shar: End of archive. exit 0 -- Please send comp.sources.unix-related mail to rsalz@uunet.uu.net. Use a domain-based address or give alternate paths, or you may lose out.
rsalz@bbn.com (Rich Salz) (01/05/91)
In <3154@litchi.bbn.com> rsalz@bbn.com (Rich Salz) writes: >This is a revised version of my pattern-matching routine. It has been >posted to the net several times, and shows up in several places including >Gilmore's public domain TAR. You might want to test this, but I did >and it seems solid. > /r$ Well, I didn't test it enough. Thanks to David Polansky <dp@meson.cray.com> for showing that "*test* fails to match "atest" Use the old version instead: #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # Contents: wildmat.c # Wrapped by rsalz@litchi.bbn.com on Fri Jan 4 16:27:29 1991 PATH=/bin:/usr/bin:/usr/ucb ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive."' if test -f 'wildmat.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'wildmat.c'\" else echo shar: Extracting \"'wildmat.c'\" \(2157 characters\) sed "s/^X//" >'wildmat.c' <<'END_OF_FILE' X/* X** Do shell-style pattern matching for ?, \, [], and * characters. X** Might not be robust in face of malformed patterns; e.g., "foo[a-" X** could cause a segmentation violation. I think it's 8bit clean. X** X** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. X*/ X X#define TRUE 1 X#define FALSE 0 X X Xstatic int XStar(text, p) X register char *text; X register char *p; X{ X while (wildmat(text, p) == FALSE) X if (*++text == '\0') X return FALSE; X return TRUE; X} X X Xint Xwildmat(text, p) X register char *text; X register char *p; X{ X register int last; X register int matched; X register int reverse; X X for ( ; *p; text++, p++) X switch (*p) { X case '\\': X /* Literal match with following character. */ X p++; X /* FALLTHROUGH */ X default: X if (*text != *p) X return FALSE; X continue; X case '?': X /* Match anything. */ X if (*text == '\0') X return FALSE; X continue; X case '*': X /* Trailing star matches everything. */ X return *++p ? Star(text, p) : TRUE; X case '[': X /* [^....] means inverse character class. */ X if (reverse = p[1] == '^') X p++; X for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p) X /* This next line requires a good C compiler. */ X if (*p == '-' ? *text <= *++p && *text >= last : *text == *p) X matched = TRUE; X if (matched == reverse) X return FALSE; X continue; X } X X return *text == '\0'; X} X X X#ifdef TEST X#include <stdio.h> X X/* Yes, we use gets not fgets. Sue me. */ Xextern char *gets(); X X Xmain() X{ X char pattern[80]; X char text[80]; X X printf("Wildmat tester. Enter pattern, then strings to test.\n"); X printf("A blank line gets prompts for a new pattern; a blank pattern\n"); X printf("exits the program.\n\n"); X X for ( ; ; ) { X printf("Enter pattern: "); X if (gets(pattern) == NULL) X break; X for ( ; ; ) { X printf("Enter text: "); X if (gets(text) == NULL) X exit(0); X if (text[0] == '\0') X /* Blank line; go back and get a new pattern. */ X break; X printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO"); X } X } X X exit(0); X /* NOTREACHED */ X} X#endif /* TEST */ END_OF_FILE if test 2157 -ne `wc -c <'wildmat.c'`; then echo shar: \"'wildmat.c'\" unpacked with wrong size! fi # end of 'wildmat.c' fi echo shar: End of archive. exit 0 -- Please send comp.sources.unix-related mail to rsalz@uunet.uu.net. Use a domain-based address or give alternate paths, or you may lose out.