rsalz@bbn.com (Rich Salz) (01/04/91)
This is a revised version of my pattern-matching routine. It has been posted to the net several times, and shows up in several places including Gilmore's public domain TAR. You might want to test this, but I did and it seems solid. /r$ #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # Contents: wildmat.c # Wrapped by rsalz@litchi.bbn.com on Thu Jan 3 13:27:38 1991 PATH=/bin:/usr/bin:/usr/ucb ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive."' if test -f 'wildmat.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'wildmat.c'\" else echo shar: Extracting \"'wildmat.c'\" \(2677 characters\) sed "s/^X//" >'wildmat.c' <<'END_OF_FILE' X/* X** Do shell-style pattern matching for ?, \, [], and * characters. X** Might not be robust in face of malformed patterns; e.g., "foo[a-" X** could cause a segmentation violation. It is 8bit clean. X** X** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. X** Special thanks to Lars Mathiesen for the ABORT code. This can greatly X** speed up failing wildcard patterns. For example: X** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-* X** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1 X** text 2: -adobe-courier-bold-o-normal--12-120-75-75-p-70-iso8859-1 X** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without X** the ABORT, then it takes 22310 calls to fail. Ugh. X*/ X X#define TRUE 1 X#define FALSE 0 X#define ABORT -1 X Xstatic int XStar(s, p) X register char *s; X register char *p; X{ X while (wildmat(s, p) == FALSE) X if (*++s == '\0') X return ABORT; X return TRUE; X} X X Xstatic int XDoMatch(s, p) X register char *s; X register char *p; X{ X register int last; X register int matched; X register int reverse; X X for ( ; *p; s++, p++) { X if (*s == '\0') X return ABORT; X switch (*p) { X case '\\': X /* Literal match with following character. */ X p++; X /* FALLTHROUGH */ X default: X if (*s != *p) X return FALSE; X continue; X case '?': X /* Match anything. */ X continue; X case '*': X /* Trailing star matches everything. */ X return *++p ? Star(s, p) : TRUE; X case '[': X /* [^....] means inverse character class. */ X if (reverse = p[1] == '^') X p++; X for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p) X /* This next line requires a good C compiler. */ X if (*p == '-' ? *s <= *++p && *s >= last : *s == *p) X matched = TRUE; X if (matched == reverse) X return FALSE; X continue; X } X } X X return *s == '\0'; X} X X Xint Xwildmat(s, p) X char *s; X char *p; X{ X return DoMatch(s, p) == TRUE; X} X X X X#ifdef TEST X#include <stdio.h> X X/* Yes, we use gets not fgets. Sue me. */ Xextern char *gets(); X X Xmain() X{ X char pattern[80]; X char text[80]; X X printf("Wildmat tester. Enter pattern, then strings to test.\n"); X printf("A blank line gets prompts for a new pattern; a blank pattern\n"); X printf("exits the program.\n\n"); X X for ( ; ; ) { X printf("Enter pattern: "); X if (gets(pattern) == NULL) X break; X for ( ; ; ) { X printf("Enter text: "); X if (gets(text) == NULL) X exit(0); X if (text[0] == '\0') X /* Blank line; go back and get a new pattern. */ X break; X printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO"); X } X } X X exit(0); X /* NOTREACHED */ X} X#endif /* TEST */ END_OF_FILE if test 2677 -ne `wc -c <'wildmat.c'`; then echo shar: \"'wildmat.c'\" unpacked with wrong size! fi # end of 'wildmat.c' fi echo shar: End of archive. exit 0 -- Please send comp.sources.unix-related mail to rsalz@uunet.uu.net. Use a domain-based address or give alternate paths, or you may lose out.
thorinn@diku.dk (Lars Henrik Mathiesen) (01/05/91)
rsalz@bbn.com (Rich Salz) writes: >This is a revised version of my pattern-matching routine. It has been >posted to the net several times, and shows up in several places including >Gilmore's public domain TAR. You might want to test this, but I did >and it seems solid. > /r$ >X** Special thanks to Lars Mathiesen for the ABORT code. This can greatly >X** speed up failing wildcard patterns. For example: >Xstatic int >XStar(s, p) >X register char *s; >X register char *p; >X{ >X while (wildmat(s, p) == FALSE) >X if (*++s == '\0') >X return ABORT; >X return TRUE; >X} I just thought I'd mention that in the patch I sent to Rich, the body of the Star routine read like this: { register int ret; while ((ret = DoMatch(s, p)) == FALSE) if (*++s == '\0') return ABORT; return ret; } (Except that I used wildmatch for DoMatch.) Rich's version gives the correct results, but the recursion goes through the wildmat function maps all returns of ABORT to FALSE --- so the time complexity is still the same as for traditional versions, with double the function call overhead. (The whole point of the ABORT return code is that it is supposed to propagate through the Star function, aborting the loop.) I have just noted that the test on *++s in Star is repeated (with the same result) at the top of the loop in DoMatch, so I suggest that it should be eliminated as in the following version of Star: static int Star(s, p) register char *s; register char *p; { register int ret; do ret = DoMatch(s++, p); while (ret == FALSE); return ret; } It may cause one more call of DoMatch in the failing case, but I think it is clearer. -- Lars Mathiesen, DIKU, U of Copenhagen, Denmark [uunet!]mcsun!diku!thorinn Institute of Datalogy -- we're scientists, not engineers. thorinn@diku.dk
rsalz@bbn.com (Rich Salz) (01/05/91)
In <3154@litchi.bbn.com> rsalz@bbn.com (Rich Salz) writes: >This is a revised version of my pattern-matching routine. It has been >posted to the net several times, and shows up in several places including >Gilmore's public domain TAR. You might want to test this, but I did >and it seems solid. > /r$ Well, I didn't test it enough. Thanks to David Polansky <dp@meson.cray.com> for showing that "*test* fails to match "atest" Use the old version instead: #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # Contents: wildmat.c # Wrapped by rsalz@litchi.bbn.com on Fri Jan 4 16:27:29 1991 PATH=/bin:/usr/bin:/usr/ucb ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive."' if test -f 'wildmat.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'wildmat.c'\" else echo shar: Extracting \"'wildmat.c'\" \(2157 characters\) sed "s/^X//" >'wildmat.c' <<'END_OF_FILE' X/* X** Do shell-style pattern matching for ?, \, [], and * characters. X** Might not be robust in face of malformed patterns; e.g., "foo[a-" X** could cause a segmentation violation. I think it's 8bit clean. X** X** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. X*/ X X#define TRUE 1 X#define FALSE 0 X X Xstatic int XStar(text, p) X register char *text; X register char *p; X{ X while (wildmat(text, p) == FALSE) X if (*++text == '\0') X return FALSE; X return TRUE; X} X X Xint Xwildmat(text, p) X register char *text; X register char *p; X{ X register int last; X register int matched; X register int reverse; X X for ( ; *p; text++, p++) X switch (*p) { X case '\\': X /* Literal match with following character. */ X p++; X /* FALLTHROUGH */ X default: X if (*text != *p) X return FALSE; X continue; X case '?': X /* Match anything. */ X if (*text == '\0') X return FALSE; X continue; X case '*': X /* Trailing star matches everything. */ X return *++p ? Star(text, p) : TRUE; X case '[': X /* [^....] means inverse character class. */ X if (reverse = p[1] == '^') X p++; X for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p) X /* This next line requires a good C compiler. */ X if (*p == '-' ? *text <= *++p && *text >= last : *text == *p) X matched = TRUE; X if (matched == reverse) X return FALSE; X continue; X } X X return *text == '\0'; X} X X X#ifdef TEST X#include <stdio.h> X X/* Yes, we use gets not fgets. Sue me. */ Xextern char *gets(); X X Xmain() X{ X char pattern[80]; X char text[80]; X X printf("Wildmat tester. Enter pattern, then strings to test.\n"); X printf("A blank line gets prompts for a new pattern; a blank pattern\n"); X printf("exits the program.\n\n"); X X for ( ; ; ) { X printf("Enter pattern: "); X if (gets(pattern) == NULL) X break; X for ( ; ; ) { X printf("Enter text: "); X if (gets(text) == NULL) X exit(0); X if (text[0] == '\0') X /* Blank line; go back and get a new pattern. */ X break; X printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO"); X } X } X X exit(0); X /* NOTREACHED */ X} X#endif /* TEST */ END_OF_FILE if test 2157 -ne `wc -c <'wildmat.c'`; then echo shar: \"'wildmat.c'\" unpacked with wrong size! fi # end of 'wildmat.c' fi echo shar: End of archive. exit 0 -- Please send comp.sources.unix-related mail to rsalz@uunet.uu.net. Use a domain-based address or give alternate paths, or you may lose out.