pac@babcock.cerc.wvu.wvnet.edu (Michael A. Packer) (08/30/90)
From article <6003@castle.ed.ac.uk>, by tkld@castle.ed.ac.uk (K Davidson): > > Ages'n'ages ago someone posted a routine called glob to (I think) > alt.sources. It did string matching like shell file name globbing > (i.e. matching with ?, *, [] and {} ) > > Now that I want it I find I've lost my copy. Does anyone out there still > have it ? Or something like it ? Seems a waste to have to rewrite it... /* * File-name wildcard pattern matching for GNU. Copyright (C) 1985, * 1988, 1989, 1990 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 1, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * To whomever it may concern: I have never seen the code which most * Unix programs use to perform this function. I wrote this from * scratch based on specifications for the pattern matching. --RMS. */ #include <sys/types.h> #if defined(USGr3) || defined(DIRENT) || defined(__GNU_LIBRARY__) #include <dirent.h> #define direct dirent #define D_NAMLEN(d) strlen((d)->d_name) #else /* not USGr3 or DIRENT or * __GNU_LIBRARY__ */ #define D_NAMLEN(d) ((d)->d_namlen) #ifdef USG #ifdef SYSNDIR #include <sys/ndir.h> #else #include "ndir.h" /* Get ndir.h from the * Emacs distribution. */ #endif /* not SYSNDIR */ #else /* not USG */ #include <sys/dir.h> #endif /* USG */ #endif /* USGr3 */ #if defined(STDC_HEADERS) || defined(__GNU_LIBRARY__) #include <stdlib.h> #include <string.h> #define bcopy(s, d, n) memcpy ((d), (s), (n)) #define index strchr #define rindex strrchr #else #ifdef USG #include <string.h> #include <memory.h> #define bcopy(s, d, n) memcpy ((d), (s), (n)) #define index strchr #define rindex strrchr #else /* not USG */ #include <strings.h> extern void bcopy (); #endif /* not USG */ extern char *malloc (); extern char *realloc (); extern void free (); #ifndef NULL #define NULL 0 #endif #endif /* Not STDC_HEADERS or * __GNU_LIBRARY__. */ #ifdef __GNUC__ #define alloca __builtin_alloca #else /* Not GCC. */ #ifdef sparc #include <alloca.h> #else /* Not sparc. */ extern char *alloca (); #endif /* sparc. */ #endif /* GCC. */ /* * Nonzero if '*' and '?' do not match an initial '.' for * glob_filename. */ int noglob_dot_filenames = 0; static int glob_match_after_star (); /* Return nonzero if PATTERN has any special globbing chars in it. */ int glob_pattern_p (pattern) char *pattern; { register char *p = pattern; register char c; while ((c = *p++) != '\0') switch (c) { case '?': case '[': case '*': return 1; case '\\': if (*p++ == '\0') return 0; } return 0; } /* * Match the pattern PATTERN against the string TEXT; return 1 if it * matches, 0 otherwise. * * A match means the entire string TEXT is used up in matching. * * In the pattern string, `*' matches any sequence of characters, `?' * matches any character, [SET] matches any character in the specified * set, [!SET] matches any character not in the specified set. * * A set is composed of characters or ranges; a range looks like character * hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the set of * characters allowed in C identifiers. Any other character in the * pattern must be matched exactly. * * To suppress the special syntactic significance of any of `[]*?!-\', and * match the character exactly, precede it with a `\'. * * If DOT_SPECIAL is nonzero, `*' and `?' do not match `.' at the * beginning of TEXT. */ int glob_match (pattern, text, dot_special) char *pattern, *text; int dot_special; { register char *p = pattern, *t = text; register char c; while ((c = *p++) != '\0') switch (c) { case '?': if (*t == '\0' || (dot_special && t == text && *t == '.')) return 0; else ++t; break; case '\\': if (*p++ != *t++) return 0; break; case '*': if (dot_special && t == text && *t == '.') return 0; return glob_match_after_star (p, t); case '[': { register char c1 = *t++; int invert; if (c1 == '\0') return 0; invert = (*p == '!'); if (invert) p++; c = *p++; while (1) { register char cstart = c, cend = c; if (c == '\\') { cstart = *p++; cend = cstart; } if (cstart == '\0') return 0; /* Missing ']'. */ c = *p++; if (c == '-') { cend = *p++; if (cend == '\\') cend = *p++; if (cend == '\0') return 0; c = *p++; } if (c1 >= cstart && c1 <= cend) goto match; if (c == ']') break; } if (!invert) return 0; break; match: /* Skip the rest of the [...] construct that already matched. */ while (c != ']') { if (c == '\0') return 0; c = *p++; if (c == '\0') return 0; if (c == '\\') p++; } if (invert) return 0; break; } default: if (c != *t++) return 0; } return *t == '\0'; } /* * Like glob_match, but match PATTERN against any final segment of * TEXT. */ static int glob_match_after_star (pattern, text) char *pattern, *text; { register char *p = pattern, *t = text; register char c, c1; while ((c = *p++) == '?' || c == '*') if (c == '?' && *t++ == '\0') return 0; if (c == '\0') return 1; if (c == '\\') c1 = *p; else c1 = c; --p; while (1) { if ((c == '[' || *t == c1) && glob_match (p, t, 0)) return 1; if (*t++ == '\0') return 0; } } /* * Return a vector of names of files in directory DIR whose names match * glob pattern PAT. The names are not in any particular order. * Wildcards at the beginning of PAT do not match an initial period if * noglob_dot_filenames is nonzero. * * The vector is terminated by an element that is a null pointer. * * To free the space allocated, first free the vector's elements, then * free the vector. * * Return NULL if cannot get enough memory to hold the pointer and the * names. * * Return -1 if cannot access directory DIR. Look in errno for more * information. */ char ** glob_vector (pat, dir) char *pat; char *dir; { struct globval { struct globval *next; char *name; }; DIR *d; register struct direct *dp; struct globval *lastlink; register struct globval *nextlink; register char *nextname; unsigned int count; int lose; register char **name_vector; register unsigned int i; d = opendir (dir); if (d == NULL) return (char **) -1; lastlink = NULL; count = 0; lose = 0; /* * Scan the directory, finding all names that match. For each name * that matches, allocate a struct globval on the stack and store the * name in it. Chain those structs together; lastlink is the front of * the chain. */ while (1) { dp = readdir (d); if (dp == NULL) break; if (dp -> d_ino != 0 && glob_match (pat, dp -> d_name, noglob_dot_filenames)) { nextlink = (struct globval *) alloca (sizeof (struct globval)); nextlink -> next = lastlink; i = D_NAMLEN (dp) + 1; nextname = (char *) malloc (i); if (nextname == NULL) { lose = 1; break; } lastlink = nextlink; nextlink -> name = nextname; bcopy (dp -> d_name, nextname, i); count++; } } closedir (d); if (!lose) { name_vector = (char **) malloc ((count + 1) * sizeof (char *)); lose |= name_vector == NULL; } /* Have we run out of memory? */ if (lose) { /* Here free the strings we have got. */ while (lastlink) { free (lastlink -> name); lastlink = lastlink -> next; } return NULL; } /* Copy the name pointers from the linked list into the vector. */ for (i = 0; i < count; ++i) { name_vector[i] = lastlink -> name; lastlink = lastlink -> next; } name_vector[count] = NULL; return name_vector; } /* * Return a new array, replacing ARRAY, which is the concatenation of * each string in ARRAY to DIR. Return NULL if out of memory. */ static char ** glob_dir_to_array (dir, array) char *dir, **array; { register unsigned int i, l; int add_slash = 0; char **result; l = strlen (dir); if (l == 0) return array; if (dir[l - 1] != '/') add_slash++; for (i = 0; array[i] != NULL; i++) ; result = (char **) malloc ((i + 1) * sizeof (char *)); if (result == NULL) return NULL; for (i = 0; array[i] != NULL; i++) { result[i] = (char *) malloc (1 + l + add_slash + strlen (array[i])); if (result[i] == NULL) return NULL; strcpy (result[i], dir); if (add_slash) result[i][l] = '/'; strcpy (result[i] + l + add_slash, array[i]); } result[i] = NULL; /* Free the input array. */ for (i = 0; array[i] != NULL; i++) free (array[i]); free ((char *) array); return result; } /* * Do globbing on PATHNAME. Return an array of pathnames that match, * marking the end of the array with a null-pointer as an element. If * no pathnames match, then the array is empty (first element is null). * If there isn't enough memory, then return NULL. If a file system * error occurs, return -1; `errno' has the error code. * * Wildcards at the beginning of PAT, or following a slash, do not match * an initial period if noglob_dot_filenames is nonzero. */ char ** glob_filename (pathname) char *pathname; { char **result; unsigned int result_size; char *directory_name, *filename; unsigned int directory_len; result = (char **) malloc (sizeof (char *)); result_size = 1; if (result == NULL) return NULL; result[0] = NULL; /* Find the filename. */ filename = rindex (pathname, '/'); if (filename == NULL) { filename = pathname; directory_name = ""; directory_len = 0; } else { directory_len = (filename - pathname) + 1; directory_name = (char *) alloca (directory_len + 1); bcopy (pathname, directory_name, directory_len); directory_name[directory_len] = '\0'; ++filename; } /* * If directory_name contains globbing characters, then we have to * expand the previous levels. Just recurse. */ if (glob_pattern_p (directory_name)) { char **directories; register unsigned int i; if (directory_name[directory_len - 1] == '/') directory_name[directory_len - 1] = '\0'; directories = glob_filename (directory_name); if (directories == NULL) goto memory_error; else if (directories == (char **) -1) return (char **) -1; else if (*directories == NULL) { free ((char *) directories); return (char **) -1; } /* * We have successfully globbed the preceding directory name. For * each name in DIRECTORIES, call glob_vector on it and FILENAME. * Concatenate the results together. */ for (i = 0; directories[i] != NULL; i++) { char **temp_results = glob_vector (filename, directories[i]); if (temp_results == NULL) goto memory_error; else if (temp_results == (char **) -1) /* This filename is probably not a directory. Ignore it. */ ; else { char **array = glob_dir_to_array (directories[i], temp_results); register unsigned int l; l = 0; while (array[l] != NULL) ++l; result = (char **) realloc (result, (result_size + l) * sizeof (char *)); if (result == NULL) goto memory_error; for (l = 0; array[l] != NULL; ++l) result[result_size++ - 1] = array[l]; result[result_size - 1] = NULL; free ((char *) array); } } /* Free the directories. */ for (i = 0; directories[i] != NULL; i++) free (directories[i]); free ((char *) directories); return result; } /* If there is only a directory name, return it. */ if (*filename == '\0') { result = (char **) realloc ((char *) result, 2 * sizeof (char *)); if (result == NULL) return NULL; result[0] = (char *) malloc (directory_len + 1); if (result[0] == NULL) goto memory_error; bcopy (directory_name, result[0], directory_len + 1); result[1] = NULL; return result; } else { /* * Otherwise, just return what glob_vector returns appended to the * directory name. */ char **temp_results = glob_vector (filename, (directory_len == 0 ? "." : directory_name)); if (temp_results == NULL || temp_results == (char **) -1) return temp_results; return glob_dir_to_array (directory_name, temp_results); } memory_error:; if (result != NULL) { register unsigned int i; for (i = 0; result[i] != NULL; ++i) free (result[i]); free ((char *) result); } return NULL; } #ifdef TEST main (argc, argv) int argc; char **argv; { char **value; int i, optind; for (optind = 1; optind < argc; optind++) { value = glob_filename (argv[optind]); if (value == NULL) puts ("virtual memory exhausted"); else if (value == (char **) -1) perror (argv[optind]); else for (i = 0; value[i] != NULL; i++) puts (value[i]); } exit (0); } #endif /* TEST */ -- USMAIL: Drawer 2000, Concurrent Eng. Res. Center, WVU, Morgantown, WV 26506 PHONE: 304 293-7226 INTERNET : pac@cerc.wvu.wvnet.edu