sources-request@mirror.UUCP (07/09/86)
Submitted by: seismo!mcvax!guido (Guido van Rossum) Mod.sources: Volume 6, Issue 47 Archive-name: glob [ A few items: 1) If you use csh, you have to invoke the test program at "./glob"; 2) A comment in the code says [...] is not implemented; it is; 3) As the README says, some sites will have to add -Dindex=strchr to the CFLAGS variable in the Makefile; 4) This uses the opendir() family of routines. --r$ ] : This is a shell archive. : Extract with 'sh this_file'. echo 'Start of glob -- expand meta-characters, part 01 out of 01:' echo 'x - Makefile' sed 's/^X//' > 'Makefile' << 'EOF' XCFLAGS= XLINTFLAGS=-abh XSRCS= glob.c main.c XOBJS= glob.o main.o XFILES= README $(SRCS) Makefile glob.3 X Xglob: glob.o main.o X $(CC) -o glob $(CFLAGS) glob.o main.o X Xtags: $(SRCS) X ctags $(SRCS) X Xlint: $(SRCS) X lint $(LINTFLAGS) $(SRCS) X Xshar: $(FILES) X packmail -o shar -t 'glob -- expand meta-characters' $(FILES) EOF echo 'x - README' sed 's/^X//' > 'README' << 'EOF' XGlob -- expand meta-characters in file names -- by Guido van Rossum X XThis is a filename 'globbing' routine that I wrote. Besides expanding X*, ? and [...] exactly like sh(1), it also replaces initial ~ or ~user Xby user's home directory, replaces initial $var by the contents of the Xenvironment variable, and has a quoting mechanism whereby any of the Xmagic characters can be quoted by '\' (including '\' itself). X XIt is really intended for inclusion in some larger program, but there is Xa small test program that expands patterns given as arguments. X XYou should have received the following files: X X README this file X glob.c the code X main.c test program X glob.3 manual page X Makefile make file X XPortability issues: I have only been able to test it on a VAX running X4.3-beta, but I believe it can be ported to anything that supports Xopendir, readdir and closedir. If you don't have index, substitute Xstrchr using the -D flag to the C compiler. The program passes lint Xexcept for the usual complaints about malloc and strcpy. X XThis software is copyright (c) 1986 by Stichting Mathematisch Centrum. XYou may use, modify and copy it, provided this notice is present in all Xmodified or unmodified copies, and provided you don't make money for it. X X*** You may only distribute the set of files mentioned above as a whole. *** X XIf you find bugs, or have any other questions, please mail them to the Xauthor, whose address is: X X Guido van Rossum X CWI, dept. AA X Kruislaan 413 X 1089 SJ Amsterdam X X Electronic mail: guido@mcvax.UUCP EOF echo 'x - glob.3' sed 's/^X//' > 'glob.3' << 'EOF' X.TH GLOB 3 "June 28, 1986" X.AT 3 X.SH NAME Xglob \- expand meta-characters in file names X.SH SYNOPSIS X.nf X.B "int glob(pattern, buffer, size) X.B "char *pattern; X.B "char *buffer; X.B "unsigned int size; X.fi X.SH DESCRIPTION X.I Glob Xexpands the meta-characters *, ? and [...] in X.I pattern Xand copies the file name(s) that match the pattern to X.I buffer , Xwhose length is given by X.I size . X.PP XBefore expansion is attempted, the pattern is processed as follows: Xan initial X.I ~user Xis replaced by X.I user's Xhome directory, an initial ~ is replaced by the contents of $HOME, Xand an initial X.I $variable Xis replaced by the contents of the environment variable by that name. XThe universal quoting character \e can be prefixed to *, ? and [, to Xinitial ~ or $, and to itself to prevent the special meaning. X.PP XThe expansion algorithm used is believed to give exactly the same Xresults as the expansion of meta-characters by the Bourne shell X.I sh (1), Xexcept for the different quoting mechanism and the expansion of ~ and $. XThe files are sorted. X.SH RETURN VALUE X.I Glob Xreturns the number of files matching the pattern that could be Xtransferred to the buffer. XIf no files match, the return value is 0 but the pattern is copied to Xthe buffer after expansion of ~, $ and \e-quotes. X.SH SEE ALSO Xsh(1) X.SH AUTHOR XGuido van Rossum, CWI, Amsterdam <guido@mcvax.uucp> X.SH DIAGNOSTICS XThere are no explicit diagnostics. XIf there is not enough memory to hold the internal lists of Xpartially-matching files, not all matching files may be found. XIf the buffer has not enough space to hold all matching files, as many Xas will fit are transferred to it. XIf X.I $variable Xhas no value, the empty string is substituted. XIf no expansion can be found for ~ or X.I ~user , Xit is left in the pattern unchanged. XIf X.SH BUGS XThere should be an interface to the ``raw'' globbing algorithm, which Xcan return an arbitrary number of files in a ``malloced'' structure and Xdoes not do ~ or $ expansion or \e-quoting. X.PP XThe code uses the Berkeley directory-reading package, so in order to Xport it to v7 or sys5 you'll need to get hold of a public domain Xre-implementation of that package. X.PP XThe name is a relict from ancient times. EOF echo 'x - glob.c' sed 's/^X//' > 'glob.c' << 'EOF' X/* This software is copyright (c) 1986 by Stichting Mathematisch Centrum. X * You may use, modify and copy it, provided this notice is present in all X * copies, modified or unmodified, and provided you don't make money for it. X * X * Written 86-jun-28 by Guido van Rossum, CWI, Amsterdam <guido@mcvax.uucp> X */ X X/* X * 'Glob' routine -- match * and ? in filename. X * Extra services: initial ~username is replaced by username's home dir, X * initial ~ is replaced by $HOME, initial $var is replaced by X * return value of getenv("var"). X * Quoting convention: \ followed by a magic character inhibits its X * special meaning. X * BUGS: [...] is not implemented. X * Nonexisting $var is treated as empty string; nonexisting ~user X * is copied literally. X */ X X#include <stdio.h> X#include <strings.h> X#include <ctype.h> X#include <pwd.h> X#include <sys/types.h> X#include <sys/dir.h> X Xstruct passwd *getpwnam(); Xchar *getenv(); Xchar *malloc(); Xchar *realloc(); X X#define EOS '\0' X#define SEP '/' X#define DOT '.' X#define QUOTE '\\' X X#define BOOL int X#define NO 0 X#define YES 1 X X#define MAXPATH 1024 X#define MAXBASE 256 X X#define META(c) ((char)((c)|0200)) X#define M_ALL META('*') X#define M_ONE META('?') X#define M_SET META('[') X#define M_RNG META('-') X#define M_END META(']') X Xstruct flist { X int len; X char **item; X}; X X/* Make a null-terminated string of the chars between two pointers */ X/* (Limited length, static buffer returned) */ X Xstatic char * Xmakestr(start, end) X char *start; X char *end; X{ X static char buf[100]; X char *p= buf; X X while (start < end && p < &buf[100]) X *p++ = *start++; X *p= EOS; X return buf; X} X X/* Append string to buffer, return new end of buffer. Guarded. */ X Xstatic char * Xaddstr(dest, src, end) X char *dest; X char *src; X char *end; X{ X while (*dest++ = *src++) { X if (dest >= end) X break; X } X return --dest; X} X X/* Form a pathname by concatenating head, maybe a / and tail. */ X/* Truncates to space available. */ X Xstatic void Xformpath(dest, head, tail, size) X char *dest; X char *head; X char *tail; X unsigned int size; /* sizeof dest */ X{ X char *start= dest; X X for (;;) { X if (--size == 0) X goto out; X if ((*dest++ = *head++) == EOS) X break; X } X if (*tail != EOS && size != 0) { X --dest; X ++size; X if (dest > start && dest[-1] != SEP) { X *dest++ = SEP; X --size; X } X for (;;) { X if (--size == 0) X goto out; X if ((*dest++ = *tail++) == EOS) X break; X } X } X return; X X out: *dest= EOS; X} X X/* Find a user's home directory, NULL if not found */ X Xstatic char * Xgethome(username) X char *username; X{ X struct passwd *p; X X p= getpwnam(username); X if (p == NULL) X return NULL; X return p->pw_dir; X} X X/* String compare for qsort */ X Xstatic int Xcompare(p, q) X char **p; X char **q; X{ X return strcmp(*p, *q); X} X X/* X * Maintain lists of strings. X * When memory is full, data is lost but X */ X Xstatic void Xaddfile(list, head, tail) X struct flist *list; X char *head; X char *tail; X{ X char *str; X X str= malloc((unsigned) strlen(head) + strlen(tail) + 2); X X if (str == NULL) X return; X if (list->item == 0) { X list->len= 0; X list->item= (char**) malloc(sizeof(char*)); X } X else { X list->item= (char**) realloc((char*)list->item, X (unsigned) (list->len+1)*sizeof(char*)); X if (list->item == 0) X list->len= 0; X } X if (list->item != NULL) { X formpath(str, head, tail, MAXPATH); X list->item[list->len++]= str; X } X else X free(str); X} X X/* Clear the fields of a struct flist before first use */ X Xstatic void Xclear(list) X struct flist *list; X{ X list->len= 0; X list->item= NULL; X} X X/* Free memory held by struct flist after use */ X Xstatic void Xdiscard(list) X struct flist *list; X{ X int i= list->len; X X if (list->item != NULL) { X while (--i >= 0) { X if (list->item[i] != NULL) X free(list->item[i]); X } X free((char*)list->item); X list->item= NULL; X } X list->len= 0; X} X X/* Pattern matching function for filenames */ X/* Each occurrence of the * pattern causes a recursion level */ X Xstatic BOOL Xmatch(name, pat) X char *name; X char *pat; X{ X char c, k; X BOOL ok; X X while ((c= *pat++) != EOS) { X switch (c) { X X case M_ONE: X if (*name++ == EOS) X return NO; X break; X X case M_ALL: X if (*pat == EOS) X return YES; X for (; *name != EOS; ++name) { X if (match(name, pat)) X return YES; X } X return NO; X X case M_SET: X ok= NO; X k= *name++; X while ((c= *pat++) != M_END) { X if (*pat == M_RNG) { X if (c <= k && k <= pat[1]) X ok= YES; X pat += 2; X } X else if (c == k) X ok= YES; X } X if (!ok) X return NO; X break; X X default: X if (*name++ != c) X return NO; X break; X X } X } X return *name == EOS; X} X X/* Perform pattern matching for one segment of the pathname */ X Xstatic void Xsegment(files, mid, pat) X struct flist *files; X char *mid; X char *pat; X{ X char path[MAXPATH]; X int i; X DIR *dirp; X struct direct *dp; X struct flist new; X X clear(&new); X for (i= 0; i < files->len; ++i) { X strcpy(path, files->item[i]); X strcat(path, mid); X free(files->item[i]); X files->item[i]= NULL; X dirp= opendir(path); X if (dirp == NULL) X continue; X while ((dp= readdir(dirp)) != NULL) { X if (*dp->d_name == DOT && *pat != DOT) X ; /* No meta-match on initial '.' */ X else if (match(dp->d_name, pat)) X addfile(&new, path, dp->d_name); X } X closedir(dirp); X } X discard(files); X *files= new; X} X X/* Finish by matching the rest of the pattern (which has no metas) */ X Xstatic void Xfindfiles(files, tail) X struct flist *files; X char *tail; X{ X int i; X struct flist new; X char path[MAXPATH]; X X if (*tail == EOS || files->len == 0) X return; X clear(&new); X for (i= 0; i < files->len; ++i) { X strcpy(path, files->item[i]); X strcat(path, tail); X free(files->item[i]); X files->item[i]= NULL; X if (access(path, 0) == 0) X addfile(&new, path, ""); X } X discard(files); X *files= new; X} X Xstatic void Xglob1(pat, files) X char *pat; X struct flist *files; X{ X char mid[MAXPATH]; X char *end= mid; X char *basestart= mid; X BOOL meta= NO; X char c; X X clear(files); X addfile(files, "", ""); X for (;;) { X switch (c= *pat++) { X X case EOS: X case SEP: X *end= EOS; X if (meta) { X if (basestart == mid) X segment(files, "", basestart); X else if (basestart == mid+1) { X static char sepstr[]= {SEP, EOS}; X segment(files, sepstr, basestart); X } X else { X basestart[-1]= EOS; X segment(files, mid, basestart); X } X if (files->len == 0) X return; X end= basestart= mid; X meta= NO; X } X else if (c == EOS) X findfiles(files, mid); X if (c == EOS) X return; X *end++= c; X basestart= end; X break; X X case M_ALL: X case M_ONE: X case M_SET: X meta= YES; X /* Fall through */ X default: X *end++ = c; X X } X } X} X X/* X * The main 'glob' routine: does $ and ~ substitution and \ quoting, X * and then calls 'glob1' to do the pattern matching. X * Returns 0 if file not found, number of matches otherwise. X * The matches found are appended to the buffer, separated by X * EOS characters. If no matches were found, the pattern is copied X * to the buffer after $ and ~ substitution and \ quoting. X */ X Xint Xglob(pat, buf, size) X char *pat; X char *buf; X unsigned int size; /* sizeof buf */ X{ X char *p, *q; X char c; X struct flist files; X char *start= buf; X char *end= buf+size; X X c= *pat; X if (c == '~') { X p= ++pat; X while (*p != EOS && *p != SEP) X ++p; X if (p == pat) { X q= getenv("HOME"); X if (q == NULL) X --pat; X else X buf= addstr(buf, q, end); X } X else { X q= gethome(makestr(pat, p)); X if (q == NULL) X --pat; X else { X buf= addstr(buf, q, end); X pat= p; X } X } X } X else if (c == '$') { X p= ++pat; X while (isalnum(*p) || *p == '_') X ++p; X q= getenv(makestr(pat, p)); X if (q != NULL) X buf= addstr(buf, q, end); X pat= p; X } X else if (c == QUOTE && (pat[1] == '$' || pat[1] == '~')) X ++pat; X X while (buf < end && (c= *pat++) != EOS) { X switch (c) { X X case QUOTE: X if ((c= *pat++) != EOS && index("\\*?[", c) != NULL) X *buf++ = c; X else { X *buf++ = QUOTE; X --pat; X } X break; X X case '*': X *buf++ = M_ALL; X break; X X case '?': X *buf++ = M_ONE; X break; X X case '[': X if (*pat == EOS || index(pat+1, ']') == NULL) { X *buf++ = c; X break; X } X *buf++ = M_SET; X c= *pat++; X do { X *buf++ = c; X if (*pat == '-' && (c= pat[1]) != ']') { X *buf++ = M_RNG; X *buf++= c; X pat += 2; X } X } while ((c= *pat++) != ']'); X *buf++ = M_END; X break; X X default: X *buf++ = c; X break; X X } X } X *buf= EOS; X X glob1(start, &files); X X if (files.len == 0) { X /* Change meta characters back to printing characters */ X for (buf= start; *buf != EOS; ++buf) { X if (*buf & 0200) X *buf &= ~0200; X } X return 0; /* No match */ X } X else { X int i, len; X X qsort((char*)files.item, files.len, sizeof(char*), compare); X buf= start; X *buf= EOS; X for (i= 0; i < files.len; ++i) { X len= strlen(files.item[i]); X if (len+1 > size) X break; X strcpy(buf, files.item[i]); X buf += len+1; X size -= len+1; X } X discard(&files); X return i; X } X} EOF echo 'x - main.c' sed 's/^X//' > 'main.c' << 'EOF' X/* X * Main program to test 'glob' routine. X */ X X#include <stdio.h> X Xextern int glob(); X Xmain(argc, argv) X int argc; X char **argv; X{ X int i, j, n; X char *p; X char buffer[10000]; X X if (argc < 2) { X fprintf(stderr, "usage: %s pattern ...\n", argv[0]); X exit(2); X } X for (i= 1; i < argc; ++i) { X printf("%s: ", argv[i]); X n= glob(argv[i], buffer, sizeof buffer); X printf("%d\n", n); X p= buffer; X if (n == 0) X ++n; X for (j= 0; j < n; ++j) { X printf("\t%s\n", p); X p += strlen(p) + 1; X } X } X exit(0); X} EOF echo 'Part 01 out of 01 of glob -- expand meta-characters complete.' exit 0