[mod.sources] v06i047: 'Globbing' library routine

sources-request@mirror.UUCP (07/09/86)

Submitted by: seismo!mcvax!guido (Guido van Rossum)
Mod.sources: Volume 6, Issue 47
Archive-name: glob

[  A few items: 1) If you use csh, you have to invoke the test program
   at "./glob"; 2) A comment in the code says [...] is not implemented;
   it is; 3) As the README says, some sites will have to add -Dindex=strchr
   to the CFLAGS variable in the Makefile; 4) This uses the opendir()
   family of routines.  --r$ ]

: This is a shell archive.
: Extract with 'sh this_file'.
echo 'Start of glob -- expand meta-characters, part 01 out of 01:'
echo 'x - Makefile'
sed 's/^X//' > 'Makefile' << 'EOF'
XCFLAGS=
XLINTFLAGS=-abh
XSRCS=	glob.c main.c
XOBJS=	glob.o main.o
XFILES=	README $(SRCS) Makefile glob.3
X
Xglob:	glob.o main.o
X	$(CC) -o glob $(CFLAGS) glob.o main.o
X
Xtags:	$(SRCS)
X	ctags $(SRCS)
X
Xlint:	$(SRCS)
X	lint $(LINTFLAGS) $(SRCS)
X
Xshar:	$(FILES)
X	packmail -o shar -t 'glob -- expand meta-characters' $(FILES)
EOF
echo 'x - README'
sed 's/^X//' > 'README' << 'EOF'
XGlob -- expand meta-characters in file names -- by Guido van Rossum
X
XThis is a filename 'globbing' routine that I wrote.  Besides expanding
X*, ? and [...] exactly like sh(1), it also replaces initial ~ or ~user
Xby user's home directory, replaces initial $var by the contents of the
Xenvironment variable, and has a quoting mechanism whereby any of the
Xmagic characters can be quoted by '\' (including '\' itself).
X
XIt is really intended for inclusion in some larger program, but there is
Xa small test program that expands patterns given as arguments.
X
XYou should have received the following files:
X
X	README		this file
X	glob.c		the code
X	main.c		test program
X	glob.3		manual page
X	Makefile	make file
X
XPortability issues: I have only been able to test it on a VAX running
X4.3-beta, but I believe it can be ported to anything that supports
Xopendir, readdir and closedir.  If you don't have index, substitute
Xstrchr using the -D flag to the C compiler.  The program passes lint
Xexcept for the usual complaints about malloc and strcpy.
X
XThis software is copyright (c) 1986 by Stichting Mathematisch Centrum.
XYou may use, modify and copy it, provided this notice is present in all
Xmodified or unmodified copies, and provided you don't make money for it.
X
X*** You may only distribute the set of files mentioned above as a whole. ***
X
XIf you find bugs, or have any other questions, please mail them to the
Xauthor, whose address is:
X
X	Guido van Rossum
X	CWI, dept. AA
X	Kruislaan 413
X	1089 SJ  Amsterdam
X
X	Electronic mail: guido@mcvax.UUCP
EOF
echo 'x - glob.3'
sed 's/^X//' > 'glob.3' << 'EOF'
X.TH GLOB 3 "June 28, 1986"
X.AT 3
X.SH NAME
Xglob \- expand meta-characters in file names
X.SH SYNOPSIS
X.nf
X.B "int glob(pattern, buffer, size)
X.B "char *pattern;
X.B "char *buffer;
X.B "unsigned int size;
X.fi
X.SH DESCRIPTION
X.I Glob
Xexpands the meta-characters *, ? and [...] in
X.I pattern
Xand copies the file name(s) that match the pattern to 
X.I buffer ,
Xwhose length is given by
X.I size .
X.PP
XBefore expansion is attempted, the pattern is processed as follows:
Xan initial
X.I ~user
Xis replaced by
X.I user's
Xhome directory, an initial ~ is replaced by the contents of $HOME,
Xand an initial
X.I $variable
Xis replaced by the contents of the environment variable by that name.
XThe universal quoting character \e can be prefixed to *, ? and [, to
Xinitial ~ or $, and to itself to prevent the special meaning.
X.PP
XThe expansion algorithm used is believed to give exactly the same
Xresults as the expansion of meta-characters by the Bourne shell
X.I sh (1),
Xexcept for the different quoting mechanism and the expansion of ~ and $.
XThe files are sorted.
X.SH RETURN VALUE
X.I Glob
Xreturns the number of files matching the pattern that could be
Xtransferred to the buffer.
XIf no files match, the return value is 0 but the pattern is copied to
Xthe buffer after expansion of ~, $ and \e-quotes.
X.SH SEE ALSO
Xsh(1)
X.SH AUTHOR
XGuido van Rossum, CWI, Amsterdam <guido@mcvax.uucp>
X.SH DIAGNOSTICS
XThere are no explicit diagnostics.
XIf there is not enough memory to hold the internal lists of
Xpartially-matching files, not all matching files may be found.
XIf the buffer has not enough space to hold all matching files, as many
Xas will fit are transferred to it.
XIf
X.I $variable
Xhas no value, the empty string is substituted.
XIf no expansion can be found for ~ or
X.I ~user ,
Xit is left in the pattern unchanged.
XIf 
X.SH BUGS
XThere should be an interface to the ``raw'' globbing algorithm, which
Xcan return an arbitrary number of files in a ``malloced'' structure and
Xdoes not do ~ or $ expansion or \e-quoting.
X.PP
XThe code uses the Berkeley directory-reading package, so in order to
Xport it to v7 or sys5 you'll need to get hold of a public domain
Xre-implementation of that package.
X.PP
XThe name is a relict from ancient times.
EOF
echo 'x - glob.c'
sed 's/^X//' > 'glob.c' << 'EOF'
X/* This software is copyright (c) 1986 by Stichting Mathematisch Centrum.
X * You may use, modify and copy it, provided this notice is present in all
X * copies, modified or unmodified, and provided you don't make money for it.
X *
X * Written 86-jun-28 by Guido van Rossum, CWI, Amsterdam <guido@mcvax.uucp>
X */
X
X/*
X * 'Glob' routine -- match * and ? in filename.
X * Extra services: initial ~username is replaced by username's home dir,
X * initial ~ is replaced by $HOME, initial $var is replaced by
X * return value of getenv("var").
X * Quoting convention: \ followed by a magic character inhibits its
X * special meaning.
X * BUGS: [...] is not implemented.
X * Nonexisting $var is treated as empty string; nonexisting ~user
X * is copied literally.
X */
X
X#include <stdio.h>
X#include <strings.h>
X#include <ctype.h>
X#include <pwd.h>
X#include <sys/types.h>
X#include <sys/dir.h>
X
Xstruct passwd *getpwnam();
Xchar *getenv();
Xchar *malloc();
Xchar *realloc();
X
X#define EOS '\0'
X#define SEP '/'
X#define DOT '.'
X#define QUOTE '\\'
X
X#define BOOL int
X#define NO 0
X#define YES 1
X
X#define MAXPATH 1024
X#define MAXBASE 256
X
X#define META(c) ((char)((c)|0200))
X#define M_ALL META('*')
X#define M_ONE META('?')
X#define M_SET META('[')
X#define M_RNG META('-')
X#define M_END META(']')
X
Xstruct flist {
X	int len;
X	char **item;
X};
X
X/* Make a null-terminated string of the chars between two pointers */
X/* (Limited length, static buffer returned) */
X
Xstatic char *
Xmakestr(start, end)
X	char *start;
X	char *end;
X{
X	static char buf[100];
X	char *p= buf;
X	
X	while (start < end && p < &buf[100])
X		*p++ = *start++;
X	*p= EOS;
X	return buf;
X}
X
X/* Append string to buffer, return new end of buffer.  Guarded. */
X
Xstatic char *
Xaddstr(dest, src, end)
X	char *dest;
X	char *src;
X	char *end;
X{
X	while (*dest++ = *src++) {
X		if (dest >= end)
X			break;
X	}
X	return --dest;
X}
X
X/* Form a pathname by concatenating head, maybe a / and tail. */
X/* Truncates to space available. */
X
Xstatic void
Xformpath(dest, head, tail, size)
X	char *dest;
X	char *head;
X	char *tail;
X	unsigned int size; /* sizeof dest */
X{
X	char *start= dest;
X	
X	for (;;) {
X		if (--size == 0)
X			goto out;
X		if ((*dest++ = *head++) == EOS)
X			break;
X	}
X	if (*tail != EOS && size != 0) {
X		--dest;
X		++size;
X		if (dest > start && dest[-1] != SEP) {
X			*dest++ = SEP;
X			--size;
X		}
X		for (;;) {
X			if (--size == 0)
X				goto out;
X			if ((*dest++ = *tail++) == EOS)
X				break;
X		}
X	}
X	return;
X	
X out:	*dest= EOS;
X}
X
X/* Find a user's home directory, NULL if not found */
X
Xstatic char *
Xgethome(username)
X	char *username;
X{
X	struct passwd *p;
X	
X	p= getpwnam(username);
X	if (p == NULL)
X		return NULL;
X	return p->pw_dir;
X}
X
X/* String compare for qsort */
X
Xstatic int
Xcompare(p, q)
X	char **p;
X	char **q;
X{
X	return strcmp(*p, *q);
X}
X
X/*
X * Maintain lists of strings.
X * When memory is full, data is lost but 
X */
X
Xstatic void
Xaddfile(list, head, tail)
X	struct flist *list;
X	char *head;
X	char *tail;
X{
X	char *str;
X	
X	str= malloc((unsigned) strlen(head) + strlen(tail) + 2);
X	
X	if (str == NULL)
X		return;
X	if (list->item == 0) {
X		list->len= 0;
X		list->item= (char**) malloc(sizeof(char*));
X	}
X	else {
X		list->item= (char**) realloc((char*)list->item,
X				(unsigned) (list->len+1)*sizeof(char*));
X		if (list->item == 0)
X			list->len= 0;
X	}
X	if (list->item != NULL) {
X		formpath(str, head, tail, MAXPATH);
X		list->item[list->len++]= str;
X	}
X	else
X		free(str);
X}
X
X/* Clear the fields of a struct flist before first use */
X
Xstatic void
Xclear(list)
X	struct flist *list;
X{
X	list->len= 0;
X	list->item= NULL;
X}
X
X/* Free memory held by struct flist after use */
X
Xstatic void
Xdiscard(list)
X	struct flist *list;
X{
X	int i= list->len;
X	
X	if (list->item != NULL) {
X		while (--i >= 0) {
X			if (list->item[i] != NULL)
X				free(list->item[i]);
X		}
X		free((char*)list->item);
X		list->item= NULL;
X	}
X	list->len= 0;
X}
X
X/* Pattern matching function for filenames */
X/* Each occurrence of the * pattern causes a recursion level */
X
Xstatic BOOL
Xmatch(name, pat)
X	char *name;
X	char *pat;
X{
X	char c, k;
X	BOOL ok;
X	
X	while ((c= *pat++) != EOS) {
X		switch (c) {
X
X		case M_ONE:
X			if (*name++ == EOS)
X				return NO;
X			break;
X
X		case M_ALL:
X			if (*pat == EOS)
X				return YES;
X			for (; *name != EOS; ++name) {
X				if (match(name, pat))
X					return YES;
X			}
X			return NO;
X
X		case M_SET:
X			ok= NO;
X			k= *name++;
X			while ((c= *pat++) != M_END) {
X				if (*pat == M_RNG) {
X					if (c <= k && k <= pat[1])
X						ok= YES;
X					pat += 2;
X				}
X				else if (c == k)
X					ok= YES;
X			}
X			if (!ok)
X				return NO;
X			break;
X
X		default:
X			if (*name++ != c)
X				return NO;
X			break;
X
X		}
X	}
X	return *name == EOS;
X}
X
X/* Perform pattern matching for one segment of the pathname */
X
Xstatic void
Xsegment(files, mid, pat)
X	struct flist *files;
X	char *mid;
X	char *pat;
X{
X	char path[MAXPATH];
X	int i;
X	DIR *dirp;
X	struct direct *dp;
X	struct flist new;
X	
X	clear(&new);
X	for (i= 0; i < files->len; ++i) {
X		strcpy(path, files->item[i]);
X		strcat(path, mid);
X		free(files->item[i]);
X		files->item[i]= NULL;
X		dirp= opendir(path);
X		if (dirp == NULL)
X			continue;
X		while ((dp= readdir(dirp)) != NULL) {
X			if (*dp->d_name == DOT && *pat != DOT)
X				; /* No meta-match on initial '.' */
X			else if (match(dp->d_name, pat))
X				addfile(&new, path, dp->d_name);
X		}
X		closedir(dirp);
X	}
X	discard(files);
X	*files= new;
X}
X
X/* Finish by matching the rest of the pattern (which has no metas) */
X
Xstatic void
Xfindfiles(files, tail)
X	struct flist *files;
X	char *tail;
X{
X	int i;
X	struct flist new;
X	char path[MAXPATH];
X
X	if (*tail == EOS || files->len == 0)
X		return;
X	clear(&new);
X	for (i= 0; i < files->len; ++i) {
X		strcpy(path, files->item[i]);
X		strcat(path, tail);
X		free(files->item[i]);
X		files->item[i]= NULL;
X		if (access(path, 0) == 0)
X			addfile(&new, path, "");
X	}
X	discard(files);
X	*files= new;
X}
X
Xstatic void
Xglob1(pat, files)
X	char *pat;
X	struct flist *files;
X{
X	char mid[MAXPATH];
X	char *end= mid;
X	char *basestart= mid;
X	BOOL meta= NO;
X	char c;
X	
X	clear(files);
X	addfile(files, "", "");
X	for (;;) {
X		switch (c= *pat++) {
X
X		case EOS:
X		case SEP:
X			*end= EOS;
X			if (meta) {
X				if (basestart == mid)
X					segment(files, "", basestart);
X				else if (basestart == mid+1) {
X					static char sepstr[]= {SEP, EOS};
X					segment(files, sepstr, basestart);
X				}
X				else {
X					basestart[-1]= EOS;
X					segment(files, mid, basestart);
X				}
X				if (files->len == 0)
X					return;
X				end= basestart= mid;
X				meta= NO;
X			}
X			else if (c == EOS)
X				findfiles(files, mid);
X			if (c == EOS)
X				return;
X			*end++= c;
X			basestart= end;
X			break;
X
X		case M_ALL:
X		case M_ONE:
X		case M_SET:
X			meta= YES;
X			/* Fall through */
X		default:
X			*end++ = c;
X
X		}
X	}
X}
X
X/*
X * The main 'glob' routine: does $ and ~ substitution and \ quoting,
X * and then calls 'glob1' to do the pattern matching.
X * Returns 0 if file not found, number of matches otherwise.
X * The matches found are appended to the buffer, separated by
X * EOS characters.  If no matches were found, the pattern is copied
X * to the buffer after $ and ~ substitution and \ quoting.
X */
X
Xint
Xglob(pat, buf, size)
X	char *pat;
X	char *buf;
X	unsigned int size; /* sizeof buf */
X{
X	char *p, *q;
X	char c;
X	struct flist files;
X	char *start= buf;
X	char *end= buf+size;
X	
X	c= *pat;
X	if (c == '~') {
X		p= ++pat;
X		while (*p != EOS && *p != SEP)
X			++p;
X		if (p == pat) {
X			q= getenv("HOME");
X			if (q == NULL)
X				--pat;
X			else
X				buf= addstr(buf, q, end);
X		}
X		else {
X			q= gethome(makestr(pat, p));
X			if (q == NULL)
X				--pat;
X			else {
X				buf= addstr(buf, q, end);
X				pat= p;
X			}
X		}
X	}
X	else if (c == '$') {
X		p= ++pat;
X		while (isalnum(*p) || *p == '_')
X			++p;
X		q= getenv(makestr(pat, p));
X		if (q != NULL)
X			buf= addstr(buf, q, end);
X		pat= p;
X	}
X	else if (c == QUOTE && (pat[1] == '$' || pat[1] == '~'))
X		++pat;
X	
X	while (buf < end && (c= *pat++) != EOS) {
X		switch (c) {
X		
X		case QUOTE:
X			if ((c= *pat++) != EOS && index("\\*?[", c) != NULL)
X				*buf++ = c;
X			else {
X				*buf++ = QUOTE;
X				--pat;
X			}
X			break;
X		
X		case '*':
X			*buf++ = M_ALL;
X			break;
X		
X		case '?':
X			*buf++ = M_ONE;
X			break;
X		
X		case '[':
X			if (*pat == EOS || index(pat+1, ']') == NULL) {
X				*buf++ = c;
X				break;
X			}
X			*buf++ = M_SET;
X			c= *pat++;
X			do {
X				*buf++ = c;
X				if (*pat == '-' && (c= pat[1]) != ']') {
X					*buf++ = M_RNG;
X					*buf++= c;
X					pat += 2;
X				}
X			} while ((c= *pat++) != ']');
X			*buf++ = M_END;
X			break;
X		
X		default:
X			*buf++ = c;
X			break;
X
X		}
X	}
X	*buf= EOS;
X	
X	glob1(start, &files);
X	
X	if (files.len == 0) {
X		/* Change meta characters back to printing characters */
X		for (buf= start; *buf != EOS; ++buf) {
X			if (*buf & 0200)
X				*buf &= ~0200;
X		}
X		return 0; /* No match */
X	}
X	else {
X		int i, len;
X		
X		qsort((char*)files.item, files.len, sizeof(char*), compare);
X		buf= start;
X		*buf= EOS;
X		for (i= 0; i < files.len; ++i) {
X			len= strlen(files.item[i]);
X			if (len+1 > size)
X				break;
X			strcpy(buf, files.item[i]);
X			buf += len+1;
X			size -= len+1;
X		}
X		discard(&files);
X		return i;
X	}
X}
EOF
echo 'x - main.c'
sed 's/^X//' > 'main.c' << 'EOF'
X/*
X * Main program to test 'glob' routine.
X */
X
X#include <stdio.h>
X
Xextern int glob();
X
Xmain(argc, argv)
X	int argc;
X	char **argv;
X{
X	int i, j, n;
X	char *p;
X	char buffer[10000];
X
X	if (argc < 2) {
X		fprintf(stderr, "usage: %s pattern ...\n", argv[0]);
X		exit(2);
X	}
X	for (i= 1; i < argc; ++i) {
X		printf("%s: ", argv[i]);
X		n= glob(argv[i], buffer, sizeof buffer);
X		printf("%d\n", n);
X		p= buffer;
X		if (n == 0)
X			++n;
X		for (j= 0; j < n; ++j) {
X			printf("\t%s\n", p);
X			p += strlen(p) + 1;
X		}
X	}
X	exit(0);
X}
EOF
echo 'Part 01 out of 01 of glob -- expand meta-characters complete.'
exit 0