[comp.sources.unix] v13i068: Perform "set" operations on command line arguments

rsalz@bbn.com (Rich Salz) (02/24/88)
Submitted-by: Chris Tweed <mcvax!caad.ed.ac.uk!chris@UUNET.UU.NET>
Posting-number: Volume 13, Issue 68
Archive-name: sets

I am sending you a program called 'sets' which you may wish to consider
for comp.unix.sources.  Sets performs union, intersection, and difference
operations on elements of two sets given on the command line.  I have
found it most useful for limiting the set of filenames I want another
command to work on.

For example, if I want to edit every file except those ending in '.c' I
could type:
	vi `sets * -d *.c`
which prints out the difference between the set of all files in the
current directory and the set of all '.c' files in the same directory.

Another example is to list the files in one directory which have the same
names as those in another directory, i.e. the intersection of the two
sets:
	sets * -i ../*

Hope it's useful.
    Chris Tweed
    chris@caad.ed.ac.uk
    ..!mcvax!ukc!edcaad!chris

# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
# Contents:  sets.L Makefile sets.c
 
echo x - sets.L
sed 's/^@//' > "sets.L" <<'@//E*O*F sets.L//'
@.TH SETS L "December 3, 1987" "" "Local UNIX Programmer's Manual"
@.UC 4
@.SH NAME
sets \- performs set operations on its arguments
@.SH SYNOPSIS
@.B sets
[-p] e1 e2 ... en [\-u] [\-d] [\-i] e1 e2 ... en
@.br
@.SH DESCRIPTION
@.I Sets
prints on the standard output stream the result of a 
@.B single
set operation on
two sets of elements provided on the command line.
The sets are separated by the operator flag.
The program collects the elements for each set, removes
duplicated elements, and then performs the set operation.
@.PP
@.I Sets
performs three set operations:
@.TP
@.B "e1 e2 ... en \-u[nion] e1 e2 ... en"
prints the union of the two sets;
@.TP
@.B "e1 e2 ... en \-d[ifference] e1 e2 ... en"
prints the set difference
@.I "e1 e2 ... en"
\- 
@.I "e1 e2 ... en;"
@.TP
@.B "e1 e2 ... en \-i[ntersection] e1 e2 ... en"
prints the intersection of the two sets.
@.PP
As
@.I sets
is intended to be used on filenames it ignores leading pathnames
in the set operations.
The
@.B \-p
flag makes pathnames significant in membership tests.
@.SH "USAGE"
@.PP
@.I Sets
is most useful for restricting the files to be processed by some
other command.
For example, to
@.I grep
all files in a directory except the object files you might use:
@.TP
grep string `sets * -d *.o`
@.PP
Since by default leading pathnames are ignored,
@.I sets
can be used across directories \- for example, to list files
with the same names in two directories:
@.TP
sets ../* -i *
@.PP
Note that full pathnames are included in the output.
As a result the relative position
of the sets on the command line is significant.
The above command will print all matching names with a leading "../".
If the position of the sets is reversed only the filenames will be printed.
@.SH "FILES"
@.PP
/usr/local/sets
@.br
/usr/src/local/sets.c
@.\" .SH "SEE ALSO"
@.\" .SH "DIAGNOSTICS"
@.\" .SH "BUGS"
@.SH "AUTHOR"
@.PP
Chris Tweed
@//E*O*F sets.L//
chmod u=rw,g=r,o=r sets.L
 
echo x - Makefile
sed 's/^@//' > "Makefile" <<'@//E*O*F Makefile//'
CFLAGS=-O
PROG=sets
FINAL=/usr/local/bin/$(PROG)
MAN=/usr/man/manl/$(PROG).l

$(PROG): $(PROG).c
	cc $(CFLAGS) -o $(PROG) $(PROG).c

install: $(PROG)
	install -s ./$(PROG) $(FINAL)
	cp $(PROG).L $(MAN)
@//E*O*F Makefile//
chmod u=rw,g=r,o=r Makefile
 
echo x - sets.c
sed 's/^@//' > "sets.c" <<'@//E*O*F sets.c//'
/*
 * sets - performs set operations on two sets of arguments and
 *        prints the result on the standard output stream
 * 
 * usage: sets	[-p[aths]]  e1 e2 ... en \-u[nion] e1 e2 ... en
 *					OR
 * 		            e1 e2 ... en \-d[ifference] e1 e2 ... en
 *					OR
 * 		            e1 e2 ... en \-i[ntersection] e1 e2 ... en
 *
 * This code may be freely distributed provided this comment
 * is not removed or substantially altered.  Please mail me any
 * fixes, changes, or enhancements.
 *
 * Christopher Tweed, EdCAAD, University of Edinburgh, Scotland.
 * chris@caad.ed.ac.uk
 * ..mcvax!ukc!edcaad!chris
 *
 * 3 December 1987.
 * 
 */

#include <stdio.h>

#define MAXSET	256	/* maximum size of a set */

#define STREQ(s1, s2)	(strcmp((s1), (s2)) == 0)
#define NOT(p)		((p) == FALSE)
#define NAME(s)		((ignorep == TRUE) ? nopath(s) : s)

typedef enum { FALSE=0, TRUE } BOOLEAN;
typedef enum { NULL_OP=0, UNION, DIFF, INTERSECT } OPERATOR;

extern	int 	strcmp();
static	void	too_many();
static	void	usage();
static	char	*nopath();
static	BOOLEAN	member();
static	BOOLEAN	ignorep = TRUE;

main(argc, argv)
int argc;
char *argv[];
{
	int i, j;			/* general purpose */
	BOOLEAN second = FALSE;		/* flag set after operator */
	char *set1[MAXSET];		/* the first set */
	int n1 = 0;			/* number of elements in first set */
	char *set2[MAXSET];		/* the second set */
	int n2 = 0;			/* number of elements in second set */
	int n;				/* number in each set */
	register OPERATOR op = NULL_OP;	/* set operation to perform */

	if (argc < 2) {
	    fprintf(stderr, "not enough arguments\n");
	    (void) usage(argv[0]);		/* EXITS */
	}

	n2 = n1 = 0;
	/* collect sets */
	while(--argc) {
	    if (argv[1][0] == '-') {
		second = TRUE;			/* found an operator */
		switch (argv[1][1]) {
		    case 'u':			/* set union */
			op = UNION;
			break;
		    case 'd':			/* set difference */
			op = DIFF;
			break;
		    case 'i':			/* set intersection */
			op = INTERSECT;
			break;
		    case 'p':			/* don't ignore paths */
			ignorep = FALSE;
			break;
		    default:
			fprintf(stderr, "illegal set operator %c\n",
					 argv[1][1]);
			(void) usage(argv[0]);	/* EXITS */
		}
	    } else {
		if (second == TRUE) {
		    if (n2 == MAXSET)
			(void) too_many();	/* EXITS */
		    set2[n2++] = argv[1];
		} else {
		    if (n1 == MAXSET)
			(void) too_many();	/* EXITS */
		    set1[n1++] = argv[1];
		}
	    }
	    argv++;
	}

	if (op == NULL_OP) {
	    fprintf(stderr, "missing operator\n");
	    (void) usage(argv[0]);
	}

	/* remove duplicates */
	n1 = nodups(set1, n1);
	n2 = nodups(set2, n2);
	/*
	 * do set operation and print result
	 *
	 */
	n = (op == UNION) ? (n1 + n2) : n1;
	for (i = 0; i < n; i++) {
	    switch(op) {
		case UNION:
		    j = i - n1;
		    if (i < n1)
			printf("%s ", set1[i]);
		    else if (NOT(member(set2[j], set1, n1)))
			printf("%s ", set2[j]);
		    break;
		case DIFF:
		    if (member(set1[i], set2, n2) == FALSE) {
			printf("%s ", set1[i]);
		    }
		    break;
		case INTERSECT:
		    if (member(set1[i], set2, n2) == TRUE) {
			printf("%s ", set1[i]);
		    }
		    break;
	    }
	}

	printf("\n");
	exit(0);
}

/*
 * nodups(set, n)
 *
 * removes duplicates from set of n elements and returns number
 * of remaining elements in the set
 *
 */

int
nodups(set, n)
char *set[];
int n;
{
	register int i;
	register int j;
	register int k;
	register int nn = n;

	/*
	 * start at the top of the list
	 *
	 */
	for(i=n-1; i>0; i--)
	    for(j=0; j<i; j++) {
		if (set[i][0] == set[j][0] && STREQ(set[i], set[j])) {
		    set[i] = NULL;	/* cancel the duplicate */
		    /*
		     * move everything above
		     * the duplicate down one
		     *
		     */
		    for(k=i+1; k<nn; k++) {
			set[k-1] = set[k];
			set[k] = NULL;
		    }
		    nn--;
		    break;
		}
	    }
	return nn;
}

/*
 * member(s, set, n)
 *
 * returns TRUE if string s is a member of set which has n members
 * otherwise return FALSE
 *
 */

static BOOLEAN
member(s, set, n)
register char *s, *set[];
register int n;
{
	register int i;

	for (i = 0; i < n; i++)
	    if (STREQ(NAME(s), NAME(set[i])))
		return TRUE;

	return FALSE;
}

/*
 * nopath(s)
 *
 * Strips leading path from s if necessary; otherwise
 * returns s.
 *
 */

static char *
nopath(s)
char *s;
{
	extern char *rindex();
	char *p;

	if (p=rindex(s, '/'))
	    return ++p;
	else
	    return s;
}

static void
too_many()
{
	fprintf(stderr, "too many members\n");
	exit(1);
}

static void
usage(prog)
char *prog;
{
	char *set = "e1 e2 ... en";

	fprintf(stderr, "%s\t%s -u[nion] %s\n", prog, set, set);
	fprintf(stderr, "\t%s -d[ifference] %s\n", set, set);
	fprintf(stderr, "\t%s -i[ntersection] %s\n", set, set);
	fprintf(stderr, "\t-p[aths]\t/* don't ignore leading paths */\n");
	exit(1);
}
@//E*O*F sets.c//
chmod u=rw,g=r,o=r sets.c
 
echo Inspecting for damage in transit...
temp=/tmp/shar$$; dtemp=/tmp/.shar$$
trap "rm -f $temp $dtemp; exit" 0 1 2 3 15
cat > $temp <<\!!!
      74     339    1851 sets.L
      11      20     202 Makefile
     238     763    4758 sets.c
     323    1122    6811 total
!!!
wc  sets.L Makefile sets.c | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp
if [ -s $dtemp ]
then echo "Ouch [diff of wc output]:" ; cat $dtemp
else echo "No problems found."
fi
exit 0


-- 
For comp.sources.unix stuff, mail to sources@uunet.uu.net.