rsalz@bbn.com (Rich Salz) (02/24/88)
Submitted-by: Chris Tweed <mcvax!caad.ed.ac.uk!chris@UUNET.UU.NET> Posting-number: Volume 13, Issue 68 Archive-name: sets I am sending you a program called 'sets' which you may wish to consider for comp.unix.sources. Sets performs union, intersection, and difference operations on elements of two sets given on the command line. I have found it most useful for limiting the set of filenames I want another command to work on. For example, if I want to edit every file except those ending in '.c' I could type: vi `sets * -d *.c` which prints out the difference between the set of all files in the current directory and the set of all '.c' files in the same directory. Another example is to list the files in one directory which have the same names as those in another directory, i.e. the intersection of the two sets: sets * -i ../* Hope it's useful. Chris Tweed chris@caad.ed.ac.uk ..!mcvax!ukc!edcaad!chris # This is a shell archive. Remove anything before this line, # then unpack it by saving it in a file and typing "sh file". # Contents: sets.L Makefile sets.c echo x - sets.L sed 's/^@//' > "sets.L" <<'@//E*O*F sets.L//' @.TH SETS L "December 3, 1987" "" "Local UNIX Programmer's Manual" @.UC 4 @.SH NAME sets \- performs set operations on its arguments @.SH SYNOPSIS @.B sets [-p] e1 e2 ... en [\-u] [\-d] [\-i] e1 e2 ... en @.br @.SH DESCRIPTION @.I Sets prints on the standard output stream the result of a @.B single set operation on two sets of elements provided on the command line. The sets are separated by the operator flag. The program collects the elements for each set, removes duplicated elements, and then performs the set operation. @.PP @.I Sets performs three set operations: @.TP @.B "e1 e2 ... en \-u[nion] e1 e2 ... en" prints the union of the two sets; @.TP @.B "e1 e2 ... en \-d[ifference] e1 e2 ... en" prints the set difference @.I "e1 e2 ... en" \- @.I "e1 e2 ... en;" @.TP @.B "e1 e2 ... en \-i[ntersection] e1 e2 ... en" prints the intersection of the two sets. @.PP As @.I sets is intended to be used on filenames it ignores leading pathnames in the set operations. The @.B \-p flag makes pathnames significant in membership tests. @.SH "USAGE" @.PP @.I Sets is most useful for restricting the files to be processed by some other command. For example, to @.I grep all files in a directory except the object files you might use: @.TP grep string `sets * -d *.o` @.PP Since by default leading pathnames are ignored, @.I sets can be used across directories \- for example, to list files with the same names in two directories: @.TP sets ../* -i * @.PP Note that full pathnames are included in the output. As a result the relative position of the sets on the command line is significant. The above command will print all matching names with a leading "../". If the position of the sets is reversed only the filenames will be printed. @.SH "FILES" @.PP /usr/local/sets @.br /usr/src/local/sets.c @.\" .SH "SEE ALSO" @.\" .SH "DIAGNOSTICS" @.\" .SH "BUGS" @.SH "AUTHOR" @.PP Chris Tweed @//E*O*F sets.L// chmod u=rw,g=r,o=r sets.L echo x - Makefile sed 's/^@//' > "Makefile" <<'@//E*O*F Makefile//' CFLAGS=-O PROG=sets FINAL=/usr/local/bin/$(PROG) MAN=/usr/man/manl/$(PROG).l $(PROG): $(PROG).c cc $(CFLAGS) -o $(PROG) $(PROG).c install: $(PROG) install -s ./$(PROG) $(FINAL) cp $(PROG).L $(MAN) @//E*O*F Makefile// chmod u=rw,g=r,o=r Makefile echo x - sets.c sed 's/^@//' > "sets.c" <<'@//E*O*F sets.c//' /* * sets - performs set operations on two sets of arguments and * prints the result on the standard output stream * * usage: sets [-p[aths]] e1 e2 ... en \-u[nion] e1 e2 ... en * OR * e1 e2 ... en \-d[ifference] e1 e2 ... en * OR * e1 e2 ... en \-i[ntersection] e1 e2 ... en * * This code may be freely distributed provided this comment * is not removed or substantially altered. Please mail me any * fixes, changes, or enhancements. * * Christopher Tweed, EdCAAD, University of Edinburgh, Scotland. * chris@caad.ed.ac.uk * ..mcvax!ukc!edcaad!chris * * 3 December 1987. * */ #include <stdio.h> #define MAXSET 256 /* maximum size of a set */ #define STREQ(s1, s2) (strcmp((s1), (s2)) == 0) #define NOT(p) ((p) == FALSE) #define NAME(s) ((ignorep == TRUE) ? nopath(s) : s) typedef enum { FALSE=0, TRUE } BOOLEAN; typedef enum { NULL_OP=0, UNION, DIFF, INTERSECT } OPERATOR; extern int strcmp(); static void too_many(); static void usage(); static char *nopath(); static BOOLEAN member(); static BOOLEAN ignorep = TRUE; main(argc, argv) int argc; char *argv[]; { int i, j; /* general purpose */ BOOLEAN second = FALSE; /* flag set after operator */ char *set1[MAXSET]; /* the first set */ int n1 = 0; /* number of elements in first set */ char *set2[MAXSET]; /* the second set */ int n2 = 0; /* number of elements in second set */ int n; /* number in each set */ register OPERATOR op = NULL_OP; /* set operation to perform */ if (argc < 2) { fprintf(stderr, "not enough arguments\n"); (void) usage(argv[0]); /* EXITS */ } n2 = n1 = 0; /* collect sets */ while(--argc) { if (argv[1][0] == '-') { second = TRUE; /* found an operator */ switch (argv[1][1]) { case 'u': /* set union */ op = UNION; break; case 'd': /* set difference */ op = DIFF; break; case 'i': /* set intersection */ op = INTERSECT; break; case 'p': /* don't ignore paths */ ignorep = FALSE; break; default: fprintf(stderr, "illegal set operator %c\n", argv[1][1]); (void) usage(argv[0]); /* EXITS */ } } else { if (second == TRUE) { if (n2 == MAXSET) (void) too_many(); /* EXITS */ set2[n2++] = argv[1]; } else { if (n1 == MAXSET) (void) too_many(); /* EXITS */ set1[n1++] = argv[1]; } } argv++; } if (op == NULL_OP) { fprintf(stderr, "missing operator\n"); (void) usage(argv[0]); } /* remove duplicates */ n1 = nodups(set1, n1); n2 = nodups(set2, n2); /* * do set operation and print result * */ n = (op == UNION) ? (n1 + n2) : n1; for (i = 0; i < n; i++) { switch(op) { case UNION: j = i - n1; if (i < n1) printf("%s ", set1[i]); else if (NOT(member(set2[j], set1, n1))) printf("%s ", set2[j]); break; case DIFF: if (member(set1[i], set2, n2) == FALSE) { printf("%s ", set1[i]); } break; case INTERSECT: if (member(set1[i], set2, n2) == TRUE) { printf("%s ", set1[i]); } break; } } printf("\n"); exit(0); } /* * nodups(set, n) * * removes duplicates from set of n elements and returns number * of remaining elements in the set * */ int nodups(set, n) char *set[]; int n; { register int i; register int j; register int k; register int nn = n; /* * start at the top of the list * */ for(i=n-1; i>0; i--) for(j=0; j<i; j++) { if (set[i][0] == set[j][0] && STREQ(set[i], set[j])) { set[i] = NULL; /* cancel the duplicate */ /* * move everything above * the duplicate down one * */ for(k=i+1; k<nn; k++) { set[k-1] = set[k]; set[k] = NULL; } nn--; break; } } return nn; } /* * member(s, set, n) * * returns TRUE if string s is a member of set which has n members * otherwise return FALSE * */ static BOOLEAN member(s, set, n) register char *s, *set[]; register int n; { register int i; for (i = 0; i < n; i++) if (STREQ(NAME(s), NAME(set[i]))) return TRUE; return FALSE; } /* * nopath(s) * * Strips leading path from s if necessary; otherwise * returns s. * */ static char * nopath(s) char *s; { extern char *rindex(); char *p; if (p=rindex(s, '/')) return ++p; else return s; } static void too_many() { fprintf(stderr, "too many members\n"); exit(1); } static void usage(prog) char *prog; { char *set = "e1 e2 ... en"; fprintf(stderr, "%s\t%s -u[nion] %s\n", prog, set, set); fprintf(stderr, "\t%s -d[ifference] %s\n", set, set); fprintf(stderr, "\t%s -i[ntersection] %s\n", set, set); fprintf(stderr, "\t-p[aths]\t/* don't ignore leading paths */\n"); exit(1); } @//E*O*F sets.c// chmod u=rw,g=r,o=r sets.c echo Inspecting for damage in transit... temp=/tmp/shar$$; dtemp=/tmp/.shar$$ trap "rm -f $temp $dtemp; exit" 0 1 2 3 15 cat > $temp <<\!!! 74 339 1851 sets.L 11 20 202 Makefile 238 763 4758 sets.c 323 1122 6811 total !!! wc sets.L Makefile sets.c | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp if [ -s $dtemp ] then echo "Ouch [diff of wc output]:" ; cat $dtemp else echo "No problems found." fi exit 0 -- For comp.sources.unix stuff, mail to sources@uunet.uu.net.