rsalz@bbn.com (Rich Salz) (02/17/88)
Submitted-by: elsie!ado (Arthur David Olson) Posting-number: Volume 13, Issue 42 Archive-name: measures [ Truly how to save work by using a computer. --r$ ] Measures reads one or more files containing the values of measurable traits for a set of items, and lists which traits may be measured to learn which one of the items an unknown item is. : To unbundle, sh this file echo 'measures.1' 1>&2 cat >'measures.1' <<'End of measures.1' .LC @(#)measures.1 1.13 .TH MEASURES 1E \*(eH .SH NAME measures \- list measurements to take .SH SYNOPSIS .B measures [ file ... ] .= measures .SH DESCRIPTION .I Measures reads one or more files containing the values of measurable traits for a set of items, and lists which traits may be measured to learn which one of the items an unknown item is. .PP The file(s) to read may be named on the command line; the standard input is read if there are no .IR file (s) on the command line or if .B `-' is used. Sharp signs ('#'s) and characters that follow them on lines are ignored. Lines with only spaces, tabs, and colons in them are ignored. Each remaining line gives the values for the item whose name is given first on the line; values are separated by any number of spaces, tabs, or colons. A non-numeric string of characters (for example, .B `--' or .BR `unknown' ) may be used for any trait whose value is unknown for a given item. Two values separated by a `-' may be used to give a range in which the value for the item is known to fall. .PP If a line starts with one of the words listed below, it gets the special treatment indicated. .TP .B Filed Other entries on the line are used as names of the traits when the traits are listed. In the absence of a .B `Filed' line, the first trait is called `1', the second is called `2', and so forth. .TP .B Errors Other entries on the line give the `experimental error' involved in measuring each trait. The number may end with a percent sign (`%') to signify a percentage error figure. The range of values that might be measured for a trait (given experimental error) for two items must not overlap for the two items to be told apart by measuring that trait. In the absence of a .B `Errors' line, each trait's experimental error is taken to be zero. .TP .B Costs Other entries on the line give the cost of measuring each trait. In the absence of a .B `Costs' line, each trait's cost is taken to be one. .PP The listed set or sets of traits to be measured are those whose totaled costs are lowest and provide as much information as you could get by measuring all the traits. .SH EXAMPLE Given the input .in +.5i .nf .ft B .ta \w'Errors\0\0'u +\w'10%\0\0'u +\w'10%\0\0'u Filed A B C Errors 10% 10% 10% p 1 6 5 q 1 3 5 r 4 6 2 .br .in -.5i .fi .ft P the .I measures program produces the two lines of output .in +.5i .nf .ft B A B B C .br .in -.5i .fi .ft P to indicate that either A and B or B and C may be measured to identify an unknown sample as being one of p, q, or r. .SH LIMITATION There's a limit (usually thirty-two) on the number of traits that may be worked with. Lines may be at most one thousand characters long. .SH SEE ALSO .IR "Journal of Theoretical Biology" , (1987) .BR 128 , 1-9. End of measures.1 echo 'Makefile' 1>&2 cat >'Makefile' <<'End of Makefile' # @(#)Makefile 1.1 SRCS= ealloc.c ialloc.c measures.c substrings.c wild.c wildexit.c OBJS= ealloc.o ialloc.o measures.o substrings.o wild.o wildexit.o CFLAGS= -s -O LINT= lint LINTFLAGS= -phbaaxc all: measures sure: $(LINT) $(LINTFLAGS) $(SRCS) clean: rm -f *.o *.out core ,* measures: $(OBJS) $(CC) $(CFLAGS) $(OBJS) -o $@ End of Makefile echo 'ealloc.c' 1>&2 cat >'ealloc.c' <<'End of ealloc.c' # /*LINTLIBRARY*/ #include "stdio.h" #if !defined lint && !defined NOID static char elsieid[] = "@(#)ealloc.c 8.2"; #endif /* !defined lint && !defined NOID */ extern char * icalloc(); extern char * icatalloc(); extern char * icpyalloc(); extern char * imalloc(); extern char * irealloc(); static char * check(pointer) char * pointer; { if (pointer == NULL) wildrexit("allocating memory"); return pointer; } char * emalloc(size) { return check(imalloc(size)); } char * ecalloc(nelem, elsize) { return check(icalloc(nelem, elsize)); } char * erealloc(ptr, size) char * ptr; { return check(irealloc(ptr, size)); } char * ecatalloc(old, new) char * old; char * new; { return check(icatalloc(old, new)); } char * ecpyalloc(string) char * string; { return check(icpyalloc(string)); } efree(p) char * p; { ifree(p); } ecfree(p) char * p; { icfree(p); } End of ealloc.c echo 'ialloc.c' 1>&2 cat >'ialloc.c' <<'End of ialloc.c' # /*LINTLIBRARY*/ #include "stdio.h" #if !defined lint && !defined NOID static char elsieid[] = "@(#)ialloc.c 8.3"; #endif /* !defined lint && !defined NOID */ #if !defined alloc_t #define alloc_t unsigned #endif /* !defined alloc_t */ #if defined MAL #define NULLMAL(x) ((x) == NULL || (x) == MAL) #else /* !defined MAL */ #define NULLMAL(x) ((x) == NULL) #endif /* !defined MAL */ extern char * calloc(); extern char * malloc(); extern char * realloc(); extern char * strcpy(); char * imalloc(n) { #if defined MAL register char * result; if (n == 0) n = 1; result = malloc((alloc_t) n); return (result == MAL) ? NULL : result; #else /* !defined MAL */ if (n == 0) n = 1; #if defined __TURBOC__ /* ** Beat a TURBOC bug. */ if ((n & 1) != 0) ++n; #endif /* defined __TURBOC__ */ return malloc((alloc_t) n); #endif /* !defined MAL */ } char * icalloc(nelem, elsize) { if (nelem == 0 || elsize == 0) nelem = elsize = 1; #if defined __TURBOC__ if ((nelem & 1) != 0 && (elsize & 1) != 0) ++nelem; #endif /* defined __TURBOC__ */ return calloc((alloc_t) nelem, (alloc_t) elsize); } char * irealloc(pointer, size) char * pointer; { if (NULLMAL(pointer)) return imalloc(size); if (size == 0) size = 1; #if defined __TURBOC__ if ((size & 1) != 0) ++size; #endif /* defined __TURBOC__ */ return realloc(pointer, (alloc_t) size); } char * icatalloc(old, new) char * old; char * new; { register char * result; register oldsize, newsize; oldsize = NULLMAL(old) ? 0 : strlen(old); newsize = NULLMAL(new) ? 0 : strlen(new); if ((result = irealloc(old, oldsize + newsize + 1)) != NULL) if (!NULLMAL(new)) (void) strcpy(result + oldsize, new); return result; } char * icpyalloc(string) char * string; { return icatalloc((char *) NULL, string); } ifree(p) char * p; { if (!NULLMAL(p)) free(p); } icfree(p) char * p; { if (!NULLMAL(p)) free(p); } End of ialloc.c echo 'measures.c' 1>&2 cat >'measures.c' <<'End of measures.c' # #include "stdio.h" #if !defined lint && !defined NOID static char elsieid[] = "@(#)measures.c 1.23"; #endif /* !defined lint && !defined NOID */ #include "math.h" #if !defined NBPI #define NBPI (8 * sizeof (int)) /* Number of Bits Per Int */ #endif /* !defined NBPI */ #if !defined MAXLINE #define MAXLINE 1000 /* MAXimum LINE length */ #endif /* !defined MAXLINE */ #define SKIPPED '\0' #if defined __TURBOC__ #define HUGE HUGE_VAL #define float double #undef SKIPPED #define SKIPPED (-1) #endif /* defined __TURBOC__ */ extern char * ecpyalloc(); extern char * erealloc(); extern char * strchr(); extern char ** substrings(); extern char * wildname(); struct range { float lo; float hi; }; struct range ** ranges; /* measured ranges of traits, -/+ errors */ int itemcount; /* number of items with traits */ int traitcount; /* number of traits of each item */ struct trait { char * name; float error; char errtype; float cost; }; struct trait traits[NBPI]; apartbits(i, j) { register struct range * rp1; register struct range * rp2; register int n, result; result = 0; n = traitcount; rp1 = ranges[i + 1]; rp2 = ranges[j + 1]; while (--n >= 0) { --rp1; --rp2; if (rp1->lo > rp2->hi || rp1->hi < rp2->lo) result |= 1 << n; } return result; } char * oopsname; long oopsline; oops(message) char * message; { (void) fprintf(stderr, "\"%s\", line %ld: wild %s\n", oopsname, oopsline, message); for ( ; ; ) wildexit(message); } dofiled(innames) char ** innames; { register int i; for (i = 0; i < traitcount; ++i) if (traits[i].name != NULL) if (strcmp(innames[i], traits[i].name) == 0) continue; else oops("mismatched `Filed' lines"); else traits[i].name = ecpyalloc(innames[i]); } doerrors(inerrors) char ** inerrors; { register int i; float f; char c; for (i = 0; i < traitcount; ++i) { c = '\0'; if ((sscanf(inerrors[i], "%f%c", &f, &c) == 1 && c != SKIPPED) || (c != SKIPPED && c != '%')) oops("`Errors' line"); if (f < 0) oops("negative `Errors' value"); if (c == '%' && f > 100) oops("Error percentage > 100"); if (traits[i].error != 0 && (f != traits[i].error || c != traits[i].errtype)) oops("mismatched `Errors' lines"); traits[i].error = f; traits[i].errtype = c; } } docosts(incosts) char ** incosts; { register int i; float f; char c; for (i = 0; i < traitcount; ++i) { c = '\0'; if (sscanf(incosts[i], "%f%c", &f, &c) != 1 || c != SKIPPED) oops("`Costs' line"); if (f <= 0) oops("non-positive `Costs' value"); if (traits[i].cost != 0 && f != traits[i].cost) oops("mismatched `Costs' lines"); traits[i].cost = f; } } getanum(string, address) char * string; float * address; { double d; char c; /* ** Some buggy sscanfs think that "-" is a number. */ if (strcmp(string, "-") == 0) return 0; c = '\0'; if (sscanf(string, "%f%c", &d, &c) == 1 && c == SKIPPED) { *address = d; return 1; } if (*string == '-') { *address = -HUGE; ++string; } else *address = HUGE; return strcmp(string, "HUGE") == 0 || strcmp(string, "huge") == 0 || strcmp(string, "Huge") == 0; } #define ATATIME 500 doranges(inranges) register char ** inranges; { register char * cp; register int i; register int ok; if ((itemcount % ATATIME) == 0) { ranges = (struct range **) erealloc((char *) ranges, (itemcount + ATATIME) * sizeof *ranges); ranges[0] = (struct range *) erealloc((char *) ranges[0], (itemcount + ATATIME) * traitcount * sizeof *ranges[0]); for (i = 1; i < itemcount + ATATIME; ++i) ranges[i] = ranges[i - 1] + traitcount; } for (i = 0; i < traitcount; ++i) { if (getanum(inranges[i], &ranges[itemcount][i].lo)) { ranges[itemcount][i].hi = ranges[itemcount][i].lo; continue; } cp = inranges[i]; for ( ; ; ) { cp = strchr(cp + 1, '-'); if (cp == 0) { ranges[itemcount][i].lo = -HUGE; ranges[itemcount][i].hi = HUGE; break; } *cp = '\0'; ok = getanum(inranges[i], &ranges[itemcount][i].lo) && getanum(cp + 1, &ranges[itemcount][i].hi); *cp++ = '-'; if (!ok) continue; if (ranges[itemcount][i].lo > ranges[itemcount][i].hi) oops("range"); break; } } ++itemcount; } infile(filename) char * filename; { register FILE * fp; register char * cp; register char ** cpp; register int i; char buf[MAXLINE + 2]; /* +2 for "\n\0" */ if (strcmp(filename, "-") == 0) { fp = stdin; filename = "standard input"; } else if ((fp = fopen(filename, "r")) == NULL) for ( ; ; ) wild2exit("result opening", filename); oopsname = filename; for (oopsline = 1; fgets(buf, sizeof buf, fp) == buf; ++oopsline) { cp = strchr(buf, '\n'); if (cp == 0) oops("long line"); *cp = '\0'; /* Zap the trailing newline */ if (strchr(buf, '#') != 0) *strchr(buf, '#') = '\0'; /* Zap comments */ cpp = substrings(buf, ": \t"); if (cpp == NULL) for ( ; ; ) wildrexit("substrings"); for (i = 0; cpp[i] != NULL; ++i) ; if (i == 0) { free((char *) cpp); continue; } if (traitcount == 0) { traitcount = i - 1; if (traitcount <= 0) oops("lack of traits"); if (traitcount > NBPI) oops("large number of traits"); } else if (traitcount != i - 1) oops("number of traits"); cp = cpp[0]; if (strcmp(cp, "Filed") == 0) dofiled(&cpp[1]); else if (strcmp(cp, "Errors") == 0) doerrors(&cpp[1]); else if (strcmp(cp, "Costs") == 0) docosts(&cpp[1]); else doranges(&cpp[1]); (void) free((char *) cpp); } if (ferror(fp) || !feof(fp)) for ( ; ; ) wild2exit("result reading", filename); if (fp != stdin) (void) fclose(fp); } int * aparts; /* traits to measure to tell two items apart */ int apartcount; /* number of elements in above table */ int mustbits; /* tells which traits MUST be measured */ int mustcount; /* tells how many traits MUST be measured */ main(argc, argv) int argc; char * argv[]; { register int argind; register int i, j; float lo, hi; char buf[NBPI]; /* Wildly generous! */ argv[0] = wildname(argv[0]); argind = 1; if (strcmp(argv[argind], "--") == 0) ++argind; if (argind == (argc - 1) && strcmp(argv[argind], "=") == 0) { (void) fprintf(stderr, "%s: usage is %s [file...]\n", argv[0], argv[0]); for ( ; ; ) tameexit(); } if (argind == argc) infile("-"); else for (i = argind; i < argc; ++i) infile(argv[i]); if (itemcount == 0) for ( ; ; ) wildrexit("lack of items"); /* ** Set all costs to one if no costs were given. */ for (i = 0; i < traitcount; ++i) if (traits[i].cost != 0) break; if (i >= traitcount) for (i = 0; i < traitcount; ++i) traits[i].cost = 1; /* ** Set all labels if no labels were given. */ if (traits[0].name == NULL) for (i = 0; i < traitcount; ++i) { (void) sprintf(buf, "%d", i + 1); traits[i].name = ecpyalloc(buf); } /* ** Adjust the ranges to reflect the errors. */ for (i = 0; i < itemcount; ++i) for (j = 0; j < traitcount; ++j) { if (traits[j].error == 0) continue; lo = ranges[i][j].lo; if (lo > -HUGE && lo < HUGE) { if (traits[j].errtype == '\0') lo -= traits[j].error; else lo -= lo * (traits[j].error / 100.); ranges[i][j].lo = lo; } hi = ranges[i][j].hi; if (hi > -HUGE && hi < HUGE) { if (traits[j].errtype == '\0') hi += traits[j].error; else hi += hi * (traits[j].error / 100.); ranges[i][j].hi = hi; } } /* ** Build the "tell apart" table. */ for (i = 0; i < itemcount - 1; ++i) { aparts = (int *) erealloc((char *) aparts, (apartcount + itemcount + 1 - i) * sizeof *aparts); for (j = i + 1; j < itemcount; ++j) newbits(apartbits(i, j)); } if (mustcount + apartcount == 0) for ( ; ; ) wildexit("indistinguishable items (given errors)"); finish(); for ( ; ; ) tameexit(); } newbits(new) register int new; { register int * ip; register int i; if ((mustbits & new) != 0) return; /* A test we must do can tell these two apart */ if (new == 0) return; /* We can't tell these two apart! */ /* ** Is the new case (or a harder one) already in the table? */ ip = aparts; ip[apartcount] = new; while ((*ip++ & ~new) != 0) ; if (ip <= &aparts[apartcount]) return; /* ** Get rid of any old entries that this new one is harder than. */ ip = aparts; for ( ; ; ) if ((new & ~*ip++) == 0) { if (*--ip == new) break; *ip = aparts[--apartcount]; aparts[apartcount] = new; } /* ** Is more than one test involved? */ i = 1; while ((new & i) == 0) i <<= 1; if (new != i) { ++apartcount; return; } /* ** A single test is involved. */ mustbits |= i; if (++mustcount < traitcount) return; /* ** Oh well. . . */ finish(); for ( ; ; ) tameexit(); } float lowcost; int * lowbits; int lowcount; finish() { register int i, j; lowcost = HUGE; aparts[apartcount] = 0; dobits(mustbits, 0, 0., aparts); for (i = 0; i < lowcount; ++i) { for (j = 0; j < traitcount; ++j) if ((lowbits[i] & (1 << j)) != 0) (void) printf("%s ", traits[j].name); printf("\n"); } } dobits(todo, done, cost, ip) register int todo, done; register float cost; register int * ip; { register int i, nextbit; while((todo & *ip++) != 0) ; if (*--ip == 0) { if (cost < lowcost) { lowcost = cost; lowcount = 0; } lowbits = (int *) erealloc((char *) lowbits, (lowcount + 1) * sizeof *lowbits); lowbits[lowcount++] = todo; return; } if (cost >= lowcost) return; /* cost can't get smaller! */ i = *ip++ & ~(todo | done); for (nextbit = 0; nextbit < traitcount; ++nextbit) { if ((i & (1 << nextbit)) == 0 || cost + traits[nextbit].cost > lowcost) continue; dobits(todo | ( 1 << nextbit), done, cost + traits[nextbit].cost, ip); done |= 1 << nextbit; } } End of measures.c echo 'substrings.c' 1>&2 cat >'substrings.c' <<'End of substrings.c' # /*LINTLIBRARY*/ #include "stdio.h" #if !defined lint && !defined NOID static char elsieid[] = "@(#)substrings.c 8.2"; #endif /* !defined lint && !defined NOID */ #include "ctype.h" #if !defined TRUE #define TRUE 1 #define FALSE 0 #endif /* !defined TRUE */ extern char * imalloc(); extern char * strchr(); char ** substrings(string, separators) char * string; char * separators; { register char ** array; register char * cp; register int nsubs; register int lastsepprint; if (string == NULL || separators == NULL || *separators == '\0') return NULL; array = (char **) imalloc((strlen(string) + 2) * sizeof *array); if (array == NULL) return NULL; if (*string == '\0') { array[0] = NULL; return array; } nsubs = 0; lastsepprint = TRUE; for (cp = string; *cp != '\0'; ++cp) if (strchr(separators, *cp) != 0) { if (isascii(*cp) && isprint(*cp) && *cp != ' ' && lastsepprint) array[nsubs++] = cp; lastsepprint = isascii(*cp) && isprint(*cp) && *cp != ' '; *cp = '\0'; } else { lastsepprint = FALSE; if (cp == string || *(cp - 1) == '\0') array[nsubs++] = cp; } if (lastsepprint && *(cp - 1) == '\0') array[nsubs++] = cp; array[nsubs] = NULL; return array; } End of substrings.c echo 'wild.c' 1>&2 cat >'wild.c' <<'End of wild.c' # /*LINTLIBRARY*/ #include "stdio.h" #if !defined lint && !defined NOID static char elsieid[] = "@(#)wild.c 8.3"; #endif /* !defined lint && !defined NOID */ char * wildname(name) char * name; { register int i; static char saved[15]; if (name != NULL && *name != '\0') for (i = 0; (saved[i] = *name) != '\0'; ++name) if ((*name == '/' || *name == '\\') && *(name + 1) != '/' && *(name + 1) != '\\' && *(name + 1) != '\0') i = 0; else if (i < sizeof saved - 1) ++i; return (saved[0] == '\0') ? "?" : saved; } wild2(part1, part2) char * part1; char * part2; { (void) fputs(wildname(""), stderr); /* ** One space after the colon matches what perror does ** (although your typing teacher may want a second space). */ (void) fputs(": wild ", stderr); if (part1 == NULL) part1 = ""; if (part2 == NULL) part2 = ""; (void) fputs(part1, stderr); if (*part1 != '\0' && *part2 != '\0') (void) fputs(" ", stderr); (void) fputs(part2, stderr); (void) fputs("\n", stderr); } wild(string) char * string; { wild2("", string); } wildcall(string) char * string; { wild2("call of", string); } wildret(string) char * string; { wild2("result from", string); } End of wild.c echo 'wildexit.c' 1>&2 cat >'wildexit.c' <<'End of wildexit.c' # /*LINTLIBRARY*/ #include "stdio.h" #if !defined lint && !defined NOID static char elsieid[] = "@(#)wildexit.c 8.2"; #endif /* !defined lint && !defined NOID */ #if !defined TAMEEXITVAL #define TAMEEXITVAL 0 #endif /* !defined TAMEEXITVAL */ #if !defined WILDEXITVAL #define WILDEXITVAL 1 #endif /* !defined WILDEXITVAL */ wildexit(string) char * string; { wild(string); for ( ; ; ) exit(WILDEXITVAL); } wildcexit(string) char * string; { wildcall(string); for ( ; ; ) exit(WILDEXITVAL); } wildrexit(string) char * string; { wildret(string); for ( ; ; ) exit(WILDEXITVAL); } wild2exit(part1, part2) char * part1; char * part2; { wild2(part1, part2); for ( ; ; ) exit(WILDEXITVAL); } tameexit() { for ( ; ; ) exit(TAMEEXITVAL); } End of wildexit.c exit -- For comp.sources.unix stuff, mail to sources@uunet.uu.net.