[comp.sources.unix] v13i042: Brute force measurement selection

rsalz@bbn.com (Rich Salz) (02/17/88)
Submitted-by: elsie!ado (Arthur David Olson)
Posting-number: Volume 13, Issue 42
Archive-name: measures

[  Truly how to save work by using a computer.  --r$ ]

Measures reads one or more files containing the values of measurable
traits for a set of items, and lists which traits may be measured to learn
which one of the items an unknown item is.

: To unbundle, sh this file
echo 'measures.1' 1>&2
cat >'measures.1' <<'End of measures.1'
.LC @(#)measures.1	1.13
.TH MEASURES 1E \*(eH
.SH NAME
measures \- list measurements to take
.SH SYNOPSIS
.B measures
[ file ... ]
.= measures
.SH DESCRIPTION
.I Measures
reads one or more files containing the values of measurable traits
for a set of items, and lists which traits may be measured to learn which one of
the items an unknown item is.
.PP
The file(s) to read may be named on the command line;
the standard input is read if there are no
.IR file (s)
on the command line or if 
.B `-'
is used.
Sharp signs ('#'s) and characters that follow them on lines are ignored.
Lines with only spaces, tabs, and colons in them are ignored.
Each remaining line gives the values for the item
whose name is given first on the line;
values are separated by any number of spaces, tabs, or colons.
A non-numeric string of characters
(for example,
.B `--'
or
.BR `unknown' )
may be used for any trait whose value is unknown for a given item.
Two values separated by a `-' may be used to give a range in which
the value for the item is known to fall.
.PP
If a line starts with one of the words listed below,
it gets the special treatment indicated.
.TP
.B Filed
Other entries on the line are used as names of the traits when the traits are
listed.  In the absence of a
.B `Filed'
line, the first trait is called `1', the second is called `2', and so forth.
.TP
.B Errors
Other entries on the line give the `experimental error' involved in measuring
each trait.  The number may end with a percent sign (`%') to signify a
percentage error figure.
The range of values that might be measured for a trait
(given experimental error)
for two items must not overlap
for the two items to be told apart by measuring that trait.
In the absence of a
.B `Errors'
line, each trait's experimental error is taken to be zero.
.TP
.B Costs
Other entries on the line give the cost of measuring each trait.
In the absence of a
.B `Costs'
line, each trait's cost is taken to be one.
.PP
The listed set or sets of traits to be measured are those whose totaled costs
are lowest and provide as much information as you could get by measuring
all the traits.
.SH EXAMPLE
Given the input
.in +.5i
.nf
.ft B
.ta \w'Errors\0\0'u +\w'10%\0\0'u +\w'10%\0\0'u
Filed	A	B	C
Errors	10%	10%	10%
p	1	6	5
q	1	3	5
r	4	6	2
.br
.in -.5i
.fi
.ft P
the
.I measures
program produces the two lines of output
.in +.5i
.nf
.ft B
A B
B C
.br
.in -.5i
.fi
.ft P
to indicate that either A and B or B and C may be measured to identify an
unknown sample as being one of p, q, or r.
.SH LIMITATION
There's a limit (usually thirty-two) 
on the number of traits that may be worked with.
Lines may be at most one thousand characters long.
.SH SEE ALSO
.IR "Journal of Theoretical Biology" ,
(1987)
.BR 128 ,
1-9.
End of measures.1
echo 'Makefile' 1>&2
cat >'Makefile' <<'End of Makefile'
# @(#)Makefile	1.1

SRCS=		ealloc.c ialloc.c measures.c substrings.c wild.c wildexit.c
OBJS=		ealloc.o ialloc.o measures.o substrings.o wild.o wildexit.o
CFLAGS=		-s -O
LINT=		lint
LINTFLAGS=	-phbaaxc

all:		measures

sure:
		$(LINT) $(LINTFLAGS) $(SRCS)

clean:
		rm -f *.o *.out core ,*

measures:	$(OBJS)
		$(CC) $(CFLAGS) $(OBJS) -o $@
End of Makefile
echo 'ealloc.c' 1>&2
cat >'ealloc.c' <<'End of ealloc.c'
#

/*LINTLIBRARY*/

#include "stdio.h"

#if !defined lint && !defined NOID
static char	elsieid[] = "@(#)ealloc.c	8.2";
#endif /* !defined lint && !defined NOID */

extern char *	icalloc();
extern char *	icatalloc();
extern char *	icpyalloc();
extern char *	imalloc();
extern char *	irealloc();

static char *
check(pointer)
char *	pointer;
{
	if (pointer == NULL)
		wildrexit("allocating memory");
	return pointer;
}

char *
emalloc(size)
{
	return check(imalloc(size));
}

char *
ecalloc(nelem, elsize)
{
	return check(icalloc(nelem, elsize));
}

char *
erealloc(ptr, size)
char *	ptr;
{
	return check(irealloc(ptr, size));
}

char *
ecatalloc(old, new)
char *	old;
char *	new;
{
	return check(icatalloc(old, new));
}

char *
ecpyalloc(string)
char *	string;
{
	return check(icpyalloc(string));
}

efree(p)
char *	p;
{
	ifree(p);
}

ecfree(p)
char *	p;
{
	icfree(p);
}
End of ealloc.c
echo 'ialloc.c' 1>&2
cat >'ialloc.c' <<'End of ialloc.c'
#

/*LINTLIBRARY*/

#include "stdio.h"

#if !defined lint && !defined NOID
static char	elsieid[] = "@(#)ialloc.c	8.3";
#endif /* !defined lint && !defined NOID */

#if !defined alloc_t
#define alloc_t	unsigned
#endif /* !defined alloc_t */

#if defined MAL
#define NULLMAL(x)	((x) == NULL || (x) == MAL)
#else /* !defined MAL */
#define NULLMAL(x)	((x) == NULL)
#endif /* !defined MAL */

extern char *	calloc();
extern char *	malloc();
extern char *	realloc();
extern char *	strcpy();

char *
imalloc(n)
{
#if defined MAL
	register char *	result;

	if (n == 0)
		n = 1;
	result = malloc((alloc_t) n);
	return (result == MAL) ? NULL : result;
#else /* !defined MAL */
	if (n == 0)
		n = 1;
#if defined __TURBOC__
	/*
	** Beat a TURBOC bug.
	*/
	if ((n & 1) != 0)
		++n;
#endif /* defined __TURBOC__ */
	return malloc((alloc_t) n);
#endif /* !defined MAL */
}

char *
icalloc(nelem, elsize)
{
	if (nelem == 0 || elsize == 0)
		nelem = elsize = 1;
#if defined __TURBOC__
	if ((nelem & 1) != 0 && (elsize & 1) != 0)
		++nelem;
#endif /* defined __TURBOC__ */
	return calloc((alloc_t) nelem, (alloc_t) elsize);
}

char *
irealloc(pointer, size)
char *	pointer;
{
	if (NULLMAL(pointer))
		return imalloc(size);
	if (size == 0)
		size = 1;
#if defined __TURBOC__
	if ((size & 1) != 0)
		++size;
#endif /* defined __TURBOC__ */
	return realloc(pointer, (alloc_t) size);
}

char *
icatalloc(old, new)
char *	old;
char *	new;
{
	register char *	result;
	register	oldsize, newsize;

	oldsize = NULLMAL(old) ? 0 : strlen(old);
	newsize = NULLMAL(new) ? 0 : strlen(new);
	if ((result = irealloc(old, oldsize + newsize + 1)) != NULL)
		if (!NULLMAL(new))
			(void) strcpy(result + oldsize, new);
	return result;
}

char *
icpyalloc(string)
char *	string;
{
	return icatalloc((char *) NULL, string);
}

ifree(p)
char *	p;
{
	if (!NULLMAL(p))
		free(p);
}

icfree(p)
char *	p;
{
	if (!NULLMAL(p))
		free(p);
}
End of ialloc.c
echo 'measures.c' 1>&2
cat >'measures.c' <<'End of measures.c'
#

#include "stdio.h"

#if !defined lint && !defined NOID
static char elsieid[] = "@(#)measures.c	1.23";
#endif /* !defined lint && !defined NOID */

#include "math.h"

#if !defined NBPI
#define NBPI	(8 * sizeof (int))	/* Number of Bits Per Int */
#endif /* !defined NBPI */

#if !defined MAXLINE
#define MAXLINE	1000			/* MAXimum LINE length */
#endif /* !defined MAXLINE */

#define SKIPPED	'\0'

#if defined __TURBOC__
#define HUGE	HUGE_VAL
#define float	double
#undef SKIPPED
#define SKIPPED	(-1)
#endif /* defined __TURBOC__ */

extern char *	ecpyalloc();
extern char *	erealloc();
extern char *	strchr();
extern char **	substrings();
extern char *	wildname();

struct range {
	float	lo;
	float	hi;
};

struct range **	ranges;		/* measured ranges of traits, -/+ errors */

int		itemcount;	/* number of items  with traits */
int		traitcount;	/* number of traits of each item */

struct trait {
	char *	name;
	float	error;
	char	errtype;
	float	cost;
};

struct trait	traits[NBPI];

apartbits(i, j)
{
	register struct range *	rp1;
	register struct range *	rp2;
	register int		n, result;

	result = 0;
	n = traitcount;
	rp1 = ranges[i + 1];
	rp2 = ranges[j + 1];
	while (--n >= 0) {
		--rp1;
		--rp2;
		if (rp1->lo > rp2->hi || rp1->hi < rp2->lo)
			result |= 1 << n;
	}
	return result;
}

char *	oopsname;
long	oopsline;

oops(message)
char *	message;
{
	(void) fprintf(stderr, "\"%s\", line %ld: wild %s\n",
		oopsname, oopsline, message);
	for ( ; ; )
		wildexit(message);
}

dofiled(innames)
char **	innames;
{
	register int	i;

	for (i = 0; i < traitcount; ++i)
		if (traits[i].name != NULL)
			if (strcmp(innames[i], traits[i].name) == 0)
				continue;
			else	oops("mismatched `Filed' lines");
		else traits[i].name = ecpyalloc(innames[i]);
}

doerrors(inerrors)
char **	inerrors;
{
	register int	i;
	float		f;
	char		c;

	for (i = 0; i < traitcount; ++i) {
		c = '\0';
		if ((sscanf(inerrors[i], "%f%c", &f, &c) == 1 &&
			c != SKIPPED) || (c != SKIPPED && c != '%'))
				oops("`Errors' line");
		if (f < 0)
			oops("negative `Errors' value");
		if (c == '%' && f > 100)
			oops("Error percentage > 100");
		if (traits[i].error != 0 &&
			(f != traits[i].error || c != traits[i].errtype))
				oops("mismatched `Errors' lines");
		traits[i].error = f;
		traits[i].errtype = c;
	}
}

docosts(incosts)
char **	incosts;
{
	register int	i;
	float		f;
	char		c;

	for (i = 0; i < traitcount; ++i) {
		c = '\0';
		if (sscanf(incosts[i], "%f%c", &f, &c) != 1 || c != SKIPPED)
			oops("`Costs' line");
		if (f <= 0)
			oops("non-positive `Costs' value");
		if (traits[i].cost != 0 && f != traits[i].cost)
			oops("mismatched `Costs' lines");
		traits[i].cost = f;
	}
}

getanum(string, address)
char *	string;
float *	address;
{
	double	d;
	char	c;

	/*
	** Some buggy sscanfs think that "-" is a number.
	*/
	if (strcmp(string, "-") == 0)
		return 0;
	c = '\0';
	if (sscanf(string, "%f%c", &d, &c) == 1 && c == SKIPPED) {
		*address = d;
		return 1;
	}
	if (*string == '-') {
		*address = -HUGE;
		++string;
	} else	*address = HUGE;
	return strcmp(string, "HUGE") == 0 || strcmp(string, "huge") == 0 ||
		strcmp(string, "Huge") == 0;
}

#define ATATIME	500

doranges(inranges)
register char **	inranges;
{
	register char *	cp;
	register int	i;
	register int	ok;

	if ((itemcount % ATATIME) == 0) {
		ranges = (struct range **) erealloc((char *) ranges,
			(itemcount + ATATIME) * sizeof *ranges);
		ranges[0] = (struct range *) erealloc((char *) ranges[0],
			(itemcount + ATATIME) * traitcount * sizeof *ranges[0]);
		for (i = 1; i < itemcount + ATATIME; ++i)
			ranges[i] = ranges[i - 1] + traitcount;
	}
	for (i = 0; i < traitcount; ++i) {
		if (getanum(inranges[i], &ranges[itemcount][i].lo)) {
			ranges[itemcount][i].hi = ranges[itemcount][i].lo;
			continue;
		}
		cp = inranges[i];
		for ( ; ; ) {
			cp = strchr(cp + 1, '-');
			if (cp == 0) {
				ranges[itemcount][i].lo = -HUGE;
				ranges[itemcount][i].hi = HUGE;
				break;
			}
			*cp = '\0';
			ok = getanum(inranges[i], &ranges[itemcount][i].lo) &&
				getanum(cp + 1, &ranges[itemcount][i].hi);
			*cp++ = '-';
			if (!ok)
				continue;
			if (ranges[itemcount][i].lo > ranges[itemcount][i].hi)
				oops("range");
			break;
		}
	}
	++itemcount;
}

infile(filename)
char *	filename;
{
	register FILE *		fp;
	register char *		cp;
	register char **	cpp;
	register int		i;
	char			buf[MAXLINE + 2];	/* +2 for "\n\0" */

	if (strcmp(filename, "-") == 0) {
		fp = stdin;
		filename = "standard input";
	}
	else if ((fp = fopen(filename, "r")) == NULL)
		for ( ; ; )
			wild2exit("result opening", filename);
	oopsname = filename;
	for (oopsline = 1; fgets(buf, sizeof buf, fp) == buf; ++oopsline) {
		cp = strchr(buf, '\n');
		if (cp == 0)
			oops("long line");
		*cp = '\0';		/* Zap the trailing newline */
		if (strchr(buf, '#') != 0)
			*strchr(buf, '#') = '\0';	/* Zap comments */
		cpp = substrings(buf, ": \t");
		if (cpp == NULL)
			for ( ; ; )
				wildrexit("substrings");
		for (i = 0; cpp[i] != NULL; ++i)
			;
		if (i == 0) {
			free((char *) cpp);
			continue;
		}
		if (traitcount == 0) {
			traitcount = i - 1;
			if (traitcount <= 0)
				oops("lack of traits");
			if (traitcount > NBPI)
				oops("large number of traits");
		} else if (traitcount != i - 1)
			oops("number of traits");
		cp = cpp[0];
		if (strcmp(cp, "Filed") == 0)
			dofiled(&cpp[1]);
		else if (strcmp(cp, "Errors") == 0)
			doerrors(&cpp[1]);
		else if (strcmp(cp, "Costs") == 0)
			docosts(&cpp[1]);
		else	doranges(&cpp[1]);
		(void) free((char *) cpp);
	}
	if (ferror(fp) || !feof(fp))
		for ( ; ; )
			wild2exit("result reading", filename);
	if (fp != stdin)
		(void) fclose(fp);
}

int *	aparts;		/* traits to measure to tell two items apart */
int	apartcount;	/* number of elements in above table */
int	mustbits;	/* tells which traits MUST be measured */
int	mustcount;	/* tells how many traits MUST be measured */

main(argc, argv)
int	argc;
char *	argv[];
{
	register int	argind;
	register int	i, j;
	float		lo, hi;
	char		buf[NBPI];	/* Wildly generous! */

	argv[0] = wildname(argv[0]);
	argind = 1;
	if (strcmp(argv[argind], "--") == 0)
		++argind;
	if (argind == (argc - 1) && strcmp(argv[argind], "=") == 0) {
		(void) fprintf(stderr, "%s: usage is %s [file...]\n",
			argv[0], argv[0]);
		for ( ; ; )
			tameexit();
	}
	if (argind == argc)
		infile("-");
	else for (i = argind; i < argc; ++i)
		infile(argv[i]);
	if (itemcount == 0)
		for ( ; ; )
			wildrexit("lack of items");
/*
** Set all costs to one if no costs were given.
*/
	for (i = 0; i < traitcount; ++i)
		if (traits[i].cost != 0)
			break;
	if (i >= traitcount)
		for (i = 0; i < traitcount; ++i)
			traits[i].cost = 1;
/*
** Set all labels if no labels were given.
*/
	if (traits[0].name == NULL)
		for (i = 0; i < traitcount; ++i) {
			(void) sprintf(buf, "%d", i + 1);
			traits[i].name = ecpyalloc(buf);
		}
/*
** Adjust the ranges to reflect the errors.
*/
	for (i = 0; i < itemcount; ++i)
		for (j = 0; j < traitcount; ++j) {
			if (traits[j].error == 0)
				continue;
			lo = ranges[i][j].lo;
			if (lo > -HUGE && lo < HUGE) {
				if (traits[j].errtype == '\0')
					lo -= traits[j].error;
				else	lo -= lo * (traits[j].error / 100.);
				ranges[i][j].lo = lo;
			}
			hi = ranges[i][j].hi;
			if (hi > -HUGE && hi < HUGE) {
				if (traits[j].errtype == '\0')
					hi += traits[j].error;
				else	hi += hi * (traits[j].error / 100.);
				ranges[i][j].hi = hi;
			}
		}
/*
** Build the "tell apart" table.
*/
	for (i = 0; i < itemcount - 1; ++i) {
		aparts = (int *) erealloc((char *) aparts,
			(apartcount + itemcount + 1 - i) * sizeof *aparts);
		for (j = i + 1; j < itemcount; ++j)
			newbits(apartbits(i, j));
	}
	if (mustcount + apartcount == 0)
		for ( ; ; )
			wildexit("indistinguishable items (given errors)");
	finish();
	for ( ; ; )
		tameexit();
}

newbits(new)
register int	new;
{
	register int *	ip;
	register int	i;

	if ((mustbits & new) != 0)
		return;	/* A test we must do can tell these two apart */
	if (new == 0)
		return;	/* We can't tell these two apart! */
	/*
	** Is the new case (or a harder one) already in the table?
	*/
	ip = aparts;
	ip[apartcount] = new;
	while ((*ip++ & ~new) != 0)
		;
	if (ip <= &aparts[apartcount])
		return;
	/*
	** Get rid of any old entries that this new one is harder than.
	*/
	ip = aparts;
	for ( ; ; )
		if ((new & ~*ip++) == 0) {
			if (*--ip == new)
				break;
			*ip = aparts[--apartcount];
			aparts[apartcount] = new;
		}
	/*
	** Is more than one test involved?
	*/
	i = 1;
	while ((new & i) == 0)
		i <<= 1;
	if (new != i) {
		++apartcount;
		return;
	}
	/*
	** A single test is involved.
	*/
	mustbits |= i;
	if (++mustcount < traitcount)
		return;
	/*
	** Oh well. . .
	*/
	finish();
	for ( ; ; )
		tameexit();
}

float	lowcost;
int *	lowbits;
int	lowcount;

finish()
{
	register int	i, j;

	lowcost = HUGE;
	aparts[apartcount] = 0;
	dobits(mustbits, 0, 0., aparts);
	for (i = 0; i < lowcount; ++i) {
		for (j = 0; j < traitcount; ++j)
			if ((lowbits[i] & (1 << j)) != 0)
				(void) printf("%s ", traits[j].name);
		printf("\n");
	}
}

dobits(todo, done, cost, ip)
register int	todo, done;
register float	cost;
register int *	ip;
{
	register int	i, nextbit;

	while((todo & *ip++) != 0)
		;
	if (*--ip == 0) {
		if (cost < lowcost) {
			lowcost = cost;
			lowcount = 0;
		}
		lowbits = (int *) erealloc((char *) lowbits,
			(lowcount + 1) * sizeof *lowbits);
		lowbits[lowcount++] = todo;
		return;
	}
	if (cost >= lowcost)
		return;		/* cost can't get smaller! */
	i = *ip++ & ~(todo | done);
	for (nextbit = 0; nextbit < traitcount; ++nextbit) {
		if ((i & (1 << nextbit)) == 0 ||
			cost + traits[nextbit].cost > lowcost)
				continue;
		dobits(todo | ( 1 << nextbit), done,
			cost + traits[nextbit].cost, ip);
		done |= 1 << nextbit;
	}
}
End of measures.c
echo 'substrings.c' 1>&2
cat >'substrings.c' <<'End of substrings.c'
#

/*LINTLIBRARY*/

#include "stdio.h"

#if !defined lint && !defined NOID
static char	elsieid[] = "@(#)substrings.c	8.2";
#endif /* !defined lint && !defined NOID */

#include "ctype.h"

#if !defined TRUE
#define TRUE	1
#define FALSE	0
#endif /* !defined TRUE */

extern char *	imalloc();
extern char *	strchr();

char **
substrings(string, separators)
char *	string;
char *	separators;
{
	register char **	array;
	register char *		cp;
	register int		nsubs;
	register int		lastsepprint;

	if (string == NULL || separators == NULL || *separators == '\0')
		return NULL;
	array = (char **) imalloc((strlen(string) + 2) * sizeof *array);
	if (array == NULL)
		return NULL;
	if (*string == '\0') {
		array[0] = NULL;
		return array;
	}
	nsubs = 0;
	lastsepprint = TRUE;
	for (cp = string; *cp != '\0'; ++cp)
		if (strchr(separators, *cp) != 0) {
			if (isascii(*cp) && isprint(*cp) && *cp != ' ' &&
				lastsepprint)
					array[nsubs++] = cp;
			lastsepprint = isascii(*cp) && isprint(*cp) &&
				*cp != ' ';
			*cp = '\0';
		} else {
			lastsepprint = FALSE;
			if (cp == string || *(cp - 1) == '\0')
				array[nsubs++] = cp;
		}
	if (lastsepprint && *(cp - 1) == '\0')
		array[nsubs++] = cp;
	array[nsubs] = NULL;
	return array;
}
End of substrings.c
echo 'wild.c' 1>&2
cat >'wild.c' <<'End of wild.c'
#

/*LINTLIBRARY*/

#include "stdio.h"

#if !defined lint && !defined NOID
static char	elsieid[] = "@(#)wild.c	8.3";
#endif /* !defined lint && !defined NOID */

char *
wildname(name)
char *	name;
{
	register int	i;
	static char	saved[15];

	if (name != NULL && *name != '\0')
		for (i = 0; (saved[i] = *name) != '\0'; ++name)
			if ((*name == '/' || *name == '\\') &&
				*(name + 1) != '/' && *(name + 1) != '\\' &&
				*(name + 1) != '\0')
					i = 0;
			else if (i < sizeof saved - 1)
				++i;
	return (saved[0] == '\0') ? "?" : saved;
}

wild2(part1, part2)
char *	part1;
char *	part2;
{
	(void) fputs(wildname(""), stderr);
	/*
	** One space after the colon matches what perror does
	** (although your typing teacher may want a second space).
	*/
	(void) fputs(": wild ", stderr);
	if (part1 == NULL)
		part1 = "";
	if (part2 == NULL)
		part2 = "";
	(void) fputs(part1, stderr);
	if (*part1 != '\0' && *part2 != '\0')
		(void) fputs(" ", stderr);
	(void) fputs(part2, stderr);
	(void) fputs("\n", stderr);
}

wild(string)
char *	string;
{
	wild2("", string);
}

wildcall(string)
char *	string;
{
	wild2("call of", string);
}

wildret(string)
char *	string;
{
	wild2("result from", string);
}
End of wild.c
echo 'wildexit.c' 1>&2
cat >'wildexit.c' <<'End of wildexit.c'
#

/*LINTLIBRARY*/

#include "stdio.h"

#if !defined lint && !defined NOID
static char	elsieid[] = "@(#)wildexit.c	8.2";
#endif /* !defined lint && !defined NOID */

#if !defined TAMEEXITVAL
#define TAMEEXITVAL	0
#endif /* !defined TAMEEXITVAL */

#if !defined WILDEXITVAL
#define WILDEXITVAL	1
#endif /* !defined WILDEXITVAL */

wildexit(string)
char *	string;
{
	wild(string);
	for ( ; ; )
		exit(WILDEXITVAL);
}

wildcexit(string)
char *	string;
{
	wildcall(string);
	for ( ; ; )
		exit(WILDEXITVAL);
}

wildrexit(string)
char *	string;
{
	wildret(string);
	for ( ; ; )
		exit(WILDEXITVAL);
}

wild2exit(part1, part2)
char *	part1;
char *	part2;
{
	wild2(part1, part2);
	for ( ; ; )
		exit(WILDEXITVAL);
}

tameexit()
{
	for ( ; ; )
		exit(TAMEEXITVAL);
}
End of wildexit.c
exit

-- 
For comp.sources.unix stuff, mail to sources@uunet.uu.net.