[alt.sources] smooth - smooth X,Y data in various ways

davidsen@sixhub.UUCP (Wm E. Davidsen Jr) (08/18/89)

  This is a program and documentation in progress. Some other things
are being added, so this will change. The final version will go to
source.misc for archiving.

#!/bin/sh
# shar:	Shell Archiver  (v1.24)
#
#	Run the following text with /bin/sh to create:
#	  smooth.1
#	  smooth.c
#
echo "x - extracting smooth.1 (Text)"
sed 's/^X//' << 'SHAR_EOF' > smooth.1 &&
X'\" @(#)skeleton	3.3 - 12/21/83
X'\" [c][e][t] (only if preprocessing by cw, eqn, and/or tbl required)
X.TH SMOOTH 1 Local
X.SH NAME
Xsmooth - smooth X,Y data in various ways
X.SH SYNOPSIS
Xsmooth [options] [filename]
X.SH DESCRIPTION
X.B smooth
Xread X,Y data from the file specified or stdin and outputs the smoothed
Xdata to stdout. The data must be in text format, whitespace separated.
XBy default the Y data points are smoothed by averaging with the nearest
X+/- 3 points.
X.SS Simple X,Y data
XData for which these is a single Y for each X value is input with one X
Xand one Y value per line. Data input must be sorted on the X value.
X.SS Data with duration
XThe program will also handle data with duration, in which the Y value is
Xassociated with an event which starts at Xstart and ends at Xend. The
Xdata for any given X is the average of the Y values of all data points
Xwhich satisfy Xstart<=X<=Xend. Data which has duration may be
Xentered with Xstart, Xlength, and Y on a single line, blank separated,
Xor with Xstart, Xduration, and Y.
X.sp
X.ne 3
XThe default input format is Xstart, Xduration, Y. By use of the \fB-E\fP
Xoption the data may be represented as Xstart, Xend, and Y.
X.SS Options
XAre specified on the command line. Not all combinations of options are
Xmeaningful.
X.sp 2
X.TS
Xtab(@);
Xlbw(.75i) lw(4i).
X-s N@T{
XThe currect point is averaged with the +/-N points closest in X value.
XT}
X
X-r N@T{
XAverage the Y value of all points which have an X value within +/-N of
Xthe current X.
XT}
X
X-a N@T{
XGenerate output for duration data averaged over N intervals.
XRead Y values with Xmin and Xmax, count the Y value at all X values from
XXmin to Xmax.
XT}
X
X-A N@T{
XGenerate output for duration data averaged into intervals N X units
Xwide.
XT}
X
X-E@T{
Xending value. This option accepts duration data in the format:
X.tr ~
X"Xstart~Xend~Y"
Xrather than the default:
X"Xstart~Xduration~Y"
X.tr
XT}
X
X-h@T{
XAverage the highest and lowest points within the given number of
Xpoints or X value range.
XT}
X
X-2@T{
XGenerate a pseudo-point between every two input points, such
Xthat the X and Y are the average of the adjoining two points.
XThe original points are not output, except the first and last.
XT}
X
X-v@verbose
X-x N@set debug level to N
X
X-w@T{
XWeight the Y values averaged based on the offset from the center
Xof the number of points or X range specified.
XT}
X-W@T{
XWeight the Y values averaged based on the square of the X offset
Xfrom the center of the number of points or X range specified.
XT}
X
X-i N@T{
XAverage the current point with the values having an X offset
X+/1N X units from the center. This is useful for cyclic data,
Xsuch as hourly, weekly, etc.
XT}
X-I N@T{
XPerform interval averaging (see -i) +/1N intervals from the
Xcenter value. Default is 2. This is used only with the -i
Xoption.
XT}
X
X-m N@T{
Xminimum. Ignore any Y value less than N.
X(Used with -r and -i only).
XT}
X-M N@T{
XMaximum. Ignore any Y value greater than N.
X(Used with -r and -i only).
XT}
X
X-?@online help
X.TE
X.SH EXAMPLES
XSmooth using default values, file x1.dat, output to x2.dat:
X.ti +.5i
X.B "smooth x1.dat > x2.dat
X.sp
XSmooth data where X is hours, and a 4 hours cycle is expected:
X.ti +.5i
X.B "smooth -i4 x1.dat > x2.dat
X.sp
XA data file has the starting and ending time of jobs running on a
Xsystem, and the ratio of real time to CPU as Y. Display the data as 100
Xslices, where the value for each slice is the average of all jobs
Xrunning during that time.
X.ti +.5i
X.B "smooth -a100 x1.dat > x2.dat
X.sp
XA data file has time for X and reading for Y. Discard all reading less
Xthan 1.5.
X.ti +.5i
X.B "smooth -m1.5 x1.dat > x2.dat
X.sp
XA data file has time as X and readings for Y. Average all readings
Xwithin ten minutes of the current point.
X.ti +.5i
X.B "smooth -r10 x2.dat > x2.dat
X.SH WARNINGS
XWhen using duration data the second filed of input is the Xdelta, or
Xduration of the event, rather than the ending X value. The \fB-E\fP
Xoption may be used if the data is more conveniently expressed with Xend.
XNo check is made to see if the data are in order on X.
X.SH FILES
X.SH SEE ALSO
Xsort(1).
X.SH DIAGNOSTICS
XThe -a and -A options may generate an "Out of memory" option for large
Xvalues of N.
X.SH LIMITATIONS
X.SH AUTHOR
XBill Davidsen, davidsen@crdos1.crd.ge.com.
SHAR_EOF
chmod 0666 smooth.1 || echo "restore of smooth.1 fails"
echo "x - extracting smooth.c (Text)"
sed 's/^X//' << 'SHAR_EOF' > smooth.c &&
X/*****************************************************************
X |  smooth - smooth X,Y data
X |----------------------------------------------------------------
X |  Reads X,Y pairs and performs a rolling average of any one of
X |  several types, uniform distribution assumed.
X |----------------------------------------------------------------
X |  Author Bill Davidsen (davidsen@crd.ge.com) June 1989
X |  v1.9 last change 8/15/89
X ****************************************************************/
X
X#include <stdio.h>
X#define R		1
X#if	111
X#define version "v1.9"
X#else
X#define version "beta test version"
X#endif
X
X#ifndef MAXINT
X#define MAXINT	(((unsigned)~0) >> 1)
X#endif
X
X/* floating absolute difference */
Xfloat T_float;
X#define FAD(x,y) ((T_float=(x)-(y))<0 ? -T_float : T_float)
X
X/* getopt stuff */
Xextern int optind, opterr;
Xextern char *optarg;
X
X/* global flags */
Xint verbose = 0, debug = 0;
Xint minflag = 0, maxflag = 0, weightflag = 0, fixrange = 0;
Xint endX = 0;
Xint ncycles = 2;
Xfloat minval, maxval;
X
X/* pointers for reading data */
Xint pointsread = 0, arraysize = 0;
Xfloat *X, *Y, *Xstart, *Xend;
X
X/* pointers for weighted average */
Xint *weightpts, weightlen;
Xfloat weightrange;
Xfloat *weightsum, weightlow = 1e6, weighthigh = -1e6;
Xfloat setweight();
X
XFILE *datafile = stdin;
X
X/* algorithm for selecting points to average */
Xenum { OFFSET, RANGE } R_select = OFFSET;
X
X/* smoothing routines */
Xvoid smoothu(), smoothw(), smootha(), smoothi(), smooth2(), smoothp();
Xvoid (*smoothptr)() = smoothu;
X
X/* data input routines */
Xvoid readXY(), readXXY();
Xvoid (*readptr)() = readXY;
X
X/* useful routines */
Xchar *malloc(), *realloc();
XFILE *fopen();
Xdouble atof();
X
Xmain(argc,argv)
Xint argc;
Xchar *argv[];
X{
X	int n;
X	float sfact = 3;
X	char *Options = "r:s:A:a:vx:m:M:i:I:2wWhE?";
X
X	while ((n = getopt(argc, argv, Options)) != EOF) {
X		switch (n) {
X		case 's': /* smoothing factor */
X			sfact = atof(optarg);
X			break;
X		case 'r': /* smooth by range */
X			sfact = atof(optarg);
X			R_select = RANGE;
X			break;
X		case 'A': /* average in N width strips */
X			fixrange = 1;
X			weightrange = atof(optarg);
X			readptr = readXXY;
X			smoothptr = smootha;
X			break;
X		case 'a': /* average over an area */
X			weightlen = atoi(optarg);
X			weightsum = (float *)malloc(sizeof(float) * weightlen);
X			weightpts = (int *)malloc(sizeof(int) * weightlen);
X			readptr = readXXY;
X			smoothptr = smootha;
X			break;
X		case 'E':
X			endX = 1;
X			break;
X		case 'v': /* increase verbose */
X			++verbose;
X			break;
X		case 'x': /* debug */
X			debug = atoi(optarg);
X			break;
X		case 'm': /* set minimum */
X			++minflag;
X			minval = atof(optarg);
X			break;
X		case 'M': /* set maximum */
X			++maxflag;
X			maxval = atof(optarg);
X			break;
X		case 'i': /* interval data */
X			sfact = atof(optarg);
X			smoothptr = smoothi;
X			break;
X		case 'I': /* number of intervals */
X			ncycles = atoi(optarg);
X			break;
X
X		case '2': /* average every two points */
X			smoothptr = smooth2;
X			break;
X		case 'w': /* linear weight the average */
X			weightflag = 1;
X			smoothptr = smoothw;
X			break;
X		case 'W': /* weight on square of X distance */
X			weightflag = 2;
X			smoothptr = smoothw;
X			break;
X		case 'h': /* hi-lo average */
X			smoothptr = smoothp;
X			break;
X		case '?': /* invalid option */
X			explain();
X			exit(1);
X		}
X	}
X
X	/* if there is a filename, use it */
X	if (optind < argc) {
X		datafile = fopen(argv[optind], "r");
X		if (datafile == NULL) {
X			fprintf(stderr, "Can't open data file %s\n", argv[optind]);
X			exit(1);
X		}
X	}
X
X	/* read the data into the global X and Y arrays */
X	(*readptr)();
X	if (verbose) {
X		fprintf(stderr, "\nRead %d points, smooth %f\n",
X			pointsread, sfact);
X	}
X
X	/* smooth uniform data */
X	(*smoothptr)(sfact);
X	exit(0);
X}
X
X/*****************************************************************
X |  explain - give usage hints
X |----------------------------------------------------------------
X |  This does not replace the man page, just lists the options
X ****************************************************************/
X
Xexplain() {
X	printf(
X		"smooth - %s\n\n"
X		"Usage:\n"
X		"  smooth [options] file\n\n",
X		version
X	);
X	printf(
X		"Where options are:\n"
X		"  -s N   smooth over +/-N points\n"
X		"  -r N   smooth over points within +/- N\n"
X		"  -a N   accept wide data of the form Xstart, Xstop, Y\n"
X		"         and distribute over N equal points\n"
X		"  -A W   accept wide data and break into W wide strips\n"
X		"  -E     duration date is Xstart, Xend, Y format\n"
X		"  -h     average the high and low values within +/-N\n"
X		"  -2     average every 2 points\n"
X		"  -v     verbose\n"
X		"  -w     linear weighting of data\n"
X		"  -W     pseudo-normal weighting of data\n"
X		"  -x N   set debug level to N\n"
X		"  -i N   sample at X intervals of N\n"
X		"  -I N   sample N intervals (def 2)\n"
X		"  -m N   set minimum Y value to N, ignore lower\n"
X		"         (used with -r and -i only)\n"
X		"  -M N   set maximum Y value to N, ignore higher\n"
X		"         (used with -r and -i only)\n"
X		"\n"
X	);
X}
X
X/*****************************************************************
X |  inrange - test if a Y value is inrage for averaging
X ****************************************************************/
X
Xint
Xinrange(val)
Xfloat val;
X{
X	return (!minflag || val >= minval) && (!maxflag || val <= maxval);
X}
X
X/*****************************************************************
X |  setweight - return weight to apply to a point at this X offset
X ****************************************************************/
X
Xfloat
Xsetweight(offset, max)
Xfloat offset, max;
X{
X	float w;
X	
X	switch (weightflag) {
X	case 0: /* linear */
X		w = 1;
X		break;
X	case 1: /* simple linear */
X		w = (max - offset)/max;
X		break;
X	case 2: /* square linear */
X		w = (max - offset)/max;
X		w *= w;
X		break;
X	}
X
X	if (debug > 2) {
X		fprintf(stderr,
X			"Type %d weight %f for %f and %f\n",
X			weightflag, w, offset, max
X		);
X	}
X	return w;
X}
X
X/*****************************************************************
X |  setlimits - set the start and end subscripts
X |----------------------------------------------------------------
X |  Pick points based on either range or points offset from the
X |  current subscript or X value.
X *****************************************************************/
X
Xstatic void
Xsetlimits(crntss, range, start, end)
Xint crntss, *start, *end;
Xfloat range;
X{
X	int n, width = range, maxpt = pointsread-1;
X	float limit;
X
X	switch (R_select) {
X	case OFFSET: /* all within N points */
X		/* set low, not less than zero */
X		if ((n = crntss-width) < 0) n = 0;
X		*start = n;
X
X		/* set high, not more than data available */
X		if ((n = crntss+width) > maxpt) n = maxpt;
X		*end = n;
X		break;
X	case RANGE: /* all within a given X range */
X		limit = X[crntss]-range;
X		/* scan down until limit found */
X		for (n = crntss; n >= 0 && X[n] >= limit; --n);
X		*start = n + 1;
X
X		/* scan up until limit found */
X		limit = X[crntss]+range;
X		for (n=crntss; n <= maxpt && X[n] <= limit; ++n);
X		*end = n - 1;
X		break;
X	}
X}
X
X/*****************************************************************
X |  readXY - read the X and Y data from the data file
X |----------------------------------------------------------------
X |  The arrays are expanded as needed
X ****************************************************************/
X
Xvoid
XreadXY() {
X	float tx, ty;
X	int n;
X
X	while (fscanf(datafile, "%f %f", &tx, &ty) == 2) {
X		/* got more data, be sure there's room */
X		if (pointsread == arraysize) {
X			/* allocate or grow the buffers */
X			if (arraysize == 0) {
X				/* allocate the initial buffers */
X				arraysize = 1024;
X				X = (float *)malloc(1024 * sizeof(float));
X				Y = (float *)malloc(1024 * sizeof(float));
X				if (verbose) fprintf(stderr, "Reading");
X			}
X			else {
X				/* grow the buffers */
X				arraysize += 1024;
X				X = (float *)realloc(X, arraysize * sizeof(float));
X				Y = (float *)realloc(Y, arraysize * sizeof(float));
X			}
X
X			/* check the status */
X			if (X == NULL || Y == NULL) {
X				fprintf(stderr, "Can't alloc buffer!\n");
X				exit(1);
X			}
X			if (verbose) putc('.', stderr);
X		}
X
X		/* add the data to the buffer */
X		X[pointsread] = tx;
X		Y[pointsread] = ty;
X		++pointsread;
X	}
X}
X
X/*****************************************************************
X |  readXXY - read Xstart, Xend,  and Y data from the data file
X |----------------------------------------------------------------
X |  The arrays are expanded as needed
X ****************************************************************/
X
Xvoid
XreadXXY() {
X	float txstart, txend, tx, ty;
X	int n;
X
X	while (fscanf(datafile, "%f %f %f", &txstart, &txend, &ty) == 3) {
X		/* got more data, be sure there's room */
X		if (pointsread == arraysize) {
X			/* allocate or grow the buffers */
X			if (arraysize == 0) {
X				/* allocate the initial buffers */
X				arraysize = 1024;
X				Xstart = (float *)malloc(1024 * sizeof(float));
X				Xend = (float *)malloc(1024 * sizeof(float));
X				Y = (float *)malloc(1024 * sizeof(float));
X				if (verbose) fprintf(stderr, "Reading");
X			}
X			else {
X				/* grow the buffers */
X				arraysize += 1024;
X				Xstart = (float *)realloc(Xstart, arraysize * sizeof(float));
X				Xend = (float *)realloc(Xend, arraysize * sizeof(float));
X				Y = (float *)realloc(Y, arraysize * sizeof(float));
X			}
X
X			/* check the status */
X			if (Xstart == NULL || Xend == NULL || Y == NULL) {
X				fprintf(stderr, "Can't alloc buffer!\n");
X				exit(1);
X			}
X			if (verbose) putc('.', stderr);
X		}
X
X		/* see if the 2nd dat point is an ending value */
X		if (endX) txend -= txstart;
X
X		/* add the data to the buffer */
X		Xstart[pointsread] = txstart;
X		Xend[pointsread] = txend;
X		Y[pointsread] = ty;
X
X		/* check the limits */
X		if (txstart < weightlow) weightlow = txstart;
X		if ((txstart += txend) > weighthigh) weighthigh = txstart;
X
X		/* final incr of the points read */
X		++pointsread;
X	}
X}
X
X/*****************************************************************
X |  smoothu - smooth uniform data
X |----------------------------------------------------------------
X |  Average N points and output average, just the points at ends
X ****************************************************************/
X
Xvoid
Xsmoothu(fwidth)
Xfloat fwidth;
X{
X	int n, m, npoints, width = fwidth;
X	int hi, lo, oldhi, oldlo;
X	float sum = 0, avg;
X
X	/* output the fully averaged points */
X	for (n = 0; n <= pointsread-1; ++n) {
X		/* save the current limits and get the new ones */
X		oldhi = hi;
X		oldlo = lo;
X		setlimits(n, fwidth, &lo, &hi);
X		if (debug) {
X			fprintf(stderr, "lo %4d, hi %4d\n", lo, hi);
X		}
X
X		/*
X		 *  This is a timesaver... drop the points no longer in the average
X		 *  and add the new ones, rather than recalc the average.
X		 */
X
X		if (n > 0) {
X			/* don't do this the first time */
X			for (m=oldlo; m < lo; ++m) sum -= Y[m];
X			for (m=oldhi+1; m <= hi; ++m) sum += Y[m];
X		}
X		else {
X			/* do this the first time instead, build the initial sum */
X			for (sum = 0, m = lo; m <= hi; ++m) sum += Y[m];
X		}
X		npoints = hi-lo+1;
X		
X		/* calculate the average value */
X		avg = sum/npoints;
X
X		/* output the point */
X		if (debug) {
X			fprintf(stderr, "pt %-4d np %-2d sum %-5.0f\n",
X				n, npoints, sum);
X		}
X		printf("%f %f\n", X[n], avg);
X	}
X}
X
X/*****************************************************************
X |  smoothw - smooth data or a given range
X |----------------------------------------------------------------
X |  Average all points within +/- width of the current point
X ****************************************************************/
X
Xvoid
Xsmoothw(width)
Xfloat width;
X{
X	int n, m;
X	int hi, lo;
X	float sum = 0, avg, limit, pweight, npoints;
X
X	/* output the fully averaged points */
X	for (n = 0; n <= pointsread-1; ++n) {
X		sum = Y[n];
X		npoints = 1;
X
X		/* set the limits and create the weighted sum */
X		setlimits(n, width, &lo, &hi);
X		sum = npoints = 0;
X		for (m = lo; m <= hi; ++m) {
X			/* get the weight factor and apply it */
X			pweight = setweight(FAD(X[n],X[m]), width);
X			sum += Y[m] * pweight;
X			npoints += pweight;
X		}
X		avg = sum / npoints;
X
X		/* output the point, then slide the average up */
X		if (debug) {
X			fprintf(stderr, "pt %-4d np %-6.2f sum %-5.0f\n",
X				n, npoints, sum);
X		}
X		printf("%f %f\n", X[n], avg);
X	}
X}
X
X/*****************************************************************
X |  smootha - average data by time into points
X |----------------------------------------------------------------
X |  This is used on data with a duration, in the form Xstart,
X |  Xens, Yval. The range of all X values is broken into a number of
X |  slices, and the value for any slice is the average of all Y
X |  values who's X value range includes this slice.
X ****************************************************************/
X
Xvoid
Xsmootha() {
X	int n, m;
X	int first, last;
X	float tx, ty;
X	float weightincr;
X	float wt_tmp1;
X
X	/* check for fixed range specified */
X	if (fixrange) {
X		/* determine the number of strips */
X		weightlen = 1 + (weighthigh - weightlow) / weightrange;
X		weightsum = (float *)malloc(sizeof(float) * weightlen);
X		weightpts = (int *)malloc(sizeof(int) * weightlen);
X		weightincr = weightrange;
X	}
X	else weightincr = (weighthigh - weightlow) / (weightlen - 1);
X	
X	/* init the arrays */
X	for (n=0; n<weightlen; ++n) {
X		weightsum[n] = weightpts[n] = 0;
X	}
X
X	/* process a point at a time loop */
X	for (n=0; n < pointsread; ++n) {
X		/* map into starting and ending indices */
X		wt_tmp1 = (Xstart[n] - weightlow);
X		first = wt_tmp1 / weightincr;
X		last = (wt_tmp1 + Xend[n]) / weightincr;
X
X		/* add the values to the vectors */
X		for (m = first; m <= last; ++m) {
X			weightsum[m] += Y[n];
X			weightpts[m]++;
X		}
X	}
X
X	/* form the averages here */
X	for (n = 0; n < weightlen; ++n) {
X		if (m = weightpts[n])
X			weightsum[n] /= m;
X	}
X
X	/* output the first point */
X	printf("%f %f\n", weightlow, weightsum[0]);
X
X	/* loop to the next to last point */
X	for (n = 1; n < (weightlen-1); ++n) {
X		printf("%f %f\n", weightlow+n*weightincr, weightsum[n]);
X	}
X
X	/* output the last point */
X	if (!fixrange)
X		printf("%f %f\n", weighthigh, weightsum[weightlen-1]);
X}
X
X/*****************************************************************
X |  smoothi - smooth by averaging points at intervals
X |----------------------------------------------------------------
X |  This applies to data at uniform (and assumed cyclic) intervals.
X |  The data at the +/- N interval of X is averaged with the current
X |  value.
X ****************************************************************/
X
Xvoid
Xsmoothi(interval)
Xfloat interval;
X{
X	int n, m, npoints, tp;
X	float sum, testval, limit, tick;
X
X	/* set the limit of closeness of match */
X	limit = X[1] - X[0];
X	tick = interval/limit;
X
X	/* loop thru the values */
X	for (n=0; n < pointsread; ++n) {
X		sum = npoints = 0;
X		if (debug) {
X			fprintf(stderr, "n=%-4d base X %15f\n", n, X[n]);
X		}
X		
X		/* scan +/-2 intervals (for now) */
X		for (m = -ncycles; m <= ncycles; ++m) {
X			tp = n + (m * tick);
X			if (0 <= tp && tp < pointsread && inrange(Y[tp])) {
X				sum += Y[tp];
X				++npoints;
X				if (debug) {
X					fprintf(stderr, "  used X=%15f\n", X[tp]);
X				}
X			}
X		}
X
X		/* output what we have */
X		printf("%f %f\n", X[n], sum/npoints);
X	}
X}
X
X/*****************************************************************
X |  smooth2 - just average every two points
X |----------------------------------------------------------------
X |  This reduces the number of points by one
X ****************************************************************/
X
Xvoid
Xsmooth2(sfact)
Xfloat sfact;
X{
X	int n;
X	float sum, Xavg, Yavg;
X
X	/* just a simple loop thru the points */
X	for (n=1; n < pointsread; ++n) {
X		printf("%f %f\n",
X			(X[n-1]+X[n])/2,
X			(Y[n-1]+Y[n])/2
X		);
X	}
X}
X
X/*****************************************************************
X |  smoothp - smooth average of peaks within +/- N points
X |----------------------------------------------------------------
X |  Finds the highest and lowest point within the specified range
X |  and takes the average of just those points.
X ****************************************************************/
X
Xvoid
Xsmoothp(range)
Xfloat range;
X{
X	int width = range;
X	int start, end;
X	int n, m;
X	float high, low, crntY;
X
X	/* loop through all points */
X	for (n=0; n < pointsread; ++n) {
X		/* set the starting and ending subscripts */
X		setlimits(n, range, &start, &end);
X
X		/* now scan for the peak values */
X		low = MAXINT;
X		high = -low;
X		for (m=start; m <= end; ++m) {
X			crntY = Y[m];
X			if (crntY < low) low = crntY;
X			if (crntY > high) high = crntY;
X		}
X
X		/* output the X and average */
X		printf("%f %f\n", X[m], (high+low)/2);
X	}
X}
SHAR_EOF
chmod 0444 smooth.c || echo "restore of smooth.c fails"
exit 0