[mod.sources] choose -- a program to select lines at random

sources-request@genrad.UUCP (08/15/85)

Mod.sources:  Volume 2, Issue 36
Submitted by: tektronix!teklds!bobl (Bob Lewis)


#!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
# shar:	Shell Archiver
#	Run the following text with /bin/sh to create:
#	README #	choose.1 #	choose.c 
cat - << \SHAR_EOF > README
Here is a program "choose" to select randomly lines from files.  Simply
compile it as

	cc -O -o choose choose.c

The program works under 4.2bsd and should also work under UTek.  Thanks to
Nigel Horspool (uvicctr!nigelh), Brad Needham (tekig4!bradn), and everyone
else who answered my earlier query on "random(3)".

	- Bob Lewis
	  ...!tektronix!teklds!bobl
SHAR_EOF
cat - << \SHAR_EOF > choose.1
.TH CHOOSE 1 "7 August 1985"
.UC 4
.SH NAME
choose \- randomly select one or more lines
.SH SYNOPSIS
.B choose
[
.BI \- n
]
[
.I file ...
]
.SH DESCRIPTION
.I Choose
randomly chooses lines from its input
.IR file s
(default: standard input).
All such files are concatenated together before the selection takes
place.
.PP
The command line option is:
.TP
.BI \- n
Select
.I n
lines from the input (default: 1).
.SH EXAMPLE
To build your own version of
.IR fortune (1),
put each saying in a file in the directory "wisdom" and then execute
.IP
cat `ls wisdom/* | choose`
.SH FILES
/tmp/choose.*
.SH AUTHOR
Bob Lewis, CAE Systems Division, Tektronix
SHAR_EOF
cat - << \SHAR_EOF > choose.c
#include <stdio.h>
#include <sys/time.h>

char *ProgName;

#define FN_TMP_TEMPLATE "/tmp/choose.XXXXXX"
char *FnTmp;
FILE *FpTmp;

main(NArg, Arg)
	int NArg;
	char *Arg[];
{
	char Ch;
	int NToChoose = 1;
	int NSample;
	FILE *Fp, *efopen();
	char *mktemp();


	ProgName = *Arg++;

	if (NArg > 1 && Arg[0][0] == '-') {
		NToChoose = atoi(&Arg[0][1]);
		if (NToChoose <= 0) {
			fprintf(stderr, "Usage: %s [-<#> [<file>]...]\n", ProgName);
			exit(1);
		}
		Arg++;
	}

	FnTmp = mktemp(FN_TMP_TEMPLATE);
	FpTmp = efopen(FnTmp, "w+");

	if (*Arg != NULL) {
		NSample = 0;
		while (*Arg != NULL) {
			Fp = efopen(*Arg++, "r");
			NSample += DupCount(Fp, FpTmp);
			fclose(Fp);
		}
	} else
		NSample = DupCount(stdin, FpTmp);

	if (NToChoose > NSample) {
		fprintf(stderr, "%s: Can't choose %d from %d.\n",
				ProgName, NToChoose, NSample);
		Bye(1);
	}

	InitRandom();

	rewind(FpTmp);

	for (;;) {
#ifdef random_is_fixed
		if ((random() % NSample) < NToChoose) {
#else
		/*
		 *	According to uvicctr!nigelh, the low order bits of random()
		 *	are not very random, hence...  (this limits us to fewer than
		 *	2^28 lines of input.  oh well.)
		 */
		if (((random() >> 4) % NSample) < NToChoose) {
#endif
			while ((Ch = getc(FpTmp)) != '\n' && Ch != EOF)
				putchar(Ch);
			putchar('\n');
			NToChoose--;
			if (NToChoose <= 0)
				Bye(0);
		} else {
			while ((Ch = getc(FpTmp)) != '\n' && Ch != EOF)
				;
			if (Ch == EOF) {
				fprintf(stderr, "%s: Unexpected EOF\n", ProgName);
				Bye(1);
			}
		}
		NSample--;
	}
}

Bye(Stat)		/* get rid of temp file and exit */
	int Stat;
{
	fclose(FpTmp);
	unlink(FnTmp);
	exit(Stat);
}

FILE *efopen(file, mode)	/* open file, die if can't (from K&P) */
	char *file, *mode;
{
	FILE *fp, *fopen();
	extern char *ProgName;

	if ((fp = fopen(file, mode)) != NULL)
		return fp;
	fprintf(stderr, "%s: can't open file %s mode %s\n", ProgName, file, mode);
	exit(1);
}

int DupCount(FpFrom, FpTo)	/* duplicate file and count lines */
	FILE *FpFrom, *FpTo;
{
	char Ch;
	int NLn = 0;

	while ((Ch = getc(FpFrom)) != EOF) {
		if (Ch == '\n')
			NLn++;
		putc(Ch, FpTo);
	}

	return NLn;
}

InitRandom()	/* initialize random sequence -- courtesy of tekig4!bradn */
{
	struct timeval tv;
	struct timezone tz;

	gettimeofday(&tv, &tz);
	srandom((int) tv.tv_sec);

	return;
}
SHAR_EOF