[comp.sources.misc] v07i063: information statistic and chi-square for 2-D contingency tables

allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc) (07/09/89)
Posting-number: Volume 7, Issue 63
Submitted-by: gwyn@BRL.MIL
Archive-name: tot_info

#!/bin/sh
# Self-unpacking archive format.  To unbundle, sh this file.
echo 'Makefile' 1>&2
cat >'Makefile' <<'END OF Makefile'
#	Makefile -- -- makefile for "tot_info" utility

#	last edit:	89/02/06	D A Gwyn

#	SCCS ID:	@(#)tot_info.mk	1.1 (edited for publication)

PRODUCT = tot_info

MAKEFIL	= Makefile
HEADERS = chisq.h gamma.h
CFILES	= $(PRODUCT).c info.c gamma.c chisq.c # chisq.c is a "freebie"
OBJECTS	= $(PRODUCT).o info.o gamma.o # chisq.o
LIBTEST	= g_test
TSRC	= $(LIBTEST).c
TOBJ	= $(LIBTEST).o
TFILES	= $(PRODUCT).in $(PRODUCT).exp
LIBES	= -lm
LLIBES	= $(LIBES)
MANSRCS	= $(PRODUCT).1 chisq.3 gamma.3

# "standard" UNIX utilities that may need invocations tweaked:
INS	= cp
LINT	= lint
FLOW	= cflow
XREF	= cxref -c -s -w132

# printer configuration:
LPR	= lp
PR	= pr -f

# DWB configuration:
TOPT	= -Ti300
TMAC	= -man
EQN	= eqn $(TOPT)
TROFF	= troff $(TOPT) $(TMAC)
TPOST	= | dimp


$(PRODUCT):	$(OBJECTS)
	$(CC) $(LDFLAGS) -o $@ $(OBJECTS) $(LIBES)

chisq.o info.o:	chisq.h

gamma.o:	gamma.h

print:		$(MAKEFIL) $(HEADERS) $(CFILES) $(TFILES) $(MANSRCS)
	$(PR) $(MAKEFIL) $(HEADERS) $(CFILES) $(TFILES) $(MANSRCS) | $(LPR)

typeset:	$(MANSRCS)
	$(EQN) $(PRODUCT).1 | $(TROFF) $(TPOST)
	$(EQN) chisq.3 | $(TROFF) $(TPOST)
	$(EQN) gamma.3 | $(TROFF) $(TPOST)

lint:		$(HEADERS) $(CFILES)
	$(LINT) $(CFILES) $(LLIBES) > $@

flow:		$(HEADERS) $(CFILES)
	$(FLOW) $(CFILES) > $@

xref:		$(HEADERS) $(CFILES)
	$(XREF) $(CFILES) > $@

test:		gamma_test tot_test

tot_test:	$(PRODUCT) $(TFILES)
	./$(PRODUCT) < $(PRODUCT).in > $(PRODUCT).out
	@if cmp -s $(PRODUCT).exp $(PRODUCT).out ; \
	then	echo 'Tested okay' ; \
	else	echo '*** Test failed; differences:' ; \
		diff $(PRODUCT).exp $(PRODUCT).out ; \
		exit 1 ; \
	fi

gamma_test:	$(LIBTEST)
	./$(LIBTEST)

$(LIBTEST):	$(TOBJ) gamma.o
	$(CC) $(LDFLAGS) -o $@ $(TOBJ) gamma.o $(LIBES)

$(LIBTEST).o:	gamma.h

clean:
	-rm -f $(OBJECTS) $(TOBJ) $(LIBTEST) $(PRODUCT).out \
	lint flow xref core mon.out

clobber:	clean
	-rm -f $(PRODUCT)
END OF Makefile
echo 'chisq.3' 1>&2
cat >'chisq.3' <<'END OF chisq.3'
'\" e
.TH CHISQ 3V VMB
'\"	last edit:	88/09/19	D A Gwyn
'\"	SCCS ID:	@(#)chisq.3	1.2 (edited for publication)
'\"
'\"	This source must be typeset via "eqn | troff -man"
'\"
.EQ
delim @@
.EN
.SH NAME
chisq \- independence tests for 2-way contingency tables
.SH SYNOPSIS
.ds cW (CW\" change to B (without the paren) if you don't have a CW font
\f\*(cW
#include "chisq.h"
.br
/* \fPThe following functions are declared in this header file.\f\*(cW */
.sp
double ChiSqTbl(int r, int c, const long *f, int *df);
.br
double InfoTbl(int r, int c, const long *f, int *df);\fP
.SH DESCRIPTION
\f\*(cWChiSqTbl\fP
returns Pearson's @chi sup 2@ statistic
for the \f\*(cWr\fP-by-\f\*(cWc\fP contingency table
of observed frequencies of occurrence for each combination of
row and column categories stored in row-major order
in the vector starting at \f\*(cWf\fP.
The computed number of degrees of freedom is stored into the location
pointed to by \f\*(cWdf\fP.
.br
.EQ
chi sup 2 ~==~ roman sum from i=0 to r-1 roman sum from j=0 to c-1
{ left ( { f sub ij ~-~ { f sub { i. } f sub { .j }} over N } right ) sup 2 }
over { { f sub { i. } f sub { .j }} over N }
.EN
where the row and column sums are denoted
@f sub { i. }~==~roman sum from j=0 to c-1 f sub ij@
and
@f sub { .j }~==~roman sum from i=0 to r-1 f sub ij@
and the total number of occurrences is
@N~==~roman sum from i=0 to r-1 roman sum from j=0 to c-1 f sub ij@.
Terms involving zero row or column sum are omitted and the returned
number of degrees of freedom are
correspondingly reduced from the nominal value
@(r-1)(c-1)@.
.P
\f\*(cWInfoTbl\fP
returns 2 times Kullback's
@"\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@ statistic
for a similar contingency table.
The number of degrees of freedom to be used when relating this statistic
to the @chi sup 2@ distribution is stored into the location
pointed to by \f\*(cWdf\fP.
(See \s-1DISCUSSION\s0 below.)
.br
.EQ
2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 ) ~==~
2 roman sum from i=0 to r-1 roman sum from j=0 to c-1
{ f sub ij log { { N f sub ij }
over { f sub { i. } f sub { .j } } } }
.EN
where the row and column sums @f sub { i. }@ and @f sub { .j }@
and the total number of observations @N@
are the same as for Pearson's @chi sup 2@.
.SH DISCUSSION
@2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@
represents twice the estimated information
in favor of hypothesis @H sub 1@ over hypothesis @H sub 2@
contained in the observed frequencies,
where the \fInull hypothesis\fP @H sub 2@ is that the row and column
categorizations are statistically independent and
the \fIalternative hypothesis\fP @H sub 1@ is that they aren't.
This statistic is asymptotically distributed as @chi sup 2@
with @(r-1)(c-1)@ degrees of freedom;
therefore by use of the \f\*(cWQChiSq\fP function (see \fIgamma\fP(3V))
it can be converted to the probability that
the statistic would be as large as was observed
if the categorizations really are independent.
This is of course the traditional use of Pearson's @chi sup 2@ statistic,
which @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^"@ approaches for large samples
when @H sub 2@ is true.
The main difference is that Pearson's @chi sup 2@ is not useful
for small sample sizes,
whereas @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^"@ fully takes into account
all available information for even the smallest samples.
.P
@2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@
(along with its corresponding degrees of freedom
for use with \f\*(cWQChiSq\fP) is \fIadditive\fP for independent samples,
so that the information can be accumulated over the course of many
independent experiments in order to improve one's ability
to detect a violation of the null hypothesis.
.SH CAVEATS
Pearson's @chi sup 2@ test is unreliable for low frequencies;
consequently it is usually recommended that
categories be chosen to tally at least
5 occurrences in each bin.
However, excessive combination of bins causes loss of information and
consequently loss of discriminating power.
Because @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^"@ is correct for any sample size,
it does not require combination of bins
and is therefore immune from this problem.
.bp
.SH EXAMPLE
\f\*(cW
.ta 8n 16n 24n 32n 40n 48n 56n 64n
.nf
\&
/*
	Example program to read table dimensions, then frequencies,
	and to print both statistics along with significance levels.
*/
#include	<stdio.h>
#include	<stdlib.h>			/* for EXIT_SUCCESS */
#include	"chisq.h"
#include	"gamma.h"			/* for QChiSq(\|) */

#define MAXTBL	1000
static long	f[MAXTBL];		/* frequency tallies */
static int	r;			/* # of rows */
static int	c;			/* # of columns */
#define x(i,j)	f[(i)*c+(j)]	/* convenient way to access freqs */

static void Calc(char *name, double (*func)(int, int, const long *, int *))
{
	int	df;			/* degrees of freedom for chi-square */
	double	stat = (*func)(r, c, f, &df);		/* computed statistic */

	/* print results */
	if ( stat >= 0.0 )
		(void)printf("%s = %5.2f\etdf = %2d\etq = %7.4f\en",
			     name, stat, df, QChiSq(stat, df)
			    );
	else
		(void)printf(stat < -3.5 ? "out of memory\en"
			   : stat < -2.5 ? "table too small\en"
			   : stat < -1.5 ? "negative frequency\en"
			   : "too many zeros\en"
			    );
}

int main(int argc, char *argv[])
{
	register int	i;		/* row index */
	register int	j;		/* column index */

	while ( scanf("%d %d\en", &r, &c) == 2 )	/* start new table */
		{
		/* input tallies */
		for ( i = 0; i < r; ++i )
			for ( j = 0; j < c; ++j )
				(void)scanf(" %ld", &x(i,j));

		/* compute statistics and print results */
		Calc("chisq", ChiSqTbl);
		Calc("2info", InfoTbl);
		}
	return EXIT_SUCCESS;
}\fP
.bp
.SH FILES
.ta 3i
chisq.h	header file
.br
chisq.o, info.o	object modules
.SH REFERENCE
Solomon Kullback, \fIInformation Theory and Statistics\fP (Dover, 1968).
.SH "SEE ALSO"
gamma(3V).
.SH DIAGNOSTICS
Both these functions a return negative value for the statistic
when it cannot be meaningfully computed, as follows:
.RS
.PD 0
.TP
\-1.0
too many 0 entries in the table
(for \f\*(cWChiSqTbl\fP, this means insufficient degrees of freedom;
for \f\*(cWInfoTbl\fP, this means \fIevery\fP entry in the table was 0)
.TP
\-2.0
some frequency was specified as less than 0
.TP
\-3.0
specified number of rows or columns was less than 2
.TP
\-4.0
unable to dynamically allocate enough working storage
.PD
.RE
.SH AUTHOR
Douglas A.\& Gwyn, BRL/VLD-VMB
END OF chisq.3
echo 'chisq.c' 1>&2
cat >'chisq.c' <<'END OF chisq.c'
/*
	ChiSqTbl -- Pearson's chi-square test for a 2-way contingency table

	last edit:	88/09/19	D A Gwyn

	SCCS ID:	@(#)chisq.c	1.1 (edited for publication)

	Special return values:
		-1.0	insufficient degrees of freedom (too many 0 entries)
		-2.0	invalid table entry (frequency less than 0)
		-3.0	invalid table dimensions (r or c less than 2)
		-4.0	unable to allocate enough working storage
*/

#ifdef __STDC__
#include	<stdlib.h>		/* malloc, free */

#include	"std.h"
#else
#include	"std.h"

extern pointer	malloc();
extern void	free();
#endif

double
ChiSqTbl( r, c, f, pdf )
	int		r;		/* # rows in table */
	int		c;		/* # columns in table */
	const long	*f;		/* -> r*c frequency tallies */
	int		*pdf;		/* -> return # degrees of freedom */
	{
#define	x(i,j)	f[(i)*c+(j)]		/* convenient way to access freqs */
	register int	i;		/* row index */
	register int	j;		/* column index */
	double		N;		/* (double)n */
	double		chisq;		/* accumulates chi-square */
	double		*xi;		/* row sums */
	double		*xj;		/* col sums */
	int		rdf = r - 1;	/* row degrees of freedom */
	int		cdf = c - 1;	/* column degrees of freedom */

	if ( rdf <= 0 || cdf <= 0 )
		{
		chisq = -3.0;
		goto ret3;
		}

	if ( (xi = (double *)malloc( r * sizeof(double) )) == NULL )
		{
		chisq = -4.0;
		goto ret3;
		}

	if ( (xj = (double *)malloc( c * sizeof(double) )) == NULL )
		{
		chisq = -4.0;
		goto ret2;
		}

	/* compute row sums and total */

	N = 0.0;

	for ( i = 0; i < r; ++i )
		{
		double	sum = 0.0;	/* accumulator */

		for ( j = 0; j < c; ++j )
			{
			register long	k = x(i,j);

			if ( k < 0L )
				{
				chisq = -2.0;
				goto ret1;
				}

			sum += (double)k;
			}

		if ( (xi[i] = sum) <= 0.0 )
			--rdf;
		else
			N += sum;
		}

	/* compute column sums */

	for ( j = 0; j < c; ++j )
		{
		double	sum = 0.0;	/* accumulator */

		for ( i = 0; i < r; ++i )
			sum += (double)x(i,j);

		if ( (xj[j] = sum) <= 0.0 )
			--cdf;
		}

	if ( rdf <= 0 || cdf <= 0 )
		{
		chisq = -1.0;
		goto ret1;
		}

	*pdf = rdf * cdf;		/* total degrees of freedom */

	/* compute chi-square */

	chisq = 0.0;

	for ( i = 0; i < r; ++i )
		{
		double	pi = xi[i];	/* row sum */

		if ( pi > 0.0 )
			for ( j = 0; j < c; ++j )
				{
				double	pj = xj[j];	/* column sum */

				if ( pj > 0.0 )
					{
					double	expected = pi * pj / N;
					double	delta =
						(double)x(i,j) - expected;

					chisq += delta * delta / expected;
					}
				}
		}

    ret1:
	free( (pointer)xj );
    ret2:
	free( (pointer)xi );
    ret3:
	return chisq;
	}
END OF chisq.c
echo 'chisq.h' 1>&2
cat >'chisq.h' <<'END OF chisq.h'
/*
	chisq.h -- definitions for contingency-table routines

	last edit:	88/09/19	D A Gwyn

	SCCS ID:	@(#)chisq.h	1.1 (edited for publication)
*/

/* library routines: */

#ifdef __STDC__
extern double	ChiSqTbl( int r, int c, const long *f, int *pdf );
extern double	InfoTbl( int r, int c, const long *f, int *pdf );
#else
extern double	ChiSqTbl();
extern double	InfoTbl();
#endif
END OF chisq.h
echo 'g_test.c' 1>&2
cat >'g_test.c' <<'END OF g_test.c'
/*
	g_test -- tests for gamma and related functions

	last edit:	88/09/09	D A Gwyn
	SCCS ID:	@(#)g_test.c	1.1 (edited for publication)
*/

#include	<stdio.h>
#ifdef __STDC__
#include	<stdlib.h>		/* for NULL etc. */
#endif

#include	"std.h"

#include	"gamma.h"

#define	Printf	(void)printf

#define	TOL	1.0e-5			/* tolerance for checks */

static int	errs = 0;		/* tally errors */

static double
RelDif( a, b )			/* returns relative difference:	*/
	double	a, b;		/* 0.0 if exactly the same,
				   otherwise ratio of difference
				   to the larger of the two	*/
	{
	double	c = Abs( a );
	double	d = Abs( b );

	d = Max( c, d );

	return d == 0.0 ? 0.0 : Abs( a - b ) / d;
	}

static void
RCheck( d, r )				/* check real number */
	double	d;			/* real to be checked */
	double	r;			/* expected value */
	{
	if ( RelDif( d, r ) > TOL )
		{
		errs = 1;
		Printf( "value s.b. %g, was %g\n", r, d );
		}
	}

void
GTest()
	{
	static struct
		{
		double	in;			/* input values */
		double	exp;			/* expected output values */
		}	tbl[] = 		/* table of test values */
		{
		{	1.0,		1.0			},
		{	2.0,		1.0			},
		{	3.0,		2.0			},
		{	4.0,		6.0			},
		{	5.0,		24.0			},
		{	6.0,		120.0			},
		{	0.5,		1.7724538509		},
		{	1.5,		0.8862269255		},
		{	0.25,		3.6256099082		},
		{	0.333333333333,	2.6789385347		},
		{	0.666666666667,	1.3541179394		},
		{	0.75,		1.2254167024		},
		{	10.0,		362880.0		},
		{	20.0,		1.2164510041e+17	},
		};
	register int	i;		/* indexes tbl[] test values */

	for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
		RCheck( Gamma( tbl[i].in ), tbl[i].exp );
	}

void
FTest()
	{
	static struct
		{
		int	in;			/* input values */
		double	exp;			/* expected output values */
		}	tbl[] = 		/* table of test values */
		{
		{	0,		1.0			},
		{	1,		1.0			},
		{	2,		2.0			},
		{	3,		6.0			},
		{	4,		24.0			},
		{	5,		120.0			},
		{	6,		720.0			},
		{	10,		3628800.0		},
		{	20,		2.4329020082e+18	},
		};
	register int	i;		/* indexes tbl[] test values */

	for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
		RCheck( Factorial( tbl[i].in ), tbl[i].exp );
	}

void
BCTest()
	{
	static struct
		{
		int	n;			/* top parts of inputs */
		int	k;			/* bottom parts of inputs */
		double	exp;			/* expected output values */
		}	tbl[] = 		/* table of test values */
		{
		{	1,		0,		1.0		},
		{	1,		1,		1.0		},
		{	2,		0,		1.0		},
		{	2,		1,		2.0		},
		{	2,		2,		1.0		},
		{	3,		0,		1.0		},
		{	3,		1,		3.0		},
		{	3,		2,		3.0		},
		{	5,		3,		10.0		},
		{	10,		4,		210.0		},
		{	10,		5,		252.0		},
		{	40,		6,		3838380.0	},
		{	50,		20,		47129212243960.0},
		};
	register int	i;		/* indexes tbl[] test values */

	for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
		RCheck( BCoeff( tbl[i].n, tbl[i].k ), tbl[i].exp );
	}

void
ETest()
	{
	static struct
		{
		double	in;			/* input values */
		double	exp;			/* expected output values */
		}	tbl[] = 		/* table of test values */
		{
		{	0.0,		0.0			},
		{	0.1,		0.1124629160		},
		{	0.2,		0.2227025892		},
		{	0.5,		0.5204998778		},
		{	0.8,		0.7421009647		},
		{	1.0,		0.8427007929		},
		{	1.5,		0.9661051465		},
		{	2.0,		0.9953222650		},
		};
	register int	i;		/* indexes tbl[] test values */

	for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
		RCheck( Erf( tbl[i].in ), tbl[i].exp );
	}

void
QCTest()
	{
	static struct
		{
		double	chisq;			/* chi-square limit */
		int	df;			/* degrees of freedom */
		double	exp;			/* expected output values */
		}	tbl[] = 		/* table of test values */
		{
		{	0.001,		1,		0.97477		},
		{	0.01,		1,		0.92034		},
		{	0.01,		2,		0.99501		},
		{	0.05,		1,		0.82306		},
		{	0.1,		1,		0.75183		},
		{	0.1,		2,		0.95123		},
		{	1.0,		1,		0.31731		},
		{	1.0,		2,		0.60653		},
		{	1.0,		3,		0.80125		},
		{	1.0,		4,		0.90980		},
		{	1.0,		5,		0.96257		},
		{	1.5,		2,		0.47237		},
		{	2.0,		1,		0.15730		},
		{	2.0,		3,		0.57241		},
		{	2.0,		5,		0.84915		},
		{	4.0,		6,		0.67668		},
		{	5.0,		5,		0.41588		},
		{	10.0,		7,		0.188573	},
		{	10.0,		10,		0.44049		},
		{	10.0,		20,		0.96817		},
		{	20.0,		30,		0.91654		},
		{	40.0,		30,		0.104864	},
		{	8.26040,	20,		0.99		},
		{	10.8508,	20,		0.95		},
		{	12.4426,	20,		0.90		},
		{	15.4518,	20,		0.75		},
		{	19.3374,	20,		0.50		},
		{	23.8277,	20,		0.25		},
		{	28.4120,	20,		0.10		},
		{	31.4104,	20,		0.05		},
		{	37.5662,	20,		0.01		},
		};
	register int	i;		/* indexes tbl[] test values */

	for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
		RCheck( QChiSq( tbl[i].chisq, tbl[i].df ), tbl[i].exp );
	}

/*ARGSUSED*/
main( argc, argv )
	int		argc;
	char		*argv[];
	{
	GTest();
	FTest();
	BCTest();
	ETest();
	QCTest();

	return errs;
	}
END OF g_test.c
echo 'gamma.3' 1>&2
cat >'gamma.3' <<'END OF gamma.3'
'\" e
.TH GAMMA 3V VMB
'\"	last edit:	88/09/09	D A Gwyn
'\"	SCCS ID:	@(#)gamma.3	1.2 (edited for publication)
'\"
'\"	This source must be typeset via "eqn | troff -man"
'\"
.EQ
delim @@
.EN
.SH NAME
gamma \- gamma and related functions
.SH SYNOPSIS
.ds cW (CW\" change to B (without the paren) if you don't have a CW font
\f\*(cW
#include "gamma.h"
.br
/* \fPAll the following functions are declared in this header file.\f\*(cW */
.sp
double Gamma(double z);
.br
double LGamma(double z);
.br
double Factorial(int n);
.br
double LFactorial(int n);
.br
double BCoeff(int n, int k);
.br
double Beta(double z, double w);
.br
double PGamma(double a, double x);
.br
double QGamma(double a, double x);
.br
double Erf(double x);
.br
double Erfc(double x);
.br
double CPoisson(double x, int k);
.br
double PChiSq(double chisq, int nu);
.br
double QChiSq(double chisq, int nu);
.ft
.SH DESCRIPTION
\f\*(cWGamma\fP
returns the real gamma function value
@GAMMA (z)~==~ int "" from 0 to inf t sup z-1 e sup -t dt@
for @z>0@.
.P
\f\*(cWLGamma\fP
returns the value
@ln ~ GAMMA (z)@
for @z>0@.
.P
\f\*(cWFactorial\fP
returns the factorial function value
@n! ~=~ GAMMA (n+1)@
for @n>=0@.
.P
\f\*(cWLFactorial\fP
returns the value
@ln ~n!@\&
for @n>=0@.
.P
\f\*(cWBCoeff\fP
returns the binomial coefficient value
@size +8 ( pile { n above k } size +8 ) ~==~n! over k!(n-k)!@\&
for @0<=k<=n@.
.P
\f\*(cWBeta\fP
returns the real beta function value
@B(z,w)~==~ int "" from 0 to 1 t sup z-1 (1-t) sup w-1 dt@
for @z>0@ and @w>0@.
.P
\f\*(cWPGamma\fP
returns the incomplete gamma function value
@P(a,x)~==~ 1 over { GAMMA (a) } int "" from 0 to x e sup -t t sup a-1 dt@
for @a>0@ and @0<=x<= inf@.
.P
\f\*(cWQGamma\fP
returns the value
@1-P(a,x)@.
.P
\f\*(cWErf\fP
returns the error function value
@roman erf (x)~==~2 over sqrt pi int "" from 0 to x e sup {-t sup 2 } dt@.
.P
\f\*(cWErfc\fP
returns the complementary error function value
@1- roman erf (x)@.
.P
\f\*(cWCPoisson\fP
returns the cumulative Poisson probability value
@P sub x (<k)@,
which is the probability that the number of Poisson random events occurring
will be between 0 and @k-1@ inclusive,
where the expected mean is @x@.
.P
\f\*(cWPChiSq\fP
returns the value
@P( chi sup 2 | nu )@,
which is the probability that the observed chi-square for a correct model
will be less than @chi sup 2@,
where @nu@ is the number of degrees of freedom.
.P
\f\*(cWQChiSq\fP
returns the value
@1-P( chi sup 2 | nu )@,
which is the probability that the observed chi-square
will exceed @chi sup 2@ by chance even for a correct model,
where @nu@ is the number of degrees of freedom.
.P
All these functions return
.I approximations
to the exact results,
generally accurate to about seven significant digits.
.SH CAVEATS
Overflow can occur.
.P
Invalid parameters cause a diagnostic and termination of the process.
.SH FILES
.ta 3i
gamma.h	header file
.br
gamma.o	object module
.SH AUTHORS
William H.\& Press,
Brian P.\& Flannery,
Saul A.\& Teukolsky,
and
William T.\& Vetterling
(\fINumerical Recipes in C\fP, Chapter 6)
.br
Douglas A.\& Gwyn, BRL/VLD-VMB
END OF gamma.3
echo 'gamma.c' 1>&2
cat >'gamma.c' <<'END OF gamma.c'
/*
	Gamma -- gamma and related functions

	last edit:	88/09/09	D A Gwyn

	SCCS ID:	@(#)gamma.c	1.1 (edited for publication)

Acknowledgement:
	Code based on that found in "Numerical Methods in C".
*/

#include	<assert.h>
#include	<math.h>

#include	"std.h"

double
LGamma( x )
	double			x;
	{
	static const double	cof[6] =
		{
		76.18009173,	-86.50532033,	24.01409822,
		-1.231739516,	0.120858003e-2,	-0.536382e-5
		};
	double			tmp, ser;
	register int		j;

	assert(x > 0.0);

	if ( --x < 0.0 )	/* use reflection formula for accuracy */
		{
		double	pix = PI * x;

		return log( pix / sin( pix ) ) - LGamma( 1.0 - x );
		}

	tmp = x + 5.5;
	tmp -= (x + 0.5) * log( tmp );

	ser = 1.0;

	for ( j = 0; j < 6; ++j )
		ser += cof[j] / ++x;

	return -tmp + log( 2.50662827465 * ser );
	}

double
Gamma( x )
	double	x;
	{
	return exp( LGamma( x ) );
	}

double
Factorial( n )
	register int	n;
	{
	static double	a[33] =
		{
		1.0,	1.0,	2.0,	6.0,	24.0
		};
	static int	ntop = 4;

	assert(n >= 0);

	if ( n > 32 )
		return Gamma( (double)n + 1.0 );

	while ( ntop < n )
		{
		register int	j = ntop++;

		a[ntop] = a[j] * (double)ntop;
		}

	return a[n];
	}

double
LFactorial( n )
	register int	n;
	{
	static double	a[101];		/* init 0.0 */

	assert(n >= 0);

	if ( n <= 1 )
		return 0.0;

	if ( n <= 100 )
		if ( a[n] > 0.0 )
			return a[n];	/* table value already set up */
		else
			return a[n] = LGamma( (double)n + 1.0 );
	else
		return LGamma( (double)n + 1.0 );	/* beyond table */
	}

double
BCoeff( n, k )
	register int	n, k;
	{
	assert(k >= 0);
	assert(n >= k);

	return Round( exp( LFactorial( n )
			 - (LFactorial( k ) + LFactorial( n - k ))
			 )
		    );
	}

double
Beta( z, w )
	double	z, w;
	{
	return exp( LGamma( z ) + LGamma( w ) - LGamma( z + w ) );
	}

#define	ITMAX	100
#define	EPS	3.0e-7

static double
gser( a, x )
	double		a, x;
	{
	double		ap, del, sum;
	register int	n;

	assert(x >= 0.0);

	if ( x <= 0.0 )
		return 0.0;

	del = sum = 1.0 / (ap = a);

	for ( n = 1; n <= ITMAX; ++n )
		{
		sum += del *= x / ++ap;

		if ( Abs( del ) < Abs( sum ) * EPS )
			return sum * exp( -x + a * log( x ) - LGamma( a ) );
		}

	assert(n <= ITMAX);
	/*NOTREACHED*/
	}

static double
gcf( a, x )
	double		a, x;
	{
	register int	n;
	double		gold = 0.0, fac = 1.0, b1 = 1.0,
			b0 = 0.0, a0 = 1.0, a1 = x;

	for ( n = 1; n <= ITMAX; ++n )
		{
		double	anf;
		double	an = (double)n;
		double	ana = an - a;

		a0 = (a1 + a0 * ana) * fac;
		b0 = (b1 + b0 * ana) * fac;
		anf = an * fac;
		b1 = x * b0 + anf * b1;
		a1 = x * a0 + anf * a1;

		if ( a1 != 0.0 )
			{		/* renormalize */
			double	g = b1 * (fac = 1.0 / a1);

			gold = g - gold;

			if ( Abs( gold ) < EPS * Abs( g ) )
				return exp( -x + a * log( x ) - LGamma( a ) )
					* g;

			gold = g;
			}
		}

	assert(n <= ITMAX);
	/*NOTREACHED*/
	}

double
PGamma( a, x )
	double	a, x;
	{
	assert(x >= 0.0);
	assert(a > 0.0);

	return x < a + 1.0 ? gser( a, x ) : 1.0 - gcf( a, x );
	}

double
QGamma( a, x )
	double	a, x;
	{
	assert(x >= 0.0);
	assert(a > 0.0);

	return x < a + 1.0 ? 1.0 - gser( a, x ) : gcf( a, x );
	}

double
Erf( x )
	double	x;
	{
	return x < 0.0 ? -PGamma( 0.5, x * x ) : PGamma( 0.5, x * x );
	}

double
Erfc( x )
	double	x;
	{
	return x < 0.0 ? 1.0 + PGamma( 0.5, x * x ) : QGamma( 0.5, x * x );
	}

double
CPoisson( x, k )
	double	x;
	int	k;
	{
	return QGamma( (double)k, x );
	}

double
PChiSq( chisq, df )
	double	chisq;
	int	df;
	{
	return PGamma( (double)df / 2.0, chisq / 2.0 );
	}

double
QChiSq( chisq, df )
	double	chisq;
	int	df;
	{
	return QGamma( (double)df / 2.0, chisq / 2.0 );
	}
END OF gamma.c
echo 'gamma.h' 1>&2
cat >'gamma.h' <<'END OF gamma.h'
/*
	<gamma.h> -- definitions for gamma-function routines

	last edit:	88/09/09	D A Gwyn

	SCCS ID:	@(#)gamma.h	1.1 (edited for publication)
*/

/* library routines: */

#ifdef __STDC__
extern double	Gamma( double z ), LGamma( double z ),
		Factorial( int n ), LFactorial( int n ),
		BCoeff( int n, int k ),
		Beta( double z, double w ),
		PGamma( double a, double x ), QGamma( double a, double x ),
		Erf( double x ), Erfc( double x ),
		CPoisson( double x, int k ),
		PChiSq( double chisq, int nu ), QChiSq( double chisq, int nu );
#else
extern double	Gamma(), LGamma(), Factorial(), LFactorial(),
		BCoeff(), Beta(), PGamma(), QGamma(), Erf(), Erfc(),
		CPoisson(), PChiSq(), QChiSq();
#endif
END OF gamma.h
echo 'info.c' 1>&2
cat >'info.c' <<'END OF info.c'
/*
	InfoTbl -- Kullback's information measure for a 2-way contingency table

	last edit:	88/09/19	D A Gwyn

	SCCS ID:	@(#)info.c	1.1 (edited for publication)

	Special return values:
		-1.0	entire table consisted of 0 entries
		-2.0	invalid table entry (frequency less than 0)
		-3.0	invalid table dimensions (r or c less than 2)
		-4.0	unable to allocate enough working storage
*/

#include	<math.h>		/* for log() */
#if __STDC__
#include	<stdlib.h>		/* malloc, free */

#include	"std.h"
#else
#include	"std.h"

extern pointer	malloc();
extern void	free();
#endif

double
InfoTbl( r, c, f, pdf )
	int		r;		/* # rows in table */
	int		c;		/* # columns in table */
	const long	*f;		/* -> r*c frequency tallies */
	int		*pdf;		/* -> return # d.f. for chi-square */
	{
#define	x(i,j)	f[(i)*c+(j)]		/* convenient way to access freqs */
	register int	i;		/* row index */
	register int	j;		/* column index */
	double		N;		/* (double)n */
	double		info;		/* accumulates information measure */
	double		*xi;		/* row sums */
	double		*xj;		/* col sums */
	int		rdf = r - 1;	/* row degrees of freedom */
	int		cdf = c - 1;	/* column degrees of freedom */

	if ( rdf <= 0 || cdf <= 0 )
		{
		info = -3.0;
		goto ret3;
		}

	*pdf = rdf * cdf;		/* total degrees of freedom */

	if ( (xi = (double *)malloc( r * sizeof(double) )) == NULL )
		{
		info = -4.0;
		goto ret3;
		}

	if ( (xj = (double *)malloc( c * sizeof(double) )) == NULL )
		{
		info = -4.0;
		goto ret2;
		}

	/* compute row sums and total */

	N = 0.0;

	for ( i = 0; i < r; ++i )
		{
		double	sum = 0.0;	/* accumulator */

		for ( j = 0; j < c; ++j )
			{
			register long	k = x(i,j);

			if ( k < 0L )
				{
				info = -2.0;
				goto ret1;
				}

			sum += (double)k;
			}

		N += xi[i] = sum;
		}

	if ( N <= 0.0 )
		{
		info = -1.0;
		goto ret1;
		}

	/* compute column sums */

	for ( j = 0; j < c; ++j )
		{
		double	sum = 0.0;	/* accumulator */

		for ( i = 0; i < r; ++i )
			sum += (double)x(i,j);

		xj[j] = sum;
		}

	/* compute information measure (four parts) */

	info = N * log( N );					/* part 1 */

	for ( i = 0; i < r; ++i )
		{
		double	pi = xi[i];	/* row sum */

		if ( pi > 0.0 )
			info -= pi * log( pi );			/* part 2 */

		for ( j = 0; j < c; ++j )
			{
			double	pij = (double)x(i,j);

			if ( pij > 0.0 )
				info += pij * log( pij );	/* part 3 */
			}
		}

	for ( j = 0; j < c; ++j )
		{
		double	pj = xj[j];	/* column sum */

		if ( pj > 0.0 )
			info -= pj * log( pj );			/* part 4 */
		}

	info *= 2.0;			/* for comparability with chi-square */

    ret1:
	free( (pointer)xj );
    ret2:
	free( (pointer)xi );
    ret3:
	return info;
	}
END OF info.c
echo 'std.h' 1>&2
cat >'std.h' <<'END OF std.h'
/*
	std.h -- Douglas A. Gwyn's standard C programming definitions

	Prerequisite:	<math.h> (if you invoke Round())

	last edit:	89/06/18	D A Gwyn

	SCCS ID:	@(#)std.h	1.30

	The master source file is to be modified only by Douglas A. Gwyn
	<Gwyn@BRL.MIL>.  When installing a VLD/VMB software distribution,
	this file may need to be tailored slightly to fit the target system.
	Usually this just involves enabling some of the "kludges for deficient
	C implementations" at the end of this file.
*/

#ifndef	VLD_STD_H_
#define	VLD_STD_H_			/* once-only latch */

/* Extended data types */

typedef int	bool;			/* Boolean data */
#define 	false	0
#define 	true	1

/* ANSI C definitions */

/* Defense against some silly systems defining __STDC__ to random things. */
#ifdef STD_C
#undef STD_C
#endif
#ifdef __STDC__
#if __STDC__ > 0
#define	STD_C	__STDC__		/* use this instead of __STDC__ */
#endif
#endif

#ifdef STD_C
typedef void	*pointer;		/* generic pointer */
#else
typedef char	*pointer;		/* generic pointer */
#define	const		/* nothing */	/* ANSI C type qualifier */
/* There really is no substitute for the following, but these might work: */
#define	signed		/* nothing */	/* ANSI C type specifier */
#define	volatile	/* nothing */	/* ANSI C type qualifier */
#endif

#ifndef EXIT_SUCCESS
#define	EXIT_SUCCESS	0
#endif

#ifndef EXIT_FAILURE
#define	EXIT_FAILURE	1
#endif

#ifndef NULL
#define NULL	0			/* null pointer constant, all types */
#endif

/* Universal constants */

#define DEGRAD	57.2957795130823208767981548141051703324054724665642
					/* degrees per radian */
#define	E	2.71828182845904523536028747135266249775724709369996
					/* base of natural logs */
#define	GAMMA	0.57721566490153286061
					/* Euler's constant */
#define LOG10E	0.43429448190325182765112891891660508229439700580367
					/* log of e to the base 10 */
#define PHI	1.618033988749894848204586834365638117720309180
					/* golden ratio */
#define PI	3.14159265358979323846264338327950288419716939937511
					/* ratio of circumf. to diam. */

/* Useful macros */

/*
	The comment "UNSAFE" means that the macro argument(s) may be evaluated
	more than once, so the programmer must realize that the macro doesn't
	quite act like a genuine function.  This matters only when evaluating
	an argument produces "side effects".
*/

/* arbitrary numerical arguments and value: */
#define Abs( x )	((x) < 0 ? -(x) : (x))			/* UNSAFE */
#define Max( a, b )	((a) > (b) ? (a) : (b))			/* UNSAFE */
#define Min( a, b )	((a) < (b) ? (a) : (b))			/* UNSAFE */

/* floating-point arguments and value: */
#define Round( d )	floor( (d) + 0.5 )		/* requires <math.h> */

/* arbitrary numerical arguments, integer value: */
#define	Sgn( x )	((x) == 0 ? 0 : (x) > 0 ? 1 : -1)	/* UNSAFE */

/* string arguments, boolean value: */
#ifdef gould	/* UTX-32 2.0 compiler has problems with "..."[] */
#define	StrEq( a, b )	(strcmp( a, b ) == 0)			/* UNSAFE */
#else
#define	StrEq( a, b )	(*(a) == *(b) && strcmp( a, b ) == 0)	/* UNSAFE */
#endif

/* array argument, integer value: */
#define	Elements( a )	(sizeof a / sizeof a[0])

/* integer (or character) arguments and value: */
#define fromhostc( c )	(c)		/* map host char set to ASCII */
#define tohostc( c )	(c)		/* map ASCII to host char set */
#define tonumber( c )	((c) - '0')	/* convt digit char to number */
#define todigit( n )	((n) + '0')	/* convt digit number to char */

/* Kludges for deficient C implementations */

/*#define	strchr	index		/* 7th Edition UNIX, 4.2BSD */
/*#define	strrchr	rindex		/* 7th Edition UNIX, 4.2BSD */
/*#define	void	int		/* K&R Appendix A followers */

#endif	/* VLD_STD_H_ */
END OF std.h
echo 'tot_info.1' 1>&2
cat >'tot_info.1' <<'END OF tot_info.1'
'\" e
.TH TOT_INFO 1V VMB
'\"	last edit:	89/02/07	D A Gwyn
'\"	SCCS ID:	@(#)tot_info.1	1.2 (edited for publication)
'\"
'\"	This source must be typeset via "eqn | troff -man"
'\"
.EQ
delim @@
.EN
.SH NAME
tot_info \- total information for multiple 2-way contingency tables
.SH SYNOPSIS
.ds cW (CW\" change to I (without the paren) if you don't have a CW font
.ds cB (CB\" change to B (without the paren) if you don't have a CB font
\f\*(cBtot_info\fP
.SH DESCRIPTION
\f\*(cBtot_info\fP
reads multiple contingency-table data sets from the standard input
and prints Kullback's information statistic for each set,
as well as for the aggregate over all sets,
on the standard output.
(See \fIchisq\^\fP(3V) for a discussion of this statistic.)
.SH "INPUT FORMAT"
Input consists of one or more data sets,
each constituting a 2-way contingency table
(not necessarily all of the same size).
A data set may be preceded by any number of blank or comment lines;
a comment line has a
\f\*(cB#\fP
character as the first non-whitespace character on the line.
Following the optional comment lines is a header line
containing the row and column dimensions of the contingency table
(in that order),
separated by white space.
Finally, the contents of the contingency table (frequency counts)
must be given in ``row major'' order.
The table may be freely formatted with white-space separators;
a row need not be on a single line.
.SH "OUTPUT FORMAT"
Input comments are copied to the output as they are encountered.
Otherwise the output consists solely of an information line for
each data set (or a diagnostic if the data is invalid) and a final
information line for the aggregate over all data sets (preceded by
a blank line).
An information line exhibits twice the value of Kullback's
@"\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@ statistic,
the corresponding number of degrees of freedom,
and the probability that the statistic
would be as large as was actually observed
if the row and column categorizations really were independent.
.P
The aggregate statistic is valid if the data sets
represent independent tests of the same hypothesis.
.SH DIAGNOSTICS
The diagnostic messages are intended to be self-explanatory.
.SH "EXIT STATUS"
\f\*(cBtot_info\fP
returns a zero exit status if and only if no problems were encountered.
.SH EXAMPLE
.RS
\f\*(cW$ \fP\f\*(cBtot_info
.br
# MilCrypI.60 biliteral cryptogram (trial pairings against G)
.br
# G vs B
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
3 1 1 1 1 2 2 1 2 1
.br
# G vs D
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
4 1 4 4 1 1 1 3 4 2
.br
# G vs J
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
1 1 1 1 1 1 2 1 1 1
.br
# G vs L
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
1 4 0 4 3 4 5 3 3 4
.br
# G vs N
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
4 1 4 3 1 1 1 2 3 3
.br
# G vs Q
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
0 2 0 2 1 1 1 0 1 0
.br
# G vs S (correct pairing)
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
1 2 2 0 2 1 0 0 0 1
.br
# G vs V
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
1 4 1 3 4 4 4 3 4 3
.br
# G vs X
.br
2 10
.br
2 2 2 0 3 0 0 1 0 1
.br
0 1 0 1 2 1 1 0 2 0
.br
# Since most pairings are incorrect,
.br
# the aggregate probability is small.
.br
^D\fP\f\*(cW
.br
# MilCrypI.60 biliteral cryptogram (trial pairings against G)
.br
# G vs B
.br
2info = 11.01	df =  9	q =  0.2748
.br
# G vs D
.br
2info = 12.40	df =  9	q =  0.1915
.br
# G vs J
.br
2info =  9.00	df =  9	q =  0.4375
.br
# G vs L
.br
2info = 19.03	df =  9	q =  0.0250
.br
# G vs N
.br
2info = 10.89	df =  9	q =  0.2830
.br
# G vs Q
.br
2info = 15.82	df =  9	q =  0.0707
.br
# G vs S (correct pairing)
.br
2info =  3.11	df =  9	q =  0.9596
.br
# G vs V
.br
2info = 14.47	df =  9	q =  0.1066
.br
# G vs X
.br
2info = 15.31	df =  9	q =  0.0826
.br
# Since most pairings are incorrect,
.br
# the aggregate probability is small.
.br
.sp
total 2info = 111.05	df = 81	q =  0.0150
\fP
.RE
.SH REFERENCES
Solomon Kullback, \fIInformation Theory and Statistics\fP (Dover, 1968).
.br
William F.\& Friedman and Lambros D.\& Callimahos,
\fIMilitary Cryptanalytics, Part I \(em Volume 1\fP
(reprinted by Aegean Park Press, 1985).
.SH "SEE ALSO"
chisq(3V).
.SH AUTHOR
Douglas A.\& Gwyn, BRL/VLD-VMB
END OF tot_info.1
echo 'tot_info.c' 1>&2
cat >'tot_info.c' <<'END OF tot_info.c'
/*
	tot_info -- combine information statistics for multiple tables

	last edit:	89/02/06	D A Gwyn

	SCCS ID:	@(#)tot_info.c	1.1 (edited for publication)
*/

#include	<ctype.h>
#include	<stdio.h>

#include	"std.h"

#include	"chisq.h"
#include	"gamma.h"		/* for QChiSq() */

#ifndef MAXLINE
#define	MAXLINE	256
#endif

#ifndef MAXTBL
#define	MAXTBL	1000
#endif

static char	line[MAXLINE];		/* row/column header input line */
static long	f[MAXTBL];		/* frequency tallies */
static int	r;			/* # of rows */
static int	c;			/* # of columns */

#define	x(i,j)	f[(i)*c+(j)]		/* convenient way to access freqs */

#define	COMMENT	'#'			/* comment character */

/*ARGSUSED*/
int
main( argc, argv )
	int		argc;
	char		*argv[];
	{
	register char	*p;		/* input line scan location */
	register int	i;		/* row index */
	register int	j;		/* column index */
	double		info;		/* computed information measure */
	int		infodf;		/* degrees of freedom for information */
	double		totinfo = 0.0;	/* accumulated information */
	int		totdf = 0;	/* accumulated degrees of freedom */

	while ( fgets( line, MAXLINE, stdin ) != NULL )	/* start new table */
		{
		for ( p = line; *p != '\0' && isspace( (int)*p ); ++p )
			;

		if ( *p == '\0' )
			continue;	/* skip blank line */

		if ( *p == COMMENT )
			{		/* copy comment through */
			(void)fputs( line, stdout );
			continue;
			}

		if ( sscanf( p, "%d %d\n", &r, &c ) != 2 )
			{
			(void)fputs( "* invalid row/column line *\n", stderr );
			return EXIT_FAILURE;
			}

		if ( r * c > MAXTBL )
			{
			(void)fputs( "* table too large *\n", stderr );
			return EXIT_FAILURE;
			}

		/* input tallies */

		for ( i = 0; i < r; ++i )
			for ( j = 0; j < c; ++j )
				if ( scanf( " %ld", &x(i,j) ) != 1 )
					{
					(void)fputs( "* EOF in table *\n",
						     stderr
						   );
					return EXIT_FAILURE;
					}

		/* compute statistic */

		info = InfoTbl( r, c, f, &infodf );

		/* print results */

		if ( info >= 0.0 )
			{
			(void)printf( "2info = %5.2f\tdf = %2d\tq = %7.4f\n",
				      info, infodf,
				      QChiSq( info, infodf )
				    );
			totinfo += info;
			totdf += infodf;
			}
		else
			(void)fputs( info < -3.5 ? "out of memory\n"
				   : info < -2.5 ? "table too small\n"
				   : info < -1.5 ? "negative freq\n"
				   : "table all zeros\n",
				     stdout
				   );
		}

	if ( totdf <= 0 )
		{
		(void)fputs( "\n*** no information accumulated ***\n", stdout );
		return EXIT_FAILURE;
		}

	(void)printf( "\ntotal 2info = %5.2f\tdf = %2d\tq = %7.4f\n",
		      totinfo, totdf,
		      QChiSq( totinfo, totdf )
		    );
	return EXIT_SUCCESS;
	}
END OF tot_info.c
echo 'tot_info.exp' 1>&2
cat >'tot_info.exp' <<'END OF tot_info.exp'
# MilCrypI.60 biliteral cryptogram (trial pairings against G)
# G vs B
2info = 11.01	df =  9	q =  0.2748
# G vs D
2info = 12.40	df =  9	q =  0.1915
# G vs J
2info =  9.00	df =  9	q =  0.4375
# G vs L
2info = 19.03	df =  9	q =  0.0250
# G vs N
2info = 10.89	df =  9	q =  0.2830
# G vs Q
2info = 15.82	df =  9	q =  0.0707
# G vs S (correct pairing)
2info =  3.11	df =  9	q =  0.9596
# G vs V
2info = 14.47	df =  9	q =  0.1066
# G vs X
2info = 15.31	df =  9	q =  0.0826
# Since most pairings are incorrect,
# the aggregate probability is small.

total 2info = 111.05	df = 81	q =  0.0150
END OF tot_info.exp
echo 'tot_info.in' 1>&2
cat >'tot_info.in' <<'END OF tot_info.in'
# MilCrypI.60 biliteral cryptogram (trial pairings against G)
# G vs B
2 10
2 2 2 0 3 0 0 1 0 1
3 1 1 1 1 2 2 1 2 1
# G vs D
2 10
2 2 2 0 3 0 0 1 0 1
4 1 4 4 1 1 1 3 4 2
# G vs J
2 10
2 2 2 0 3 0 0 1 0 1
1 1 1 1 1 1 2 1 1 1
# G vs L
2 10
2 2 2 0 3 0 0 1 0 1
1 4 0 4 3 4 5 3 3 4
# G vs N
2 10
2 2 2 0 3 0 0 1 0 1
4 1 4 3 1 1 1 2 3 3
# G vs Q
2 10
2 2 2 0 3 0 0 1 0 1
0 2 0 2 1 1 1 0 1 0
# G vs S (correct pairing)
2 10
2 2 2 0 3 0 0 1 0 1
1 2 2 0 2 1 0 0 0 1
# G vs V
2 10
2 2 2 0 3 0 0 1 0 1
1 4 1 3 4 4 4 3 4 3
# G vs X
2 10
2 2 2 0 3 0 0 1 0 1
0 1 0 1 2 1 1 0 2 0
# Since most pairings are incorrect,
# the aggregate probability is small.
END OF tot_info.in