[comp.sources.misc] N-dimensional, Radix 2 FFT Routine

alee@tybalt.caltech.edu (Andrew Lee) (07/18/87)
	This is an n-dimensional FFT routine I wrote.  There are two
compilation options, SPEED and MULT4, which are described in the
documentation file, fftn.doc.

-------------- Cut Here -------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	fftn.doc
#	complex.h
#	fftn.c
#	demo.c
# This archive created: Wed Jul 15 23:14:46 1987
export PATH; PATH=/bin:$PATH
if test -f 'fftn.doc'
then
	echo shar: will not over-write existing file "'fftn.doc'"
else
cat << \SHAR_EOF > 'fftn.doc'
	Here is an n-dimensional FFT routine I wrote recently.  It's based
on the Fortrash and Pascal routines in the book "Numerical Recipes"
(I forgot the author, and the book is at work.), but much of it has
been changed (to take advantage of C's logical operations for the bit
reversal, to use C's array index ordering and starting index of 0,
etc.)
	If the preprocessor variable SPEED is defined, data[] must be an
array of fcomplex, not complex, which uses float's instead of double's.
I used the float's in the application I wrote this for to reduce the size
of the array, and to reduce the time taken by memory accesses, since high
accuracy wasn't needed.
	The routine has been optimized for speed for MSC 4.0, compiling
for an 80286, using inline 8087/287 code (requires the 87/287), and it is
probably not optimal for other systems.
	I have included 2 options for the complex multiplication, a complex
multiply using 4 real multiplies, and one using 3 real multiplies. 
The 3 multiply method is used unless the preprocessor variable MULT4 is
defined (which is automatically done for MSC).  Usually, the 3 multiply
method should be faster, but unfortunately, MSC 4.0 does almost no
optimization for the 80287, so it was storing t1 and t2 back to memory
and reading them back, which all took more time than doing the extra multiply!

	Now for the usage (finally):
data[] is the array of complex numbers to be transformed,
nn[] is the array giving the dimensions (I mean size) of the array,
ndim is the number of dimensions of the array, and
isign is +1 for a forward transform, and -1 for an inverse transform.

data[] and nn[] are stored in the "natural" order for C:
nn[0] gives the number of elements along the leftmost index,
nn[ndim - 1] gives the number of elements along the rightmost index, and
data should be declared along the lines of
	struct (f)complex data[nn[0], nn[1], ..., nn[ndim - 1]]

	Additional notes: The routine does NO NORMALIZATION, so if you do a
forward, and then an inverse transform on an array, the result will
be identical to the original array MULTIPLIED BY THE NUMBER OF
ELEMENTS IN THE ARRAY.  Also, of course, the dimensions of data[]
must all be powers of 2.  I have also enclosed a version of the demo
program from the examples book for "Numerical Recipes".
SHAR_EOF
fi # end of overwriting check
if test -f 'complex.h'
then
	echo shar: will not over-write existing file "'complex.h'"
else
cat << \SHAR_EOF > 'complex.h'
#ifndef DOS	/*	Definition of complex not needed for MSC	*/
#ifndef MSDOS	/*	Already in <math.h>	*/

struct complex {
	double x, y;
};

#endif
#endif

struct fcomplex {
	float x, y;
};
SHAR_EOF
fi # end of overwriting check
if test -f 'fftn.c'
then
	echo shar: will not over-write existing file "'fftn.c'"
else
cat << \SHAR_EOF > 'fftn.c'
#include <math.h>
#include "complex.h"

/*	Use 4-multiply complex multiply for MSC	*/
#ifdef DOS	/*	IBM C	*/
#define MULT4
#endif
#ifdef MSDOS	/*	Microsoft C	*/
#define MULT4
#endif

#define PI 3.1415926535979

fftn(data, nn, ndim, isign)

#ifdef SPEED
struct fcomplex data[];
#else
struct complex data[];
#endif

unsigned nn[];
int ndim, isign;
{
	int idim;
	unsigned i1, i2rev, i3rev, ibit;
	unsigned ip2, ifp1, ifp2, k2, n;
	unsigned nprev = 1, nrem, ntot = 1;
	register unsigned i2, i3;
	double theta;
	struct complex w, wp;

#ifdef SPEED
	float wtemp;
	struct fcomplex temp, wt;
#else
	double wtemp;
	struct complex temp, wt;
#endif

#ifndef MULT4	/*	Temporary variables needed for 3-multiply complex mult.	*/
	double t1, t2;
#endif

	/*      Compute total number of complex values  */
	for (idim = 0; idim < ndim; ++idim)
		ntot *= nn[idim];

	for (idim = ndim - 1; idim >= 0; --idim) {
		n = nn[idim];

		nrem = ntot / (n * nprev);
		ip2 = nprev * n;        /*      Unit step for next dimension */
		i2rev = 0;              /*      Bit reversed i2 */

		/*      This is the bit reversal section of the routine */
		/*      Loop over current dimension     */
		for (i2 = 0; i2 < ip2; i2 += nprev) {
			if (i2 < i2rev)
				/*      Loop over lower dimensions      */
				for (i1 = i2; i1 < i2 + nprev; ++i1)
					/*      Loop over higher dimensions  */
					for (i3 = i1; i3 < ntot; i3 += ip2) {
						i3rev = i3 + i2rev - i2;
						temp = data[i3];
						data[i3] = data[i3rev];
						data[i3rev] = temp;
					}

			ibit = ip2;
			/*      Increment from high end of i2rev to low */
			do {
				ibit >>= 1;
				i2rev ^= ibit;
			} while (ibit >= nprev && !(ibit & i2rev));
		}

		/*      Here begins the Danielson-Lanczos section of the routine */
		/*      Loop over step sizes    */
		for (ifp1 = nprev; ifp1 < ip2; ifp1 <<= 1) {
			ifp2 = ifp1 << 1;
			/*  Initialize for the trig. recurrence */
			theta = isign * 2.0 * PI / (ifp2 / nprev);
			wp.x = sin(0.5 * theta);
			wp.x *= -2.0 * wp.x;
			wp.y = sin(theta);
			w.x = 1.0;
			w.y = 0.0;

			/*  Loop by unit step in current dimension  */
			for (i3 = 0; i3 < ifp1; i3 += nprev) {
				/*      Loop over lower dimensions      */
				for (i1 = i3; i1 < i3 + nprev; ++i1)
					/*  Loop over higher dimensions */
					for (i2 = i1; i2 < ntot; i2 += ifp2) {
						/*      Danielson-Lanczos formula */
						k2 = i2 + ifp1;
						wt = data[k2];

#ifdef MULT4
/*	Complex multiply using 4 real multiplies.  Faster in MSC	*/
						data[k2].x = data[i2].x - (temp.x = w.x * wt.x - w.y * wt.y);
						data[k2].y = data[i2].y - (temp.y = w.x * wt.y + w.y * wt.x);
#else
/*	Complex multiply using 3 real multiplies.  Should usually be faster.	*/
						data[k2].x = data[i2].x - (temp.x =
							(t1 = w.x * wt.x) - (t2 = w.y * wt.y));
						data[k2].y = data[i2].y - (temp.y =
							(w.x + w.y) * (wt.x + wt.y) - t1 - t2);
#endif
						data[i2].x += temp.x;
						data[i2].y += temp.y;
					}
				/*      Trigonometric recurrence        */
				wtemp = w.x;
#ifdef MULT4
/*	Complex multiply using 4 real multiplies.	*/
				w.x += w.x * wp.x - w.y * wp.y;
				w.y += wtemp * wp.y + w.y * wp.x;
#else
/*	Complex multiply using 3 real multiplies.	*/
				w.x += (t1 = w.x * wp.x) - (t2 = w.y * wp.y);
				w.y += (wtemp + w.y) * (wp.x + wp.y) - t1 - t2;
#endif
			}
		}
	nprev *= n;
	}
}
SHAR_EOF
fi # end of overwriting check
if test -f 'demo.c'
then
	echo shar: will not over-write existing file "'demo.c'"
else
cat << \SHAR_EOF > 'demo.c'
#include <stdio.h>
#include <math.h>
#include "complex.h"

#define ndim 3
#define ndat 512

main()
{
	int i, j, k, l, ll, isign = 1;
	int nn[ndim];

#ifdef SPEED
	struct fcomplex data[ndat];
#else
	struct complex data[ndat];
#endif

	for (i = 0; i < ndim; ++i)
	nn[i] = 4 << i;

	for (i = 0; i < nn[0]; ++i)
		for (j = 0; j < nn[1]; ++j)
			for (k = 0; k < nn[2]; ++k) {
				l = (i * nn[1] + j) * nn[2] + k;
				ll = (l << 1) + 1;
				data[l].x = ll;
				data[l].y = ll + 1;
			}
	fftn(data, nn, ndim, isign);
	isign = -1;

	printf("\nDouble 3-dimensional Transform");
	printf("\nDouble Transf.  Original Data  Ratio");
	printf("\nReal   Imag.	Real   Imag.   Real   Imag.");
 
	fftn(data, nn, ndim, isign);

	for (i = 0; i < 4; ++i) {
		j = i << 1;
		k = j << 1;
		l = (i * nn[1] + j) * nn[2] + k;
		ll = (l << 1) + 1;
		printf("\n%.2lf %.2lf %d %d %.2lf %.2lf", data[l].x, data[l].y, ll, ll+1, data[l].x / ll, data[l].y / (ll+1));
	}

	printf("\nThe product of transform lengths is: %d\n", nn[0] * nn[1] * nn[2]);
}
SHAR_EOF
fi # end of overwriting check
#	End of shell archive
exit 0

	Andrew Lee
	alee@caltech.bitnet, alee@tybalt.caltech.edu,
	...!seismo!cit-vax!tybalt!alee