alee@tybalt.caltech.edu (Andrew Lee) (07/18/87)
This is an n-dimensional FFT routine I wrote. There are two compilation options, SPEED and MULT4, which are described in the documentation file, fftn.doc. -------------- Cut Here ------------- #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # fftn.doc # complex.h # fftn.c # demo.c # This archive created: Wed Jul 15 23:14:46 1987 export PATH; PATH=/bin:$PATH if test -f 'fftn.doc' then echo shar: will not over-write existing file "'fftn.doc'" else cat << \SHAR_EOF > 'fftn.doc' Here is an n-dimensional FFT routine I wrote recently. It's based on the Fortrash and Pascal routines in the book "Numerical Recipes" (I forgot the author, and the book is at work.), but much of it has been changed (to take advantage of C's logical operations for the bit reversal, to use C's array index ordering and starting index of 0, etc.) If the preprocessor variable SPEED is defined, data[] must be an array of fcomplex, not complex, which uses float's instead of double's. I used the float's in the application I wrote this for to reduce the size of the array, and to reduce the time taken by memory accesses, since high accuracy wasn't needed. The routine has been optimized for speed for MSC 4.0, compiling for an 80286, using inline 8087/287 code (requires the 87/287), and it is probably not optimal for other systems. I have included 2 options for the complex multiplication, a complex multiply using 4 real multiplies, and one using 3 real multiplies. The 3 multiply method is used unless the preprocessor variable MULT4 is defined (which is automatically done for MSC). Usually, the 3 multiply method should be faster, but unfortunately, MSC 4.0 does almost no optimization for the 80287, so it was storing t1 and t2 back to memory and reading them back, which all took more time than doing the extra multiply! Now for the usage (finally): data[] is the array of complex numbers to be transformed, nn[] is the array giving the dimensions (I mean size) of the array, ndim is the number of dimensions of the array, and isign is +1 for a forward transform, and -1 for an inverse transform. data[] and nn[] are stored in the "natural" order for C: nn[0] gives the number of elements along the leftmost index, nn[ndim - 1] gives the number of elements along the rightmost index, and data should be declared along the lines of struct (f)complex data[nn[0], nn[1], ..., nn[ndim - 1]] Additional notes: The routine does NO NORMALIZATION, so if you do a forward, and then an inverse transform on an array, the result will be identical to the original array MULTIPLIED BY THE NUMBER OF ELEMENTS IN THE ARRAY. Also, of course, the dimensions of data[] must all be powers of 2. I have also enclosed a version of the demo program from the examples book for "Numerical Recipes". SHAR_EOF fi # end of overwriting check if test -f 'complex.h' then echo shar: will not over-write existing file "'complex.h'" else cat << \SHAR_EOF > 'complex.h' #ifndef DOS /* Definition of complex not needed for MSC */ #ifndef MSDOS /* Already in <math.h> */ struct complex { double x, y; }; #endif #endif struct fcomplex { float x, y; }; SHAR_EOF fi # end of overwriting check if test -f 'fftn.c' then echo shar: will not over-write existing file "'fftn.c'" else cat << \SHAR_EOF > 'fftn.c' #include <math.h> #include "complex.h" /* Use 4-multiply complex multiply for MSC */ #ifdef DOS /* IBM C */ #define MULT4 #endif #ifdef MSDOS /* Microsoft C */ #define MULT4 #endif #define PI 3.1415926535979 fftn(data, nn, ndim, isign) #ifdef SPEED struct fcomplex data[]; #else struct complex data[]; #endif unsigned nn[]; int ndim, isign; { int idim; unsigned i1, i2rev, i3rev, ibit; unsigned ip2, ifp1, ifp2, k2, n; unsigned nprev = 1, nrem, ntot = 1; register unsigned i2, i3; double theta; struct complex w, wp; #ifdef SPEED float wtemp; struct fcomplex temp, wt; #else double wtemp; struct complex temp, wt; #endif #ifndef MULT4 /* Temporary variables needed for 3-multiply complex mult. */ double t1, t2; #endif /* Compute total number of complex values */ for (idim = 0; idim < ndim; ++idim) ntot *= nn[idim]; for (idim = ndim - 1; idim >= 0; --idim) { n = nn[idim]; nrem = ntot / (n * nprev); ip2 = nprev * n; /* Unit step for next dimension */ i2rev = 0; /* Bit reversed i2 */ /* This is the bit reversal section of the routine */ /* Loop over current dimension */ for (i2 = 0; i2 < ip2; i2 += nprev) { if (i2 < i2rev) /* Loop over lower dimensions */ for (i1 = i2; i1 < i2 + nprev; ++i1) /* Loop over higher dimensions */ for (i3 = i1; i3 < ntot; i3 += ip2) { i3rev = i3 + i2rev - i2; temp = data[i3]; data[i3] = data[i3rev]; data[i3rev] = temp; } ibit = ip2; /* Increment from high end of i2rev to low */ do { ibit >>= 1; i2rev ^= ibit; } while (ibit >= nprev && !(ibit & i2rev)); } /* Here begins the Danielson-Lanczos section of the routine */ /* Loop over step sizes */ for (ifp1 = nprev; ifp1 < ip2; ifp1 <<= 1) { ifp2 = ifp1 << 1; /* Initialize for the trig. recurrence */ theta = isign * 2.0 * PI / (ifp2 / nprev); wp.x = sin(0.5 * theta); wp.x *= -2.0 * wp.x; wp.y = sin(theta); w.x = 1.0; w.y = 0.0; /* Loop by unit step in current dimension */ for (i3 = 0; i3 < ifp1; i3 += nprev) { /* Loop over lower dimensions */ for (i1 = i3; i1 < i3 + nprev; ++i1) /* Loop over higher dimensions */ for (i2 = i1; i2 < ntot; i2 += ifp2) { /* Danielson-Lanczos formula */ k2 = i2 + ifp1; wt = data[k2]; #ifdef MULT4 /* Complex multiply using 4 real multiplies. Faster in MSC */ data[k2].x = data[i2].x - (temp.x = w.x * wt.x - w.y * wt.y); data[k2].y = data[i2].y - (temp.y = w.x * wt.y + w.y * wt.x); #else /* Complex multiply using 3 real multiplies. Should usually be faster. */ data[k2].x = data[i2].x - (temp.x = (t1 = w.x * wt.x) - (t2 = w.y * wt.y)); data[k2].y = data[i2].y - (temp.y = (w.x + w.y) * (wt.x + wt.y) - t1 - t2); #endif data[i2].x += temp.x; data[i2].y += temp.y; } /* Trigonometric recurrence */ wtemp = w.x; #ifdef MULT4 /* Complex multiply using 4 real multiplies. */ w.x += w.x * wp.x - w.y * wp.y; w.y += wtemp * wp.y + w.y * wp.x; #else /* Complex multiply using 3 real multiplies. */ w.x += (t1 = w.x * wp.x) - (t2 = w.y * wp.y); w.y += (wtemp + w.y) * (wp.x + wp.y) - t1 - t2; #endif } } nprev *= n; } } SHAR_EOF fi # end of overwriting check if test -f 'demo.c' then echo shar: will not over-write existing file "'demo.c'" else cat << \SHAR_EOF > 'demo.c' #include <stdio.h> #include <math.h> #include "complex.h" #define ndim 3 #define ndat 512 main() { int i, j, k, l, ll, isign = 1; int nn[ndim]; #ifdef SPEED struct fcomplex data[ndat]; #else struct complex data[ndat]; #endif for (i = 0; i < ndim; ++i) nn[i] = 4 << i; for (i = 0; i < nn[0]; ++i) for (j = 0; j < nn[1]; ++j) for (k = 0; k < nn[2]; ++k) { l = (i * nn[1] + j) * nn[2] + k; ll = (l << 1) + 1; data[l].x = ll; data[l].y = ll + 1; } fftn(data, nn, ndim, isign); isign = -1; printf("\nDouble 3-dimensional Transform"); printf("\nDouble Transf. Original Data Ratio"); printf("\nReal Imag. Real Imag. Real Imag."); fftn(data, nn, ndim, isign); for (i = 0; i < 4; ++i) { j = i << 1; k = j << 1; l = (i * nn[1] + j) * nn[2] + k; ll = (l << 1) + 1; printf("\n%.2lf %.2lf %d %d %.2lf %.2lf", data[l].x, data[l].y, ll, ll+1, data[l].x / ll, data[l].y / (ll+1)); } printf("\nThe product of transform lengths is: %d\n", nn[0] * nn[1] * nn[2]); } SHAR_EOF fi # end of overwriting check # End of shell archive exit 0 Andrew Lee alee@caltech.bitnet, alee@tybalt.caltech.edu, ...!seismo!cit-vax!tybalt!alee