johne@ex.heurikon.com (John Eckrich) (10/18/90)
I have zilch in the area of neural-net programs, and would like to get my feet wet by doing some tinkerring. If you have any source code that you would feel comfortable E-mailing to me I would much apreciate it. Any level of software would be fine; I just need to get some hands-on experience; Books take can take a person only so far. 10Q Jon Eckrich (608) 828-3464 -- ------------------------------------------- johne@heurikon.com
jxo2f@hagar6c.acc.Virginia.EDU (Jose X Orellana) (10/24/90)
I am also interested in some source code for a neural net. Jose X. Orellana (working on M.S.) Systems Engineering Dpt. University of Virginia
num44@ecst.csuchico.edu (Terral Jamison) (10/27/90)
In article <1990Oct24.024710.7346@murdoch.acc.Virginia.EDU> jxo2f@hagar6c.acc.Virginia.EDU (Jose X Orellana) writes: > > I am also interested in some source code for a neural net. > > Jose X. Orellana (working on M.S.) > Systems Engineering Dpt. > University of Virginia > /* This program simulates a constraint satisfaction neural network for choosing tic-tac-toe moves as described in Volume 2 of the PDP books. The network is presented a description of the board and chooses the best move subject to predefined constraints. It requires an EGA or CGA to display a Hinton diagram of the element activation levels. Author : Gary Andriotakis Compiler : Microsoft Quick C Date : 5 January 1989 */ #include<stdio.h> #include <stdio.h> #include <time.h> #include <graph.h> #define N 67 #define GAIN 0.1 #define SCALE 1.0/16.0 static float w[N][N], bias[N], a[N], e[N]; main() { int j,mark = -1,converged; time_t t; #ifdef DEBUG initialize(a,w,bias); #endif #ifndef DEBUG if(!_setvideomode(_ERESCOLOR)) { printf("EGA required\n"); exit(0); } srand((int)time(&t)); drawboard(); do { putmarkers(&e[9],mark); initialize(); boxes(a,N); do { j = randint(N); update(j); boxes(a,j); converged = ((j < 9) && (a[j] > 0.9)); } while(!converged && !kbhit()); if(converged) mark = j; else mark = -1; } while(converged || (kbhit() && getch() != 27)); _displaycursor(_GCURSORON); _setvideomode(_DEFAULTMODE); #endif } update(j) int j; { int i; float net; net = 0.0; for(i = 0; i < N; i++) net += w[i][j]*a[i]; net += bias[j]; net *= GAIN; net += e[j]; if(net > 0.0) a[j] += net*(1.0 - a[j]); else a[j] += net*a[j]; } initialize() { int i,j; /* initialize to zero */ for(i = 0; i < N; i++) { a[i] = 0.0; bias[i] = 0.0; for(j = 0; j < N; j++) w[i][j] = 0.0; } for(i = 0; i < 9; i++) { /* inhibit responses from occupied squares */ w[i+9][i] = -8.0; w[i+18][i] = -8.0; /* mutual inhibition to insure only one response */ for(j = 0; j < 9; j++) w[i][j] = -2.; /* self excitation to excourage self growth */ w[i][i] = 2.; } /* empty line detectors */ for(i = 0; i < 3; i++) { w[i+9][27] = -1.0; w[i+18][27] = -1.0; w[27][i] = 1.0; w[i+12][28] = -1.0; w[i+21][28] = -1.0; w[28][i+3] = 1.0; w[i+15][29] = -1.0; w[i+24][29] = -1.0; w[29][i+6] = 1.0; w[9+4*i][30] = -1.0; w[18+4*i][30] = -1.0; w[30][4*i] = 1.0; w[11+2*i][31] = -1.0; w[20+2*i][31] = -1.0; w[31][2+2*i] = 1.0; w[9+3*i][32] = -1.0; w[18+3*i][32] = -1.0; w[32][3*i] = 1.0; w[10+3*i][33] = -1.0; w[19+3*i][33] = -1.0; w[33][1+3*i] = 1.0; w[11+3*i][34] = -1.0; w[20+3*i][34] = -1.0; w[34][2+3*i] = 1.0; } /* friendly doubleton detectors */ for(i = 0; i < 3; i++) { w[i+9][35] = 1.0; w[i+18][35] = -2.0; w[35][i] = 5.0; w[i+12][36] = 1.0; w[i+21][36] = -2.0; w[36][i+3] = 5.0; w[i+15][37] = 1.0; w[i+24][37] = -2.0; w[37][i+6] = 5.0; w[9+4*i][38] = 1.0; w[18+4*i][38] = -2.0; w[38][4*i] = 5.0; w[11+2*i][39] = 1.0; w[20+2*i][39] = -2.0; w[39][2+2*i] = 5.0; w[9+3*i][40] = 1.0; w[18+3*i][40] = -2.0; w[40][3*i] = 5.0; w[10+3*i][41] = 1.0; w[19+3*i][41] = -2.0; w[41][1+3*i] = 5.0; w[11+3*i][42] = 1.0; w[20+3*i][42] = -2.0; w[42][2+3*i] = 5.0; } /* enemy doubleton detectors */ for(i = 0; i < 3; i++) { w[i+9][43] = -2.0; w[i+18][43] = 1.0; w[43][i] = 1.0; w[i+12][44] = -2.0; w[i+21][44] = 1.0; w[44][i+3] = 1.0; w[i+15][45] = -2.0; w[i+24][45] = 1.0; w[45][i+6] = 1.0; w[9+4*i][46] = -2.0; w[18+4*i][46] = 1.0; w[46][4*i] = 1.0; w[11+2*i][47] = -2.0; w[20+2*i][47] = 1.0; w[47][2+2*i] = 1.0; w[9+3*i][48] = -2.0; w[18+3*i][48] = 1.0; w[48][3*i] = 1.0; w[10+3*i][49] = -2.0; w[19+3*i][49] = 1.0; w[49][1+3*i] = 1.0; w[11+3*i][50] = -2.0; w[20+3*i][50] = 1.0; w[50][2+3*i] = 1.0; } /* friendly singleton detectors */ for(i = 0; i < 3; i++) { w[i+9][51] = 1.0; w[i+18][51] = -2.0; w[51][i] = 2.0; w[i+12][52] = 1.0; w[i+21][52] = -2.0; w[52][i+3] = 2.0; w[i+15][53] = 1.0; w[i+24][53] = -2.0; w[53][i+6] = 2.0; w[9+4*i][54] = 1.0; w[18+4*i][54] = -2.0; w[54][4*i] = 2.0; w[11+2*i][55] = 1.0; w[20+2*i][55] = -2.0; w[55][2+2*i] = 2.0; w[9+3*i][56] = 1.0; w[18+3*i][56] = -2.0; w[56][3*i] = 2.0; w[10+3*i][57] = 1.0; w[19+3*i][57] = -2.0; w[57][1+3*i] = 2.0; w[11+3*i][58] = 1.0; w[20+3*i][58] = -2.0; w[58][2+3*i] = 2.0; } /* enemy singleton detectors */ for(i = 0; i < 3; i++) { w[i+9][59] = -2.0; w[i+18][59] = 1.0; w[59][i] = 2.0; w[i+12][60] = -2.0; w[i+21][60] = 1.0; w[60][i+3] = 2.0; w[i+15][61] = -2.0; w[i+24][61] = 1.0; w[61][i+6] = 2.0; w[9+4*i][62] = -2.0; w[18+4*i][62] = 1.0; w[62][4*i] = 2.0; w[11+2*i][63] = -2.0; w[20+2*i][63] = 1.0; w[63][2+2*i] = 2.0; w[9+3*i][64] = -2.0; w[18+3*i][64] = 1.0; w[64][3*i] = 2.0; w[10+3*i][65] = -2.0; w[19+3*i][65] = 1.0; w[65][1+3*i] = 2.0; w[11+3*i][66] = -2.0; w[20+3*i][66] = 1.0; w[66][2+3*i] = 2.0; } /* biases */ for(i = 0; i < 9; i++) bias[i] = 0.0625; for(i = 0; i < 8; i++) { bias[i+27] = 0.5; bias[i+35] = -1.5; bias[i+43] = -1.5; bias[i+51] = -0.5; bias[i+59] = -0.5; } for(i = 0; i < N; i++) { bias[i] *= SCALE; for(j = 0; j < N; j++) w[i][j] *= SCALE; } #ifdef DEBUG for(i = 0; i < N; i++) { for(j = 0; j < N; j++) printf("%2.0f",w[i][j]); printf("%7.2f\n",bias[i]); } #endif } randint(n) int n; /* return a random integer in the range 0 to n-1 */ { return(rand() % n); } #define MAXSIZE 5 boxes(a,n) float a[]; int n; /* Draw boxes proportional to activation levels (a) ala Hinton diagrams. * If n equals N then draw all N boxes otherwise draw only the nth. */ { int i,j,k,x,y,x0,y0; int size,count; x0 = 5; y0 = 40; count = -1; for(k = 0; k < 3; k++) { x = x0; y = y0; for(i = 0; i < 3; i++) { for(j = 0; j < 3; j++) { x += 12; count++; if(n == N || n == count) { size = a[count]*MAXSIZE; _setcolor(0); _rectangle(_GFILLINTERIOR, x-MAXSIZE,y-MAXSIZE,x+MAXSIZE,y+MAXSIZE); _setcolor(15); _rectangle(_GBORDER,x-size,y-size,x+size,y+size); } } x = x0; y += 12; } x0 += 48; } y0 = 40; for(k = 0; k < 5; k++) { x = x0; y = y0; for(i = 0; i < 3; i++) { for(j = 0; j < 3; j++) { x += 12; if(i != 1 || j != 1) { count++; if(n == N || n == count) { size = a[count]*MAXSIZE; _setcolor(0); _rectangle(_GFILLINTERIOR, x-MAXSIZE,y-MAXSIZE,x+MAXSIZE,y+MAXSIZE); _setcolor(15); _rectangle(_GBORDER,x-size,y-size,x+size,y+size); } } } x = x0; y += 12; } x0 += 48; } } drawboard() { static char board[5][6] = {32,179,32,179,32,0, 196,197,196,197,196,0, 32,179,32,179,32,0, 196,197,196,197,196,0, 32,179,32,179,32,0}; int i; _clearscreen(_GCLEARSCREEN); for(i = 0; i < 5; i++) { _settextposition(i+8,37); _outtext(&board[i][0]); } _settextposition(2,2); _outtext("move x o empty 2x 2o 1x 1o"); _settextposition(19,5); _outtext("Use arrow keys to move cursor."); _settextposition(20,5); _outtext("ENTER will change the marker under the cursor. x is program, o is opponent."); _settextposition(21,5); _outtext("ESCAPE will start network."); _settextposition(22,5); _outtext("After network is started, any key will stop."); _settextposition(23,5); _outtext("ESCAPE while network is running will exit program"); } putmarkers(b,x) float b[]; /* board description vector for input to network */ int x; /* location of programs new move */ /* * puts x's and o's on the board * x is the programs marker, * o is the opponents marker */ { int i,row,col; static int board[9] = {0}; int c, c1; /* place the programs new move on the board */ if(x > -1) { _settextposition(8+2*(x/3),37+2*(x%3)); _outtext("x"); board[x] = 1; } /* get the users input to the new board description */ row = 8; col = 37; i = 0; _settextposition(row,col); _displaycursor(_GCURSORON); while((c = getch()) != 27) /* continue until escape is hit */ { switch(c) { case 13: /* enter key */ { board[i] = (board[i]+1)%3; if(board[i] == 0) _outtext(" "); else if(board[i] == 1) _outtext("x"); else _outtext("o"); _settextposition(row,col); break; } case 0: { c1 = getch(); switch(c1) { case 72: /* up arrow */ { if(row > 8) { row -= 2; i -= 3; _settextposition(row,col); break; } } case 75: /* left arrow */ { if(col > 37) { col -= 2; i--; _settextposition(row,col); break; } } case 77: /* right arrow */ { if(col < 41) { col += 2; i++; _settextposition(row,col); break; } } case 80: /* down arrow */ { if(row < 12) { row += 2; i += 3; _settextposition(row,col); break; } } } } } } _displaycursor(_GCURSOROFF); for(i = 0; i < 9; i++) { switch(board[i]) { case 0: { b[i] = 0.0; b[i+9] = 0.0; break; } case 1: { b[i] = 1.0; b[i+9] = 0.0; break; } case 2: { b[i] = 0.0; b[i+9] = 1.0; break; } } } } case 2: { b[i] = 0.0; b[i+9] = 1.0; break; }
num44@ecst.csuchico.edu (Terral Jamison) (10/27/90)
I have a few other NN source examples (mostly in C, but one in Lisp). If there is any interrest, I'll post them, too. -- TJ
jt@cogsci.ed.ac.uk (Jay Buckingham) (10/27/90)
In article <945@heurikon.heurikon.com> johne@heurikon.com writes: > >I have zilch in the area of neural-net programs, and would like to get my feet >wet by doing some tinkerring. If you have any source code that you would feel >comfortable E-mailing to me I would much apreciate it. Any level of software >would be fine; I just need to get some hands-on experience; Books take can take >a person only so far. I fully agree that 'Books take can take a person only so far'. In the neural net class that I teach I emphasize that the best way to learn is by doing. My colleagues and I are constantly amazed to see messages on the net of the form 'I'm looking for source which implements XXX' where XXX is some simple neural net algorithm. If you want to get your feet wet and do some tinkering you should code up a program which implements some simple algorithm. Sure, you can compile someone elses simple program, see what it does, maybe modify it a bit, but only by programming one yourself will you be forced to *think* and make the design decisions regarding data structures to represent the net and input/output vectors, the functions to implement the algorithm, etc. Once you have done this a few times yourself you will be in a much better position to look at someone elses code and understand or critique their design decision. You will find times when you will be looking at someone's code and think "Ah, I've struggled with that transform for a long time. They certainly have come up with a slick way of doing it" or "Hmm, that's not quite as tight as I would have done it." Fortunately, simple neural net algorithms *are* simple to implement. I am certainly not suggesting that you start off by trying to implement backprop with conjugate gradient methods. Start off with something like the associative net or a perceptron. Keep it simple - just use printf() statements to do output. No need to complicate things with graphics. What I suggest in the exercises that I assign is that students printout data in a format that can be used by one of the common (and easy to use) graphics programs like gnuplot, splot or xgraph and that they become comfortable with tools like grep, awk, and sort in order to shuffle their data around. I am appending the exercise that I assign for the associative net. The notes include a description of the algorithm so the student does not have to tease it out of the original papers (which are necessarily terse). The notes are in LaTeX format, but the 'input' commands for the figures have been commented out so that I did not have to include the postscript for them. Doing this exercise involves writing a program to implement an associative net. If you follow the exercise you will begin to gain a feel for the capabilities and limitations of this architecture. In the lecture after the students do this exercise I cover the analysis of this net so that they then understand (hopefully) *why* the net performs the way it does. Give it a try! -- Jay ------------------------ cut here ----------------------- \documentstyle [A4wide]{article} \setlength{\parindent}{0in} \setlength{\parskip}{0.2in plus 0.2in minus 0.2in} \begin{document} {\Large Associative Net Notes\\ Neural Net MSc Class\\ Autumn 1990\\ \\ Jay Buckingham } \section{What is associative memory?} In this class we simply mean a memory in which pairs of {\em events} are associated together such that presentation of the first event to the memory will elicit the second event from the memory. Such a memory is called {\em content-addressable} if presentation of part of one of these events will elicit the whole of the event with which it is paired (associated). A common special case is the content-addressable auto-associative net in which events are paired with themselves; the goal is recall of a full event by presenting some (small) part of it. \section{What is the Associative Net?} The associative net is a simple abstraction of a set of primary neurons. Primary neurons (often pyramidal cells) are those whose outputs project to other brain regions. The synapses they make with other neurons are usually excitatory. That is, these cells send the information-full signals around the nervous system. [In constrast, another large class of neurons is the local inhibitory interneurons. They usually project only to neurons in their neighborhood. Their synapses are inhibitory. Their postulated function is to dynamically set the thresholds of the primary neurons.] In the associative net abstraction, the axons of other cells (postulated to be in some other brain area) synapse with the dendrites of each of the primary cells (Figure~\ref{anet1}). The efficacy of a synapse is represented by a binary weight (0 or 1). Each axon and primary unit has a state associated with it. These are also binary. The event pairs noted above are represented by the pair of vectors of the states of the axons and the states of the primary units. To make this a bit more concise let: \[ \begin{array}{lcl} npats & = & \mbox{The number of pattern pairs stored in the net} \\ N_{A} &=& \mbox{The number of input axons}\\ N_{B} &=& \mbox{The number of output (primary) units}\\ M_{A} &=& \mbox{The number of input axons in state '1'}\\ M_{B} &=& \mbox{The number of output (primary) units in state '1'} \end{array} \] \[ \begin{array}{lcll} x_{i} &\in& \{0,1\}& \mbox{The state of input axon i}\\ \bf x &=& (x_{1}, x_{2}, \ldots, x_{N_{A}}) & \mbox{The input state vector}\\ y_{i} &\in& \{0,1\} &\mbox{The state of output unit i}\\ \bf y & =& (y_{1}, y_{2}, \ldots, y_{N_{B}}) & \mbox{The output state vector}\\ w_{ij} &\in& \{0,1\} & \mbox{The weight of the synapse from input unit j to output unit i}\\ {\bf w}_{i} & = & (w_{i1}, w_{i2}, \ldots, w_{i,N_{A}}) & \mbox{The weight vector into unit $i$}\\ \bf W &=& (w_{ij}) & \mbox{The weight matrix} \end{array} \] The input vectors are also often known as $A$ and the output vectors as $B$. You often see the patterns pairs to be associated as $(A^{p}, B^{p})$ where the superscript $p$ indexes the pattern and runs from $0$ to $npats - 1$ (or less often, 1 to npats). This superscript is also used to index {\bf x} and {\bf y}. \begin{figure} \begin{center} %\input{anet1} \end{center} \caption[Figure 1] {A standard schematic depiction of an associative net. The small circles represent the primary units. The vertical lines represent the dendrites of these units. The horizontal lines represent the input axons.} \label{anet1} \end{figure} \section{How does it work?} Patterns pairs $(A^{p}, B^{p})$ are presented to the associative net and the association is stored by modifying the weight matrix {\bf W}. The weights all start out equal to zero before any patterns have been stored. To store a pattern p:\\ Set $w_{ij} = 1$ if $x_{j}^{p} = 1$ and $y_{i}^{p} = 1$\\ That is, turn on weights where there is conjoint activity on the input axon and the output unit. In this model weights never decay. To recall pattern p:\\ Let: \[ d_{i} = \sum_{j=1}^{N_{A}} w_{ij}x_{j} \] be the dendritic sum (or weighted sum of the inputs) impinging on unit $i$ and \[ a = \sum_{j=1}^{N_{A}} x_{j} \] be the number of active input lines, or simply the input activity. Then \[ y_{i} = \left\{ \begin{array}{ll} 0 & \mbox{if $d_{i} < a$} \\ 1 & \mbox{if $d_{i} \geq a$} \end{array} \right. \] That is, the output units fire if their dendritic sum is greater than or equal to the number of active input lines. Example:\\ Consider two eight bit pattern pairs:\\ \[ \begin{array}{ll} {\bf x}^1 = (0 1 0 1 0 1 0 0) & {\bf y}^1 = (1 1 1 0 0 0 0 0) \\ {\bf x}^2 = (1 0 0 0 0 0 1 1) & {\bf y}^2 = (0 0 0 0 1 0 1 1) \\ \end{array} \] After storing pattern 1 the weight matrix looks like the one in Figure~\ref{anet2}. \begin{figure} \begin{center} %\input{anet2} \end{center} \caption[Associative net after storing one pattern pair] {Associative net after storing one pattern pair. The filled circles represent weights with value 1.} \label{anet2} \end{figure} After storing both patterns the weight matrix looks like the one in Figure~\ref{anet3}. \begin{figure} \begin{center} %\input{anet3} \end{center} \caption[Associative net after storing two pattern pairs] {Associative net after storing two pattern pairs. The filled circles represent weights with value 1.} \label{anet3} \end{figure} Another value to note is the fraction of synapses with weight '1' after learning. This is commonly called $\pi$. In this example eighteen of the synapses have weight one so $\pi = 18/64 = .28$ To recall using pattern ${\bf x}^1$ as a cue, calculate the number of active input lines in ${\bf x}^1$, which comes to 3, and calculate $d_{i}$ for each output unit. The vector of the $d_{i}$ values comes out ${\bf d} = (3 3 3 0 0 0 0 0)$. To get the output vector we threshold the dendritic sum vector and get ${\bf y} = (1 1 1 0 0 0 0) = {\bf y}^1$ as desired. \section{Why study the Associative Net?} Because it: \begin{itemize} \item Can perform an interesting task - associative memory. \item Is is fundemental. That is, many other neural architecture are elaborations on this simple abstraction. E.g. Competitive nets, Marr's nets. \item It uses a simple weight modification rule - the most abstract version of the Hebb rule. \item It is simple. So it is: \begin{itemize} \item Easy to code. \item Quick to run. \item Easy to understand. \item Analyzable. \end{itemize} \item Though simple, it does have application in brain theory. Several important theories of the functioning of particular brain regions are based on this abstraction. E.g. Marr's theories of the cerebellum and hippocampus, McNaughton's ideas about the hippocampus. \end{itemize} \section{Reading} Willshaw's work on the associative net has been reprinted in various volumes of collections of neural net papers. For the purposes of this class you only need to read one of them; the {\em Nature} paper is probably easiest to find (most of the science libraries have {\em Nature} and it appears in the {\em Brain Theory} volume as well). The Jordan chapter in PDP volume I is a good introduction to the linear algebra immediately useful when working with neural computing architectures. \begin{description} \item[Willshaw,~D.~J., Buneman, O.~P., Longuet-Higgins,~H.~C. (1969).] Non-holographic associative memory. {\em Nature} V 222. \item[Willshaw, D.~J. (1981).] Holography, associative memory and inductive generalization. In {\em Parallel models of associative memory} pp. 83-104. G.~E.~Hinton and J.~A.~Anderson (Eds.), Erlbaum. \item[Jordon,~M. (1986).] Introduction to linear algebra. In {\em Parallel Distributed Processing, Vol 1}. Eds. Rumelhart, D. and McClelland, J. MIT Press. \end{description} \newpage \section{Exercises} The exercise basically involves building a simulator to implement an associative net and running it to explore the properties of this architecture. For simplicity, we will focus on one size of network and explore the effects of varying other parameters such as $M$ and npats. Let:\\ $N = N_{A} = 256$ and $N_{B} = 256$. The parameters which will be varied for different runs of the simulator are $M = M_{A} = M_{B}$ and npats. The values of $M$ we will consider are: $M = M_{A} = M_{B} = 8, 16, 32$. \subsection{Building the input/output patterns} {\bf To do:} \subsubsection{Write the program and run it to generate patterns} Write a program to generate a set of patterns which can be used as either input or output (target) patterns. Each pattern must have exactly $M$ units on (the rest off). Choose these $M$ randomly. Generate (at least) two sets for each value of $M$; one will be used as inputs to your simulator, one as the targets. The second could just be a permutation of the first, but it's probably easier just to run you pattern generator twice. Each set should have, say, 2000 patterns for the topics being explored in this exercise. \subsubsection{Check the nature of the patterns generated} In any neural net work the representation in terms of the states of units is critical. Different representations lead to very different performance and possibly function. In this case we are dealing with random binary vectors so things are relatively simple. But before we feed them into anything let's look at the properties of the representation which arise from the specification above. We will look simply at the number of patterns in which a particular unit fires and the hamming distance between the patterns. For each pattern set: \begin{enumerate} \item When considered across all patterns in a pattern set each unit will be 'on' some number of times. Call this 'unit usage', that is, the number of times a unit is used or involved in patterns. Plot a histogram of unit usage. [To do this you will probably write a small program that reads your pattern file, keeps track of how many times each unit is on, and converts that into (unit usage, frequency) pairs stored in some sort of array. Much of this can also be done with judicious use of sort, uniq and awk as well.] \item For each pair $i, j \in [0, npats-1], j < i$ calculate the hamming distance between pattern $i$ and pattern $j$. Plot (or in this case just printout) the hamming distance histogram. (That is, for each hamming distance that occurs, printout the frequency of occurance). \end{enumerate} \subsection{Build and run an associative net simulator} {\bf To do:} \subsubsection{Program an associative net simulator.} Your program must: \begin{itemize} \item Setup the structure for the associative net.\\ (Easy in this case - just calloc.) \item Train the net using a set of input and a set of target patterns. \item Test recall by presenting each input pattern, running it through the net using the recall algorithm above, and calculating the hamming distance between the simulator output and the target output for that stored input pattern. \end{itemize} What you do with this hamming distance depends on what questions you are asking about the behaviour of the net. So... you will be hacking about with this program quite a bit in order to calculate, store and output information relevant to the question at hand. \subsubsection{Run the simulator to test its performance} Once you have your simulator running, experiment with its behaviour as a function of the $M$ and npats. In this case the function we are interested in is that of an associative memory. We need to use some measure to test performance. The measure used depends to a large extent on the questions being asked. In this case a good performance measure is: \begin{equation} performance = \frac{\mbox{number of patterns recalled with} \leq \mbox{1 bit output error}}{\mbox{number of patterns presented for recall}} \end{equation} {\bf To do} for $M = 8, 16, 32$: \begin{enumerate} \item Test performance as a function of npats.\\ For the particular value of $M$ under consideration, choose one of the patterns sets with that $M$ bits on per pattern to be the inputs, the other to be the target outputs. Run the net storing npats patterns, where you use a different value for npats each run. Then present the input patterns stored for recall, keep track of the hamming distance between the simulator output and the target output for the originally stored pattern pair, and calculate the performance (above) at each value of npats you choose. Since you are interested in understanding how performance varies with npats use enough values of npats to give you a good picture of that. That is, start with npats fairly low and increase it until performance becomes poor. Also, you will want to do more runs in the neighborhood where performance is changing quickly. This exercise is purposefully vague about which values of npats you should choose. One of your tasks is to figure out a granularity that will give you the information you want without wasting alot of time (both yours and compute). Figuring out which regions of parameter space to explore is an important part of the work when studying any dynamic system. \begin{itemize} \item Produce a graph showing performance (on the y axis) plotted against npats (on the x axis). \end{itemize} \item Test performance using partial or noisy cues for recall.\\ In this part of the exercise we test the ability of this architecture to function as a {\em content-addressable} memory. What you will do is choose a value of npats, store npats pattern pairs in a net, then present progressively more partial or noisy versions of the stored input patterns and examine the output patterns elicited. Again, we will use the performance measure above. Modify your simulator to present partial or noisy versions of stored input patterns for recall. What you will do is store pattern pairs as before, but to test partial/noisy pattern recall:\\ \begin{itemize} \item Get the full version of the input pattern (from the file or array). \item Decide how partial or noisy it will be. At this stage you can think in terms of how many bits that were on in the stored patterns will be off in the cue (partialness) and how many bits that were off in the stored pattern will be on in the cue (noise). \item For partial patterns, choose some number of bits, call it b, in the stored pattern to turn off. Turn off b bits (that were on in the stored pattern) randomly. \item In this exercise we'll have the number of bits on in noisy patterns $= M$ for simplicity. So to construct noisy patterns we will do as we did for partial patterns then add some noise bits. \begin{itemize} \item Choose b. \item Turn off b bits (that were on) randomly. \item Turn on b bits (that were {\em not} on in the stored pattern) randomly. \end{itemize} \item Present the partial/noisy cue for recall and keep track of number of bits error between the simulator output and the target output for the originally stored pattern. \end{itemize} You will plot the performace as a function of how partial or noisy the cues are. A good measure of the similarity between two vectors is the cosine of the angle between them (it works well for binary patterns). Let $cos({\bf x}^1, {\bf x}^2)$ represent the cosine of the angle between two vectors ${\bf x}^1$ and ${\bf x}^2$ (easier than writing $\cos \theta$ where $\theta$ is the angle between ${\bf x}^1$ and ${\bf x}^2$). If ${\bf x}^1$ and ${\bf x}^2$ point in the same direction, $cos({\bf x}^1, {\bf x}^2) = 1$ and if the angle between them is 90 degrees $cos({\bf x}^1, {\bf x}^2) = 0$. So use cos(cue, pattern) on the x axis of your graphs. Recall that for two vectors ${\bf x}^1$ and ${\bf x}^2$ the cosine of the angle between them is given by: \[ \cos \theta = \frac{{\bf x}^1 \cdot {\bf x}^2}{\|{\bf x}^1\| \|{\bf x}^2\|} \] where $\theta$ is the angle between ${\bf x}^1$ and ${\bf x}^2$. E.g. If $M = 16$ and you want a noisy pattern such that the cosine of the angle between it and the stored input pattern is 0.5, then let b (the number of bits above) = 8. The lengths of each of the vectors is $\surd 16$. If $b = 8$ then they overlap in 8 bits so their dot product is 8. And $8/(\surd 16 \surd 16)= 0.5$. So... Find npats where performance just starts to degrade. \begin{itemize} \item What is $\pi$ for that run? \item Do runs with partial patterns for recall cues and plot performance (y axis) as a function of cos(cue, pattern) (x axis). Do runs for $cos(cue, pattern) \approx 0.1, 0.2, 0.3, \ldots 1.0$. \item Do runs with noisy patterns for recall cues and plot performance (y axis) as a function of cos(cue, pattern) (x axis). Do runs for $cos(cue, pattern) \approx 0.1, 0.2, 0.3, \ldots 1.0$. \end{itemize} \end{enumerate} \end{document}
milleraj@sage.cc.purdue.edu (Andy) (10/28/90)
In article <945@heurikon.heurikon.com> johne@heurikon.com writes: > >I have zilch in the area of neural-net programs, and would like to get my feet >wet by doing some tinkerring. If you have any source code that you would feel >comfortable E-mailing to me I would much apreciate it. Any level of software >would be fine; I just need to get some hands-on experience; Books take can take >a person only so far. 10Q > Same here! Any one who sends it out, could you send it to me too? Andy Miller milleraj@sage.cc.purdue.edu
loren@tristan.llnl.gov (Loren Petrich) (10/30/90)
In article <1990Oct26.174757.2607@ecst.csuchico.edu> num44@ecst.csuchico.edu (Terral Jamison) writes: > >I have a few other NN source examples (mostly in C, but one in Lisp). If there is any interrest, I'll post them, too. I'll bite. I'd like to see some more examples, preferably relatively general ones. Why is one of the examples in Lisp? I get the impression that a language like C is a lot faster than one like Lisp for general programming tasks -- no lists or other such things to keep track of. $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ Loren Petrich, the Master Blaster: loren@sunlight.llnl.gov Since this nodename is not widely known, you may have to try: loren%sunlight.llnl.gov@star.stanford.edu
tsych@pyrthoth.pyramid.com (Terry Sych @ Pyramid Technology Corp.) (10/30/90)
I recently found the following at a local bookstore: "Neural Network PC Tools, A Practical Guide" Edited by Russel C. Eberhart & Roy W. Dobbins Academic Press, Inc., 1990 ISBN 0-12-228640-5 It contains several source code listings (C & Turbo Pascal) of neural networks. It's oriented toward the PC world with one chapter on the Transputer. From the Introduction: "... Probably the best background for getting the maximum benefit from this book is liking to "muck about" with computers. If you're comfortable running a variety of software and occasionally (possibly with some intrepidation) fiddling with programming simple stuff in a language such as Basic or C, you'll feel right at home here." I haven't read it all yet so I can't give a review but it looks like an excellent resource. _m_______ __Terry_Sych__________________________________________ ___mmm_____ __Pyramid_Technology_Corp.__Mountain_View__CA ________ _____mmmmm___ __tsych@pyramid.com __________________________________ _______mmmmmmm_ __{decwrl,hplabs,sun}!pyramid!tsych___________________
jlm@lucid.com (Jim McDonald) (10/30/90)
In article <85041@lll-winken.LLNL.GOV> loren@tristan.llnl.gov (Loren Petrich) writes: In article <1990Oct26.174757.2607@ecst.csuchico.edu> num44@ecst.csuchico.edu (Terral Jamison) writes: > >I have a few other NN source examples (mostly in C, but one in Lisp). If there is any interrest, I'll post them, too. I'll bite. I'd like to see some more examples, preferably relatively general ones. Why is one of the examples in Lisp? I get the impression that a language like C is a lot faster than one like Lisp for general programming tasks -- no lists or other such things to keep track of. Most of the time in NN software is spent computing numerical values-- dot products or whatever. If those inner loops are fast, then your application will be fast. In fact, since lisp compilers tend to be more flexible than those in other languages, it is (at least in some lisp products) easier to create hand-optimized machine-code templates that are invoked when such inner loops are recognized. The result can be inner loops that are 5 or 10% faster than those in the most highly optimized C applications now on the market, resulting in complete applications that are overall comparable or faster than similar (but less functional) applications written in C, even after all the "lists and other things" have been accounted for. If there were C compilers that allowed you to write macros to emit hybrid hand/machine optimized assembly code (each doing what it's best at), and if someone actually wrote such macros, applications built with them would probably regain a few percent lead over the fastest lisp implementations. In practice, however, no one seems to really care about that last 2% of speed, so it's hard to even make a business case for investing a few days to write the macros. In general, the lisp/C speed issue is largely a myth, but of course poorly written lisp code and/or poor lisp implementations can be arbitrarily slow, so your mileage may vary. I imagine the same is true with C compilers. jlm
tcasey@hydra.unm.edu (Arthur Crackpot) (10/30/90)
In article <1990Oct26.174757.2607@ecst.csuchico.edu> num44@ecst.csuchico.edu (Terral Jamison) writes: > >I have a few other NN source examples (mostly in C, but one in Lisp). If there is any interrest, I'll post them, too. > >-- TJ I would very much like to see more posts. I would rather get unix source, but am going to modify the posted src. tim
tcasey@hydra.unm.edu (Arthur Crackpot) (10/30/90)
In article <2241@spark.ed.ac.uk> jt@cns.ed.ac.uk (Jay Buckingham) writes: > >I am appending the exercise that I assign for the associative net. >...In the lecture after the students do this exercise >I cover the analysis of this net so that they then understand >(hopefully) *why* the net performs the way it does. > >-- Jay If I might be so bold, could you post the notes also? tim
mod@masscomp.ccur.com (2915) (10/31/90)
If anybody has some simple NN program source codes out there that run under Xwindows, they would would be appreciated, too. I don't have an IBM compatible machine (only UNIX boxes) but would like to engage in some of the previously mentioned "mucking about"... Thanks. ---------------------------------------------------------------------------- Michael O'Donnell (508)392-2915 home(508)251-7576 fax(508)392-2492 ___________ / ________/__ ...!{harvard,uunet,petsd}!masscomp!mod /__/_______/ / mod@westford.ccur.com Concurrent /__________/ Computer Corporation 1 Technology Way Westford, MA 01886 DISCLAIMER: My opinions only coincidentally resemble those of my employer. ----------------------------------------------------------------------------