ajayshah@aludra.usc.edu (Ajay Shah) (04/15/90)
Here are a few units. They are part of a pretty well-finished ML package, the only reason i'm not posting the package is that i don't have a good user manual yet. If there are people out there who are interested, i can do that. Some new comments i inserted just now come with the keyword 'netters' First, unit algo. It is the basic optimisation part. --------------------------------------------------------------------------- UNIT algo; {$N+} {$E+} INTERFACE uses Constant, Typedef, LikeFunc, Derivs, VectOps, Time, Util, crt; procedure Solve; IMPLEMENTATION procedure Derive_New_Guess(var theta, thetanew:ParameterVectorType; var failed : boolean; var FirstL, FinalL : float); {given a guess theta, returns a better guess. When you finish debugging, pass both by reference for speed. If, for any abstruse reason, he can't improve upon L(theta), he'll just return thetanew as theta and failed = true} procedure MakeSearchDir(var F:ParameterMatrixType; var g, s:ParameterVectorType); var i,j:integer; sum:float; begin for i := 1 to SParams do {rows of F. SParams, not Params!} begin fillchar(sum, sizeof(sum), 0); {sum := 0.0} for j := 1 to Params do sum := sum + F[i,j]*g[j]; s[i] := sum end; end; procedure MakeNew(jmp:float; var theta, ss, tnew : ParameterVectorType); {Take a jumpsize and a short search direction ss, modify theta to make thetanew.} var ndx, i : integer; begin move(theta, tnew, sizeof(theta)); {tnew := theta} ndx := 0; for i := 1 to SParams do begin repeat inc(ndx) until c.estimate[ndx]; {now element ndx of long maps to element i of short} tnew[ndx] := tnew[ndx] + (jmp*ss[i]) end end; const factor:float = 8.0; lowlimit:float = 1e-16; var S_F : ParameterMatrixType; S_SearchDir, S_GradL : ParameterVectorType; thisL, lastL, jumpsize : float; overshot : boolean; i : integer; begin {of procedure Derive New Guess} thisL := FirstL; {passed to me by caller} {From now on, i don't touch FirstL, we'll just manipulate thisL and lastL} {we only compute search direction ONCE.} if c.verbose then write('Creating search direction...'); MakeGradL(theta, S_GradL); case c.method of BHHH : begin MakeF(theta, S_F); MakeSearchDir(S_F, S_GradL, S_SearchDir) end; Simple : S_SearchDir := S_GradL; end; {At the end of all this, we have a short search dirn} if c.verbose then begin write(#13, 'Search Dirn: '); for i := 1 to SParams do write(S_SearchDir[i]:6:5,' '); writeln; writeln(' Initial LogL = ',FirstL:10:6); end; thetanew := theta; overshot := false; jumpsize := c.firstjump; repeat lastL := thisL; MakeNew(jumpsize, theta, S_SearchDir, thetanew); {take short searchdir, long theta and make long thetanew} thisL := LogL(thetanew); if c.verbose then writeln('Jump = ',jumpsize:5:2,' ==> ','LogL = ',thisL:10:6,' '); overshot := thisL <= lastL; if overshot and not (lastL >= FirstL) then {you overshot and you haven't improved on FirstL yet} begin jumpsize := jumpsize/factor; thisL := lastL; overshot := false end; jumpsize := 2.0*jumpsize until overshot or (jumpsize < lowlimit); if c.verbose then writeln; {at this point, we've overshot jumpsize by one doubling. So we backtrack:} MakeNew((0.25*jumpsize), theta, S_SearchDir, thetanew); FinalL := lastL; {sending back FinalL to caller} failed := FinalL <= FirstL {you failed if you couldn't improve upon the old LogL} end; {of procedure Derive New Guess} procedure SearchL(var seed, LocalMax:ParameterVectorType; var Final : float; {Final is LogL(LocalMax)} var failed:boolean); var biggest : float; procedure dump(var b:ParameterVectorType); var dim, i : dimtype; s1 : string; begin if c.verbose then writeln; Display('theta: ', false); dim := 0; for i := 1 to SParams do begin repeat inc(dim) until c.estimate[dim]; str(b[dim]:6:Criterion.decimals, s1); Display(s1+' ', false) end; Display('', true) end; function converged(var t1, t2 : ParameterVectorType; o, n : float; iters : integer):boolean; var tmp:boolean; {it's thinking long, but never mind} i:integer; begin if iters = 0 then begin converged := false; exit end; with Criterion do begin if (iters >= iterations) or ((n-o) < deltalogl) then begin converged := true; exit end; end; tmp := true; for i := 1 to Params do if abs(t1[i]-t2[i]) > biggest then tmp := false; converged := tmp end; var theta, thetanew : ParameterVectorType; iterations : integer; i : byte; s1 : string; oldL, newL : float; stopsign : boolean; begin stopsign := false; failed := false; iterations := 0; biggest := 1.0; for i := 1 to Criterion.decimals do biggest := biggest/10.0; if c.verbose then write(ElapsedStr,'Seed') else Display('0. ', false); newl := LogL(seed); thetanew := seed; dump(thetanew); {assert thetanew is seed, newl is LogL(either, both)} while not stopsign do begin inc(iterations); theta := thetanew; oldL := newL; Derive_New_Guess(theta, thetanew, failed, oldL, newL); {This takes theta, oldL which is LogL(theta), generates thetanew, newL which is LogL(thetanew)} stopsign := failed or converged(theta, thetanew, oldL, newL, iterations); if not stopsign then begin if c.verbose then begin writeln; write(ElapsedStr,'Iteration #',iterations,': ') end else begin str(iterations, s1); Display(s1+'. ', false); end; dump(thetanew) end else write(chr(254)); end; LocalMax := thetanew; Final := newL end; procedure Solve; procedure show(dim:integer; thetai, stddev, tratio : float); const five = ' '; var dims, s1, s2, s3 : string; begin str(thetai:8:5, s1); if near(stddev, -0.01) then s2 := ' ' else str(stddev:6:3, s2); if near(tratio, -0.01) then s3 := ' ' else str(tratio:5:2, s3); Display(Labels[dim]+' '+s1+five+s2+five+s3, true) end; procedure FreeGrad(var t, g:ParameterVectorType); var i:dimtype; begin for i := 1 to Params do c.estimate[i] := true; SParams := Params; MakeGradL(t, g) end; var FG, Best : ParameterVectorType; failed : boolean; ndx, i : integer; VCV : ParameterMatrixType; s1, s2 : string; ti, sd, tr : float; safe : integer; Truth : SufficientConditions; NewL : float; begin RestartTimer; {note to netters. The Augmented idea here is kinda original. Atleast, i thought it up, and i haven't seen it anywhere else. It is 30% faster or so when your seed sucks. If you have a really good seed, then it isn't slower. So i use it by default. } if c.augmented then begin {Save good values for later use:} safe := c.useobs; Truth := Criterion; {Stage I Setup:} Display('Searching with 1/4th of the observations:',true); with Criterion do begin DeltaLogL := 100*DeltaLogL; decimals := decimals-2; iterations := iterations div 100; if iterations = 0 then iterations := 1 end; c.useobs := safe div 4; SearchL(c.seed, Best, NewL, failed); writeln; writeln; writeln; c.seed := Best; {Stage II Setup:} clrscr; writeln; Display('Phase I search took '+ElapsedStr, true); Display('Search with 1/2 the observations:', true); with Criterion do begin DeltaLogL := 10*DeltaLogL; decimals := decimals + 1; iterations := 10*iterations end; c.useobs := 2*c.useobs; SearchL(c.seed, Best, NewL, failed); writeln; writeln; writeln; c.seed := Best; {Get back to the truth:} c.useobs := safe; Criterion := Truth; clrscr; writeln; Display('At end of Phase II search,'+ElapsedStr, true); Display('Now using full dataset:', true); end; {of if augmented} {and continue where we might have been:} SearchL(c.seed, Best, NewL, failed); MakeF(Best, VCV); for i := 1 to 3 do Display('', true); Display('Search took '+ElapsedStr+' seconds.', true); if failed then begin Display('Estimates have not converged as required.', true); Display('Best possible estimates shown: ', true) end; Display('', true); Display('', true); Display('Results of Search: ', true); Display('', true); Display('', true); Display(' estimate std. dev. t-ratio', true); Display('', true); ndx := 1; for i := 1 to Params do begin if c.estimate[i] then begin ti := Best[i]; sd := sqrt(VCV[ndx,ndx]); tr := ti/sd; show(i, ti, sd, tr); inc(ndx) end else show(i, Best[i], -0.01, -0.01) end; {ndx works in short domain, i works in long} str(NewL:8:7, s1); Display('', true); Display('At above parameter vector, LogL is '+s1, true); if c.freegradient then begin FreeGrad(Best, FG); for i := 1 to 3 do Display('', true); Display('Full table of first derivatives: ', true); Display('', true); for i := 1 to Params do begin str(FG[i]:8:7, s2); Display(Labels[i]+' : '+s2, true) end; end; end; begin {unit initialisation code} end. --------------------------------------------------------------------------- The next file is called derivs.pas, it is where the Grad and F matrices are computed. --------------------------------------------------------------------------- UNIT Derivs; {$N+} {$E+} INTERFACE uses constant, typedef, matrix, longshort, likefunc; procedure MakeGradL(var t, g : ParameterVectorType); procedure MakeF(var t:ParameterVectorType; var F:ParameterMatrixType); {On return, g is short SParams x 1 F is short, SParams x SParams } IMPLEMENTATION procedure MakeGradL(var t, g : ParameterVectorType); {takes a t:long, and finds a g:short} var j, obs : integer; tmplong, tmpshort : ParameterVectorType; anobs : OneObservationType; begin fillchar(g, sizeof(g), 0); {just fast way to init all g[i] to zero} Reset(rawfile); for obs := 1 to c.useobs do {run over all observations} begin read(rawfile, anobs); FindOneDeriv(t, anobs, tmplong); MapToShort(c.estimate, tmplong, tmpshort); for j := 1 to SParams do g[j] := g[j] + tmpshort[j] end; end; {of MakeGradL} procedure MakeF(var t:ParameterVectorType; var F:ParameterMatrixType); {Note to netters. This is the heart of the BHHH algorithm. The method used here for computing F, that is} var i, j, obs:integer; D2L, Result : ^TNMatrix; LongD, ShortD:ParameterVectorType; error : byte; tmp:float; anobs : OneObservationType; begin New(D2L); New(Result); fillchar(D2L^, sizeof(D2L^), 0); Reset(rawfile); for obs := 1 to c.useobs do begin read(rawfile, anobs); FindOneDeriv(t, anobs, LongD); MapToShort(c.estimate, LongD, ShortD); for i := 1 to SParams do for j := 1 to i do D2L^[i,j] := D2L^[i,j] - ShortD[i]*ShortD[j] end; for i := 1 to SParams do for j := (i+1) to SParams do D2L^[i,j] := D2L^[j,i]; Inverse(SParams, D2L^, Result^, error); if error <> 0 then begin writeln('2nd derivatives matrix of LogL is singular.'); halt(1) end; for i := 1 to SParams do for j := 1 to SParams do F[i,j] := -Result^[i,j]; {NOTE ZEE MINUS SIGN!!! after inversion} dispose(D2L); dispose(Result) end; begin end. --------------------------------------------------------------------------- As a demonstration, i present one likelihood function here, it's an ordered probit with fixed cutoffs. --------------------------------------------------------------------------- UNIT LikeFunc; {$N+} {$E+} INTERFACE USES typedef, constant, normal; const ModelName : string = 'Clive Bell''s Problem'; function LogL(var theta:ParameterVectorType):float; procedure FindOneDeriv(var theta:ParameterVectorType; var obs:OneObservationType; var O:ParameterVectorType); {These functions and the ModelName string are what unit LikeFunc shows user program. LogL takes a parameter vector and returns a LogLikelihood. FindOneDeriv takes one observation and returns a vector of derivatives. Both t's are passed by reference for speed, they are not altered within this unit. } IMPLEMENTATION const K = 3; type XVector = array[1..K] of real; meaningful = record X : XVector; n, y : real; end; NiceParams = record Beta : array[1..K] of float; sigma : float end; {meaningful is exactly the way an observation is organised. In the obs, first we have the block of x's. Then, at the end, we have n&y. In the parameter vector, first we have beta's and last is the sigma. } function betadotx(var theta : ParameterVectorType; var X:XVector):float; {Given theta and an obs, find Beta dot X} var B:float; i:integer; t : NiceParams absolute theta; begin fillchar(B, sizeof(B), 0); {fast version of B := 0.0} for i := 1 to K do B := B + X[i]*t.Beta[i]; betadotx := B end; function LogL(var theta:ParameterVectorType):float; const minimum = 1e-8; var BdotX, sum, OneObsL :float; anobs : OneObservationType; theobs : meaningful absolute anobs; t : NiceParams absolute theta; obsno : integer; begin fillchar(sum, sizeof(sum), 0); {faster version of sum := 0.0} reset(rawfile); for obsno := 1 to c.useobs do begin read(rawfile, anobs); BdotX := betadotx(theta, theobs.X); with theobs do if n = -Y then {censored. Can ONLY happen when Y is -ve} OneObsL := CNorm( (Y+0.5 - BdotX)/t.sigma) else {not censored, thimple observation} OneObsL := CNorm( (Y+0.5 - BdotX)/t.sigma) - CNorm( (Y-0.5 - BdotX)/t.sigma); if OneObsL < minimum then {ln(OneObsL) is likely to blow up} sum := sum - 174.0 else sum := sum + ln(OneObsL) end; LogL := sum end; {of likefunc} procedure FindOneDeriv(var theta:ParameterVectorType; var obs:OneObservationType; var O:ParameterVectorType); {Returns long vector, with some zeros is estimate is off} const minimum = 1.0e-10; {i only let an observation contribute to the GradL vector if it's probability is > minimum. Avoids numerical hassles caused by outliers} var scratch, dl, du, upper, lower, BdotX, OneObsL : float; j : integer; theobs : meaningful absolute obs; t : NiceParams absolute theta; begin BdotX := betadotx(theta, theobs.X); with theobs do begin upper := (Y+0.5 - BdotX)/t.sigma; du := Norm(upper); if n = -Y then {censored. Can ONLY happen when Y is -ve} begin OneObsL := CNorm(upper); scratch := du/(t.sigma*OneObsL); for j := 1 to K do {in betas} O[j] := -X[j]*scratch; O[K+1] := -(Y+0.5-BdotX)*scratch/t.sigma end else {not censored, thimple observation} begin lower := (Y-0.5 - BdotX)/t.sigma; dl := Norm(lower); OneObsL := CNorm(upper) - CNorm(lower); for j := 1 to K do {derivatives in all betas} O[j] := ((dl - du)*X[j]/t.sigma)/OneObsL; O[K+1] := (((Y-0.5-BdotX)*dl) - ((Y+0.5-BdotX)*du)) /(OneObsL*t.sigma*t.sigma); end end {of the with} end; begin end. Last, the time unit: --------------------------------------------------------------------------- UNIT Time; interface uses Dos; procedure RestartTimer; function Elapsed:real; function ElapsedStr:string; implementation var TimerTicking : boolean; h1, m1, s1, s1001 : word; procedure RestartTimer; begin TimerTicking := true; gettime(h1, m1, s1, s1001) end; function Elapsed:real; function AbsTime(h,m,s,s100:word):real; begin AbsTime := (s100/100.0) + s + (60.0*m) + (3600.0*h) end; var h2, m2, s2, s1002 : word; T1, T2 : real; begin if TimerTicking then begin gettime(h2, m2, s2, s1002); T1 := AbsTime(h1, m1, s1, s1001); T2 := AbsTime(h2, m2, s2, s1002); Elapsed := T2 - T1 end else Elapsed := 0.0 end; function ElapsedStr:string; var s:string; begin str(Elapsed:6:1,s); ElapsedStr := 't = '+s+'s. ' end; begin TimerTicking := false end. --------------------------------------------------------------------------- _______________________________________________________________________________ Ajay Shah, (213)747-9991, ajayshah@usc.edu The more things change, the more they stay insane. _______________________________________________________________________________