ajayshah@aludra.usc.edu (Ajay Shah) (04/15/90)
Here are a few units. They are part of a pretty well-finished ML
package, the only reason i'm not posting the package is that i
don't have a good user manual yet. If there are people out
there who are interested, i can do that.
Some new comments i inserted just now come with the keyword
'netters'
First, unit algo. It is the basic optimisation part.
---------------------------------------------------------------------------
UNIT algo; {$N+} {$E+}
INTERFACE
uses
Constant,
Typedef,
LikeFunc,
Derivs,
VectOps,
Time,
Util,
crt;
procedure Solve;
IMPLEMENTATION
procedure Derive_New_Guess(var theta, thetanew:ParameterVectorType;
var failed : boolean;
var FirstL, FinalL : float);
{given a guess theta, returns a better guess. When you finish
debugging, pass both by reference for speed.
If, for any abstruse reason, he can't improve upon L(theta), he'll
just return thetanew as theta and failed = true}
procedure MakeSearchDir(var F:ParameterMatrixType;
var g, s:ParameterVectorType);
var
i,j:integer;
sum:float;
begin
for i := 1 to SParams do {rows of F. SParams, not Params!}
begin
fillchar(sum, sizeof(sum), 0); {sum := 0.0}
for j := 1 to Params do
sum := sum + F[i,j]*g[j];
s[i] := sum
end;
end;
procedure MakeNew(jmp:float;
var theta, ss, tnew : ParameterVectorType);
{Take a jumpsize and a short search direction ss,
modify theta to make thetanew.}
var ndx, i : integer;
begin
move(theta, tnew, sizeof(theta)); {tnew := theta}
ndx := 0;
for i := 1 to SParams do
begin
repeat
inc(ndx)
until c.estimate[ndx];
{now element ndx of long maps to element i of short}
tnew[ndx] := tnew[ndx] + (jmp*ss[i])
end
end;
const
factor:float = 8.0;
lowlimit:float = 1e-16;
var
S_F : ParameterMatrixType;
S_SearchDir, S_GradL : ParameterVectorType;
thisL, lastL, jumpsize : float;
overshot : boolean;
i : integer;
begin {of procedure Derive New Guess}
thisL := FirstL; {passed to me by caller}
{From now on, i don't touch FirstL, we'll just manipulate
thisL and lastL}
{we only compute search direction ONCE.}
if c.verbose then write('Creating search direction...');
MakeGradL(theta, S_GradL);
case c.method of
BHHH : begin
MakeF(theta, S_F);
MakeSearchDir(S_F, S_GradL, S_SearchDir)
end;
Simple : S_SearchDir := S_GradL;
end;
{At the end of all this, we have a short search dirn}
if c.verbose then
begin
write(#13, 'Search Dirn: ');
for i := 1 to SParams do write(S_SearchDir[i]:6:5,' ');
writeln; writeln(' Initial LogL = ',FirstL:10:6);
end;
thetanew := theta; overshot := false;
jumpsize := c.firstjump;
repeat
lastL := thisL;
MakeNew(jumpsize, theta, S_SearchDir, thetanew);
{take short searchdir, long theta and make long thetanew}
thisL := LogL(thetanew);
if c.verbose then
writeln('Jump = ',jumpsize:5:2,' ==> ','LogL = ',thisL:10:6,' ');
overshot := thisL <= lastL;
if overshot and not (lastL >= FirstL) then
{you overshot and you haven't improved on FirstL yet}
begin
jumpsize := jumpsize/factor;
thisL := lastL;
overshot := false
end;
jumpsize := 2.0*jumpsize
until overshot or (jumpsize < lowlimit);
if c.verbose then writeln;
{at this point, we've overshot jumpsize by one doubling. So
we backtrack:}
MakeNew((0.25*jumpsize), theta, S_SearchDir, thetanew);
FinalL := lastL; {sending back FinalL to caller}
failed := FinalL <= FirstL
{you failed if you couldn't improve upon the old LogL}
end; {of procedure Derive New Guess}
procedure SearchL(var seed, LocalMax:ParameterVectorType;
var Final : float; {Final is LogL(LocalMax)}
var failed:boolean);
var
biggest : float;
procedure dump(var b:ParameterVectorType);
var dim, i : dimtype;
s1 : string;
begin
if c.verbose then writeln; Display('theta: ', false);
dim := 0;
for i := 1 to SParams do
begin
repeat inc(dim) until c.estimate[dim];
str(b[dim]:6:Criterion.decimals, s1);
Display(s1+' ', false)
end;
Display('', true)
end;
function converged(var t1, t2 : ParameterVectorType;
o, n : float;
iters : integer):boolean;
var tmp:boolean; {it's thinking long, but never mind}
i:integer;
begin
if iters = 0 then
begin
converged := false; exit
end;
with Criterion do
begin
if (iters >= iterations) or ((n-o) < deltalogl) then
begin
converged := true;
exit
end;
end;
tmp := true;
for i := 1 to Params do
if abs(t1[i]-t2[i]) > biggest then tmp := false;
converged := tmp
end;
var
theta, thetanew : ParameterVectorType;
iterations : integer;
i : byte;
s1 : string;
oldL, newL : float;
stopsign : boolean;
begin
stopsign := false; failed := false; iterations := 0; biggest := 1.0;
for i := 1 to Criterion.decimals do biggest := biggest/10.0;
if c.verbose then write(ElapsedStr,'Seed')
else Display('0. ', false);
newl := LogL(seed); thetanew := seed; dump(thetanew);
{assert thetanew is seed, newl is LogL(either, both)}
while not stopsign do
begin
inc(iterations);
theta := thetanew; oldL := newL;
Derive_New_Guess(theta, thetanew, failed, oldL, newL);
{This takes theta, oldL which is LogL(theta),
generates thetanew, newL which is LogL(thetanew)}
stopsign := failed or
converged(theta, thetanew, oldL, newL, iterations);
if not stopsign then
begin
if c.verbose then
begin
writeln; write(ElapsedStr,'Iteration #',iterations,': ')
end
else
begin
str(iterations, s1); Display(s1+'. ', false);
end;
dump(thetanew)
end
else write(chr(254));
end;
LocalMax := thetanew;
Final := newL
end;
procedure Solve;
procedure show(dim:integer; thetai, stddev, tratio : float);
const five = ' ';
var dims, s1, s2, s3 : string;
begin
str(thetai:8:5, s1);
if near(stddev, -0.01) then s2 := ' '
else str(stddev:6:3, s2);
if near(tratio, -0.01) then s3 := ' '
else str(tratio:5:2, s3);
Display(Labels[dim]+' '+s1+five+s2+five+s3, true)
end;
procedure FreeGrad(var t, g:ParameterVectorType);
var i:dimtype;
begin
for i := 1 to Params do c.estimate[i] := true;
SParams := Params;
MakeGradL(t, g)
end;
var FG, Best : ParameterVectorType;
failed : boolean;
ndx, i : integer;
VCV : ParameterMatrixType;
s1, s2 : string;
ti, sd, tr : float;
safe : integer;
Truth : SufficientConditions;
NewL : float;
begin
RestartTimer;
{note to netters. The Augmented idea here is kinda original.
Atleast, i thought it up, and i haven't seen it anywhere else.
It is 30% faster or so when your seed sucks. If you have a
really good seed, then it isn't slower. So i use it by default.
}
if c.augmented then
begin
{Save good values for later use:}
safe := c.useobs; Truth := Criterion;
{Stage I Setup:}
Display('Searching with 1/4th of the observations:',true);
with Criterion do
begin
DeltaLogL := 100*DeltaLogL;
decimals := decimals-2;
iterations := iterations div 100;
if iterations = 0 then iterations := 1
end; c.useobs := safe div 4;
SearchL(c.seed, Best, NewL, failed); writeln; writeln; writeln;
c.seed := Best;
{Stage II Setup:}
clrscr; writeln; Display('Phase I search took '+ElapsedStr, true);
Display('Search with 1/2 the observations:', true);
with Criterion do
begin
DeltaLogL := 10*DeltaLogL;
decimals := decimals + 1;
iterations := 10*iterations
end; c.useobs := 2*c.useobs;
SearchL(c.seed, Best, NewL, failed); writeln; writeln; writeln;
c.seed := Best;
{Get back to the truth:}
c.useobs := safe;
Criterion := Truth; clrscr; writeln;
Display('At end of Phase II search,'+ElapsedStr, true);
Display('Now using full dataset:', true);
end; {of if augmented}
{and continue where we might have been:}
SearchL(c.seed, Best, NewL, failed);
MakeF(Best, VCV);
for i := 1 to 3 do Display('', true);
Display('Search took '+ElapsedStr+' seconds.', true);
if failed then
begin
Display('Estimates have not converged as required.', true);
Display('Best possible estimates shown: ', true)
end;
Display('', true); Display('', true);
Display('Results of Search: ', true);
Display('', true); Display('', true);
Display(' estimate std. dev. t-ratio', true);
Display('', true);
ndx := 1;
for i := 1 to Params do
begin
if c.estimate[i] then
begin
ti := Best[i];
sd := sqrt(VCV[ndx,ndx]);
tr := ti/sd;
show(i, ti, sd, tr);
inc(ndx)
end
else show(i, Best[i], -0.01, -0.01)
end; {ndx works in short domain, i works in long}
str(NewL:8:7, s1);
Display('', true);
Display('At above parameter vector, LogL is '+s1, true);
if c.freegradient then
begin
FreeGrad(Best, FG);
for i := 1 to 3 do Display('', true);
Display('Full table of first derivatives: ', true);
Display('', true);
for i := 1 to Params do
begin
str(FG[i]:8:7, s2);
Display(Labels[i]+' : '+s2, true)
end;
end;
end;
begin {unit initialisation code}
end.
---------------------------------------------------------------------------
The next file is called derivs.pas, it is where the Grad and F
matrices are computed.
---------------------------------------------------------------------------
UNIT Derivs; {$N+} {$E+}
INTERFACE
uses constant,
typedef,
matrix,
longshort,
likefunc;
procedure MakeGradL(var t, g : ParameterVectorType);
procedure MakeF(var t:ParameterVectorType; var F:ParameterMatrixType);
{On return, g is short SParams x 1
F is short, SParams x SParams
}
IMPLEMENTATION
procedure MakeGradL(var t, g : ParameterVectorType);
{takes a t:long, and finds a g:short}
var j, obs : integer;
tmplong, tmpshort : ParameterVectorType;
anobs : OneObservationType;
begin
fillchar(g, sizeof(g), 0); {just fast way to init all g[i] to zero}
Reset(rawfile);
for obs := 1 to c.useobs do {run over all observations}
begin
read(rawfile, anobs);
FindOneDeriv(t, anobs, tmplong);
MapToShort(c.estimate, tmplong, tmpshort);
for j := 1 to SParams do
g[j] := g[j] + tmpshort[j]
end;
end; {of MakeGradL}
procedure MakeF(var t:ParameterVectorType; var F:ParameterMatrixType);
{Note to netters. This is the heart of the BHHH algorithm. The
method used here for computing F, that is}
var
i, j, obs:integer;
D2L, Result : ^TNMatrix;
LongD, ShortD:ParameterVectorType;
error : byte;
tmp:float;
anobs : OneObservationType;
begin
New(D2L); New(Result);
fillchar(D2L^, sizeof(D2L^), 0);
Reset(rawfile);
for obs := 1 to c.useobs do
begin
read(rawfile, anobs);
FindOneDeriv(t, anobs, LongD);
MapToShort(c.estimate, LongD, ShortD);
for i := 1 to SParams do
for j := 1 to i do
D2L^[i,j] := D2L^[i,j] - ShortD[i]*ShortD[j]
end;
for i := 1 to SParams do
for j := (i+1) to SParams do
D2L^[i,j] := D2L^[j,i];
Inverse(SParams, D2L^, Result^, error);
if error <> 0 then
begin
writeln('2nd derivatives matrix of LogL is singular.');
halt(1)
end;
for i := 1 to SParams do
for j := 1 to SParams do
F[i,j] := -Result^[i,j]; {NOTE ZEE MINUS SIGN!!! after inversion}
dispose(D2L); dispose(Result)
end;
begin
end.
---------------------------------------------------------------------------
As a demonstration, i present one likelihood function here, it's
an ordered probit with fixed cutoffs.
---------------------------------------------------------------------------
UNIT LikeFunc; {$N+} {$E+}
INTERFACE
USES
typedef,
constant,
normal;
const
ModelName : string = 'Clive Bell''s Problem';
function LogL(var theta:ParameterVectorType):float;
procedure FindOneDeriv(var theta:ParameterVectorType;
var obs:OneObservationType;
var O:ParameterVectorType);
{These functions and the ModelName string are what unit LikeFunc shows
user program.
LogL takes a parameter vector and returns a LogLikelihood.
FindOneDeriv takes one observation and returns a vector of derivatives.
Both t's are passed by reference for speed,
they are not altered within this unit.
}
IMPLEMENTATION
const
K = 3;
type
XVector = array[1..K] of real;
meaningful = record
X : XVector;
n, y : real;
end;
NiceParams = record
Beta : array[1..K] of float;
sigma : float
end;
{meaningful is exactly the way an observation is organised.
In the obs, first we have the block of x's. Then, at the end, we have n&y.
In the parameter vector, first we have beta's and last is the sigma.
}
function betadotx(var theta : ParameterVectorType;
var X:XVector):float; {Given theta and an obs,
find Beta dot X}
var B:float; i:integer;
t : NiceParams absolute theta;
begin
fillchar(B, sizeof(B), 0); {fast version of B := 0.0}
for i := 1 to K do B := B + X[i]*t.Beta[i];
betadotx := B
end;
function LogL(var theta:ParameterVectorType):float;
const
minimum = 1e-8;
var BdotX, sum, OneObsL :float;
anobs : OneObservationType;
theobs : meaningful absolute anobs;
t : NiceParams absolute theta;
obsno : integer;
begin
fillchar(sum, sizeof(sum), 0); {faster version of sum := 0.0}
reset(rawfile);
for obsno := 1 to c.useobs do
begin
read(rawfile, anobs);
BdotX := betadotx(theta, theobs.X);
with theobs do
if n = -Y then {censored. Can ONLY happen when Y is -ve}
OneObsL := CNorm( (Y+0.5 - BdotX)/t.sigma)
else {not censored, thimple observation}
OneObsL := CNorm( (Y+0.5 - BdotX)/t.sigma)
- CNorm( (Y-0.5 - BdotX)/t.sigma);
if OneObsL < minimum then {ln(OneObsL) is likely to blow up}
sum := sum - 174.0
else
sum := sum + ln(OneObsL)
end;
LogL := sum
end; {of likefunc}
procedure FindOneDeriv(var theta:ParameterVectorType;
var obs:OneObservationType;
var O:ParameterVectorType);
{Returns long vector, with some zeros is estimate is off}
const
minimum = 1.0e-10; {i only let an observation contribute
to the GradL vector if it's probability is
> minimum. Avoids numerical hassles caused by
outliers}
var
scratch, dl, du, upper, lower, BdotX, OneObsL : float;
j : integer;
theobs : meaningful absolute obs;
t : NiceParams absolute theta;
begin
BdotX := betadotx(theta, theobs.X);
with theobs do
begin
upper := (Y+0.5 - BdotX)/t.sigma; du := Norm(upper);
if n = -Y then {censored. Can ONLY happen when Y is -ve}
begin
OneObsL := CNorm(upper);
scratch := du/(t.sigma*OneObsL);
for j := 1 to K do {in betas}
O[j] := -X[j]*scratch;
O[K+1] := -(Y+0.5-BdotX)*scratch/t.sigma
end
else {not censored, thimple observation}
begin
lower := (Y-0.5 - BdotX)/t.sigma; dl := Norm(lower);
OneObsL := CNorm(upper) - CNorm(lower);
for j := 1 to K do {derivatives in all betas}
O[j] := ((dl - du)*X[j]/t.sigma)/OneObsL;
O[K+1] := (((Y-0.5-BdotX)*dl) - ((Y+0.5-BdotX)*du))
/(OneObsL*t.sigma*t.sigma);
end
end {of the with}
end;
begin
end.
Last, the time unit:
---------------------------------------------------------------------------
UNIT Time;
interface
uses Dos;
procedure RestartTimer;
function Elapsed:real;
function ElapsedStr:string;
implementation
var
TimerTicking : boolean;
h1, m1, s1, s1001 : word;
procedure RestartTimer;
begin
TimerTicking := true;
gettime(h1, m1, s1, s1001)
end;
function Elapsed:real;
function AbsTime(h,m,s,s100:word):real;
begin
AbsTime := (s100/100.0) + s + (60.0*m) + (3600.0*h)
end;
var h2, m2, s2, s1002 : word;
T1, T2 : real;
begin
if TimerTicking then
begin
gettime(h2, m2, s2, s1002);
T1 := AbsTime(h1, m1, s1, s1001);
T2 := AbsTime(h2, m2, s2, s1002);
Elapsed := T2 - T1
end
else
Elapsed := 0.0
end;
function ElapsedStr:string;
var s:string;
begin
str(Elapsed:6:1,s);
ElapsedStr := 't = '+s+'s. '
end;
begin
TimerTicking := false
end.
---------------------------------------------------------------------------
_______________________________________________________________________________
Ajay Shah, (213)747-9991, ajayshah@usc.edu
The more things change, the more they stay insane.
_______________________________________________________________________________