[sci.math.stat] BHHH Algorithm, Turbo Pascal source, and a nice time unit

ajayshah@aludra.usc.edu (Ajay Shah) (04/15/90)
Here are a few units.  They are part of a pretty well-finished ML
package, the only reason i'm not posting the package is that i
don't have a good user manual yet.  If there are people out
there who are interested, i can do that.

Some new comments i inserted just now come with the keyword
'netters'


First, unit algo.  It is the basic optimisation part.
---------------------------------------------------------------------------
UNIT algo; {$N+} {$E+}

INTERFACE

uses
    Constant,
    Typedef,
    LikeFunc,
    Derivs,
    VectOps,
    Time,
    Util,
    crt;

procedure Solve;

IMPLEMENTATION

procedure Derive_New_Guess(var theta, thetanew:ParameterVectorType;
                           var failed : boolean;
                           var FirstL, FinalL : float);

     {given a guess theta, returns a better guess.  When you finish
      debugging, pass both by reference for speed.
      If, for any abstruse reason, he can't improve upon L(theta), he'll
      just return thetanew as theta and failed = true}

      procedure MakeSearchDir(var F:ParameterMatrixType;
                               var g, s:ParameterVectorType);
      var
         i,j:integer;
         sum:float;
      begin
           for i := 1 to SParams do {rows of F. SParams, not Params!}
           begin
                fillchar(sum, sizeof(sum), 0); {sum := 0.0}
                for j := 1 to Params do
                    sum := sum + F[i,j]*g[j];
                s[i] := sum
           end;
      end;

      procedure MakeNew(jmp:float;
                        var theta, ss, tnew : ParameterVectorType);
      {Take a jumpsize and a short search direction ss,
       modify theta to make thetanew.}
      var ndx, i : integer;
      begin
           move(theta, tnew, sizeof(theta)); {tnew := theta}
           ndx := 0;
           for i := 1 to SParams do
           begin
                repeat
                      inc(ndx)
                until c.estimate[ndx];
                {now element ndx of long maps to element i of short}
                tnew[ndx] := tnew[ndx] + (jmp*ss[i])
           end
      end;

const
     factor:float = 8.0;
     lowlimit:float = 1e-16;

var
   S_F : ParameterMatrixType;
   S_SearchDir, S_GradL : ParameterVectorType;
   thisL, lastL, jumpsize : float;
   overshot : boolean;
   i : integer;

begin {of procedure Derive New Guess}
     thisL := FirstL; {passed to me by caller}
     {From now on, i don't touch FirstL, we'll just manipulate
      thisL and lastL}

{we only compute search direction ONCE.}
     if c.verbose then write('Creating search direction...');
     MakeGradL(theta, S_GradL);
     case c.method of
          BHHH : begin
                      MakeF(theta, S_F);
                      MakeSearchDir(S_F, S_GradL, S_SearchDir)
                 end;
          Simple : S_SearchDir := S_GradL;
     end;
     {At the end of all this, we have a short search dirn}

     if c.verbose then
     begin
          write(#13, 'Search Dirn: ');
          for i := 1 to SParams do write(S_SearchDir[i]:6:5,' ');
          writeln; writeln('         Initial LogL = ',FirstL:10:6);
     end;

     thetanew := theta; overshot := false;
     jumpsize := c.firstjump;

     repeat
           lastL := thisL;
           MakeNew(jumpsize, theta, S_SearchDir, thetanew);
           {take short searchdir, long theta and make long thetanew}
           thisL := LogL(thetanew);
           if c.verbose then
                writeln('Jump = ',jumpsize:5:2,' ==> ','LogL = ',thisL:10:6,'      ');

           overshot := thisL <= lastL;
           if overshot and not (lastL >= FirstL) then
              {you overshot and you haven't improved on FirstL yet}
           begin
                jumpsize := jumpsize/factor;
                thisL := lastL;
                overshot := false
           end;
           jumpsize := 2.0*jumpsize
     until overshot or (jumpsize < lowlimit);
     if c.verbose then writeln;

     {at this point, we've overshot jumpsize by one doubling. So
      we backtrack:}

     MakeNew((0.25*jumpsize), theta, S_SearchDir, thetanew);

     FinalL := lastL; {sending back FinalL to caller}
     failed := FinalL <= FirstL
     {you failed if you couldn't improve upon the old LogL}
end; {of procedure Derive New Guess}

procedure SearchL(var seed, LocalMax:ParameterVectorType;
                  var Final : float; {Final is LogL(LocalMax)}
                  var failed:boolean);
var
   biggest : float;

     procedure dump(var b:ParameterVectorType);
     var dim, i : dimtype;
         s1 : string;
     begin
          if c.verbose then writeln; Display('theta: ', false);
          dim := 0;
          for i := 1 to SParams do
          begin
               repeat inc(dim) until c.estimate[dim];
               str(b[dim]:6:Criterion.decimals, s1);
               Display(s1+' ', false)
          end;
          Display('', true)
     end;

     function converged(var t1, t2 : ParameterVectorType;
                              o, n : float;
                             iters : integer):boolean;
     var tmp:boolean;   {it's thinking long, but never mind}
         i:integer;
     begin
          if iters = 0 then
          begin
               converged := false; exit
          end;
          with Criterion do
          begin
               if (iters >= iterations) or ((n-o) < deltalogl) then
               begin
                    converged := true;
                    exit
               end;
          end;
          tmp := true;
          for i := 1 to Params do
              if abs(t1[i]-t2[i]) > biggest then tmp := false;
          converged := tmp
     end;

var
   theta, thetanew : ParameterVectorType;
   iterations : integer;
   i : byte;
   s1 : string;
   oldL, newL : float;
   stopsign : boolean;

begin
     stopsign := false; failed := false; iterations := 0; biggest := 1.0;
     for i := 1 to Criterion.decimals do biggest := biggest/10.0;
     if c.verbose then write(ElapsedStr,'Seed')
                  else Display('0. ', false);
     newl := LogL(seed); thetanew := seed; dump(thetanew);
     {assert thetanew is seed, newl is LogL(either, both)}
     while not stopsign do
     begin
          inc(iterations);
          theta := thetanew; oldL := newL;
          Derive_New_Guess(theta, thetanew, failed, oldL, newL);
           {This takes theta, oldL which is LogL(theta),
            generates  thetanew, newL which is LogL(thetanew)}

          stopsign := failed or
                      converged(theta, thetanew, oldL, newL, iterations);
          if not stopsign then
          begin
               if c.verbose then
               begin
                    writeln; write(ElapsedStr,'Iteration #',iterations,': ')
               end
               else
               begin
                    str(iterations, s1); Display(s1+'. ', false);
               end;
               dump(thetanew)
          end
          else write(chr(254));
     end;
     LocalMax := thetanew;
     Final := newL
end;


procedure Solve;

    procedure show(dim:integer; thetai, stddev, tratio : float);
    const five = '      ';
    var dims, s1, s2, s3 : string;
    begin
         str(thetai:8:5, s1);
         if near(stddev, -0.01) then s2 := '      '
                                else str(stddev:6:3, s2);
         if near(tratio, -0.01) then s3 := '     '
                                else str(tratio:5:2, s3);
         Display(Labels[dim]+'    '+s1+five+s2+five+s3, true)
    end;

    procedure FreeGrad(var t, g:ParameterVectorType);
    var i:dimtype;
    begin
         for i := 1 to Params do c.estimate[i] := true;
         SParams := Params;
         MakeGradL(t, g)
    end;

var FG, Best : ParameterVectorType;
    failed : boolean;
    ndx, i : integer;
    VCV : ParameterMatrixType;
    s1, s2 : string;
    ti, sd, tr : float;
    safe : integer;
    Truth : SufficientConditions;
    NewL : float;

begin
     RestartTimer;


{note to netters.  The Augmented idea here is kinda original.
Atleast, i thought it up, and i haven't seen it anywhere else.
It is 30% faster or so when your seed sucks.  If you have a
really good seed, then it isn't slower.  So i use it by default.
}

     if c.augmented then
     begin
          {Save good values for later use:}
          safe := c.useobs; Truth := Criterion;

          {Stage I Setup:}
          Display('Searching with 1/4th of the observations:',true);
          with Criterion do
          begin
               DeltaLogL := 100*DeltaLogL;
               decimals := decimals-2;
               iterations := iterations div 100;
               if iterations = 0 then iterations := 1
          end; c.useobs := safe div 4;
          SearchL(c.seed, Best, NewL, failed); writeln; writeln; writeln;
          c.seed := Best;

          {Stage II Setup:}
          clrscr; writeln; Display('Phase I search took '+ElapsedStr, true);
          Display('Search with 1/2 the observations:', true);
          with Criterion do
          begin
               DeltaLogL := 10*DeltaLogL;
               decimals := decimals + 1;
               iterations := 10*iterations
          end; c.useobs := 2*c.useobs;
          SearchL(c.seed, Best, NewL, failed); writeln; writeln; writeln;
          c.seed := Best;

          {Get back to the truth:}
          c.useobs := safe;
          Criterion := Truth; clrscr; writeln;
          Display('At end of Phase II search,'+ElapsedStr, true);
          Display('Now using full dataset:', true);
     end; {of if augmented}

     {and continue where we might have been:}
     SearchL(c.seed, Best, NewL, failed);
     MakeF(Best, VCV);

     for i := 1 to 3 do Display('', true);
     Display('Search took '+ElapsedStr+' seconds.', true);
     if failed then
     begin
          Display('Estimates have not converged as required.', true);
          Display('Best possible estimates shown: ', true)
     end;
     Display('', true); Display('', true);
     Display('Results of Search: ', true);
     Display('', true); Display('', true);
     Display('            estimate     std. dev.   t-ratio', true);
     Display('', true);

     ndx := 1;
     for i := 1 to Params do
     begin
          if c.estimate[i] then
          begin
               ti := Best[i];
               sd := sqrt(VCV[ndx,ndx]);
               tr := ti/sd;
               show(i, ti, sd, tr);
               inc(ndx)
          end
          else show(i, Best[i], -0.01, -0.01)
     end; {ndx works in short domain, i works in long}

     str(NewL:8:7, s1);
     Display('', true);
     Display('At above parameter vector, LogL is '+s1, true);
     if c.freegradient then
     begin
          FreeGrad(Best, FG);
          for i := 1 to 3 do Display('', true);
          Display('Full table of first derivatives: ', true);
          Display('', true);
          for i := 1 to Params do
          begin
               str(FG[i]:8:7, s2);
               Display(Labels[i]+' : '+s2, true)
          end;
     end;
end;

begin {unit initialisation code}
end.
---------------------------------------------------------------------------




The next file is called derivs.pas, it is where the Grad and F
matrices are computed.

---------------------------------------------------------------------------

UNIT Derivs; {$N+} {$E+}

INTERFACE

uses constant,
     typedef,
     matrix,
     longshort,
     likefunc;

procedure MakeGradL(var t, g : ParameterVectorType);
procedure MakeF(var t:ParameterVectorType; var F:ParameterMatrixType);

{On return, g is short  SParams x 1
            F is short, SParams x SParams
}

IMPLEMENTATION

procedure MakeGradL(var t, g : ParameterVectorType);
{takes a t:long, and finds a g:short}
var j, obs : integer;
    tmplong, tmpshort : ParameterVectorType;
    anobs : OneObservationType;
begin
     fillchar(g, sizeof(g), 0); {just fast way to init all g[i] to zero}
     Reset(rawfile);
     for obs := 1 to c.useobs do {run over all observations}
     begin
          read(rawfile, anobs);
          FindOneDeriv(t, anobs, tmplong);
          MapToShort(c.estimate, tmplong, tmpshort);
          for j := 1 to SParams do
                 g[j] := g[j] + tmpshort[j]
     end;
end; {of MakeGradL}

procedure MakeF(var t:ParameterVectorType; var F:ParameterMatrixType);

{Note to netters.  This is the heart of the BHHH algorithm.  The
method used here for computing F, that is}

var
   i, j, obs:integer;
   D2L, Result : ^TNMatrix;
   LongD, ShortD:ParameterVectorType;
   error : byte;
   tmp:float;
   anobs : OneObservationType;

begin
     New(D2L); New(Result);
     fillchar(D2L^, sizeof(D2L^), 0);
     Reset(rawfile);
     for obs := 1 to c.useobs do
     begin
          read(rawfile, anobs);
          FindOneDeriv(t, anobs, LongD);
          MapToShort(c.estimate, LongD, ShortD);
          for i := 1 to SParams do
              for j := 1 to i do
                  D2L^[i,j] := D2L^[i,j] - ShortD[i]*ShortD[j]
     end;

     for i := 1 to SParams do
         for j := (i+1) to SParams do
             D2L^[i,j] := D2L^[j,i];

     Inverse(SParams, D2L^, Result^, error);
     if error <> 0 then
     begin
          writeln('2nd derivatives matrix of LogL is singular.');
          halt(1)
     end;
     for i := 1 to SParams do
         for j := 1 to SParams do
             F[i,j] := -Result^[i,j]; {NOTE ZEE MINUS SIGN!!! after inversion}
     dispose(D2L); dispose(Result)
end;

begin
end.
---------------------------------------------------------------------------




As a demonstration, i present one likelihood function here, it's
an ordered probit with fixed cutoffs.

---------------------------------------------------------------------------

UNIT LikeFunc; {$N+} {$E+}

INTERFACE

USES
    typedef,
    constant,
    normal;

const
     ModelName : string = 'Clive Bell''s Problem';

function LogL(var theta:ParameterVectorType):float;

procedure FindOneDeriv(var theta:ParameterVectorType;
                       var obs:OneObservationType;
                       var O:ParameterVectorType);

{These functions and the ModelName string are what unit LikeFunc shows
 user program.

 LogL takes a parameter vector and returns a LogLikelihood.
 FindOneDeriv takes one observation and returns a vector of derivatives.

 Both t's are passed by reference for speed,
 they are not altered within this unit.
}

IMPLEMENTATION

const
     K = 3;

type
    XVector = array[1..K] of real;
    meaningful = record
        X : XVector;
        n, y : real;
    end;
    NiceParams = record
        Beta : array[1..K] of float;
        sigma : float
    end;
{meaningful is exactly the way an observation is organised.
 In the obs, first we have the block of x's. Then, at the end, we have n&y.

 In the parameter vector, first we have beta's and last is the sigma.
}

function betadotx(var theta : ParameterVectorType;
                  var X:XVector):float; {Given theta and an obs,
                                         find Beta dot X}
var B:float; i:integer;
    t : NiceParams absolute theta;
begin
     fillchar(B, sizeof(B), 0); {fast version of B := 0.0}
     for i := 1 to K do B := B + X[i]*t.Beta[i];
     betadotx := B
end;

function LogL(var theta:ParameterVectorType):float;
const
     minimum = 1e-8;

var BdotX, sum, OneObsL :float;
    anobs : OneObservationType;
    theobs : meaningful absolute anobs;
    t : NiceParams absolute theta;
    obsno : integer;
begin
     fillchar(sum, sizeof(sum), 0); {faster version of sum := 0.0}
     reset(rawfile);
     for obsno := 1 to c.useobs do
     begin
          read(rawfile, anobs);
          BdotX := betadotx(theta, theobs.X);
          with theobs do
             if n = -Y then {censored. Can ONLY happen when Y is -ve}
                OneObsL := CNorm( (Y+0.5 - BdotX)/t.sigma)
             else {not censored, thimple observation}
                OneObsL := CNorm( (Y+0.5 - BdotX)/t.sigma)
                           - CNorm( (Y-0.5 - BdotX)/t.sigma);
          if OneObsL < minimum then {ln(OneObsL) is likely to blow up}
            sum := sum - 174.0
          else
            sum := sum + ln(OneObsL)
     end;
     LogL := sum
end; {of likefunc}

procedure FindOneDeriv(var theta:ParameterVectorType;
                       var obs:OneObservationType;
                       var O:ParameterVectorType);
{Returns long vector, with some zeros is estimate is off}
const
     minimum = 1.0e-10; {i only let an observation contribute
                        to the GradL vector if it's probability is
                        > minimum. Avoids numerical hassles caused by
                        outliers}
var
   scratch, dl, du, upper, lower, BdotX, OneObsL : float;
   j : integer;
   theobs : meaningful absolute obs;
   t : NiceParams absolute theta;

begin
     BdotX := betadotx(theta, theobs.X);
     with theobs do
     begin
          upper := (Y+0.5 - BdotX)/t.sigma; du := Norm(upper);
          if n = -Y then {censored. Can ONLY happen when Y is -ve}
          begin
               OneObsL := CNorm(upper);
               scratch := du/(t.sigma*OneObsL);
               for j := 1 to K do {in betas}
                   O[j] := -X[j]*scratch;
               O[K+1] := -(Y+0.5-BdotX)*scratch/t.sigma
          end
          else {not censored, thimple observation}
          begin
             lower := (Y-0.5 - BdotX)/t.sigma; dl := Norm(lower);
             OneObsL := CNorm(upper) - CNorm(lower);
             for j := 1 to K do {derivatives in all betas}
                  O[j] := ((dl - du)*X[j]/t.sigma)/OneObsL;
             O[K+1] := (((Y-0.5-BdotX)*dl) - ((Y+0.5-BdotX)*du))
                       /(OneObsL*t.sigma*t.sigma);
          end
     end {of the with}
end;

begin
end.




Last, the time unit:

---------------------------------------------------------------------------

UNIT Time;

interface

uses Dos;

procedure RestartTimer;
function Elapsed:real;
function ElapsedStr:string;

implementation

var
   TimerTicking : boolean;
   h1, m1, s1, s1001 : word;

procedure RestartTimer;
begin
     TimerTicking := true;
     gettime(h1, m1, s1, s1001)
end;

function Elapsed:real;

         function AbsTime(h,m,s,s100:word):real;
         begin
              AbsTime := (s100/100.0) + s + (60.0*m) + (3600.0*h)
         end;

var h2, m2, s2, s1002 : word;
    T1, T2 : real;
begin
     if TimerTicking then
     begin
          gettime(h2, m2, s2, s1002);
          T1 := AbsTime(h1, m1, s1, s1001);
          T2 := AbsTime(h2, m2, s2, s1002);
          Elapsed := T2 - T1
     end
     else
         Elapsed := 0.0
end;

function ElapsedStr:string;
var s:string;
begin
     str(Elapsed:6:1,s);
     ElapsedStr := 't = '+s+'s.  '
end;

begin
     TimerTicking := false
end.
---------------------------------------------------------------------------
_______________________________________________________________________________
Ajay Shah, (213)747-9991, ajayshah@usc.edu
                              The more things change, the more they stay insane.
_______________________________________________________________________________