[comp.sys.sgi] Fortran Optimizer Bug

igraham@NAZGUL.PHYSICS.MCGILL.CA (Ian Graham) (06/16/91)
Hello Netland!

I have discovered a vexing FORTRAN optimization bug irritating, I know
it's a bug, because it works fine when compiled with -u -g, but not when
compiled with -u -O2 -- it gives wildly wrong answers.  I am not clever 
enough to wade through the assembler to find the fault -- any takers?   
I have not yet reported this to the hotline.  I will do so on monday.  

I have enclosed the guilty subroutine:  The optimization error apparently
occurs between the comment lines marked AAAAA and BBBBB -- it is
olat(kk) that is eventually incorrectly set.   The program works fine
if the block of code defining `mask' is commented out and replaced by
either of the above definitions (ONE SITE or PURE MODEL -- see comment
lines in the code segment).

Any help or advice would be appreciated -- this subroutine is the
main engine of a large numerical simulation code, and it really needs
to be optimized -- the programs will have to run for 50 hours or 
more on a 4D-35 at best: I would hate to have to triple that time by
not optimizing.

I compiled this on a Personal Iris 4D-25, Irix 3.3.2 with the
4D1-3.3 Fortran.   I have no other machine or Irix versions
which I can try.

---- CUT --- HERE ---- CUT ---- HERE---- CUT --- HERE ---- CUT ---- HERE
---- CUT --- HERE ---- CUT ---- HERE---- CUT --- HERE ---- CUT ---- HERE

        subroutine engine(mcstart,mcstop,boltz,olat)
	integer   l,        nsite,       lsub,       lv,    lvm
	integer   lsquare,  one
	parameter(l=8,   nsite=512,    lsub=8,     lv=2,  lvm=1)
	parameter(lsquare=64)
	parameter(one=-1)
	real      boltz(*)
        integer   olat(*)
	integer   spini
	integer   mask
	integer   ns1,    ns2,    ns3,    ns4,    ns5,    ns6
	integer   nns1,   nns2,   nns3,   nns4,   nns5,   nns6
	integer   mcstart,        mcstop,         mct
	integer   iii,    kk
	integer   tempa,  tempb,  tempc,  tempd,  tempe,  ntemp
	integer   row,    col,    uprow,  dnrow,  lecol,  rtcol
	integer   slice,  inslice,outslice
	integer   db1(nsite),    or34,    or56,   or3456
	real      randi

        do 101 mct=mcstart,mcstop
   		  call  simdemon(boltz,db1)
                  do 120 iii=1,nsite
                      kk=1 + nsite*randi()
		      spini    = olat(kk)
		      slice    =  (kk-1)/LSQUARE
		      row      =  mod((kk-1),LSQUARE)/L
		      col      =  kk-(LSQUARE*slice)-(L*row)-1
		      uprow    =  mod( (row+1),    L)  * L
		      dnrow    =  mod( (row-1+L),  L)  * L
		      rtcol    =  mod( (col+1),    L)
		      lecol    =  mod( (col-1+L),  L)
		      inslice  =  mod( (slice+1),  L)  * LSQUARE
		      outslice =  mod( (slice-1+L),L)  * LSQUARE
		      row      =  row   * L
		      slice    =  slice * LSQUARE
		
		      ns1 = olat(1 + col   + uprow + slice)
		      ns2 = olat(1 + col   + dnrow + slice)
		      ns3 = olat(1 + lecol + row   + slice)
		      ns4 = olat(1 + rtcol + row   + slice)
		      ns5 = olat(1 + col   + row   + inslice)
		      ns6 = olat(1 + col   + row   + outslice)

		      nns1 = not(ns1)
		      nns2 = not(ns2)
		      nns3 = not(ns3)
		      nns4 = not(ns4)
		      nns5 = not(ns5)
		      nns6 = not(ns6)

c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 
c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 
c THE BUG COMES IN BELOW HERE.......

C PURE MODEL
c		      mask = -1
c ONE-SITE 
c		      mask= or(nns1,or(nns2,or(nns3,
c     :		                     or(nns4,or(nns5,nns6)))))

C   The program optimizes incorrectly with the following definition of
c   mask.  It works fine, however, with either of the above two
c   definitions (ONE-SITE or PURE MODEL)

c TWO-SITE
                      or34   = or( nns3, nns4)
                      or56   = or( nns5, nns6)
                      or3456 = or( or34, or56)
                      tempe  = and(nns5, nns6)
                      tempd  = and(nns4, or56)
                      tempc  = and(nns3, or(nns4, or56))
                      tempb  = and(nns2, or3456)
                      tempa  = and(nns1, or(nns2, or3456))
                      mask   =  or(tempa, 
     :     		               or(tempb, 
     :				           or(tempc,
     :					       or(tempd,tempe)
     :					     )
     :					 ) 
     :			    	   )

                      tempd = or( 
     :			            not( spini),
     :			            xor( spini, db1(iii) ) 
     :				)
		        
                      ntemp = or(
     :			            and( spini, not(mask) ),
     :				    and( tempd, mask      ) 
     :			         )

		      olat(kk)   = ntemp

c THE BUG COMES IN ABOVE HERE.......  olat(kk) is incorrectly set...
c BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
c BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB

120         continue
101     continue

	return
	end


	subroutine simdemon(boltz, db1)
	include   'params3d.h'
        real       boltz(*),  rtab(nsite)
	integer    db1(*)
	integer    ii,       k
        do 5 ii=1, nsite
5             db1(ii)  = 0
        do 1 k=0,31
	    call ranvec(rtab,nsite)
	    do 11 ii=1, nsite
                if( rtab(ii) .le. boltz(1) ) db1(ii) = ibset(db1(ii),k)
11	    continue
1       continue
	return
	end
---- CUT --- HERE ---- CUT ---- HERE---- CUT --- HERE ---- CUT ---- HERE
---- CUT --- HERE ---- CUT ---- HERE---- CUT --- HERE ---- CUT ---- HERE


Thanks in advance for any help any of you can offer.

Ian.

    ___________________________________________ Ian Graham ______________
						igraham@physics.mcgill.ca
					        Tel: (514) 398-6526
					        Fax: (514) 398-8434