wood@DG-RTP.DG.COM (Tom Wood) (02/01/90)
I've been installing the 1.36.93 changes into GCC 1.36 for the 88k.
In doing so, I came across this "performance regression". The problem
is that the changes to loop.c from 1.36.00 to 1.36.93 fail to note
that piecemax[i] is invariant. It appears this is the case for global
variables in general. Here's the function from the Stanford benchmark
bench.c:
int Fit (i, j) int i, j;
{
int k;
for ( k = 0; k <= piecemax[i]; k++ )
if ( p[i][k] ) if ( puzzl[j+k] ) return (false);
return (true);
};
Here's what I get with the 1.36.93 version of loop.c. This is real
fancy, and an improvement except for the reloading of piecemax[i]
after @L128.
file "bench.c"
; Cc1 (1.36.4) arguments:
; -fdelayed-branch -fomit-frame-pointer -quiet -dumpbase -O -o
_Fit:
or r8,r0,r2
or r9,r0,r0
or.u r12,r0,hi16(_piecemax)
or r12,r12,lo16(_piecemax)
ld r12,r12[r8]
cmp r10,r9,r12
bb0.n le,r10,@L132
or r7,r0,r3
or.u r12,r0,hi16(_p)
or r12,r12,lo16(_p)
mak r11,r8,0<11>
addu r11,r12,r11
or.u r6,r0,hi16(_puzzl)
or r6,r6,lo16(_puzzl)
@L131:
ld r12,r11[r9]
bcnd eq0,r12,@L128
addu r12,r7,r9
ld r12,r6[r12]
bcnd eq0,r12,@L128
br.n @L125
or r2,r0,r0
@L128:
addu r9,r9,0x0001 ; 1
or.u r12,r0,hi16(_piecemax)
or r12,r12,lo16(_piecemax)
ld r12,r12[r8]
cmp r10,r9,r12
bb1 le,r10,@L131
@L132:
or r2,r0,0x0001 ; 1
@L125:
jmp r1
If I simply back out the loop.c changes entirely, I get the old style
loop. piecemax[i] is invariant, but puzzl[i] is not an induction
variable.
file "bench.c"
; Cc1 (1.36.4) arguments:
; -fdelayed-branch -fomit-frame-pointer -quiet -dumpbase -O -o
_Fit:
or r6,r0,r3
or r9,r0,r0
or.u r12,r0,hi16(_piecemax)
or r12,r12,lo16(_piecemax)
ld r7,r12[r2]
or.u r12,r0,hi16(_p)
or r12,r12,lo16(_p)
mak r2,r2,0<11>
br.n @L125
addu r8,r12,r2
@L130:
ld r12,r8[r9]
bcnd eq0,r12,@L127
addu r12,r6,r9
or.u r11,r0,hi16(_puzzl)
or r11,r11,lo16(_puzzl)
ld r12,r11[r12]
bcnd eq0,r12,@L127
br.n @L124
or r2,r0,r0
@L127:
addu r9,r9,0x0001 ; 1
@L125:
cmp r10,r9,r7
bb1 le,r10,@L130
or r2,r0,0x0001 ; 1
@L124:
jmp r1
--
Tom Wood