wood@DG-RTP.DG.COM (Tom Wood) (02/02/90)
I've tracked down the "performance regression" I reported. The deal
is that the 1.36.93 changes regard the loop as one that can benefit
from conversion as loop_skip_over\loop.c. When this conversion is
done, "a loop that enters just before the end test is converted to one
that falls straight in after skipping over the entire loop if the end
test would drop out". (Whew!)
In the scan through this converted loop for insns that are safe to
move (the body of scan_loop\loop.c), we know something more about
insns toward the end of the scan. Before we enter the while loop, "p"
points to the end test. When we get to this position during the scan,
the conditions "maybe_never" and "call_passed" may be reset: we're
back to the virtual end test again.
This isn't really the ultimate fix. Maybe someone out there who's
better at threading insns could help. If we determine that insns
after the end test are movable, we end up duplicating them. We also
know where their values are...
Here again is the function from the Stanford benchmark bench.c:
int Fit (i, j) int i, j;
{
int k;
for ( k = 0; k <= piecemax[i]; k++ )
if ( p[i][k] ) if ( puzzl[j+k] ) return (false);
return (true);
};
Here's what I get with the included patch to the 1.36.93 version of
loop.c:
file "gdb-test.c"
; Cc1 (1.36.4) arguments:
; -fdelayed-branch -fomit-frame-pointer -quiet -dumpbase -dL -ds -df -O -o
text
align 4
global _Fit
_Fit:
or r8,r0,r2
or r9,r0,r0
or.u r12,r0,hi16(_piecemax)
or r12,r12,lo16(_piecemax)
ld r11,r12[r8]
cmp r10,r9,r11
bb0.n le,r10,@L8
or r6,r0,r3
or.u r12,r0,hi16(_p)
or r12,r12,lo16(_p)
mak r11,r8,0<11>
addu r7,r12,r11
or.u r5,r0,hi16(_puzzl)
or r5,r5,lo16(_puzzl)
or.u r12,r0,hi16(_piecemax)
or r12,r12,lo16(_piecemax)
ld r11,r12[r8]
@L7:
ld r12,r7[r9]
bcnd eq0,r12,@L4
addu r12,r6,r9
ld r12,r5[r12]
bcnd eq0,r12,@L4
br.n @L1
or r2,r0,r0
@L4:
addu r9,r9,0x0001 ; 1
cmp r10,r9,r11
bb1 le,r10,@L7
@L8:
or r2,r0,0x0001 ; 1
@L1:
jmp r1
Here's what I get with the 1.36.93 version of loop.c:
file "bench.c"
; Cc1 (1.36.4) arguments:
; -fdelayed-branch -fomit-frame-pointer -quiet -dumpbase -O -o
_Fit:
or r8,r0,r2
or r9,r0,r0
or.u r12,r0,hi16(_piecemax)
or r12,r12,lo16(_piecemax)
ld r12,r12[r8]
cmp r10,r9,r12
bb0.n le,r10,@L132
or r7,r0,r3
or.u r12,r0,hi16(_p)
or r12,r12,lo16(_p)
mak r11,r8,0<11>
addu r11,r12,r11
or.u r6,r0,hi16(_puzzl)
or r6,r6,lo16(_puzzl)
@L131:
ld r12,r11[r9]
bcnd eq0,r12,@L128
addu r12,r7,r9
ld r12,r6[r12]
bcnd eq0,r12,@L128
br.n @L125
or r2,r0,r0
@L128:
addu r9,r9,0x0001 ; 1
or.u r12,r0,hi16(_piecemax)
or r12,r12,lo16(_piecemax)
ld r12,r12[r8]
cmp r10,r9,r12
bb1 le,r10,@L131
@L132:
or r2,r0,0x0001 ; 1
@L125:
jmp r1
Here's the patch:
*** loop.c.orig Thu Feb 1 16:19:49 1990
--- loop.c Thu Feb 1 16:19:49 1990
***************
*** 341,346 ****
--- 341,349 ----
/* For a rotated loop that is entered near the bottom,
this is the label at the top. Otherwise it is zero. */
rtx loop_top = 0;
+ /* For a loop that's converted to as loop_skip_over, record where it
+ virtually begins. */
+ rtx loop_skip_over_start = 0;
/* Jump insn that enters the loop, or 0 if control drops in. */
rtx loop_entry_jump = 0;
/* Place in the loop where control enters. */
***************
*** 432,437 ****
--- 435,441 ----
if (! something_before_entry_jump
&& loop_skip_over (loop_start, end, loop_entry_jump))
{
+ loop_skip_over_start = NEXT_INSN (p);
scan_start = loop_top;
loop_top = 0;
}
***************
*** 492,497 ****
--- 496,507 ----
break;
if (p == scan_start)
break;
+ }
+ /* At the virtual top of a converted loop. */
+ if (p == loop_skip_over_start)
+ {
+ maybe_never = 0;
+ call_passed = 0;
}
if (GET_CODE (p) == INSN
&& GET_CODE (PATTERN (p)) == SET
---
Tom Wood (919) 248-6067
Data General, Research Triangle Park, NC
{the known world}!rti!xyzzy!wood, wood@dg-rtp.dg.com