bh.atari%rand-relay@sri-unix.UUCP (02/14/84)
From: Brian Harvey <bh.atari@rand-relay>
The following was sent to me by one of our users. I haven't played with
it myself, but was wondering whether this situation is familiar to
anyone, and if there is a known fix. Please reply directly to
bh.atari@rand-relay
since I don't subscribe to u-w. Tnx.
Script started on Tue Feb 14 05:57:54 1984
Warning: no access to tty; thus no job control in this shell...
%
%
% cat bug.c
/*
** this file compiles with different results depending on
** whether or not the c optimizer is used.
*/
main()
{
int i;
i = ((int) 1.0) & 0xff;
printf ("i = 0x%x\n", i);
}
%
%
% cc bug.c -o nobug
% nobug
i = 0x1
% cc -O bug.c -o bug
% bug
i = 0x80
%
%
% cc -S bug.c
% t bug.s
LL0:
.data
.text
.align 1
.globl _main
_main:
.word L12
jbr L14
L15:
.data
.align 2
L16:
.double 0d1.00000000000000000000e+00
.text
cvtdl L16,r0
bicl2 $-256,r0
movl r0,-4(fp)
.data 1
L18:
.ascii "i = 0x%x\12\0"
.text
pushl -4(fp)
pushl $L18
calls $2,_printf
ret
.set L12,0x0
L14:
subl2 $4,sp
jbr L15
.data
% cc -O -S bug.c
% cat bug.s
.data
.text
LL0:.align 1
.globl _main
.data
.align 2
L16:.double 0d1.00000000000000000000e+00
.text
.data 1
L18:.ascii "i = 0x%x\12\0"
.text
.set L12,0x0
.data
.text
_main:.word L12
subl2 $4,sp
movzbl L16,-4(fp)
pushl -4(fp)
pushal L18
calls $2,_printf
ret
%
script done on Tue Feb 14 06:00:31 1984
gwyn%brl-vld@sri-unix.UUCP (02/15/84)
From: Doug Gwyn (VLD/VMB) <gwyn@brl-vld> Indeed, the 4.2BSD C optimizer has the bug you describe. There should be a "cvtdl" before the "movzbl". The UNIX System V C optimizer does not have this bug, tra la.
guy@rlgvax.UUCP (Guy Harris) (02/19/84)
The System III and 4.xBSD C optimizers (the 4.xBSD optimizer is a later release than the System III one) have a number of bugs, many of which have to do with improperly dealing with conversion instructions. Here is a log of the fixed ones: revision 1.5 date: 83/08/15 05:31:38; author: bin; state: Exp; lines added/del: 5/3 In generating code to extract values from fields, the compiler uses only the extv/extvz instructions. The optimizer changes all extractions of byte/word fields on byte/word boundaries to cvt{b,w}l or movz{b,w}l depending on the field being signed or unsigned. However, it does this incorrectly for word size fields on non-longword boundaries; it adds the offset into the word to the address, but adds it as if it were a byte offset, not a word offset. Fixed. ---------------------------- revision 1.4 date: 83/08/15 05:29:00; author: bin; state: Exp; lines added/del: 4/0 Fix courtesy of James T. Ellis, Microelectronics Center of North Carolina: don't fold ANYTHING with cvtfl, cvtdl, cvtgl, or cvthl!!!! ---------------------------- revision 1.3 date: 83/08/15 05:25:00; author: bin; state: Exp; lines added/del: 25/3 Previous fix meant that if the mask was 0xffff, the instructions were cvtwl and movzwl, which could be merged but weren't. Fixed to merge the instructions iff the sizes of the operands were the same. Also modified to handle masks of 0xffff as well as 0xff. ---------------------------- revision 1.2 date: 83/08/15 05:19:50; author: bin; state: Exp; lines added/del: 8/1 Fixed so as not to collapse a cvtwl xx,dest/bicl $0377,dest sequence into a movzbl xx,dest if the xx does autoincrementing or autodecrementing. The first sequence would increment or decrement the register used in xx by 2, while the second sequence would increment or decrement it by 1. Routine "autoid" added to test if an operand used autoincrement or autodecrement; routine "bicopt" modified to test for !autoid(src of cvtwl) as well as !indexa(src). and here is a "diff -c" listing of the two versions of the optimizer. Note that the line numbers pertain to the 4.1cBSD version, so they may not be exactly the same for the 4.1 or 4.2 versions. This listing can also be used, with some care, for the System III version. (The System V version doesn't have this problem, because it was totally rewritten to be retargetable to the various machines that are targets for the Software Generation Systems.) *** c21.c (original) Sun Feb 19 13:23:08 1984 --- c21.c (fixed) Sun Feb 19 13:23:11 1984 *************** *** 275,281 ** ** also byte- and word-size fields: ** extv $n*8,$8,A,B > cvtbl n+A,B ! ** extv $n*16,$16,A,B > cvtwl n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ** extzv $n*16,$16,A,B > movzwl n+A,B */ --- 275,281 ----- ** ** also byte- and word-size fields: ** extv $n*8,$8,A,B > cvtbl n+A,B ! ** extv $n*16,$16,A,B > cvtwl 2n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ** extzv $n*16,$16,A,B > movzwl 2n+A,B */ *************** *** 277,283 ** extv $n*8,$8,A,B > cvtbl n+A,B ** extv $n*16,$16,A,B > cvtwl n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ! ** extzv $n*16,$16,A,B > movzwl n+A,B */ register struct node *pf; /* forward node */ register struct node *pn; /* next node (after pf) */ --- 277,283 ----- ** extv $n*8,$8,A,B > cvtbl n+A,B ** extv $n*16,$16,A,B > cvtwl 2n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ! ** extzv $n*16,$16,A,B > movzwl 2n+A,B */ register struct node *pf; /* forward node */ register struct node *pn; /* next node (after pf) */ *************** *** 322,328 if (coff == 0) strcpy(regs[RT1], regs[RT3]); else ! sprintf(regs[RT1], "%d%s%s", coff, regs[RT3][0]=='(' ? "":"+", regs[RT3]); strcpy(regs[RT2], regs[RT4]); regs[RT3][0] = '\0'; regs[RT4][0] = '\0'; --- 322,330 ----- if (coff == 0) strcpy(regs[RT1], regs[RT3]); else ! sprintf(regs[RT1], "%d%s%s", ! (flen == 8 ? coff : 2*coff), ! (regs[RT3][0] == '(' ? "" : "+"), regs[RT3]); strcpy(regs[RT2], regs[RT4]); regs[RT3][0] = '\0'; regs[RT4][0] = '\0'; *************** *** 788,793 /* use field operations or MOVZ if possible. done as part of 'bflow'. */ register char *cp1,*cp2; int r; char src[C2_ASIZE]; if (!bixprep(p,JBCC)) return(p); if (f==0) {/* the BIC isolates low order bits */ --- 790,796 ----- /* use field operations or MOVZ if possible. done as part of 'bflow'. */ register char *cp1,*cp2; int r; + char lhssiz, subop; char src[C2_ASIZE]; if (!bixprep(p,JBCC)) return(p); if (f==0) {/* the BIC isolates low order bits */ *************** *** 804,809 delnode(p->back); } } if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; if (equstr(src,cp2) && okio(cp1) && !indexa(cp1) --- 807,825 ----- delnode(p->back); } } + /* + * 'pos', 'siz' known; find out the size of the + * left-hand operand of what the bicl will turn into. + */ + if (pos==0) { + if (siz==8) + lhssiz = BYTE; /* movzbl */ + else if (siz==16) + lhssiz = WORD; /* movzwl */ + else + lhssiz = BYTE; /* extzvl */ + } else + lhssiz = BYTE; /* extzvl */ if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; /* *************** *** 806,812 } if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; ! if (equstr(src,cp2) && okio(cp1) && !indexa(cp1) && 0<=(r=isreg(cp2)) && r<NUSE && bitsize[p->back->subop&0xF]>=(pos+siz) && bitsize[p->back->subop>>4]>=(pos+siz)) {/* good CVT */ --- 822,840 ----- lhssiz = BYTE; /* extzvl */ if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; ! /* ! * If indexa(cp1) || autoid(cp1), the fold may ! * still be OK if the CVT/MOVZ has the same ! * size operand on its left size as what we ! * will turn the bicl into. ! * However, if the CVT is from a float or ! * double, forget it! ! */ ! subop = p->back->subop&0xF; /* type of LHS of CVT/MOVZ */ ! if (equstr(src,cp2) && okio(cp1) ! && subop != FFLOAT && subop != DFLOAT ! && subop != GFLOAT && subop != HFLOAT ! && ((!indexa(cp1) && !autoid(cp1)) || lhssiz == subop) && 0<=(r=isreg(cp2)) && r<NUSE && bitsize[p->back->subop&0xF]>=(pos+siz) && bitsize[p->back->subop>>4]>=(pos+siz)) {/* good CVT */ *************** *** 816,823 } /* 'pos', 'siz' known; source of field is in 'src' */ splitrand(p); /* retrieve destination of BICL */ ! if (siz==8 && pos==0) { ! p->combop = T(MOVZ,U(BYTE,LONG)); sprintf(line,"%s,%s",src,lastrand); } else { p->combop = T(EXTZV,LONG); --- 844,851 ----- } /* 'pos', 'siz' known; source of field is in 'src' */ splitrand(p); /* retrieve destination of BICL */ ! if ((siz==8 || siz==16) && pos==0) { ! p->combop = T(MOVZ,U(lhssiz,LONG)); sprintf(line,"%s,%s",src,lastrand); } else { p->combop = T(EXTZV,LONG); *************** *** 1361,1366 indexa(p) register char *p; {/* 1-> uses [r] addressing mode; 0->doesn't */ while (*p) if (*p++=='[') return(1); return(0); } --- 1389,1401 ----- indexa(p) register char *p; {/* 1-> uses [r] addressing mode; 0->doesn't */ while (*p) if (*p++=='[') return(1); + return(0); + } + + autoid(p) register char *p; {/* 1-> uses autoincrement/autodecrement; 0->doesn't */ + if (*p == '-' && *(p+1) == '(') return(1); + while (*p) p++; + if (*--p == '+' && *--p == ')') return(1); return(0); } Guy Harris {seismo,ihnp4,allegra}!rlgvax!guy
jeff%aids-unix@sri-unix.UUCP (02/22/84)
From: Jeff Dean <jeff@aids-unix> Brian, I've found a solution to your problem, where CVTDL a,b BICL $-256,c MOVL c,d was being optimized (sic) to MOVZBL a,d However, not being a "c2 guru", and finding the comments in c2 somewhat lacking, I'm afraid that my fix might be only "local" solution (i.e., I only know that it solves this particular problem). However, if anyone fails to come up with a better solution, I'll be glad to make mine available (it's only two new lines of code for c21.c). jd P.S. This is for 4.1 TS DONE UNDER 4.2 COULD BE ASSURED TO RUN UNDER SYSTEM 5? THANKS IN ADVANCE, ED FOX.
gwyn%brl-vld@sri-unix.UUCP (02/22/84)
From: Doug Gwyn (VLD/VMB) <gwyn@brl-vld> On release 1.0 of UNIX System V at least, the C optimizer was still the traditional one although the "portable" optimizer was shipped too. The old bugs had been fixed, however.