guy@rlgvax.UUCP (Guy Harris) (02/19/84)
The System III and 4.xBSD C optimizers (the 4.xBSD optimizer is a later release than the System III one) have a number of bugs, many of which have to do with improperly dealing with conversion instructions. Here is a log of the fixed ones: revision 1.5 date: 83/08/15 05:31:38; author: bin; state: Exp; lines added/del: 5/3 In generating code to extract values from fields, the compiler uses only the extv/extvz instructions. The optimizer changes all extractions of byte/word fields on byte/word boundaries to cvt{b,w}l or movz{b,w}l depending on the field being signed or unsigned. However, it does this incorrectly for word size fields on non-longword boundaries; it adds the offset into the word to the address, but adds it as if it were a byte offset, not a word offset. Fixed. ---------------------------- revision 1.4 date: 83/08/15 05:29:00; author: bin; state: Exp; lines added/del: 4/0 Fix courtesy of James T. Ellis, Microelectronics Center of North Carolina: don't fold ANYTHING with cvtfl, cvtdl, cvtgl, or cvthl!!!! ---------------------------- revision 1.3 date: 83/08/15 05:25:00; author: bin; state: Exp; lines added/del: 25/3 Previous fix meant that if the mask was 0xffff, the instructions were cvtwl and movzwl, which could be merged but weren't. Fixed to merge the instructions iff the sizes of the operands were the same. Also modified to handle masks of 0xffff as well as 0xff. ---------------------------- revision 1.2 date: 83/08/15 05:19:50; author: bin; state: Exp; lines added/del: 8/1 Fixed so as not to collapse a cvtwl xx,dest/bicl $0377,dest sequence into a movzbl xx,dest if the xx does autoincrementing or autodecrementing. The first sequence would increment or decrement the register used in xx by 2, while the second sequence would increment or decrement it by 1. Routine "autoid" added to test if an operand used autoincrement or autodecrement; routine "bicopt" modified to test for !autoid(src of cvtwl) as well as !indexa(src). and here is a "diff -c" listing of the two versions of the optimizer. Note that the line numbers pertain to the 4.1cBSD version, so they may not be exactly the same for the 4.1 or 4.2 versions. This listing can also be used, with some care, for the System III version. (The System V version doesn't have this problem, because it was totally rewritten to be retargetable to the various machines that are targets for the Software Generation Systems.) *** c21.c (original) Sun Feb 19 13:23:08 1984 --- c21.c (fixed) Sun Feb 19 13:23:11 1984 *************** *** 275,281 ** ** also byte- and word-size fields: ** extv $n*8,$8,A,B > cvtbl n+A,B ! ** extv $n*16,$16,A,B > cvtwl n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ** extzv $n*16,$16,A,B > movzwl n+A,B */ --- 275,281 ----- ** ** also byte- and word-size fields: ** extv $n*8,$8,A,B > cvtbl n+A,B ! ** extv $n*16,$16,A,B > cvtwl 2n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ** extzv $n*16,$16,A,B > movzwl 2n+A,B */ *************** *** 277,283 ** extv $n*8,$8,A,B > cvtbl n+A,B ** extv $n*16,$16,A,B > cvtwl n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ! ** extzv $n*16,$16,A,B > movzwl n+A,B */ register struct node *pf; /* forward node */ register struct node *pn; /* next node (after pf) */ --- 277,283 ----- ** extv $n*8,$8,A,B > cvtbl n+A,B ** extv $n*16,$16,A,B > cvtwl 2n+A,B ** extzv $n*8,$8,A,B > movzbl n+A,B ! ** extzv $n*16,$16,A,B > movzwl 2n+A,B */ register struct node *pf; /* forward node */ register struct node *pn; /* next node (after pf) */ *************** *** 322,328 if (coff == 0) strcpy(regs[RT1], regs[RT3]); else ! sprintf(regs[RT1], "%d%s%s", coff, regs[RT3][0]=='(' ? "":"+", regs[RT3]); strcpy(regs[RT2], regs[RT4]); regs[RT3][0] = '\0'; regs[RT4][0] = '\0'; --- 322,330 ----- if (coff == 0) strcpy(regs[RT1], regs[RT3]); else ! sprintf(regs[RT1], "%d%s%s", ! (flen == 8 ? coff : 2*coff), ! (regs[RT3][0] == '(' ? "" : "+"), regs[RT3]); strcpy(regs[RT2], regs[RT4]); regs[RT3][0] = '\0'; regs[RT4][0] = '\0'; *************** *** 788,793 /* use field operations or MOVZ if possible. done as part of 'bflow'. */ register char *cp1,*cp2; int r; char src[C2_ASIZE]; if (!bixprep(p,JBCC)) return(p); if (f==0) {/* the BIC isolates low order bits */ --- 790,796 ----- /* use field operations or MOVZ if possible. done as part of 'bflow'. */ register char *cp1,*cp2; int r; + char lhssiz, subop; char src[C2_ASIZE]; if (!bixprep(p,JBCC)) return(p); if (f==0) {/* the BIC isolates low order bits */ *************** *** 804,809 delnode(p->back); } } if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; if (equstr(src,cp2) && okio(cp1) && !indexa(cp1) --- 807,825 ----- delnode(p->back); } } + /* + * 'pos', 'siz' known; find out the size of the + * left-hand operand of what the bicl will turn into. + */ + if (pos==0) { + if (siz==8) + lhssiz = BYTE; /* movzbl */ + else if (siz==16) + lhssiz = WORD; /* movzwl */ + else + lhssiz = BYTE; /* extzvl */ + } else + lhssiz = BYTE; /* extzvl */ if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; /* *************** *** 806,812 } if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; ! if (equstr(src,cp2) && okio(cp1) && !indexa(cp1) && 0<=(r=isreg(cp2)) && r<NUSE && bitsize[p->back->subop&0xF]>=(pos+siz) && bitsize[p->back->subop>>4]>=(pos+siz)) {/* good CVT */ --- 822,840 ----- lhssiz = BYTE; /* extzvl */ if (p->back->op==CVT || p->back->op==MOVZ) {/* greedy, aren't we? */ splitrand(p->back); cp1=regs[RT1]; cp2=regs[RT2]; ! /* ! * If indexa(cp1) || autoid(cp1), the fold may ! * still be OK if the CVT/MOVZ has the same ! * size operand on its left size as what we ! * will turn the bicl into. ! * However, if the CVT is from a float or ! * double, forget it! ! */ ! subop = p->back->subop&0xF; /* type of LHS of CVT/MOVZ */ ! if (equstr(src,cp2) && okio(cp1) ! && subop != FFLOAT && subop != DFLOAT ! && subop != GFLOAT && subop != HFLOAT ! && ((!indexa(cp1) && !autoid(cp1)) || lhssiz == subop) && 0<=(r=isreg(cp2)) && r<NUSE && bitsize[p->back->subop&0xF]>=(pos+siz) && bitsize[p->back->subop>>4]>=(pos+siz)) {/* good CVT */ *************** *** 816,823 } /* 'pos', 'siz' known; source of field is in 'src' */ splitrand(p); /* retrieve destination of BICL */ ! if (siz==8 && pos==0) { ! p->combop = T(MOVZ,U(BYTE,LONG)); sprintf(line,"%s,%s",src,lastrand); } else { p->combop = T(EXTZV,LONG); --- 844,851 ----- } /* 'pos', 'siz' known; source of field is in 'src' */ splitrand(p); /* retrieve destination of BICL */ ! if ((siz==8 || siz==16) && pos==0) { ! p->combop = T(MOVZ,U(lhssiz,LONG)); sprintf(line,"%s,%s",src,lastrand); } else { p->combop = T(EXTZV,LONG); *************** *** 1361,1366 indexa(p) register char *p; {/* 1-> uses [r] addressing mode; 0->doesn't */ while (*p) if (*p++=='[') return(1); return(0); } --- 1389,1401 ----- indexa(p) register char *p; {/* 1-> uses [r] addressing mode; 0->doesn't */ while (*p) if (*p++=='[') return(1); + return(0); + } + + autoid(p) register char *p; {/* 1-> uses autoincrement/autodecrement; 0->doesn't */ + if (*p == '-' && *(p+1) == '(') return(1); + while (*p) p++; + if (*--p == '+' && *--p == ')') return(1); return(0); } Guy Harris {seismo,ihnp4,allegra}!rlgvax!guy