brooks@maddog.llnl.gov (11/05/88)
Bug report for GCC 1.30 on NS32K, Sequent Balance. First half of double register move clobbers register used in address computation for move of the second half. The bug is apparently in output_move_double, which does not check that the target of the move to a register might clobber registers used in address computation, and a check of the other machine descriptions reveals no equivalent checking being done in them either. For the NS32K a cheap trick workaround is to use the stack to save the first word moved, move the second word, then pop the first word back off the stack. I would have submitted patches, but am not well enough acquainted with the rtx biz to perform the patch without a not of effort. I do not hit the bug without the optimizer, and do not hit the bug on the SUN, but perhaps this is just an accident. Anyone knowing how to patch output_move_double() to check for use of target registers in the address computation of the source operand that could provide a patch would be appreciated. We have not been able to work around the bug. Search for BUG in the file below for a pointer to the bad assembler output. The bug hitter input code, compiled with gcc -O -S, is: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx extern struct _iobuf { int _cnt; char *_ptr; char *_base; int _bufsiz; short _flag; char _file; } _iob[20 ]; struct _iobuf *fopen(); struct _iobuf *fdopen(); struct _iobuf *freopen(); long ftell(); char *gets(); char *fgets(); char *sprintf(); extern double fabs(), floor(), ceil(), fmod(), ldexp(), frexp(); extern double sqrt(), hypot(), atof(); extern double sin(), cos(), tan(), asin(), acos(), atan(), atan2(); extern double exp(), log(), log10(), pow(); extern double sinh(), cosh(), tanh(); extern double gamma(); extern double j0(), j1(), jn(), y0(), y1(), yn(); void ranset(); extern long ranseed; double ranf(); double simpint(); float fmean(); float fstdev(); double dmean(); double dstdev(); extern int randi(); extern double randiranf(); extern void srandi(); extern void randifwrite(); extern void randifread(); extern int prandi(); extern double prandiranf(); extern void psrandi(); extern void fwriteprandi(); extern void freadprandi(); char *malloc(); char *calloc(); typedef enum { INT, DOUBLE, PINT, PDOUBLE, PPINT, PPDOUBLE, PPPINT, PPPDOUBLE} TYPE; typedef union { int *pi; int **ppi; int ***pppi; int ****ppppi; double *pd; double **ppd; double ***pppd; double ****ppppd; } PTRUNION; typedef struct symtab { char *name; TYPE type; PTRUNION valptr; int *dim1; int *dim2; int *dim3; struct symtab *next; } SYMTAB; char *strcpy(); SYMTAB *lookup(); extern void addtab(); extern int *mllc3d(); extern int *mllc2d(); extern struct _iobuf *yyin; extern struct _iobuf *yyout; extern int warningMessages; struct bins { double p; double a; double b; }; struct line { int nbins; double avoigt; double knucut; double knu0; double deltaknud; double knucuty; double knucutprimex; double probatknucut; struct bins *bin; }; typedef struct line LINE; LINE *genline( ); double knuinline( ); double pknuinline( ); double fofknu( ); typedef unsigned char PCP_LOCK_TYPE; int mtime(); char *malloc(); char *sprintf(); char *sbrk(); void propagate(); struct _iobuf *quefptr; struct _iobuf *outfptr; struct _iobuf *pltfptr; struct _iobuf *matfptr; int mirror = 0; int nlines; int nlevels; int natoms; int timesteps; double timelimit; int nbundles; double **coll; int parallelism = 1; double **rho; int *atomoflevel; int ssgnlines; int *ssgupper; int *ssglower; double *ssgsigmadown; double *ssgsigmaup; double *aspont; double *avoigt; double *sigmadown; double *sigmaup; int *upper; int *lower; double *nu0; double *nucut; double *deltanud; int *ngroups; int *jnuenable; int nubins = 20; double nulolim = 96.0; double nuhilim = 104.0; int *dumplsenable; int emitatleastone = 0; LINE **lptr; int nzones; double **n; double *zonewidth; double totalwidth; static int mtxelts; static PCP_LOCK_TYPE lmtxelts; static int symbols; static PCP_LOCK_TYPE lsymbols; struct mtxelt { int col; double value; struct mtxelt *next; }; typedef struct mtxelt MTXELT; static MTXELT *freemtxelts[1 ]; struct symbol { double sym; unsigned short iz; unsigned short in; struct symbol *next; }; typedef struct symbol SYMBOL; static SYMBOL *freesymbols[1 ]; struct zone { double ledge; double redge; double *n; double *intphif; double intf; double *jnu; double *cntwght; SYMBOL ***symintphif; SYMBOL **symintf; unsigned char access; }; double *lnudist; unsigned char *llnudist; double lbelow = 0.0; unsigned char llbelow = 0 ; double labove = 0.0; unsigned char llabove = 0 ; double *rnudist; unsigned char *lrnudist; double rbelow = 0.0; unsigned char lrbelow = 0 ; double rabove = 0.0; unsigned char lrabove = 0 ; unsigned char lcheckio = 0 ; struct zone *zones; int dim; double *B, *X, *G; int *ls_index; int *zl_index; MTXELT ***SA; int nblocks; int *blocksizes; static int zoneofx(x) double x; { register int _TINDEX = 0; register int _TSIZE = 1 ; int middle; int lower = 0; int upper = nzones; while(lower != upper - 1) { middle = (lower + upper) / 2; if((zones[middle].ledge) <= x) { lower = middle; } else if((zones[middle].redge) > x) { upper = middle; } } return(lower); } struct PBUNDLE { double num; double x; double mu; double t; double initnum; double knu; short zx; unsigned short bzx; struct PBUNDLE *next; unsigned short id; unsigned char l; unsigned char sym; }; typedef struct PBUNDLE PBUNDLE; struct DISTRIBUTION { PBUNDLE *head; PBUNDLE *tail; int count; }; typedef struct DISTRIBUTION DISTRIBUTION; PBUNDLE *getfromdist(dptr) DISTRIBUTION *dptr; { PBUNDLE *pptr; ; if(dptr->count > 1) { pptr = dptr->head; dptr->head = dptr->head->next; pptr->next = 0 ; dptr->count -= 1; } else if(dptr->count == 1) { pptr = dptr->head; dptr->head = 0 ; dptr->tail = 0 ; dptr->count = 0; } else { ; ; pptr = 0 ; } return(pptr); } void addtodist(pptr, dptr) PBUNDLE *pptr; DISTRIBUTION *dptr; { ; ; ; ; if(dptr->count == 0) { ; ; dptr->head = pptr; dptr->tail = pptr; } else { ; dptr->tail->next = pptr; dptr->tail = pptr; } dptr->count += 1; } DISTRIBUTION pdist[128 ]; DISTRIBUTION cdist[128 ]; DISTRIBUTION spare[128 ]; DISTRIBUTION outleft[128 ]; DISTRIBUTION outright[128 ]; DISTRIBUTION sdist[128 ]; PBUNDLE *allopbundle(p) int p; { PBUNDLE *array; int i; ; if(spare[p].count == 0) { if((array = (PBUNDLE *)sbrk(2048 * sizeof(PBUNDLE))) == (PBUNDLE *)-1) { perror("allopbundle"); exit(1); } for(i = 0; i <2048 ; i += 1) { array[i].next = 0 ; addtodist(array + i, spare + p); } } return(getfromdist(spare + p)); } int bcounter[128 ]; void addpbundle(num, sym, knu, x, zx, bzx, mu, t, p, l) double num; int sym; double knu; double x; int zx; int bzx; double mu; double t; int p; int l; { register PBUNDLE *tptr; tptr = allopbundle(p); tptr->num = num; tptr->initnum = num; tptr->knu = knu; tptr->x = x; tptr->zx = zx; tptr->bzx = bzx; tptr->mu = mu; tptr->t = t; tptr->next = 0 ; tptr->sym = sym; tptr->id = bcounter[p]++; tptr->l = l; addtodist(tptr, pdist + p); } void propagate(tstart, tcensus) double tstart, tcensus; { register int _TINDEX = 0; register int _TSIZE = 1 ; double incctnfpa; int l; double r; PBUNDLE *pbptr; double distocensus; double distoboundary; double distocollision; double newnum; double absconstant; double incintf; int p; for(p = 0; p < parallelism; p += 1) { while((pbptr = getfromdist(pdist + p)) != 0 ) { int inu; if((pbptr->knu >= nulolim) && (pbptr->knu < nuhilim)) { inu = nubins * ((pbptr->knu - nulolim) / (nuhilim - nulolim)); } else { inu = -1; } if(pbptr->sym == 1 && pbptr->initnum == (double)0.0) { ; ; pbptr->num = pbptr->initnum = zones[pbptr->zx].cntwght[pbptr->l]; } while(1) { distocensus = 2.99792458e+10 * (tcensus - pbptr->t); # 1914 "imc.c" if(pbptr->mu > (double)0.0) { distoboundary = ((zones[pbptr->zx].redge) - pbptr->x) / pbptr->mu; } else if(pbptr->mu < (double)0.0) { distoboundary = ((zones[pbptr->zx].ledge) - pbptr->x) / pbptr->mu; } else { distoboundary = (double)1.0e+30 ; } absconstant = 0.0; for(l = 0; l < nlines; l += 1) { absconstant += ((rho[atomoflevel[lower[l]]][pbptr->zx] * sigmaup[l] * zones[pbptr->zx].n[lower[l]]) - (rho[atomoflevel[upper[l]]][pbptr->zx] * sigmadown[l] * zones[pbptr->zx].n[upper[l]])) * fofknu(pbptr->knu, *(lptr[l])); } if(distocensus <= distoboundary) { if(distocensus > 0) { newnum = (double)exp(-absconstant * distocensus) * pbptr->num; if(absconstant == 0) { incintf = distocensus * pbptr->num; } else if(fabs(absconstant * distocensus) < 1.0e-3 ) { incintf = pbptr->num * -((-absconstant * distocensus) + (-absconstant * distocensus)*(-absconstant * distocensus)/2 + (-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)/6 + (-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)/24 + (-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)/120) / absconstant; } else { incintf = (pbptr->num - newnum) / absconstant; } } else if(distocensus == 0) { newnum = pbptr->num; incintf = 0.0; } else { fprintf((&_iob[2]) , "Distocensus was negative = %g\n", distocensus); newnum = pbptr->num; incintf = 0.0; } 1 ; if(pbptr->sym == 1 ) { addsymbol(pbptr->bzx, upper[pbptr->l], incintf, zones[pbptr->zx].symintf); for(l = 0; l < nlines; l += 1) { addsymbol(pbptr->bzx, upper[pbptr->l], fofknu(pbptr->knu, *(lptr[l])) * incintf, zones[pbptr->zx].symintphif[l]); } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf * zones[pbptr->bzx].n[upper[pbptr->l]]; } } else { zones[pbptr->zx].intf += incintf; for(l = 0; l < nlines; l += 1) { zones[pbptr->zx].intphif[l] += fofknu(pbptr->knu, *(lptr[l])) * incintf; } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf; } } 1 ; pbptr->num = newnum; pbptr->x += distocensus * pbptr->mu; pbptr->t = tcensus; if(pbptr->num / pbptr->initnum < 1.0e-5 ) { addtodist(pbptr, sdist + p); } else { addtodist(pbptr, cdist + p); } break; } else { if(distoboundary > 0) { newnum = (double)exp(-absconstant * distoboundary) * pbptr->num; if(absconstant == 0) { incintf = distoboundary * pbptr->num; } else if(fabs(absconstant * distoboundary) < 1.0e-3 ) { incintf = pbptr->num * -((-absconstant * distoboundary) + (-absconstant * distoboundary)*(-absconstant * distoboundary)/2 + (-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)/6 + (-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)/24 + (-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)/1 20) / absconstant; } else { incintf = (pbptr->num - newnum) / absconstant; } } else if(distoboundary == 0) { newnum = pbptr->num; incintf = 0.0; } else { fprintf((&_iob[2]) , "distoboundary was negative = %g\n", distoboundary); newnum = pbptr->num; incintf = 0.0; } 1 ; if(pbptr->sym == 1 ) { addsymbol(pbptr->bzx, upper[pbptr->l], incintf, zones[pbptr->zx].symintf); for(l = 0; l < nlines; l += 1) { addsymbol(pbptr->bzx, upper[pbptr->l], fofknu(pbptr->knu, *(lptr[l])) * incintf, zones[pbptr->zx].symintphif[l]); } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf * zones[pbptr->bzx].n[upper[pbptr->l]]; } } else { zones[pbptr->zx].intf += incintf; for(l = 0; l < nlines; l += 1) { zones[pbptr->zx].intphif[l] += fofknu(pbptr->knu, *(lptr[l])) * incintf; } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf; } } 1 ; pbptr->num = newnum; if(pbptr->mu > (double)0.0) { pbptr->x = (zones[pbptr->zx].redge) ; (pbptr->zx) += 1; } else if(pbptr->mu < (double)0.0) { pbptr->x = (zones[pbptr->zx].ledge) ; (pbptr->zx) -= 1; } else { fprintf((&_iob[2]) , "propagate: We made it to the boundary but mu == 0.0 ???\n"); exit(1); } pbptr->t += distoboundary / 2.99792458e+10 ; if(pbptr->num / pbptr->initnum < 1.0e-5 ) { addtodist(pbptr, sdist + p); break; } else if(pbptr->zx == -1) { if(mirror) { ; pbptr->mu = -pbptr->mu; pbptr->zx = 0; } else { addtodist(pbptr, outleft + p); break; } } else if(pbptr->zx == nzones) { addtodist(pbptr, outright + p); break; } } } } } } xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx The incorrect assembly code output is (see the (BUG) pointer a few lines after the definition of _propagate): xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx #NO_APP .globl _mirror .data .align 2 _mirror: .long 0 .globl _parallelism .align 2 _parallelism: .long 1 .globl _nubins .align 2 _nubins: .long 20 .globl _nulolim .align 2 _nulolim: .double 0d9.60000000000000000000e+01 .globl _nuhilim .align 2 _nuhilim: .double 0d1.04000000000000000000e+02 .globl _emitatleastone .align 2 _emitatleastone: .long 0 .globl _lbelow .align 2 _lbelow: .double 0d0.00000000000000000000e+00 .globl _llbelow .align 0 _llbelow: .byte 0 .globl _labove .align 2 _labove: .double 0d0.00000000000000000000e+00 .globl _llabove .align 0 _llabove: .byte 0 .globl _rbelow .align 2 _rbelow: .double 0d0.00000000000000000000e+00 .globl _lrbelow .align 0 _lrbelow: .byte 0 .globl _rabove .align 2 _rabove: .double 0d0.00000000000000000000e+00 .globl _lrabove .align 0 _lrabove: .byte 0 .globl _lcheckio .align 0 _lcheckio: .byte 0 .text .align 1 _zoneofx: enter [r3,r4],0 movl 8(fp),f0 movqd 0,r2 movd _nzones,r3 movd _zones,r4 br L2 L8: movd r2,r1 addd r3,r1 cmpqd 0,r1 ble L4 addqd 1,r1 L4: ashd -1,r1 movd r1,r0 muld 52,r0 cmpl 0(r0)[r4:b],f0 bgt L5 movd r1,r2 br L2 L5: movd r1,r0 muld 52,r0 cmpl 8(r0)[r4:b],f0 ble L2 movd r1,r3 L2: movd r3,r0 addqd -1,r0 cmpd r2,r0 bne L8 movd r2,r0 exit [r3,r4] ret 0 .align 1 .globl _getfromdist _getfromdist: enter [],0 movd 8(fp),r0 cmpqd 1,8(r0) bge L10 movd 0(r0),r1 movd 52(r1),0(r0) movqd 0,52(r1) addqd -1,8(r0) br L11 L10: cmpqd 1,8(r0) bne L12 movd 0(r0),r1 movqd 0,0(r0) movqd 0,4(r0) movqd 0,8(r0) br L11 L12: movqd 0,r1 L11: movd r1,r0 exit [] ret 0 .align 1 .globl _addtodist _addtodist: enter [],0 movd 8(fp),r2 movd 12(fp),r1 cmpqd 0,8(r1) bne L15 movd r2,0(r1) br L17 L15: movd 4(r1),r0 movd r2,52(r0) L17: movd r2,4(r1) addqd 1,8(r1) exit [] ret 0 .align 0 LC0: .ascii "allopbundle\0" .align 2 LC1: .long _spare .align 1 .globl _allopbundle _allopbundle: enter [r3,r4,r5,r6],0 movd 8(fp),r6 addr @0[r6:q],r0 addr 0(r0)[r6:d],r0 cmpqd 0,_spare+8[r0:b] bne L19 movd 122880,tos bsr _sbrk movd r0,r4 adjspb -4 cmpqd -1,r4 bne L20 addr LC0,tos bsr _perror movqd 1,tos bsr _exit adjspb -8 L20: movqd 0,r3 addr @0[r6:q],r0 addr 0(r0)[r6:d],r0 movd LC1,r5 addd r0,r5 L24: movd r3,r0 muld 60,r0 addd r4,r0 movqd 0,52(r0) movd r5,tos movd r0,tos bsr _addtodist adjspb -8 addqd 1,r3 cmpd r3,2047 ble L24 L19: addr @0[r6:q],r0 addr 0(r0)[r6:d],r0 addr _spare,r1 addd r1,r0 movd r0,tos bsr _getfromdist adjspb -4 exit [r3,r4,r5,r6] ret 0 .align 1 .globl _addpbundle _addpbundle: enter [r3,r4,r5],0 movd 8(fp),r4 movd 12(fp),r5 movd 60(fp),r3 movd r3,tos bsr _allopbundle movd r4,0(r0) movd r5,4(r0) movd r4,32(r0) movd r5,36(r0) movd 20(fp),40(r0) movd 24(fp),44(r0) movd 28(fp),8(r0) movd 32(fp),12(r0) movw 36(fp),48(r0) movw 40(fp),50(r0) movd 44(fp),16(r0) movd 48(fp),20(r0) movd 52(fp),24(r0) movd 56(fp),28(r0) movqd 0,52(r0) movb 16(fp),59(r0) addr _bcounter[r3:d],r1 movw 0(r1),56(r0) addqd 1,0(r1) movb 64(fp),58(r0) addr @0[r3:q],r1 addr 0(r1)[r3:d],r1 addr _pdist,r2 addd r2,r1 movd r1,tos movd r0,tos bsr _addtodist adjspb -12 exit [r3,r4,r5] ret 0 .align 0 LC2: .ascii "Distocensus was negative = %g\12\0" .align 0 LC3: .ascii "distoboundary was negative = %g\12\0" .align 0 LC4: .ascii "propagate: We made it to the boundary but mu == 0.0 ???\12\0" .align 1 .globl _propagate _propagate: enter [r3,r4,r5,r6,r7],48 movl f4,tos movl f6,tos movqd 0,-36(fp) cmpd -36(fp),_parallelism bge L107 br L30 L100: cmpl 40(r6),_nulolim blt L32 cmpl 40(r6),_nuhilim bge L32 movdl _nubins,f0 movl 40(r6),f2 subl _nulolim,f2 movl _nuhilim,f4 subl _nulolim,f4 divl f4,f2 mull f2,f0 truncld f0,r7 br L33 L32: movqd -1,r7 L33: cmpqb 1,59(r6) bne L34 cmpl 0d0.00000000000000000000e+00,32(r6) bne L34 movxwd 48(r6),r0 muld 52,r0 movzbd 58(r6),r1 movd 36(_zones(sb))[r0:b],r0 movd 0(r0)[r1:q],r0 <<== (BUG) Clobbers r0 which is needed in the next instructio. Changing the order won't help because both r0 and r1 are needed. movd 4(r0)[r1:q],r1 movd r0,32(r6) movd r1,36(r6) movd r0,0(r6) movd r1,4(r6) L34: movd -36(fp),r4 addr @0[r4:d],-40(fp) addr @0[r4:q],-44(fp) movd -40(fp),r0 addd -44(fp),r0 addr _sdist,r4 addd r4,r0 movd r0,-48(fp) L35: movl 16(fp),f0 subl 24(r6),f0 mull 0d2.99792458000000000000e+10,f0 movl f0,-8(fp) cmpl 0d0.00000000000000000000e+00,16(r6) bge L37 movxwd 48(r6),r0 muld 52,r0 movl 8(_zones(sb))[r0:b],f0 br L108 L37: cmpl 0d0.00000000000000000000e+00,16(r6) ble L39 movxwd 48(r6),r0 muld 52,r0 movl 0(_zones(sb))[r0:b],f0 L108: subl 8(r6),f0 divl 16(r6),f0 movl f0,-16(fp) br L38 L39: movd 966823146,-16(fp) movd 1177108057,-12(fp) L38: movqd 0,-32(fp) movqd 0,-28(fp) movqd 0,r5 cmpd r5,_nlines bge L106 L44: movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu movd 0(_lower(sb))[r5:d],r4 movd _atomoflevel,r3 movd 0(r3)[r4:d],r2 movd _rho,r1 movxwd 48(r6),r0 movd 0(r1)[r2:d],r2 movl 0(r2)[r0:q],f2 mull 0(_sigmaup(sb))[r5:q],f2 movd r0,r2 muld 52,r2 movd 16(_zones(sb))[r2:b],r2 mull 0(r2)[r4:q],f2 movd 0(_upper(sb))[r5:d],r4 movd 0(r3)[r4:d],r3 movd 0(r1)[r3:d],r1 movl 0(r1)[r0:q],f4 mull 0(_sigmadown(sb))[r5:q],f4 mull 0(r2)[r4:q],f4 subl f4,f2 mull f0,f2 addl f2,-32(fp) adjspw -72 addqd 1,r5 cmpd r5,_nlines blt L44 L106: cmpl -8(fp),-16(fp) bgt L45 cmpl 0d0.00000000000000000000e+00,-8(fp) bge L46 negl -32(fp),f0 mull -8(fp),f0 movl f0,tos bsr _exp mull 0(r6),f0 movl f0,-24(fp) adjspb -8 cmpl 0d0.00000000000000000000e+00,-32(fp) bne L47 movl -8(fp),f4 mull 0(r6),f4 br L51 L47: movl -32(fp),f6 mull -8(fp),f6 movl f6,tos bsr _fabs adjspb -8 cmpl f0,0d1.00000000000000000000e-03 bge L49 negl -32(fp),f4 mull -8(fp),f4 movl f4,f0 mull f4,f0 movl f0,f2 divl 0d2.00000000000000000000e+00,f2 addl f4,f2 mull f4,f0 movl f0,f6 divl 0d6.00000000000000000000e+00,f6 addl f6,f2 mull f4,f0 movl f0,f6 divl 0d2.40000000000000000000e+01,f6 addl f6,f2 mull f4,f0 divl 0d1.20000000000000000000e+02,f0 addl f0,f2 negl f2,f2 movl f2,f4 mull 0(r6),f4 divl -32(fp),f4 br L51 L49: movl 0(r6),f4 subl -24(fp),f4 divl -32(fp),f4 br L51 L46: cmpl 0d0.00000000000000000000e+00,-8(fp) bne L52 movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 br L51 L52: movd -4(fp),tos movd -8(fp),tos addr LC2,tos addr __iob+40,tos bsr _fprintf movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 adjspb -16 L51: cmpqb 1,59(r6) bne L54 movxwd 48(r6),r0 muld 52,r0 movd 44(_zones(sb))[r0:b],tos movl f4,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol movqd 0,r5 adjspb -20 cmpd r5,_nlines bge L105 L58: movxwd 48(r6),r0 muld 52,r0 movd 40(_zones(sb))[r0:b],r0 movd 0(r0)[r5:d],tos movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu adjspw -72 mull f4,f0 movl f0,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol adjspb -20 addqd 1,r5 cmpd r5,_nlines blt L58 L105: movxwd 48(r6),r1 cmpqd 0,0(_jnuenable(sb))[r1:d] beq L60 cmpqd 0,r7 bgt L60 cmpd r7,_nubins bge L60 movd _zones,r0 muld 52,r1 movd 32(r1)[r0:b],r1 movzwd 50(r6),r2 muld 52,r2 movzbd 58(r6),r3 movd 0(_upper(sb))[r3:d],r3 movd 16(r2)[r0:b],r0 movl f4,f0 mull 0(r0)[r3:q],f0 addl f0,0(r1)[r7:q] br L60 L54: movxwd 48(r6),r0 muld 52,r0 addl f4,24(_zones(sb))[r0:b] movqd 0,r5 cmpd r5,_nlines bge L104 L64: movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu movxwd 48(r6),r0 muld 52,r0 movd 20(_zones(sb))[r0:b],r0 mull f4,f0 addl f0,0(r0)[r5:q] adjspw -72 addqd 1,r5 cmpd r5,_nlines blt L64 L104: movxwd 48(r6),r0 cmpqd 0,0(_jnuenable(sb))[r0:d] beq L60 cmpqd 0,r7 bgt L60 cmpd r7,_nubins bge L60 muld 52,r0 movd 32(_zones(sb))[r0:b],r0 addl f4,0(r0)[r7:q] L60: movd -24(fp),0(r6) movd -20(fp),4(r6) movl -8(fp),f0 mull 16(r6),f0 addl f0,8(r6) movd 16(fp),24(r6) movd 20(fp),28(r6) movl 0(r6),f0 divl 32(r6),f0 cmpl f0,0d1.00000000000000000000e-05 bge L66 movd -48(fp),tos br L109 L66: movd -40(fp),r0 addd -44(fp),r0 addr _cdist,r4 br L110 L45: cmpl 0d0.00000000000000000000e+00,-16(fp) bge L69 negl -32(fp),f0 mull -16(fp),f0 movl f0,tos bsr _exp mull 0(r6),f0 movl f0,-24(fp) adjspb -8 cmpl 0d0.00000000000000000000e+00,-32(fp) bne L70 movl -16(fp),f4 mull 0(r6),f4 br L74 L70: movl -32(fp),f6 mull -16(fp),f6 movl f6,tos bsr _fabs adjspb -8 cmpl f0,0d1.00000000000000000000e-03 bge L72 negl -32(fp),f4 mull -16(fp),f4 movl f4,f0 mull f4,f0 movl f0,f2 divl 0d2.00000000000000000000e+00,f2 addl f4,f2 mull f4,f0 movl f0,f6 divl 0d6.00000000000000000000e+00,f6 addl f6,f2 mull f4,f0 movl f0,f6 divl 0d2.40000000000000000000e+01,f6 addl f6,f2 mull f4,f0 divl 0d1.20000000000000000000e+02,f0 addl f0,f2 negl f2,f2 movl f2,f4 mull 0(r6),f4 divl -32(fp),f4 br L74 L72: movl 0(r6),f4 subl -24(fp),f4 divl -32(fp),f4 br L74 L69: cmpl 0d0.00000000000000000000e+00,-16(fp) bne L75 movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 br L74 L75: movd -12(fp),tos movd -16(fp),tos addr LC3,tos addr __iob+40,tos bsr _fprintf movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 adjspb -16 L74: cmpqb 1,59(r6) bne L77 movxwd 48(r6),r0 muld 52,r0 movd 44(_zones(sb))[r0:b],tos movl f4,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol movqd 0,r5 adjspb -20 cmpd r5,_nlines bge L103 L81: movxwd 48(r6),r0 muld 52,r0 movd 40(_zones(sb))[r0:b],r0 movd 0(r0)[r5:d],tos movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu adjspw -72 mull f4,f0 movl f0,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol adjspb -20 addqd 1,r5 cmpd r5,_nlines blt L81 L103: movxwd 48(r6),r1 cmpqd 0,0(_jnuenable(sb))[r1:d] beq L83 cmpqd 0,r7 bgt L83 cmpd r7,_nubins bge L83 movd _zones,r0 muld 52,r1 movd 32(r1)[r0:b],r1 movzwd 50(r6),r2 muld 52,r2 movzbd 58(r6),r3 movd 0(_upper(sb))[r3:d],r3 movd 16(r2)[r0:b],r0 movl f4,f0 mull 0(r0)[r3:q],f0 addl f0,0(r1)[r7:q] br L83 L77: movxwd 48(r6),r0 muld 52,r0 addl f4,24(_zones(sb))[r0:b] movqd 0,r5 cmpd r5,_nlines bge L102 L87: movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu movxwd 48(r6),r0 muld 52,r0 movd 20(_zones(sb))[r0:b],r0 mull f4,f0 addl f0,0(r0)[r5:q] adjspw -72 addqd 1,r5 cmpd r5,_nlines blt L87 L102: movxwd 48(r6),r0 cmpqd 0,0(_jnuenable(sb))[r0:d] beq L83 cmpqd 0,r7 bgt L83 cmpd r7,_nubins bge L83 muld 52,r0 movd 32(_zones(sb))[r0:b],r0 addl f4,0(r0)[r7:q] L83: movd -24(fp),0(r6) movd -20(fp),4(r6) cmpl 0d0.00000000000000000000e+00,16(r6) bge L89 movxwd 48(r6),r0 muld 52,r0 movd 8(_zones(sb))[r0:b],8(r6) movd 12(_zones(sb))[r0:b],12(r6) addqw 1,48(r6) br L90 L89: cmpl 0d0.00000000000000000000e+00,16(r6) ble L91 movxwd 48(r6),r0 muld 52,r0 movd 0(_zones(sb))[r0:b],8(r6) movd 4(_zones(sb))[r0:b],12(r6) addqw -1,48(r6) br L90 L91: addr LC4,tos addr __iob+40,tos bsr _fprintf movqd 1,tos bsr _exit adjspb -12 L90: movl -16(fp),f0 divl 0d2.99792458000000000000e+10,f0 addl f0,24(r6) movl 0(r6),f0 divl 32(r6),f0 cmpl f0,0d1.00000000000000000000e-05 bge L93 movd -40(fp),r0 addd -44(fp),r0 addr _sdist,r4 br L110 L93: cmpqw -1,48(r6) bne L95 cmpqd 0,_mirror beq L96 negl 16(r6),16(r6) movqw 0,48(r6) br L35 L96: movd -40(fp),r0 addd -44(fp),r0 addr _outleft,r4 br L110 L95: movxwd 48(r6),r0 cmpd r0,_nzones bne L35 movd -40(fp),r0 addd -44(fp),r0 addr _outright,r4 L110: addd r4,r0 movd r0,tos L109: movd r6,tos bsr _addtodist adjspb -8 L30: movd -36(fp),r4 addr @0[r4:q],r0 addr 0(r0)[r4:d],r0 addr _pdist,r4 addd r4,r0 movd r0,tos bsr _getfromdist movd r0,r6 adjspb -4 cmpqd 0,r6 bne L100 addqd 1,-36(fp) cmpd -36(fp),_parallelism blt L30 L107: movl tos,f6 movl tos,f4 exit [r3,r4,r5,r6,r7] ret 0 .comm _bcounter,512 .comm _sdist,1536 .comm _outright,1536 .comm _outleft,1536 .comm _spare,1536 .comm _cdist,1536 .comm _pdist,1536 .comm _blocksizes,4 .comm _nblocks,4 .comm _SA,4 .comm _zl_index,4 .comm _ls_index,4 .comm _G,4 .comm _X,4 .comm _B,4 .comm _dim,4 .comm _zones,4 .comm _lrnudist,4 .comm _rnudist,4 .comm _llnudist,4 .comm _lnudist,4 .lcomm _freesymbols,4 .lcomm _freemtxelts,4 .lcomm _lsymbols,4 .lcomm _symbols,4 .lcomm _lmtxelts,4 .lcomm _mtxelts,4 .comm _totalwidth,8 .comm _zonewidth,4 .comm _n,4 .comm _nzones,4 .comm _lptr,4 .comm _dumplsenable,4 .comm _jnuenable,4 .comm _ngroups,4 .comm _deltanud,4 .comm _nucut,4 .comm _nu0,4 .comm _lower,4 .comm _upper,4 .comm _sigmaup,4 .comm _sigmadown,4 .comm _avoigt,4 .comm _aspont,4 .comm _ssgsigmaup,4 .comm _ssgsigmadown,4 .comm _ssglower,4 .comm _ssgupper,4 .comm _ssgnlines,4 .comm _atomoflevel,4 .comm _rho,4 .comm _coll,4 .comm _nbundles,4 .comm _timelimit,8 .comm _timesteps,4 .comm _natoms,4 .comm _nlevels,4 .comm _nlines,4 .comm _matfptr,4 .comm _pltfptr,4 .comm _outfptr,4 .comm _quefptr,4
brooks@lll-crg.llnl.gov (Eugene D. Brooks III) (11/05/88)
Bug report for GCC 1.30 on NS32K, Sequent Balance. First half of double register move clobbers register used in address computation for move of the second half. The bug is apparently in output_move_double, which does not check that the target of the move (if a register pair) might clobber registers used in address computation, and a check of the other machine descriptions reveals no equivalent checking being done in them either. For the NS32K a cheap trick workaround is to use the stack to save the first word moved, move the second word, then pop the first word back off the stack. Anyone know how to get output_move_double() to accomplish this? I am not well versed in GCC internals. I do not hit the bug without the optimizer, and do not hit the bug on the SUN, but perhaps this is just an accident. Search for BUG in the file below for a pointer to the bad assembler output. The bug hitter input code, compiled with gcc -O -S, is: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx extern struct _iobuf { int _cnt; char *_ptr; char *_base; int _bufsiz; short _flag; char _file; } _iob[20 ]; struct _iobuf *fopen(); struct _iobuf *fdopen(); struct _iobuf *freopen(); long ftell(); char *gets(); char *fgets(); char *sprintf(); extern double fabs(), floor(), ceil(), fmod(), ldexp(), frexp(); extern double sqrt(), hypot(), atof(); extern double sin(), cos(), tan(), asin(), acos(), atan(), atan2(); extern double exp(), log(), log10(), pow(); extern double sinh(), cosh(), tanh(); extern double gamma(); extern double j0(), j1(), jn(), y0(), y1(), yn(); void ranset(); extern long ranseed; double ranf(); double simpint(); float fmean(); float fstdev(); double dmean(); double dstdev(); extern int randi(); extern double randiranf(); extern void srandi(); extern void randifwrite(); extern void randifread(); extern int prandi(); extern double prandiranf(); extern void psrandi(); extern void fwriteprandi(); extern void freadprandi(); char *malloc(); char *calloc(); typedef enum { INT, DOUBLE, PINT, PDOUBLE, PPINT, PPDOUBLE, PPPINT, PPPDOUBLE} TYPE; typedef union { int *pi; int **ppi; int ***pppi; int ****ppppi; double *pd; double **ppd; double ***pppd; double ****ppppd; } PTRUNION; typedef struct symtab { char *name; TYPE type; PTRUNION valptr; int *dim1; int *dim2; int *dim3; struct symtab *next; } SYMTAB; char *strcpy(); SYMTAB *lookup(); extern void addtab(); extern int *mllc3d(); extern int *mllc2d(); extern struct _iobuf *yyin; extern struct _iobuf *yyout; extern int warningMessages; struct bins { double p; double a; double b; }; struct line { int nbins; double avoigt; double knucut; double knu0; double deltaknud; double knucuty; double knucutprimex; double probatknucut; struct bins *bin; }; typedef struct line LINE; LINE *genline( ); double knuinline( ); double pknuinline( ); double fofknu( ); typedef unsigned char PCP_LOCK_TYPE; int mtime(); char *malloc(); char *sprintf(); char *sbrk(); void propagate(); struct _iobuf *quefptr; struct _iobuf *outfptr; struct _iobuf *pltfptr; struct _iobuf *matfptr; int mirror = 0; int nlines; int nlevels; int natoms; int timesteps; double timelimit; int nbundles; double **coll; int parallelism = 1; double **rho; int *atomoflevel; int ssgnlines; int *ssgupper; int *ssglower; double *ssgsigmadown; double *ssgsigmaup; double *aspont; double *avoigt; double *sigmadown; double *sigmaup; int *upper; int *lower; double *nu0; double *nucut; double *deltanud; int *ngroups; int *jnuenable; int nubins = 20; double nulolim = 96.0; double nuhilim = 104.0; int *dumplsenable; int emitatleastone = 0; LINE **lptr; int nzones; double **n; double *zonewidth; double totalwidth; static int mtxelts; static PCP_LOCK_TYPE lmtxelts; static int symbols; static PCP_LOCK_TYPE lsymbols; struct mtxelt { int col; double value; struct mtxelt *next; }; typedef struct mtxelt MTXELT; static MTXELT *freemtxelts[1 ]; struct symbol { double sym; unsigned short iz; unsigned short in; struct symbol *next; }; typedef struct symbol SYMBOL; static SYMBOL *freesymbols[1 ]; struct zone { double ledge; double redge; double *n; double *intphif; double intf; double *jnu; double *cntwght; SYMBOL ***symintphif; SYMBOL **symintf; unsigned char access; }; double *lnudist; unsigned char *llnudist; double lbelow = 0.0; unsigned char llbelow = 0 ; double labove = 0.0; unsigned char llabove = 0 ; double *rnudist; unsigned char *lrnudist; double rbelow = 0.0; unsigned char lrbelow = 0 ; double rabove = 0.0; unsigned char lrabove = 0 ; unsigned char lcheckio = 0 ; struct zone *zones; int dim; double *B, *X, *G; int *ls_index; int *zl_index; MTXELT ***SA; int nblocks; int *blocksizes; static int zoneofx(x) double x; { register int _TINDEX = 0; register int _TSIZE = 1 ; int middle; int lower = 0; int upper = nzones; while(lower != upper - 1) { middle = (lower + upper) / 2; if((zones[middle].ledge) <= x) { lower = middle; } else if((zones[middle].redge) > x) { upper = middle; } } return(lower); } struct PBUNDLE { double num; double x; double mu; double t; double initnum; double knu; short zx; unsigned short bzx; struct PBUNDLE *next; unsigned short id; unsigned char l; unsigned char sym; }; typedef struct PBUNDLE PBUNDLE; struct DISTRIBUTION { PBUNDLE *head; PBUNDLE *tail; int count; }; typedef struct DISTRIBUTION DISTRIBUTION; PBUNDLE *getfromdist(dptr) DISTRIBUTION *dptr; { PBUNDLE *pptr; ; if(dptr->count > 1) { pptr = dptr->head; dptr->head = dptr->head->next; pptr->next = 0 ; dptr->count -= 1; } else if(dptr->count == 1) { pptr = dptr->head; dptr->head = 0 ; dptr->tail = 0 ; dptr->count = 0; } else { ; ; pptr = 0 ; } return(pptr); } void addtodist(pptr, dptr) PBUNDLE *pptr; DISTRIBUTION *dptr; { ; ; ; ; if(dptr->count == 0) { ; ; dptr->head = pptr; dptr->tail = pptr; } else { ; dptr->tail->next = pptr; dptr->tail = pptr; } dptr->count += 1; } DISTRIBUTION pdist[128 ]; DISTRIBUTION cdist[128 ]; DISTRIBUTION spare[128 ]; DISTRIBUTION outleft[128 ]; DISTRIBUTION outright[128 ]; DISTRIBUTION sdist[128 ]; PBUNDLE *allopbundle(p) int p; { PBUNDLE *array; int i; ; if(spare[p].count == 0) { if((array = (PBUNDLE *)sbrk(2048 * sizeof(PBUNDLE))) == (PBUNDLE *)-1) { perror("allopbundle"); exit(1); } for(i = 0; i <2048 ; i += 1) { array[i].next = 0 ; addtodist(array + i, spare + p); } } return(getfromdist(spare + p)); } int bcounter[128 ]; void addpbundle(num, sym, knu, x, zx, bzx, mu, t, p, l) double num; int sym; double knu; double x; int zx; int bzx; double mu; double t; int p; int l; { register PBUNDLE *tptr; tptr = allopbundle(p); tptr->num = num; tptr->initnum = num; tptr->knu = knu; tptr->x = x; tptr->zx = zx; tptr->bzx = bzx; tptr->mu = mu; tptr->t = t; tptr->next = 0 ; tptr->sym = sym; tptr->id = bcounter[p]++; tptr->l = l; addtodist(tptr, pdist + p); } void propagate(tstart, tcensus) double tstart, tcensus; { register int _TINDEX = 0; register int _TSIZE = 1 ; double incctnfpa; int l; double r; PBUNDLE *pbptr; double distocensus; double distoboundary; double distocollision; double newnum; double absconstant; double incintf; int p; for(p = 0; p < parallelism; p += 1) { while((pbptr = getfromdist(pdist + p)) != 0 ) { int inu; if((pbptr->knu >= nulolim) && (pbptr->knu < nuhilim)) { inu = nubins * ((pbptr->knu - nulolim) / (nuhilim - nulolim)); } else { inu = -1; } if(pbptr->sym == 1 && pbptr->initnum == (double)0.0) { ; ; pbptr->num = pbptr->initnum = zones[pbptr->zx].cntwght[pbptr->l]; } while(1) { distocensus = 2.99792458e+10 * (tcensus - pbptr->t); # 1914 "imc.c" if(pbptr->mu > (double)0.0) { distoboundary = ((zones[pbptr->zx].redge) - pbptr->x) / pbptr->mu; } else if(pbptr->mu < (double)0.0) { distoboundary = ((zones[pbptr->zx].ledge) - pbptr->x) / pbptr->mu; } else { distoboundary = (double)1.0e+30 ; } absconstant = 0.0; for(l = 0; l < nlines; l += 1) { absconstant += ((rho[atomoflevel[lower[l]]][pbptr->zx] * sigmaup[l] * zones[pbptr->zx].n[lower[l]]) - (rho[atomoflevel[upper[l]]][pbptr->zx] * sigmadown[l] * zones[pbptr->zx].n[upper[l]])) * fofknu(pbptr->knu, *(lptr[l])); } if(distocensus <= distoboundary) { if(distocensus > 0) { newnum = (double)exp(-absconstant * distocensus) * pbptr->num; if(absconstant == 0) { incintf = distocensus * pbptr->num; } else if(fabs(absconstant * distocensus) < 1.0e-3 ) { incintf = pbptr->num * -((-absconstant * distocensus) + (-absconstant * distocensus)*(-absconstant * distocensus)/2 + (-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)/6 + (-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)/24 + (-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)*(-absconstant * distocensus)/120) / absconstant; } else { incintf = (pbptr->num - newnum) / absconstant; } } else if(distocensus == 0) { newnum = pbptr->num; incintf = 0.0; } else { fprintf((&_iob[2]) , "Distocensus was negative = %g\n", distocensus); newnum = pbptr->num; incintf = 0.0; } 1 ; if(pbptr->sym == 1 ) { addsymbol(pbptr->bzx, upper[pbptr->l], incintf, zones[pbptr->zx].symintf); for(l = 0; l < nlines; l += 1) { addsymbol(pbptr->bzx, upper[pbptr->l], fofknu(pbptr->knu, *(lptr[l])) * incintf, zones[pbptr->zx].symintphif[l]); } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf * zones[pbptr->bzx].n[upper[pbptr->l]]; } } else { zones[pbptr->zx].intf += incintf; for(l = 0; l < nlines; l += 1) { zones[pbptr->zx].intphif[l] += fofknu(pbptr->knu, *(lptr[l])) * incintf; } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf; } } 1 ; pbptr->num = newnum; pbptr->x += distocensus * pbptr->mu; pbptr->t = tcensus; if(pbptr->num / pbptr->initnum < 1.0e-5 ) { addtodist(pbptr, sdist + p); } else { addtodist(pbptr, cdist + p); } break; } else { if(distoboundary > 0) { newnum = (double)exp(-absconstant * distoboundary) * pbptr->num; if(absconstant == 0) { incintf = distoboundary * pbptr->num; } else if(fabs(absconstant * distoboundary) < 1.0e-3 ) { incintf = pbptr->num * -((-absconstant * distoboundary) + (-absconstant * distoboundary)*(-absconstant * distoboundary)/2 + (-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)/6 + (-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)/24 + (-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)*(-absconstant * distoboundary)/1 20) / absconstant; } else { incintf = (pbptr->num - newnum) / absconstant; } } else if(distoboundary == 0) { newnum = pbptr->num; incintf = 0.0; } else { fprintf((&_iob[2]) , "distoboundary was negative = %g\n", distoboundary); newnum = pbptr->num; incintf = 0.0; } 1 ; if(pbptr->sym == 1 ) { addsymbol(pbptr->bzx, upper[pbptr->l], incintf, zones[pbptr->zx].symintf); for(l = 0; l < nlines; l += 1) { addsymbol(pbptr->bzx, upper[pbptr->l], fofknu(pbptr->knu, *(lptr[l])) * incintf, zones[pbptr->zx].symintphif[l]); } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf * zones[pbptr->bzx].n[upper[pbptr->l]]; } } else { zones[pbptr->zx].intf += incintf; for(l = 0; l < nlines; l += 1) { zones[pbptr->zx].intphif[l] += fofknu(pbptr->knu, *(lptr[l])) * incintf; } if(jnuenable[pbptr->zx] && inu >= 0 && inu < nubins) { zones[pbptr->zx].jnu[inu] += incintf; } } 1 ; pbptr->num = newnum; if(pbptr->mu > (double)0.0) { pbptr->x = (zones[pbptr->zx].redge) ; (pbptr->zx) += 1; } else if(pbptr->mu < (double)0.0) { pbptr->x = (zones[pbptr->zx].ledge) ; (pbptr->zx) -= 1; } else { fprintf((&_iob[2]) , "propagate: We made it to the boundary but mu == 0.0 ???\n"); exit(1); } pbptr->t += distoboundary / 2.99792458e+10 ; if(pbptr->num / pbptr->initnum < 1.0e-5 ) { addtodist(pbptr, sdist + p); break; } else if(pbptr->zx == -1) { if(mirror) { ; pbptr->mu = -pbptr->mu; pbptr->zx = 0; } else { addtodist(pbptr, outleft + p); break; } } else if(pbptr->zx == nzones) { addtodist(pbptr, outright + p); break; } } } } } } xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx The incorrect assembly code output is (see the (BUG) pointer a few lines after the definition of _propagate): xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx #NO_APP .globl _mirror .data .align 2 _mirror: .long 0 .globl _parallelism .align 2 _parallelism: .long 1 .globl _nubins .align 2 _nubins: .long 20 .globl _nulolim .align 2 _nulolim: .double 0d9.60000000000000000000e+01 .globl _nuhilim .align 2 _nuhilim: .double 0d1.04000000000000000000e+02 .globl _emitatleastone .align 2 _emitatleastone: .long 0 .globl _lbelow .align 2 _lbelow: .double 0d0.00000000000000000000e+00 .globl _llbelow .align 0 _llbelow: .byte 0 .globl _labove .align 2 _labove: .double 0d0.00000000000000000000e+00 .globl _llabove .align 0 _llabove: .byte 0 .globl _rbelow .align 2 _rbelow: .double 0d0.00000000000000000000e+00 .globl _lrbelow .align 0 _lrbelow: .byte 0 .globl _rabove .align 2 _rabove: .double 0d0.00000000000000000000e+00 .globl _lrabove .align 0 _lrabove: .byte 0 .globl _lcheckio .align 0 _lcheckio: .byte 0 .text .align 1 _zoneofx: enter [r3,r4],0 movl 8(fp),f0 movqd 0,r2 movd _nzones,r3 movd _zones,r4 br L2 L8: movd r2,r1 addd r3,r1 cmpqd 0,r1 ble L4 addqd 1,r1 L4: ashd -1,r1 movd r1,r0 muld 52,r0 cmpl 0(r0)[r4:b],f0 bgt L5 movd r1,r2 br L2 L5: movd r1,r0 muld 52,r0 cmpl 8(r0)[r4:b],f0 ble L2 movd r1,r3 L2: movd r3,r0 addqd -1,r0 cmpd r2,r0 bne L8 movd r2,r0 exit [r3,r4] ret 0 .align 1 .globl _getfromdist _getfromdist: enter [],0 movd 8(fp),r0 cmpqd 1,8(r0) bge L10 movd 0(r0),r1 movd 52(r1),0(r0) movqd 0,52(r1) addqd -1,8(r0) br L11 L10: cmpqd 1,8(r0) bne L12 movd 0(r0),r1 movqd 0,0(r0) movqd 0,4(r0) movqd 0,8(r0) br L11 L12: movqd 0,r1 L11: movd r1,r0 exit [] ret 0 .align 1 .globl _addtodist _addtodist: enter [],0 movd 8(fp),r2 movd 12(fp),r1 cmpqd 0,8(r1) bne L15 movd r2,0(r1) br L17 L15: movd 4(r1),r0 movd r2,52(r0) L17: movd r2,4(r1) addqd 1,8(r1) exit [] ret 0 .align 0 LC0: .ascii "allopbundle\0" .align 2 LC1: .long _spare .align 1 .globl _allopbundle _allopbundle: enter [r3,r4,r5,r6],0 movd 8(fp),r6 addr @0[r6:q],r0 addr 0(r0)[r6:d],r0 cmpqd 0,_spare+8[r0:b] bne L19 movd 122880,tos bsr _sbrk movd r0,r4 adjspb -4 cmpqd -1,r4 bne L20 addr LC0,tos bsr _perror movqd 1,tos bsr _exit adjspb -8 L20: movqd 0,r3 addr @0[r6:q],r0 addr 0(r0)[r6:d],r0 movd LC1,r5 addd r0,r5 L24: movd r3,r0 muld 60,r0 addd r4,r0 movqd 0,52(r0) movd r5,tos movd r0,tos bsr _addtodist adjspb -8 addqd 1,r3 cmpd r3,2047 ble L24 L19: addr @0[r6:q],r0 addr 0(r0)[r6:d],r0 addr _spare,r1 addd r1,r0 movd r0,tos bsr _getfromdist adjspb -4 exit [r3,r4,r5,r6] ret 0 .align 1 .globl _addpbundle _addpbundle: enter [r3,r4,r5],0 movd 8(fp),r4 movd 12(fp),r5 movd 60(fp),r3 movd r3,tos bsr _allopbundle movd r4,0(r0) movd r5,4(r0) movd r4,32(r0) movd r5,36(r0) movd 20(fp),40(r0) movd 24(fp),44(r0) movd 28(fp),8(r0) movd 32(fp),12(r0) movw 36(fp),48(r0) movw 40(fp),50(r0) movd 44(fp),16(r0) movd 48(fp),20(r0) movd 52(fp),24(r0) movd 56(fp),28(r0) movqd 0,52(r0) movb 16(fp),59(r0) addr _bcounter[r3:d],r1 movw 0(r1),56(r0) addqd 1,0(r1) movb 64(fp),58(r0) addr @0[r3:q],r1 addr 0(r1)[r3:d],r1 addr _pdist,r2 addd r2,r1 movd r1,tos movd r0,tos bsr _addtodist adjspb -12 exit [r3,r4,r5] ret 0 .align 0 LC2: .ascii "Distocensus was negative = %g\12\0" .align 0 LC3: .ascii "distoboundary was negative = %g\12\0" .align 0 LC4: .ascii "propagate: We made it to the boundary but mu == 0.0 ???\12\0" .align 1 .globl _propagate _propagate: enter [r3,r4,r5,r6,r7],48 movl f4,tos movl f6,tos movqd 0,-36(fp) cmpd -36(fp),_parallelism bge L107 br L30 L100: cmpl 40(r6),_nulolim blt L32 cmpl 40(r6),_nuhilim bge L32 movdl _nubins,f0 movl 40(r6),f2 subl _nulolim,f2 movl _nuhilim,f4 subl _nulolim,f4 divl f4,f2 mull f2,f0 truncld f0,r7 br L33 L32: movqd -1,r7 L33: cmpqb 1,59(r6) bne L34 cmpl 0d0.00000000000000000000e+00,32(r6) bne L34 movxwd 48(r6),r0 muld 52,r0 movzbd 58(r6),r1 movd 36(_zones(sb))[r0:b],r0 movd 0(r0)[r1:q],r0 <<== (BUG) Clobbers r0 which is needed in the next instructio. Changing the order won't help because both r0 and r1 are needed. movd 4(r0)[r1:q],r1 movd r0,32(r6) movd r1,36(r6) movd r0,0(r6) movd r1,4(r6) L34: movd -36(fp),r4 addr @0[r4:d],-40(fp) addr @0[r4:q],-44(fp) movd -40(fp),r0 addd -44(fp),r0 addr _sdist,r4 addd r4,r0 movd r0,-48(fp) L35: movl 16(fp),f0 subl 24(r6),f0 mull 0d2.99792458000000000000e+10,f0 movl f0,-8(fp) cmpl 0d0.00000000000000000000e+00,16(r6) bge L37 movxwd 48(r6),r0 muld 52,r0 movl 8(_zones(sb))[r0:b],f0 br L108 L37: cmpl 0d0.00000000000000000000e+00,16(r6) ble L39 movxwd 48(r6),r0 muld 52,r0 movl 0(_zones(sb))[r0:b],f0 L108: subl 8(r6),f0 divl 16(r6),f0 movl f0,-16(fp) br L38 L39: movd 966823146,-16(fp) movd 1177108057,-12(fp) L38: movqd 0,-32(fp) movqd 0,-28(fp) movqd 0,r5 cmpd r5,_nlines bge L106 L44: movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu movd 0(_lower(sb))[r5:d],r4 movd _atomoflevel,r3 movd 0(r3)[r4:d],r2 movd _rho,r1 movxwd 48(r6),r0 movd 0(r1)[r2:d],r2 movl 0(r2)[r0:q],f2 mull 0(_sigmaup(sb))[r5:q],f2 movd r0,r2 muld 52,r2 movd 16(_zones(sb))[r2:b],r2 mull 0(r2)[r4:q],f2 movd 0(_upper(sb))[r5:d],r4 movd 0(r3)[r4:d],r3 movd 0(r1)[r3:d],r1 movl 0(r1)[r0:q],f4 mull 0(_sigmadown(sb))[r5:q],f4 mull 0(r2)[r4:q],f4 subl f4,f2 mull f0,f2 addl f2,-32(fp) adjspw -72 addqd 1,r5 cmpd r5,_nlines blt L44 L106: cmpl -8(fp),-16(fp) bgt L45 cmpl 0d0.00000000000000000000e+00,-8(fp) bge L46 negl -32(fp),f0 mull -8(fp),f0 movl f0,tos bsr _exp mull 0(r6),f0 movl f0,-24(fp) adjspb -8 cmpl 0d0.00000000000000000000e+00,-32(fp) bne L47 movl -8(fp),f4 mull 0(r6),f4 br L51 L47: movl -32(fp),f6 mull -8(fp),f6 movl f6,tos bsr _fabs adjspb -8 cmpl f0,0d1.00000000000000000000e-03 bge L49 negl -32(fp),f4 mull -8(fp),f4 movl f4,f0 mull f4,f0 movl f0,f2 divl 0d2.00000000000000000000e+00,f2 addl f4,f2 mull f4,f0 movl f0,f6 divl 0d6.00000000000000000000e+00,f6 addl f6,f2 mull f4,f0 movl f0,f6 divl 0d2.40000000000000000000e+01,f6 addl f6,f2 mull f4,f0 divl 0d1.20000000000000000000e+02,f0 addl f0,f2 negl f2,f2 movl f2,f4 mull 0(r6),f4 divl -32(fp),f4 br L51 L49: movl 0(r6),f4 subl -24(fp),f4 divl -32(fp),f4 br L51 L46: cmpl 0d0.00000000000000000000e+00,-8(fp) bne L52 movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 br L51 L52: movd -4(fp),tos movd -8(fp),tos addr LC2,tos addr __iob+40,tos bsr _fprintf movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 adjspb -16 L51: cmpqb 1,59(r6) bne L54 movxwd 48(r6),r0 muld 52,r0 movd 44(_zones(sb))[r0:b],tos movl f4,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol movqd 0,r5 adjspb -20 cmpd r5,_nlines bge L105 L58: movxwd 48(r6),r0 muld 52,r0 movd 40(_zones(sb))[r0:b],r0 movd 0(r0)[r5:d],tos movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu adjspw -72 mull f4,f0 movl f0,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol adjspb -20 addqd 1,r5 cmpd r5,_nlines blt L58 L105: movxwd 48(r6),r1 cmpqd 0,0(_jnuenable(sb))[r1:d] beq L60 cmpqd 0,r7 bgt L60 cmpd r7,_nubins bge L60 movd _zones,r0 muld 52,r1 movd 32(r1)[r0:b],r1 movzwd 50(r6),r2 muld 52,r2 movzbd 58(r6),r3 movd 0(_upper(sb))[r3:d],r3 movd 16(r2)[r0:b],r0 movl f4,f0 mull 0(r0)[r3:q],f0 addl f0,0(r1)[r7:q] br L60 L54: movxwd 48(r6),r0 muld 52,r0 addl f4,24(_zones(sb))[r0:b] movqd 0,r5 cmpd r5,_nlines bge L104 L64: movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu movxwd 48(r6),r0 muld 52,r0 movd 20(_zones(sb))[r0:b],r0 mull f4,f0 addl f0,0(r0)[r5:q] adjspw -72 addqd 1,r5 cmpd r5,_nlines blt L64 L104: movxwd 48(r6),r0 cmpqd 0,0(_jnuenable(sb))[r0:d] beq L60 cmpqd 0,r7 bgt L60 cmpd r7,_nubins bge L60 muld 52,r0 movd 32(_zones(sb))[r0:b],r0 addl f4,0(r0)[r7:q] L60: movd -24(fp),0(r6) movd -20(fp),4(r6) movl -8(fp),f0 mull 16(r6),f0 addl f0,8(r6) movd 16(fp),24(r6) movd 20(fp),28(r6) movl 0(r6),f0 divl 32(r6),f0 cmpl f0,0d1.00000000000000000000e-05 bge L66 movd -48(fp),tos br L109 L66: movd -40(fp),r0 addd -44(fp),r0 addr _cdist,r4 br L110 L45: cmpl 0d0.00000000000000000000e+00,-16(fp) bge L69 negl -32(fp),f0 mull -16(fp),f0 movl f0,tos bsr _exp mull 0(r6),f0 movl f0,-24(fp) adjspb -8 cmpl 0d0.00000000000000000000e+00,-32(fp) bne L70 movl -16(fp),f4 mull 0(r6),f4 br L74 L70: movl -32(fp),f6 mull -16(fp),f6 movl f6,tos bsr _fabs adjspb -8 cmpl f0,0d1.00000000000000000000e-03 bge L72 negl -32(fp),f4 mull -16(fp),f4 movl f4,f0 mull f4,f0 movl f0,f2 divl 0d2.00000000000000000000e+00,f2 addl f4,f2 mull f4,f0 movl f0,f6 divl 0d6.00000000000000000000e+00,f6 addl f6,f2 mull f4,f0 movl f0,f6 divl 0d2.40000000000000000000e+01,f6 addl f6,f2 mull f4,f0 divl 0d1.20000000000000000000e+02,f0 addl f0,f2 negl f2,f2 movl f2,f4 mull 0(r6),f4 divl -32(fp),f4 br L74 L72: movl 0(r6),f4 subl -24(fp),f4 divl -32(fp),f4 br L74 L69: cmpl 0d0.00000000000000000000e+00,-16(fp) bne L75 movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 br L74 L75: movd -12(fp),tos movd -16(fp),tos addr LC3,tos addr __iob+40,tos bsr _fprintf movd 0(r6),-24(fp) movd 4(r6),-20(fp) movl 0d0.00000000000000000000e+00,f4 adjspb -16 L74: cmpqb 1,59(r6) bne L77 movxwd 48(r6),r0 muld 52,r0 movd 44(_zones(sb))[r0:b],tos movl f4,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol movqd 0,r5 adjspb -20 cmpd r5,_nlines bge L103 L81: movxwd 48(r6),r0 muld 52,r0 movd 40(_zones(sb))[r0:b],r0 movd 0(r0)[r5:d],tos movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu adjspw -72 mull f4,f0 movl f0,tos movzbd 58(r6),r0 movd 0(_upper(sb))[r0:d],tos movzwd 50(r6),tos bsr _addsymbol adjspb -20 addqd 1,r5 cmpd r5,_nlines blt L81 L103: movxwd 48(r6),r1 cmpqd 0,0(_jnuenable(sb))[r1:d] beq L83 cmpqd 0,r7 bgt L83 cmpd r7,_nubins bge L83 movd _zones,r0 muld 52,r1 movd 32(r1)[r0:b],r1 movzwd 50(r6),r2 muld 52,r2 movzbd 58(r6),r3 movd 0(_upper(sb))[r3:d],r3 movd 16(r2)[r0:b],r0 movl f4,f0 mull 0(r0)[r3:q],f0 addl f0,0(r1)[r7:q] br L83 L77: movxwd 48(r6),r0 muld 52,r0 addl f4,24(_zones(sb))[r0:b] movqd 0,r5 cmpd r5,_nlines bge L102 L87: movd 0(_lptr(sb))[r5:d],r0 adjspd 64 addr tos,r2 addr 0(r0),r1 movd 16,r0 movsd movd 44(r6),tos movd 40(r6),tos bsr _fofknu movxwd 48(r6),r0 muld 52,r0 movd 20(_zones(sb))[r0:b],r0 mull f4,f0 addl f0,0(r0)[r5:q] adjspw -72 addqd 1,r5 cmpd r5,_nlines blt L87 L102: movxwd 48(r6),r0 cmpqd 0,0(_jnuenable(sb))[r0:d] beq L83 cmpqd 0,r7 bgt L83 cmpd r7,_nubins bge L83 muld 52,r0 movd 32(_zones(sb))[r0:b],r0 addl f4,0(r0)[r7:q] L83: movd -24(fp),0(r6) movd -20(fp),4(r6) cmpl 0d0.00000000000000000000e+00,16(r6) bge L89 movxwd 48(r6),r0 muld 52,r0 movd 8(_zones(sb))[r0:b],8(r6) movd 12(_zones(sb))[r0:b],12(r6) addqw 1,48(r6) br L90 L89: cmpl 0d0.00000000000000000000e+00,16(r6) ble L91 movxwd 48(r6),r0 muld 52,r0 movd 0(_zones(sb))[r0:b],8(r6) movd 4(_zones(sb))[r0:b],12(r6) addqw -1,48(r6) br L90 L91: addr LC4,tos addr __iob+40,tos bsr _fprintf movqd 1,tos bsr _exit adjspb -12 L90: movl -16(fp),f0 divl 0d2.99792458000000000000e+10,f0 addl f0,24(r6) movl 0(r6),f0 divl 32(r6),f0 cmpl f0,0d1.00000000000000000000e-05 bge L93 movd -40(fp),r0 addd -44(fp),r0 addr _sdist,r4 br L110 L93: cmpqw -1,48(r6) bne L95 cmpqd 0,_mirror beq L96 negl 16(r6),16(r6) movqw 0,48(r6) br L35 L96: movd -40(fp),r0 addd -44(fp),r0 addr _outleft,r4 br L110 L95: movxwd 48(r6),r0 cmpd r0,_nzones bne L35 movd -40(fp),r0 addd -44(fp),r0 addr _outright,r4 L110: addd r4,r0 movd r0,tos L109: movd r6,tos bsr _addtodist adjspb -8 L30: movd -36(fp),r4 addr @0[r4:q],r0 addr 0(r0)[r4:d],r0 addr _pdist,r4 addd r4,r0 movd r0,tos bsr _getfromdist movd r0,r6 adjspb -4 cmpqd 0,r6 bne L100 addqd 1,-36(fp) cmpd -36(fp),_parallelism blt L30 L107: movl tos,f6 movl tos,f4 exit [r3,r4,r5,r6,r7] ret 0 .comm _bcounter,512 .comm _sdist,1536 .comm _outright,1536 .comm _outleft,1536 .comm _spare,1536 .comm _cdist,1536 .comm _pdist,1536 .comm _blocksizes,4 .comm _nblocks,4 .comm _SA,4 .comm _zl_index,4 .comm _ls_index,4 .comm _G,4 .comm _X,4 .comm _B,4 .comm _dim,4 .comm _zones,4 .comm _lrnudist,4 .comm _rnudist,4 .comm _llnudist,4 .comm _lnudist,4 .lcomm _freesymbols,4 .lcomm _freemtxelts,4 .lcomm _lsymbols,4 .lcomm _symbols,4 .lcomm _lmtxelts,4 .lcomm _mtxelts,4 .comm _totalwidth,8 .comm _zonewidth,4 .comm _n,4 .comm _nzones,4 .comm _lptr,4 .comm _dumplsenable,4 .comm _jnuenable,4 .comm _ngroups,4 .comm _deltanud,4 .comm _nucut,4 .comm _nu0,4 .comm _lower,4 .comm _upper,4 .comm _sigmaup,4 .comm _sigmadown,4 .comm _avoigt,4 .comm _aspont,4 .comm _ssgsigmaup,4 .comm _ssgsigmadown,4 .comm _ssglower,4 .comm _ssgupper,4 .comm _ssgnlines,4 .comm _atomoflevel,4 .comm _rho,4 .comm _coll,4 .comm _nbundles,4 .comm _timelimit,8 .comm _timesteps,4 .comm _natoms,4 .comm _nlevels,4 .comm _nlines,4 .comm _matfptr,4 .comm _pltfptr,4 .comm _outfptr,4 .comm _quefptr,4