piet@cs.ruu.nl (Piet van Oostrum) (07/11/89)
About a month ago, Allan Pratt asked about a bug in the qmult routine of the GCC library distributed by John Dunning. I never saw a reply on the net, and as the floating point results were not very accurate, I started looking at it. The problem was in the generated inline assembler code: In one case, with no -O option, the compiler (GCC 1.35) generates addx.l a0@-,a2@- which is not unreasonable. However, with -O, it generates addx.l a0@-,a0@- The problem appeared to be in the operand specifications for the asm instructions. Section 5.16 of the GCC amnual explains the use of these (not very clear in my opinion - a lot is still to be guessed). Anyway, the operands in the XXADDL macro in qmult were not correctly specified. In particular an output operand that is at the same time an input operand must have a ``digit'' specification (e.g. "0") to say that these are actually the same. I changed this and got correct code for the instruction. By the way I still believe the instruction is not officially correct, but I couldn't get that right. It works now and the multiplication is now correct to the last bit. Investigation in flonum.h learned that there are also wrong specifications there. The correction there made the floating divide work correct !!!! Here are the patches: *** dflonum.c.~1~ Mon Jul 3 17:00:38 1989 --- dflonum.c Tue Jul 11 14:40:23 1989 *************** *** 355,363 **** #define XADDL(partial, target_ptr) \ { register unsigned long temp = *target_ptr; \ ! asm volatile("addl %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \ *target_ptr-- = temp; temp = *target_ptr; \ ! asm volatile("addxl #0,%0" : "=d" (temp) : "d" (temp)); \ *target_ptr = temp; } static long constant_zero_kludge = 0; --- 355,363 ---- #define XADDL(partial, target_ptr) \ { register unsigned long temp = *target_ptr; \ ! asm volatile("addl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \ *target_ptr-- = temp; temp = *target_ptr; \ ! asm volatile("addxl #0,%0" : "=d" (temp) : "0" (temp)); \ *target_ptr = temp; } static long constant_zero_kludge = 0; *************** *** 364,391 **** #define XXADDL(partial, target) \ { register unsigned long * zero = &constant_zero_kludge + 1; \ ! asm volatile("addl %2,%0@" : "=a" (target) : "a" (target), "g" (partial)); \ ! asm volatile("addxl %0@-,%1@-" : "=a" (zero) : "a" (target), "a" (zero)); } /* #define ADDL(partial, target_ptr) \ { register unsigned long temp = *target_ptr; \ ! asm volatile("addl %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \ *target_ptr-- = temp } #define ADDXL(partial, target_ptr) \ { register unsigned long temp = *target_ptr; \ ! asm volatile("addxl %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \ *target_ptr-- = temp } #define ADDW(partial, target_ptr) \ { register unsigned short temp = *(unsigned short * )target_ptr; \ ! asm volatile("addw %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \ *(unsigned short * )target_ptr-- = temp } #define ADDXW(partial, target_ptr) \ { register unsigned sort temp = *(unsigned short * )target_ptr; \ ! asm volatile("addxw %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \ *(unsigned short * )target_ptr-- = temp } */ --- 364,391 ---- #define XXADDL(partial, target) \ { register unsigned long * zero = &constant_zero_kludge + 1; \ ! asm volatile("addl %2,%0@" : "=a" (target) : "0" (target), "g" (partial)); \ ! asm volatile("addxl %0@-,%1@-" : "=a" (zero) : "a" (target), "0" (zero)); } /* #define ADDL(partial, target_ptr) \ { register unsigned long temp = *target_ptr; \ ! asm volatile("addl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \ *target_ptr-- = temp } #define ADDXL(partial, target_ptr) \ { register unsigned long temp = *target_ptr; \ ! asm volatile("addxl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \ *target_ptr-- = temp } #define ADDW(partial, target_ptr) \ { register unsigned short temp = *(unsigned short * )target_ptr; \ ! asm volatile("addw %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \ *(unsigned short * )target_ptr-- = temp } #define ADDXW(partial, target_ptr) \ { register unsigned sort temp = *(unsigned short * )target_ptr; \ ! asm volatile("addxw %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \ *(unsigned short * )target_ptr-- = temp } */ *** flonum.h.~1~ Mon Jul 3 17:13:09 1989 --- flonum.h Tue Jul 11 15:14:43 1989 *************** *** 47,69 **** #define MUL(a, b) asm volatile ("mulu %2,%0" : "=d" (b) : "0" (b) , "g" (a)) #define DIV(a, b) asm volatile ("divu %2,%0" : "=d" (b) : "0" (b) , "g" (a)) ! #define SWAP(a) asm volatile ("swap %0" : "=r" (a) : "r" (a) , "r" (a) ) ! #define ASL2(r1, r2) { asm volatile ("asll #1,%0" : "=d" (r2) : "d" (r2));\ ! asm volatile ("roxll #1,%0" : "=d" (r1) : "d" (r1)); } ! #define ASL3(r1, r2, r3) { asm volatile ("asll #1,%0" : "=d" (r3) : "d" (r3));\ ! asm volatile ("roxll #1,%0" : "=d" (r2) : "d" (r2));\ ! asm volatile ("roxll #1,%0" : "=d" (r1) : "d" (r1)); } ! ! #define ASR2(r1, r2) { asm volatile ("asrl #1,%0" : "=d" (r1) : "d" (r1));\ ! asm volatile ("roxrl #1,%0" : "=d" (r2) : "d" (r2)); } ! #define ASR3(r1, r2, r3) { asm volatile ("asrl #1,%0" : "=d" (r1) : "d" (r1));\ ! asm volatile ("roxrl #1,%0" : "=d" (r2) : "d" (r2));\ ! asm volatile ("roxrl #1,%0" : "=d" (r3) : "d" (r3)); } ! #define ASR4(r1, r2, r3, r4) { asm volatile ("asrl #1,%0" : "=d" (r1) : "d" (r1));\ ! asm volatile ("roxrl #1,%0" : "=d" (r2) : "d" (r2));\ ! asm volatile ("roxrl #1,%0" : "=d" (r3) : "d" (r3));\ ! asm volatile ("roxrl #1,%0" : "=d" (r4) : "d" (r4)); } #define ADD2(r1, r2, r3, r4) \ { asm volatile ("addl %2,%0": "=g" (r4) : "0" (r4) , "g" (r2)); \ --- 47,69 ---- #define MUL(a, b) asm volatile ("mulu %2,%0" : "=d" (b) : "0" (b) , "g" (a)) #define DIV(a, b) asm volatile ("divu %2,%0" : "=d" (b) : "0" (b) , "g" (a)) ! #define SWAP(a) asm volatile ("swap %0" : "=r" (a) : "0" (a) ) ! #define ASL2(r1, r2) { asm volatile ("asll #1,%0" : "=d" (r2) : "0" (r2));\ ! asm volatile ("roxll #1,%0" : "=d" (r1) : "0" (r1)); } ! #define ASL3(r1, r2, r3) { asm volatile ("asll #1,%0" : "=d" (r3) : "0" (r3));\ ! asm volatile ("roxll #1,%0" : "=d" (r2) : "0" (r2));\ ! asm volatile ("roxll #1,%0" : "=d" (r1) : "0" (r1)); } ! ! #define ASR2(r1, r2) { asm volatile ("asrl #1,%0" : "=d" (r1) : "0" (r1));\ ! asm volatile ("roxrl #1,%0" : "=d" (r2) : "0" (r2)); } ! #define ASR3(r1, r2, r3) { asm volatile ("asrl #1,%0" : "=d" (r1) : "0" (r1));\ ! asm volatile ("roxrl #1,%0" : "=d" (r2) : "0" (r2));\ ! asm volatile ("roxrl #1,%0" : "=d" (r3) : "0" (r3)); } ! #define ASR4(r1, r2, r3, r4) { asm volatile ("asrl #1,%0" : "=d" (r1) : "0" (r1));\ ! asm volatile ("roxrl #1,%0" : "=d" (r2) : "0" (r2));\ ! asm volatile ("roxrl #1,%0" : "=d" (r3) : "0" (r3));\ ! asm volatile ("roxrl #1,%0" : "=d" (r4) : "0" (r4)); } #define ADD2(r1, r2, r3, r4) \ { asm volatile ("addl %2,%0": "=g" (r4) : "0" (r4) , "g" (r2)); \ *************** *** 71,79 **** /* y <- y - x */ #define SUB3(x1, x2, x3, y1, y2, y3) \ ! { asm volatile ("subl %2,%0": "=g" (y3) : "g" (y3) , "d" (x3)); \ ! asm volatile ("subxl %2,%0": "=g" (y2) : "g" (y2) , "d" (x2));\ ! asm volatile ("subxl %2,%0": "=g" (y1) : "g" (y1) , "d" (x1)); } /* sub4 here is rather complex, as the compiler is overwhelmed by me wanting to have 8 data registers allocated for mantissa accumulators. Help it out --- 71,79 ---- /* y <- y - x */ #define SUB3(x1, x2, x3, y1, y2, y3) \ ! { asm volatile ("subl %2,%0": "=g" (y3) : "0" (y3) , "d" (x3)); \ ! asm volatile ("subxl %2,%0": "=g" (y2) : "0" (y2) , "d" (x2));\ ! asm volatile ("subxl %2,%0": "=g" (y1) : "0" (y1) , "d" (x1)); } /* sub4 here is rather complex, as the compiler is overwhelmed by me wanting to have 8 data registers allocated for mantissa accumulators. Help it out *************** *** 80,96 **** by declaring a temp that it can move stuff in and out of. */ #define SUB4(x1, x2, x3, x4, y1, y2, y3, y4) \ { register long temp = y4; \ ! asm volatile ("subl %2,%0": "=d" (temp) : "d" (temp) , "d" (x4)); \ y4 = temp; temp = y3; \ ! asm volatile ("subxl %2,%0": "=d" (temp) : "d" (temp) , "d" (x3));\ y3 = temp; temp = y2; \ ! asm volatile ("subxl %2,%0": "=d" (temp) : "d" (temp) , "d" (x2));\ y2 = temp; temp = y1; \ ! asm volatile ("subxl %2,%0": "=d" (temp) : "d" (temp) , "d" (x1));\ y1 = temp; } ! #define NEG(r1, r2) { asm volatile ("negl %0" : "=d" (r2) : "d" (r2)); \ ! asm volatile ("negxl %0" : "=d" (r1) : "d" (r1)); } /* switches for which routines to compile. All the single-float and long-int arithmetic routines are turned off here, as they were all --- 80,96 ---- by declaring a temp that it can move stuff in and out of. */ #define SUB4(x1, x2, x3, x4, y1, y2, y3, y4) \ { register long temp = y4; \ ! asm volatile ("subl %2,%0": "=d" (temp) : "0" (temp) , "d" (x4)); \ y4 = temp; temp = y3; \ ! asm volatile ("subxl %2,%0": "=d" (temp) : "0" (temp) , "d" (x3));\ y3 = temp; temp = y2; \ ! asm volatile ("subxl %2,%0": "=d" (temp) : "0" (temp) , "d" (x2));\ y2 = temp; temp = y1; \ ! asm volatile ("subxl %2,%0": "=d" (temp) : "0" (temp) , "d" (x1));\ y1 = temp; } ! #define NEG(r1, r2) { asm volatile ("negl %0" : "=d" (r2) : "0" (r2)); \ ! asm volatile ("negxl %0" : "=d" (r1) : "0" (r1)); } /* switches for which routines to compile. All the single-float and long-int arithmetic routines are turned off here, as they were all Note -- you have to remake the compiler with the new routines, to get floating point constants in your program correct. -- Piet van Oostrum, Dept of Computer Science, University of Utrecht Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands Telephone: +31-30-531806. piet@cs.ruu.nl (mcvax!hp4nl!ruuinf!piet)