[comp.sys.atari.st] Bugs in GCC library for Atari ST

piet@cs.ruu.nl (Piet van Oostrum) (07/11/89)
About a month ago, Allan Pratt asked about a bug in the qmult routine of
the GCC library distributed by John Dunning. I never saw a reply on the
net, and as the floating point results were not very accurate, I started
looking at it. 
The problem was in the generated inline assembler code:

    In one case, with no -O option, the compiler (GCC 1.35) generates

	    addx.l	a0@-,a2@-

    which is not unreasonable.  However, with -O, it generates

	    addx.l	a0@-,a0@-

The problem appeared to be in the operand specifications for the asm
instructions. Section 5.16 of the GCC amnual explains the use of these (not
very clear in my opinion - a lot is still to be guessed). Anyway, the
operands in the XXADDL macro in qmult were not correctly specified. In
particular an output operand that is at the same time an input operand must
have a ``digit'' specification (e.g. "0") to say that these are actually
the same. I changed this and got correct code for the instruction. By the
way I still believe the instruction is not officially correct, but I
couldn't get that right. It works now and the multiplication is now correct
to the last bit. 

Investigation in flonum.h learned that there are also wrong specifications
there. The correction there made the floating divide work correct !!!!

Here are the patches:

*** dflonum.c.~1~	Mon Jul  3 17:00:38 1989
--- dflonum.c	Tue Jul 11 14:40:23 1989
***************
*** 355,363 ****
  
  #define XADDL(partial, target_ptr) \
  	{ register unsigned long temp = *target_ptr; \
! 	asm volatile("addl %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \
  	*target_ptr-- = temp; temp = *target_ptr; \
! 	asm volatile("addxl #0,%0" : "=d" (temp) : "d" (temp)); \
  	*target_ptr = temp; }
  
  static long constant_zero_kludge = 0;
--- 355,363 ----
  
  #define XADDL(partial, target_ptr) \
  	{ register unsigned long temp = *target_ptr; \
! 	asm volatile("addl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
  	*target_ptr-- = temp; temp = *target_ptr; \
! 	asm volatile("addxl #0,%0" : "=d" (temp) : "0" (temp)); \
  	*target_ptr = temp; }
  
  static long constant_zero_kludge = 0;
***************
*** 364,391 ****
  
  #define XXADDL(partial, target) \
  	{ register unsigned long * zero = &constant_zero_kludge + 1; \
! 	asm volatile("addl %2,%0@" : "=a" (target) : "a" (target), "g" (partial)); \
! 	asm volatile("addxl %0@-,%1@-" : "=a" (zero) : "a" (target), "a" (zero)); }
  
  /*
  #define ADDL(partial, target_ptr) \
  	{ register unsigned long temp = *target_ptr; \
! 	asm volatile("addl %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \
  	*target_ptr-- = temp }
  
  #define ADDXL(partial, target_ptr) \
  	{ register unsigned long temp = *target_ptr; \
! 	asm volatile("addxl %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \
  	*target_ptr-- = temp }
  	
  #define ADDW(partial, target_ptr) \
  	{ register unsigned short temp = *(unsigned short * )target_ptr; \
! 	asm volatile("addw %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \
  	*(unsigned short * )target_ptr-- = temp }
  
  #define ADDXW(partial, target_ptr) \
  	{ register unsigned sort temp = *(unsigned short * )target_ptr; \
! 	asm volatile("addxw %2,%0" : "=d" (temp) : "d" (temp), "g" (partial)); \
  	*(unsigned short * )target_ptr-- = temp }
  */	
  
--- 364,391 ----
  
  #define XXADDL(partial, target) \
  	{ register unsigned long * zero = &constant_zero_kludge + 1; \
! 	asm volatile("addl %2,%0@" : "=a" (target) : "0" (target), "g" (partial)); \
! 	asm volatile("addxl %0@-,%1@-" : "=a" (zero) : "a" (target), "0" (zero)); }
  
  /*
  #define ADDL(partial, target_ptr) \
  	{ register unsigned long temp = *target_ptr; \
! 	asm volatile("addl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
  	*target_ptr-- = temp }
  
  #define ADDXL(partial, target_ptr) \
  	{ register unsigned long temp = *target_ptr; \
! 	asm volatile("addxl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
  	*target_ptr-- = temp }
  	
  #define ADDW(partial, target_ptr) \
  	{ register unsigned short temp = *(unsigned short * )target_ptr; \
! 	asm volatile("addw %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
  	*(unsigned short * )target_ptr-- = temp }
  
  #define ADDXW(partial, target_ptr) \
  	{ register unsigned sort temp = *(unsigned short * )target_ptr; \
! 	asm volatile("addxw %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
  	*(unsigned short * )target_ptr-- = temp }
  */	
  
*** flonum.h.~1~	Mon Jul  3 17:13:09 1989
--- flonum.h	Tue Jul 11 15:14:43 1989
***************
*** 47,69 ****
  
  #define MUL(a, b) asm volatile ("mulu %2,%0" : "=d" (b) : "0" (b) , "g" (a))
  #define DIV(a, b) asm volatile ("divu %2,%0" : "=d" (b) : "0" (b) , "g" (a))
! #define SWAP(a) asm volatile ("swap %0" : "=r" (a) : "r" (a) , "r" (a) )
  
! #define ASL2(r1, r2) { asm volatile ("asll #1,%0" : "=d" (r2) : "d" (r2));\
! 		       asm volatile ("roxll #1,%0" : "=d" (r1) : "d" (r1)); }
! #define ASL3(r1, r2, r3) { asm volatile ("asll #1,%0" : "=d" (r3) : "d" (r3));\
! 			   asm volatile ("roxll #1,%0" : "=d" (r2) : "d" (r2));\
! 			   asm volatile ("roxll #1,%0" : "=d" (r1) : "d" (r1)); }
! 
! #define ASR2(r1, r2) { asm volatile ("asrl #1,%0" : "=d" (r1) : "d" (r1));\
! 		       asm volatile ("roxrl #1,%0" : "=d" (r2) : "d" (r2)); }
! #define ASR3(r1, r2, r3) { asm volatile ("asrl #1,%0" : "=d" (r1) : "d" (r1));\
! 			   asm volatile ("roxrl #1,%0" : "=d" (r2) : "d" (r2));\
! 			   asm volatile ("roxrl #1,%0" : "=d" (r3) : "d" (r3)); }
! #define ASR4(r1, r2, r3, r4) { asm volatile ("asrl #1,%0" : "=d" (r1) : "d" (r1));\
! 			       asm volatile ("roxrl #1,%0" : "=d" (r2) : "d" (r2));\
! 			       asm volatile ("roxrl #1,%0" : "=d" (r3) : "d" (r3));\
! 			       asm volatile ("roxrl #1,%0" : "=d" (r4) : "d" (r4)); }
  
  #define ADD2(r1, r2, r3, r4) \
  	{ asm volatile ("addl %2,%0": "=g" (r4) : "0" (r4) , "g" (r2)); \
--- 47,69 ----
  
  #define MUL(a, b) asm volatile ("mulu %2,%0" : "=d" (b) : "0" (b) , "g" (a))
  #define DIV(a, b) asm volatile ("divu %2,%0" : "=d" (b) : "0" (b) , "g" (a))
! #define SWAP(a) asm volatile ("swap %0" : "=r" (a) : "0" (a) )
  
! #define ASL2(r1, r2) { asm volatile ("asll #1,%0" : "=d" (r2) : "0" (r2));\
! 		       asm volatile ("roxll #1,%0" : "=d" (r1) : "0" (r1)); }
! #define ASL3(r1, r2, r3) { asm volatile ("asll #1,%0" : "=d" (r3) : "0" (r3));\
! 			   asm volatile ("roxll #1,%0" : "=d" (r2) : "0" (r2));\
! 			   asm volatile ("roxll #1,%0" : "=d" (r1) : "0" (r1)); }
! 
! #define ASR2(r1, r2) { asm volatile ("asrl #1,%0" : "=d" (r1) : "0" (r1));\
! 		       asm volatile ("roxrl #1,%0" : "=d" (r2) : "0" (r2)); }
! #define ASR3(r1, r2, r3) { asm volatile ("asrl #1,%0" : "=d" (r1) : "0" (r1));\
! 			   asm volatile ("roxrl #1,%0" : "=d" (r2) : "0" (r2));\
! 			   asm volatile ("roxrl #1,%0" : "=d" (r3) : "0" (r3)); }
! #define ASR4(r1, r2, r3, r4) { asm volatile ("asrl #1,%0" : "=d" (r1) : "0" (r1));\
! 			       asm volatile ("roxrl #1,%0" : "=d" (r2) : "0" (r2));\
! 			       asm volatile ("roxrl #1,%0" : "=d" (r3) : "0" (r3));\
! 			       asm volatile ("roxrl #1,%0" : "=d" (r4) : "0" (r4)); }
  
  #define ADD2(r1, r2, r3, r4) \
  	{ asm volatile ("addl %2,%0": "=g" (r4) : "0" (r4) , "g" (r2)); \
***************
*** 71,79 ****
  
  /* y <- y - x  */
  #define SUB3(x1, x2, x3, y1, y2, y3) \
! 	{ asm volatile ("subl %2,%0": "=g" (y3) : "g" (y3) , "d" (x3)); \
! 	  asm volatile ("subxl %2,%0": "=g" (y2) : "g" (y2) , "d" (x2));\
! 	  asm volatile ("subxl %2,%0": "=g" (y1) : "g" (y1) , "d" (x1)); }
  
  /* sub4 here is rather complex, as the compiler is overwhelmed by me wanting
     to have 8 data registers allocated for mantissa accumulators.  Help it out
--- 71,79 ----
  
  /* y <- y - x  */
  #define SUB3(x1, x2, x3, y1, y2, y3) \
! 	{ asm volatile ("subl %2,%0": "=g" (y3) : "0" (y3) , "d" (x3)); \
! 	  asm volatile ("subxl %2,%0": "=g" (y2) : "0" (y2) , "d" (x2));\
! 	  asm volatile ("subxl %2,%0": "=g" (y1) : "0" (y1) , "d" (x1)); }
  
  /* sub4 here is rather complex, as the compiler is overwhelmed by me wanting
     to have 8 data registers allocated for mantissa accumulators.  Help it out
***************
*** 80,96 ****
     by declaring a temp that it can move stuff in and out of.  */
  #define SUB4(x1, x2, x3, x4, y1, y2, y3, y4) \
  	{ register long temp = y4; \
! 	  asm volatile ("subl %2,%0": "=d" (temp) : "d" (temp) , "d" (x4)); \
  	  y4 = temp; temp = y3; \
! 	  asm volatile ("subxl %2,%0": "=d" (temp) : "d" (temp) , "d" (x3));\
  	  y3 = temp; temp = y2; \
! 	  asm volatile ("subxl %2,%0": "=d" (temp) : "d" (temp) , "d" (x2));\
  	  y2 = temp; temp = y1; \
! 	  asm volatile ("subxl %2,%0": "=d" (temp) : "d" (temp) , "d" (x1));\
  	  y1 = temp; }
  
! #define NEG(r1, r2) { asm volatile ("negl %0" : "=d" (r2) : "d" (r2)); \
! 		      asm volatile ("negxl %0" : "=d" (r1) : "d" (r1)); } 
  
  /* switches for which routines to compile.  All the single-float and
  long-int arithmetic routines are turned off here, as they were all
--- 80,96 ----
     by declaring a temp that it can move stuff in and out of.  */
  #define SUB4(x1, x2, x3, x4, y1, y2, y3, y4) \
  	{ register long temp = y4; \
! 	  asm volatile ("subl %2,%0": "=d" (temp) : "0" (temp) , "d" (x4)); \
  	  y4 = temp; temp = y3; \
! 	  asm volatile ("subxl %2,%0": "=d" (temp) : "0" (temp) , "d" (x3));\
  	  y3 = temp; temp = y2; \
! 	  asm volatile ("subxl %2,%0": "=d" (temp) : "0" (temp) , "d" (x2));\
  	  y2 = temp; temp = y1; \
! 	  asm volatile ("subxl %2,%0": "=d" (temp) : "0" (temp) , "d" (x1));\
  	  y1 = temp; }
  
! #define NEG(r1, r2) { asm volatile ("negl %0" : "=d" (r2) : "0" (r2)); \
! 		      asm volatile ("negxl %0" : "=d" (r1) : "0" (r1)); } 
  
  /* switches for which routines to compile.  All the single-float and
  long-int arithmetic routines are turned off here, as they were all


Note -- you have to remake the compiler with the new routines, to get
floating point constants in your program correct.
-- 
Piet van Oostrum, Dept of Computer Science, University of Utrecht
Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
Telephone: +31-30-531806. piet@cs.ruu.nl (mcvax!hp4nl!ruuinf!piet)