[net.bugs.4bsd] two fixes for awk

sutton@daemon.UUCP (Carl Sutton) (04/11/85)

Synopsis:	awk does odd things with substr or field setting

Description:
	Recently, I fixed a bug that was causing `substr' to misbehave or
	fields to remain unset in the record.  There is some history.
	Apparently some time ago there was a fix to the field problem 
	posted to the net.  It was a hack fix involving 
	redefining an `EMPTY' string to be a null pointer 
	(as opposed to a pointer to a zero length string).
	This historic fix causes a problem with `substr' which 
	can core dump or otherwise misbehave with a null pointer.  

Repeat by:
	Repeating the bug depends on whether the earlier fix was 
	installed.  Try each of the following.

	1. Create a file with a bunch of empty lines (newlines only)
	   and run the follwing command:

	   awk '{s = substr($1, 1, 1)}' file

	2. Try the following:

	   echo 'now is the time' >file
	   awk '{ $1 = "then"; print $0 }' file

Fix:
	Backout the historic fix if it was installed.  Following are
	diffs for backing out the old fix in awk.def and lib.c
	and installing the new in tran.c.  You will probably want
	to install these by hand.

	The proper fix involves ferreting out the actual inconsistency 
	in the souce code in the determination of an `EMPTY' string.


RCS file: RCS/lib.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -c -r1.2 -r1.3
*** /tmp/,RCSt1008065	Wed Apr 10 15:44:38 1985
--- /tmp/,RCSt2008065	Wed Apr 10 15:44:48 1985
***************
*** 1,8
  /*
   *	$Log:	lib.c,v $
!  *	Revision 1.2  84/10/24  18:58:20  root
!  *	Part of fix to change EMPTY[] to #define EMPTY NULL
!  *	-rdoty@tek
   *	
   *	Revision 1.1  84/09/10  16:37:59  root
   *	Initial revision

--- 1,9 -----
  /*
   *	$Log:	lib.c,v $
!  *	Revision 1.3  85/02/22  16:16:22  sutton
!  *	This is revision 1.1 retrieved
!  *	from the past.  The purpose is to
!  *	back out the fix made in 1.2.
   *	
   *	Revision 1.1  84/09/10  16:37:59  root
   *	Initial revision
***************
*** 22,27
  #define	RECSIZE	(5 * 512)
  char	record[RECSIZE];
  char	fields[RECSIZE];
  
  #define	MAXFLD	100
  int	donefld;	/* 1 = implies rec broken into fields */

--- 23,29 -----
  #define	RECSIZE	(5 * 512)
  char	record[RECSIZE];
  char	fields[RECSIZE];
+ char	EMPTY[] = "";
  
  #define	MAXFLD	100
  int	donefld;	/* 1 = implies rec broken into fields */

RCS file: RCS/awk.def,v
retrieving revision 1.2
retrieving revision 1.3
diff -c -r1.2 -r1.3
*** /tmp/,RCSt1008107	Wed Apr 10 15:46:47 1985
--- /tmp/,RCSt2008107	Wed Apr 10 15:46:52 1985
***************
*** 1,8
  /*
   *	$Log:	awk.def,v $
!  *	Revision 1.2  84/10/24  18:59:21  root
!  *	Part of fix to change EMPTY[] to #define EMPTY NULL
!  *	-rdoty@tek
   *	
   *	Revision 1.1  84/09/10  16:42:58  root
   *	Initial revision

--- 1,9 -----
  /*
   *	$Log:	awk.def,v $
!  *	Revision 1.3  85/02/22  16:17:48  sutton
!  *	This is revision 1.1 retrieved
!  *	from the past.  The purpose is to
!  *	back out the fix made in 1.2.
   *	
   *	Revision 1.1  84/09/10  16:42:58  root
   *	Initial revision
***************
*** 11,17
  /*	awk.def	4.2	83/02/09	*/
  
  #define hack	int
- #define EMPTY	NULL
  #define	AWKFLOAT	float
  #define	xfree(a)	{ if(a!=NULL) { yfree(a); a=NULL;} }
  #define	strfree(a)	{ if(a!=NULL && a!=EMPTY) { yfree(a);} a=EMPTY; }

--- 12,17 -----
  /*	awk.def	4.2	83/02/09	*/
  
  #define hack	int
  #define	AWKFLOAT	float
  #define	xfree(a)	{ if(a!=NULL) { yfree(a); a=NULL;} }
  #define	strfree(a)	{ if(a!=NULL && a!=EMPTY) { yfree(a);} a=EMPTY; }
***************
*** 33,38
  extern char	**FILENAME;
  
  extern char	record[];
  extern int	dbg;
  extern int	lineno;
  extern int	errorflag;

--- 33,39 -----
  extern char	**FILENAME;
  
  extern char	record[];
+ extern char	EMPTY[];
  extern int	dbg;
  extern int	lineno;
  extern int	errorflag;

RCS file: RCS/tran.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -c -r1.1 -r1.2
*** /tmp/,RCSt1007086	Wed Apr 10 14:49:29 1985
--- /tmp/,RCSt2007086	Wed Apr 10 14:49:37 1985
***************
*** 1,5
  /*
   *	$Log:	tran.c,v $
   *	Revision 1.1  84/09/10  16:39:49  root
   *	Initial revision
   *	

--- 1,15 -----
  /*
   *	$Log:	tran.c,v $
+  *	Revision 1.2  85/02/22  16:19:52  sutton
+  *	Fixed an inconsistency in setsval and
+  *	setfval.  The nval member of struct cell
+  *	is initialized to EMPTY.  setsval and
+  *	setfval were testing (nval == 0) to decide
+  *	whether the cell whose value was set was
+  *	a field.  If a field is set, then the record
+  *	must be rebuilt when next refered to in
+  *	the awk script.
+  *	
   *	Revision 1.1  84/09/10  16:39:49  root
   *	Initial revision
   *	
***************
*** 142,148
  		error(FATAL, "can't set $0");
  	vp->tval &= ~STR;	/* mark string invalid */
  	vp->tval |= NUM;	/* mark number ok */
! 	if ((vp->tval & FLD) && vp->nval == 0)
  		donerec = 0;
  	return(vp->fval = f);
  }

--- 152,158 -----
  		error(FATAL, "can't set $0");
  	vp->tval &= ~STR;	/* mark string invalid */
  	vp->tval |= NUM;	/* mark number ok */
! 	if ((vp->tval & FLD) && vp->nval == EMPTY)
  		donerec = 0;
  	return(vp->fval = f);
  }
***************
*** 157,163
  		error(FATAL, "can't set $0");
  	vp->tval &= ~NUM;
  	vp->tval |= STR;
! 	if ((vp->tval & FLD) && vp->nval == 0)
  		donerec = 0;
  	if (!(vp->tval&FLD))
  		strfree(vp->sval);

--- 167,173 -----
  		error(FATAL, "can't set $0");
  	vp->tval &= ~NUM;
  	vp->tval |= STR;
! 	if ((vp->tval & FLD) && vp->nval == EMPTY)
  		donerec = 0;
  	if (!(vp->tval&FLD))
  		strfree(vp->sval);

**************************************************************
Synopsis:	awk dies on a floating exception

Description:
	awk attempts to determine whether each field in
	an input record may properly be interpreted as
	a floating point number.  If the magnitude of 
	potential number is too large or too small, then
	the number is left as a string.

	`isnumber' is incorrectly calculating the
	magnitude of the number and thus allowing
	awk to attempt to convert a number that is
	actually too large.
	
To Reproduce:
	echo 8300E36 >xx
	awk '{ print $1 }' xx
	Floating Exception (core dumped)

Fix:
	Make `isnumber' correctly determine the magnitude of
	a potential number.

RCS file: RCS/lib.c,v
retrieving revision 1.1
retrieving revision 1.4
diff -c -r1.1 -r1.4
*** /tmp/,RCSt1007074	Wed Apr 10 14:48:32 1985
--- /tmp/,RCSt2007074	Wed Apr 10 14:48:40 1985
***************
*** 1,5
  /*
   *	$Log:	lib.c,v $
   *	Revision 1.1  84/09/10  16:37:59  root
   *	Initial revision
   *	

--- 1,19 -----
  /*
   *	$Log:	lib.c,v $
+  *	Revision 1.4  85/03/20  13:08:20  sutton
+  *	Routine isnumber() attempts to determine
+  *	the magnitude of a potential number before
+  *	actual conversion of the string to a value.
+  *	The magnitude was incorrectly calculated.
+  *	This was causing atof() to be called for an
+  *	argument that could potentially not fit in
+  *	the type `awkfloat'.
+  *	
+  *	Revision 1.3  85/02/22  16:16:22  sutton
+  *	This is revision 1.1 retrieved
+  *	from the past.  The purpose is to
+  *	back out the fix made in 1.2.
+  *	
   *	Revision 1.1  84/09/10  16:37:59  root
   *	Initial revision
   *	
***************
*** 279,285
  		} while (isdigit(*s));
  		if (s - es > 2)
  			return(0);
! 		else if (s - es == 2 && 10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
  			return(0);
  	}
  	while (*s == ' ' || *s == '\t' || *s == '\n')

--- 293,299 -----
  		} while (isdigit(*s));
  		if (s - es > 2)
  			return(0);
! 		else if (s - es == 2 && 10 * (*es-'0') + *(es+1)-'0' + d1 >= MAXEXPON)
  			return(0);
  	}
  	while (*s == ' ' || *s == '\t' || *s == '\n')

***************************************************
Carl Sutton	
tektronix!sutton

jaffe@topaz.ARPA (Saul) (04/12/85)

Thanks for your help and thanks to everyone else who also responded
with fixes for awk.  It works now!!!!!!!!
-- 
Saul Jaffe
Systems Programmer
Rutgers University
ARPA: Jaffe@Rutgers
UUCP: ...{harvard,seismo,ut-sally,ihnp4!packard}!topaz!jaffe