[net.bugs.4bsd] 4.1 xstr: parse bugs & enhancements

daves@ios.UUCP (David B. Schnepper) (08/17/84)

From: Dave Schnepper <ios!daves>

Subject: xstr has problems in special cases of string parsing.
	Fixes included, also several enhancements.
Index:	/usr/src/cmd/xstr.c, 4.1 BSD

Description:
	Several bugs here:
	1)	xstr cannot handle a string constant that extents
		over multiple lines by placing a "\" at the end
		of a line. (Causes a compiler error)
		Reference: The C programming Language, page 181.
	2)	"\'" is treated as "'" in cc, but not in xstr. (ibid.)
	3)	"\a" is treated as "a" in cc, but not in xstr. (ibid.)
	4)	Strings are defined as 
			'a sequence of characters surrounded
			 by double quotes, as in "..." '       (ibid.)
		xstr will truncate strings at the first embedded null
		in the string.  Thus "a\0b" is the same as "a".  While
		this is not strictly a bug, it does interfere with the
		program I was trying to use xstr on. (No, don't bother
		to ask why I have nulls embedded in strings... )
	5)	xstr will take the valid C construct:
			char	array[] = "This goes in it";
		and change it to:
			char	array[] = (&xstr[1234]);
		which causes a compiler error.  This bug does NOT have
		a fix, but is mentioned for completeness.

Enhancements:
	1)	"-x name" option to specify the name of the structure containing
		the extracted strings.  This makes it possible to do xstr 
		separately on several modules of a large program.  Note: all 
		files must use the same "-x" option, including the "xs.c" file.
		("xstr" default)
	2)	"-o file" option to specify an output file other than x.c.
		Having "x.c" as the compiled file for several files in a module
		will break sdb ("x.c" default).
	3)	"-s file" option to specify a "strings" database other than
		the file "strings" ("strings" default)
	4)	"-a file" option to specify the name of the "xs.c" file.
		("xs.c" default).
		

Repeat-By:

	#include <stdio.h>

	/*
 	*	string1 & string2 are the same in some ways, string
 	*	constants containing nulls are not well defined.
 	*/
	char *string1 = "One\0I am!";
	char *string2 = "One\0I'm not";
	
	main()
	{
	    /* should print "This is a string" */
   	    printf( "This is \a string\n" );

	    /* should print "This is ' a string" */
   	    printf( "This is \' a string\n" );
	
   	    if (string1 == string2)
	        printf( "The strings are equal\n" );	/* xstr */
   	    else
	        printf( "The strings are NOT equal\n" ); /* cc, & new xstr */
	}
	
	#ifdef DONT_DEFINE
	/* The below cause compilation errors in xstr, both old and new */
	char compile1[] = "This goes in an array";
	char compile2[ 100 ] = "This goes in an array";
	
	/* This causes a compile error in OLD xstr, no error in new */
	char *compile3 = "This is the first part:\
 	and this is the second part of the string";
	#endif DONT_DEFINE

Fix:
	Bugs #2 & #3 were simple problems in the yankstr() routine.
	Bug #1 & #4 has been fixed by having xstr NOT try to extract
	strings that either have embedded nulls or extend over more
	than one line. (doing a proper fix would take more time than
	I cared to spend on it).
	I have not attempted to fix bug #5.  Possible approach:
	   the command /* UNIQUE */ before a string constant will cause
	   xstr to NOT extract the string.

Diffs:	(These diffs are against "xstr" as distributed with 4.1 BSD).

1c1
< static char *sccsid = "@(#)xstr.c	4.2 (Berkeley) 8/16/84";
---
> static char *sccsid = "@(#)xstr.c	4.2 (Berkeley) 5/7/81";
12,23d11
<  *
<  *	Modified 16 August 1984 to allow specification of output
<  *	file name and name of the "shared" string structure.
<  *	Several bugs fixed:
<  *	   "\a" was not the same as "a"
<  *	   "\'" was not the same as "'"
<  *	   strings extending over more than one line caused cc errors
<  *	   strings were terminated by an embedded null.
<  *	Not fixed:
<  *	   char array[] = "string";	will still cause cc errors.
<  *	ios!daves	(David B. Schnepper)
<  *
39,43c27
< char	*strings =	NULL;		/* file to use as strings dbase */
< #define	default_Strings "strings"
< char	*xstrName =	"xstr";		/* variable where strings are stored */
< char	*outputFile =	"x.c";		/* output file name		*/
< char	*stringName =	"xs.c";		/* "strings" data base output name */
---
> char	*strings =	"strings";
73,96d56
< 		case 's':
< 			if (argc <= 0) usage();
< 			strings = *argv++;
< 			argc--;
< 			continue;
< 
< 		case 'a':
< 			if (argc <= 0) usage();
< 			stringName = *argv++;
< 			argc--;
< 			continue;
< 
< 		case 'o':
< 			if (argc <= 0) usage();
< 			outputFile = *argv++;
< 			argc--;
< 			continue;
< 
< 		case 'x':
< 			if (argc <= 0) usage();
< 			xstrName = *argv++;
< 			argc--;
< 			continue;
< 
98c58
< 			usage();
---
> 			fprintf(stderr, "usage: xstr [ -v ] [ -c ] [ - ] [ name ... ]\n");
103,107c63
< 
< 	if (strings == NULL)
< 	    strings = default_Strings;
< 
< 	if (cflg || ((argc == 0) && !readstd))
---
> 	if (cflg || argc == 0 && !readstd)
111d66
< 
113,114c68,69
< 		if (freopen(outputFile, "w", stdout) == NULL)
< 			perror(outputFile), exit(1);
---
> 		if (freopen("x.c", "w", stdout) == NULL)
> 			perror("x.c"), exit(1);
117c72
< 		process( outputFile );
---
> 		process("x.c");
131,136d85
< usage()
< {
< 	fprintf(stderr, "usage: xstr [(-s|-o|-a) file]* [-x name] [ -v ] [ -c ] [ - ] [ name ... ]\n");
< 	exit( 1 );
< };
< 
144,145d92
< 	register int inString = 0;	/* inside of a non-yank-able string? */
< 	int offset;
147c94
< 	printf("char\t%s[];\n", xstrName );
---
> 	printf("char\txstr[];\n");
156,157d102
< 		if (lastchr( linebuf ) != '\n')
< 			fprintf( stderr, "xstr: warning, line too long\n" );
170,183c115
< 			if (inString)
< 			{
< 				inString = 0;
< 				goto def;
< 			}
< 			  /* see if the string can be yanked */
< 			offset = yankstr( &cp );
< 			if (offset != -1)
< 			    printf("(&%s[%d])", xstrName, (int) offset );
< 			else
< 			{
< 			    inString = 1;
< 			    goto def;
< 			};
---
> 			printf("(&xstr[%d])", (int) yankstr(&cp));
187c119
< 			if (incomm || inString)
---
> 			if (incomm)
197,198d128
< 			if (inString)
< 				goto def;
205,206d134
< 			if (inString)
< 				goto def;
222c150
< 		perror( outputFile ), onintr();
---
> 		perror("x.c"), onintr();
247,248c175,176
< 				goto noCanDo;	/* ignore multiline strings */
< 			for (tp = "''b\bt\tr\rn\nf\f\\\\\"\""; ch = *tp++; tp++)
---
> 				continue;
> 			for (tp = "b\bt\tr\rn\nf\f\\\\\"\""; ch = *tp++; tp++)
254,255c182,183
< 				goto gotc;	/* ignore / if char not in set*/
< 						/* above, or if not digits */
---
> 				*dp++ = '\\';
> 				break;
268,269d195
< 		if (c == 0)
< 		    goto noCanDo;		/* abort on strings with null */
275,277d200
< noCanDo:
< 	/* don't touch *cpp if can't store string */
< 	return( -1 );
428c351
< 	xdotcf = fopen( stringName, "w");
---
> 	xdotcf = fopen("xs.c", "w");
430,431c353,354
< 		perror( stringName ), exit(6);
< 	fprintf(xdotcf, "char\t%s[] = {\n", xstrName );
---
> 		perror("xs.c"), exit(6);
> 	fprintf(xdotcf, "char\txstr[] = {\n");
503,504c426,427
< 	ignore(unlink( outputFile ));
< 	ignore(unlink( stringName ));
---
> 	ignore(unlink("x.c"));
> 	ignore(unlink("xs.c"));