daves@ios.UUCP (David B. Schnepper) (08/17/84)
From: Dave Schnepper <ios!daves> Subject: xstr has problems in special cases of string parsing. Fixes included, also several enhancements. Index: /usr/src/cmd/xstr.c, 4.1 BSD Description: Several bugs here: 1) xstr cannot handle a string constant that extents over multiple lines by placing a "\" at the end of a line. (Causes a compiler error) Reference: The C programming Language, page 181. 2) "\'" is treated as "'" in cc, but not in xstr. (ibid.) 3) "\a" is treated as "a" in cc, but not in xstr. (ibid.) 4) Strings are defined as 'a sequence of characters surrounded by double quotes, as in "..." ' (ibid.) xstr will truncate strings at the first embedded null in the string. Thus "a\0b" is the same as "a". While this is not strictly a bug, it does interfere with the program I was trying to use xstr on. (No, don't bother to ask why I have nulls embedded in strings... ) 5) xstr will take the valid C construct: char array[] = "This goes in it"; and change it to: char array[] = (&xstr[1234]); which causes a compiler error. This bug does NOT have a fix, but is mentioned for completeness. Enhancements: 1) "-x name" option to specify the name of the structure containing the extracted strings. This makes it possible to do xstr separately on several modules of a large program. Note: all files must use the same "-x" option, including the "xs.c" file. ("xstr" default) 2) "-o file" option to specify an output file other than x.c. Having "x.c" as the compiled file for several files in a module will break sdb ("x.c" default). 3) "-s file" option to specify a "strings" database other than the file "strings" ("strings" default) 4) "-a file" option to specify the name of the "xs.c" file. ("xs.c" default). Repeat-By: #include <stdio.h> /* * string1 & string2 are the same in some ways, string * constants containing nulls are not well defined. */ char *string1 = "One\0I am!"; char *string2 = "One\0I'm not"; main() { /* should print "This is a string" */ printf( "This is \a string\n" ); /* should print "This is ' a string" */ printf( "This is \' a string\n" ); if (string1 == string2) printf( "The strings are equal\n" ); /* xstr */ else printf( "The strings are NOT equal\n" ); /* cc, & new xstr */ } #ifdef DONT_DEFINE /* The below cause compilation errors in xstr, both old and new */ char compile1[] = "This goes in an array"; char compile2[ 100 ] = "This goes in an array"; /* This causes a compile error in OLD xstr, no error in new */ char *compile3 = "This is the first part:\ and this is the second part of the string"; #endif DONT_DEFINE Fix: Bugs #2 & #3 were simple problems in the yankstr() routine. Bug #1 & #4 has been fixed by having xstr NOT try to extract strings that either have embedded nulls or extend over more than one line. (doing a proper fix would take more time than I cared to spend on it). I have not attempted to fix bug #5. Possible approach: the command /* UNIQUE */ before a string constant will cause xstr to NOT extract the string. Diffs: (These diffs are against "xstr" as distributed with 4.1 BSD). 1c1 < static char *sccsid = "@(#)xstr.c 4.2 (Berkeley) 8/16/84"; --- > static char *sccsid = "@(#)xstr.c 4.2 (Berkeley) 5/7/81"; 12,23d11 < * < * Modified 16 August 1984 to allow specification of output < * file name and name of the "shared" string structure. < * Several bugs fixed: < * "\a" was not the same as "a" < * "\'" was not the same as "'" < * strings extending over more than one line caused cc errors < * strings were terminated by an embedded null. < * Not fixed: < * char array[] = "string"; will still cause cc errors. < * ios!daves (David B. Schnepper) < * 39,43c27 < char *strings = NULL; /* file to use as strings dbase */ < #define default_Strings "strings" < char *xstrName = "xstr"; /* variable where strings are stored */ < char *outputFile = "x.c"; /* output file name */ < char *stringName = "xs.c"; /* "strings" data base output name */ --- > char *strings = "strings"; 73,96d56 < case 's': < if (argc <= 0) usage(); < strings = *argv++; < argc--; < continue; < < case 'a': < if (argc <= 0) usage(); < stringName = *argv++; < argc--; < continue; < < case 'o': < if (argc <= 0) usage(); < outputFile = *argv++; < argc--; < continue; < < case 'x': < if (argc <= 0) usage(); < xstrName = *argv++; < argc--; < continue; < 98c58 < usage(); --- > fprintf(stderr, "usage: xstr [ -v ] [ -c ] [ - ] [ name ... ]\n"); 103,107c63 < < if (strings == NULL) < strings = default_Strings; < < if (cflg || ((argc == 0) && !readstd)) --- > if (cflg || argc == 0 && !readstd) 111d66 < 113,114c68,69 < if (freopen(outputFile, "w", stdout) == NULL) < perror(outputFile), exit(1); --- > if (freopen("x.c", "w", stdout) == NULL) > perror("x.c"), exit(1); 117c72 < process( outputFile ); --- > process("x.c"); 131,136d85 < usage() < { < fprintf(stderr, "usage: xstr [(-s|-o|-a) file]* [-x name] [ -v ] [ -c ] [ - ] [ name ... ]\n"); < exit( 1 ); < }; < 144,145d92 < register int inString = 0; /* inside of a non-yank-able string? */ < int offset; 147c94 < printf("char\t%s[];\n", xstrName ); --- > printf("char\txstr[];\n"); 156,157d102 < if (lastchr( linebuf ) != '\n') < fprintf( stderr, "xstr: warning, line too long\n" ); 170,183c115 < if (inString) < { < inString = 0; < goto def; < } < /* see if the string can be yanked */ < offset = yankstr( &cp ); < if (offset != -1) < printf("(&%s[%d])", xstrName, (int) offset ); < else < { < inString = 1; < goto def; < }; --- > printf("(&xstr[%d])", (int) yankstr(&cp)); 187c119 < if (incomm || inString) --- > if (incomm) 197,198d128 < if (inString) < goto def; 205,206d134 < if (inString) < goto def; 222c150 < perror( outputFile ), onintr(); --- > perror("x.c"), onintr(); 247,248c175,176 < goto noCanDo; /* ignore multiline strings */ < for (tp = "''b\bt\tr\rn\nf\f\\\\\"\""; ch = *tp++; tp++) --- > continue; > for (tp = "b\bt\tr\rn\nf\f\\\\\"\""; ch = *tp++; tp++) 254,255c182,183 < goto gotc; /* ignore / if char not in set*/ < /* above, or if not digits */ --- > *dp++ = '\\'; > break; 268,269d195 < if (c == 0) < goto noCanDo; /* abort on strings with null */ 275,277d200 < noCanDo: < /* don't touch *cpp if can't store string */ < return( -1 ); 428c351 < xdotcf = fopen( stringName, "w"); --- > xdotcf = fopen("xs.c", "w"); 430,431c353,354 < perror( stringName ), exit(6); < fprintf(xdotcf, "char\t%s[] = {\n", xstrName ); --- > perror("xs.c"), exit(6); > fprintf(xdotcf, "char\txstr[] = {\n"); 503,504c426,427 < ignore(unlink( outputFile )); < ignore(unlink( stringName )); --- > ignore(unlink("x.c")); > ignore(unlink("xs.c"));