sutela@polaris.utu.fi (Kari Sutela) (01/07/91)
This is my version of the _cliparse() function for the Aztec C compiler. You need the original cliparse.c from the 5.0 distribution diskettes and the patch program to extract a working version. This version properly supports quoted tokens and preprocesses the escaped characters inside quoted tokens. Use any way you want, but redistribute as a patch only. I'll appreciate any comments, bug reports etc. Kari Sutela sutela@polaris.utu.fi # This is a shell archive. # Remove everything above and including the cut line. # Then run the rest of the file through sh. #----cut here-----cut here-----cut here-----cut here----# #!/bin/sh # shar: Shell Archiver # Run the following text with /bin/sh to create: # README # Makefile # cliparse.diff # myecho.c # testexecute.c # This archive created: Sun Jan 6 20:52:41 1991 sed 's/^X//' << \SHAR_EOF > README XThis is my version of cliparse.c which I wrote when I realized that Aztec's X_cliparse() doesn't properly support quoted argument tokens. For example, Xin a quoted token the first doublequote would end the token whether the Xquote was escaped (with `*') or not. Another disadvantage was the fact that X_cliparse() would always strip the surrounding doublequotes. This fact Xwould make it impossible for a `list'-like program to distinguish between Xargument tokens FILES (a keyword) and "FILES" (a filename). X XMy version strips the surrounding quotes only if the quotes were `required'. XQuotes are required if the quoted token has a space or an escaped character. XFor example, this version won't strip the quotes from "aToken". X XThis version also preparses the escaped characters. To my knowledge the Xstandard escape-sequences are: X X "*"" => '"' X "**" => '*' X "*n" => '\n' X "*N" => '\n' X "*e" => '\033' (the ASCII-escape) X "*E" => '\033' (the ASCII-escape) X "*x" => 'x' where x is any other character X XPlease, let me know if there are some escape-sequences which I have Xforgotten. X XI distribute my version of _cliparse() as a diff-file because it contains Xsome code which is copyright by Manx Software Systems. To extract the X`better' cliparse.c, you first must copy the Manx-supplied cliparse.c to Xthe directory where you put cliparse.diff. BTW, I used cliparse.c from the X5.0a distribution diskettes---I hope they haven't changed it since that Xdistribution. X XYou also need the "patch"-program. When you have both cliparse.diff and the XManx-supplied cliparse.c in the same directory, type: X patch <cliparse.diff XNow you should have files cliparse.c (the `better' one) and cliparse.c.orig X(the old one). Just compile cliparse.c as usual and when you want to parse Xthe CLI-arguments correctly, link cliparse.o before the normal c-library. XFor example, "ln whatever cliparse -lc". X XPLEASE, DON'T REDISTRIBUTE THE RESULTING cliparse.c. You may redistribute Xthe diff-file however. X XThe new cliparse.c has some debugging statements which can be enabled by X#defining DEBUG. If you suspect a bug somewhere, you might try compiling Xwith "cc -dDEBUG cliparse". X XIf you notice any bugs or problems, please, let me know. X X Kari Sutela (email: sutela@polaris.utu.fi) SHAR_EOF sed 's/^X//' << \SHAR_EOF > Makefile Xmyecho: myecho.o cliparse.o X ln myecho cliparse -lc X Xtestexecute: testexecute.o myecho X ln testexecute cliparse -lc X Xtest: c:echo myecho X c:echo "aaa aaa" X myecho "aaa aaa" X c:echo "aaa*"***naaa" X myecho "aaa*"***naaa" X c:echo "aaa*e[33mbbb*e[31mccc" X myecho "aaa*e[33mbbb*e[31mccc" X @c:echo "Here's the exception:" X c:echo "aaa" X myecho "aaa" X @c:echo "Doing invalid argument test:" X -c:echo "aaa"aaa X -myecho "aaa"aaa X Xtest2: testexecute myecho X testexecute "*"aaa aaa*"" X testexecute "*"*e[33maaa***"*e[31mbbb*"" X testexecute "*"*e[33maaa***"*e[31mbbb" X testexecute "*"aaa*"" SHAR_EOF sed 's/^X//' << \SHAR_EOF > cliparse.diff X*** cliparse.c Sat Jan 05 16:53:13 1991 X--- bcliparse.c Sun Jan 06 13:13:59 1991 X*************** X*** 16,17 **** X--- 16,48 ---- X X+ /* X+ * Patched by Kari Sutela to enable one to differentiate between X+ * aToken and "aToken". Also AmigaDOS escaped characters in quoted X+ * tokens are preparsed. X+ * X+ * To my knowledge the escaped characters are: X+ * '*"' ==> '"' X+ * '**' ==> '*' X+ * '*n' or '*N' ==> '\n' X+ * '*e' or '*E' ==> '\033' (escape) X+ * '*x' ==> 'x', where x is any other character. X+ * X+ * I use this `heuristic' to decide whether the quotes should be left X+ * around the token: if there was an obvious need to quote the token X+ * the quotes will be stripped. There is an obvious need if the token X+ * contains a space or an escaped character. Examples: X+ * "one token" ==> one token X+ * "oneToken" ==> "oneToken" X+ * "one*"Token" ==> one"Token X+ * "one*Token" ==> oneToken X+ * X+ * Compile normally and include the resulting object file in the ln-command X+ * line before `-lc'. Example: "ln whatever cliparse -lc". X+ * X+ * Use any way you want provided you don't distribute the source file X+ * (which contains code copyrighted by Manx Software Systems). You may X+ * redistribute the patch file however. X+ * X+ * Bug reports and comments to sutela@polaris.utu.fi X+ */ X+ X #include <libraries/dosextens.h> X*************** X*** 20,21 **** X--- 51,96 ---- X X+ #include <ctype.h> X+ /* BOOL is here */ X+ #include <exec/types.h> X+ X+ /* X+ * You might want to "cc -dDEBUG cliparse" if you suspect you have found X+ * a bug somewhere. X+ */ X+ #ifdef DEBUG X+ long Write(BPTR file, char *buffer, long length); X+ BPTR Output(void); X+ #define dMSG( str, val ) \ X+ { \ X+ char buf[256]; \ X+ \ X+ sprintf( &buf[0], "%s = %d\n", ((char *) str), (val) ); \ X+ Write( Output(), buf, strlen(buf) ); \ X+ } X+ #define wMSG( str, haddr, cval ) \ X+ { \ X+ char buf[256]; \ X+ \ X+ if(cval) \ X+ sprintf( &buf[0], "%s: wrote at addr %x, intval = %d, charval = %c\n",(str),(haddr),(int) (cval),(cval)); \ X+ else \ X+ sprintf( &buf[0], "%s: wrote at addr %x, intval = %d, null-character\n", (str),(haddr),(int) (cval)); \ X+ Write( Output(), buf, strlen(buf) ); \ X+ } X+ #define hMSG( str, val ) \ X+ { \ X+ char buf[256]; \ X+ \ X+ sprintf( &buf[0], "%s = %x\n", (str), (val) ); \ X+ Write(Output(),buf,strlen(buf)); \ X+ } X+ #define sMSG( str, strval ) \ X+ { \ X+ char buf[2000]; \ X+ \ X+ sprintf( &buf[0], "%s \"%s\"\n",(str),(strval) ); \ X+ Write(Output(),buf,strlen(buf)); \ X+ } X+ #endif X+ X extern int _argc, _arg_len; X*************** X*** 31,32 **** X--- 106,116 ---- X X+ #ifdef DEBUG X+ dMSG( "argument length (from alen)", alen ); X+ sMSG( "aptr contains", aptr ); X+ sMSG( "If launched with Aztec's Make, aptr seems to contain", X+ "unnecessary characters" X+ ); X+ sMSG( "Execute() seems to provide a correct argument line", "" ); X+ #endif X+ X if (pp->pr_CLI) { X*************** X*** 37,39 **** X cp = _detach_name; X! _arg_len = cp[0]+alen+2; X if ((_arg_lin = AllocMem((long)_arg_len, 0L)) == 0) X--- 121,161 ---- X cp = _detach_name; X! #ifdef DEBUG X! dMSG( "argument length (with strlen())", strlen(aptr) ); X! dMSG( "command name length (with strlen())", strlen(cp+1) ); X! dMSG( "command name length (from process struct)", cp[0] ); X! #endif X! X! /* X! * We must allocate a bit more memory to allow for `terse' closing X! * quotes such as "Token"AnotherToken. This string will be parsed X! * into two tokens: "Token" and AnotherToken. X! * X! * This isn't too accurate (can allocate a bit more than really needed). X! */ X! { X! int terseclose=0; X! char *tempcp = aptr; X! BOOL open = FALSE; X! X! while( c = *tempcp++ ) { X! if( open ) { X! if( c == '"' ) { X! if( !isspace(*tempcp) && *tempcp != '\0' ) X! terseclose++; X! open = FALSE; X! } X! } else { X! open = (c == '"'); X! } X! } X! X! /* X! * OK! We have calculated the extra allocation requirements. X! */ X! _arg_len = cp[0]+alen+2+terseclose; X! #ifdef DEBUG X! dMSG( "terseclose", terseclose ); X! dMSG( "total _arg_len", _arg_len ); X! #endif X! } X if ((_arg_lin = AllocMem((long)_arg_len, 0L)) == 0) X*************** X*** 40,47 **** X return; X c = cp[0]; X strncpy(_arg_lin, cp+1, (size_t)c); X strcpy(_arg_lin+c, " "); X! strncat(_arg_lin, aptr, (size_t)alen); X _arg_lin[c] = 0; X! for (_argc=1,aptr=cp=_arg_lin+c+1;;_argc++) { X while ((c=*cp) == ' ' || c == '\t' || c == '\f' || X--- 162,217 ---- X return; X+ #ifdef DEBUG X+ dMSG( "Allocated bytes", _arg_len ); X+ hMSG( "_arg_lin starts @", _arg_lin ); X+ hMSG( "_arg_lin last legal position @", _arg_lin+_arg_len-1 ); X+ #endif X c = cp[0]; X+ X+ /* X+ * First, copy the command name! X+ */ X strncpy(_arg_lin, cp+1, (size_t)c); X+ #ifdef DEBUG X+ dMSG( "copied bytes to _arg_lin", c ); X+ #endif X strcpy(_arg_lin+c, " "); X! #ifdef DEBUG X! hMSG( "Copied a blank to address", _arg_lin+c ); X! #endif X! /* X! * Then the rest of the command line! X! * Actually, I don't catenate the argument string after _arg_lin; X! * I just use aptr and write from it to _arg_lin one character X! * at a time. But I must overwrite the '\n' at aptr[alen-1] with X! * a '\0' as Aztec Make launches seem to provide some extra characters X! * (which I'd better not overwrite---who knows where they belong to). X! * The last `real' character at end of the command line always seems to X! * be a '\n'. X! */ X! aptr[(size_t)alen-1] = '\0'; X _arg_lin[c] = 0; X! X! /* X! * The arguments! X! * X! * Here cp points to the original argument string (initially was aptr) X! * and aptr is used to write the characters to the resulting command X! * line. cp is used to read, aptr to write. X! */ X! for (_argc=1,cp=aptr,aptr=_arg_lin+c+1;;_argc++) { X! X! /* X! * This variable will tell if there was a need to quote the token. X! */ X! BOOL required; X! X! #ifdef DEBUG X! dMSG( "Token #", _argc ); X! hMSG( "cp (read position)", cp ); X! hMSG( "aptr (write position)", aptr ); X! #endif X! X! /* X! * First, skip white space! X! */ X while ((c=*cp) == ' ' || c == '\t' || c == '\f' || X*************** X*** 49,50 **** X--- 219,227 ---- X cp++; X+ #ifdef DEBUG X+ hMSG( "Skipped white space: cp (read position)", cp ); X+ #endif X+ X+ /* X+ * Break if at end of command line. X+ */ X if (*cp < ' ') X*************** X*** 51,63 **** X break; X if (*cp == '"') { X! cp++; X while (c = *cp++) { X! *aptr++ = c; X! if (c == '"') { X! if (*cp == '"') X! cp++; X! else { X! aptr[-1] = 0; X! break; X } X } X--- 228,346 ---- X break; X+ X+ /* X+ * Copy to end of token. X+ */ X if (*cp == '"') { X! X! char *tempcp; X! BOOL escapenext; X! X! #ifdef DEBUG X! sMSG( "a quoted token starts", cp ); X! #endif X! X! /* X! * Don't advance, yet. First must find a space or an X! * escaped character! X! */ X! required = FALSE; X! escapenext = FALSE; X! for( tempcp = cp+1; c = *tempcp; tempcp++ ) { X! if(!required) X! required = isspace(c) || (c == '*'); X! if(!escapenext && c == '"') X! break; X! escapenext = (c == '*'); X! } X! #ifdef DEBUG X! dMSG( "required", required ); X! #endif X! X! /* X! * Now advance! X! * If there was a real need to quote the token, we don't X! * want to leave the quotes around the token. On the other X! * hand, if no reason was found, the quotes won't be stripped. X! */ X! if(required) X! cp++; X! else { X! #ifdef DEBUG X! wMSG( "not required",aptr,*cp); X! #endif X! *aptr++ = *cp++; X! } X! X! X! /* X! * Copy the token until the closing quote! X! */ X! escapenext = FALSE; X while (c = *cp++) { X! X! /* X! * Replace escaped characters! X! */ X! if(escapenext) { X! switch( c ) { X! case 'n': X! case 'N': X! #ifdef DEBUG X! wMSG( "an escaped NL", aptr, '\n' ); X! #endif X! *aptr++ = '\n'; X! break; X! case 'e': X! case 'E': X! #ifdef DEBUG X! wMSG( "an escaped escape", aptr, 'e' ); X! #endif X! *aptr++ = '\033'; X! break; X! default: X! #ifdef DEBUG X! wMSG( "an escaped character", aptr, c ); X! #endif X! *aptr++ = c; X } X+ escapenext = FALSE; X+ continue; X+ } else { X+ if( c != '*' ) { X+ #ifdef DEBUG X+ wMSG( "an ordinary character",aptr,c ); X+ #endif X+ *aptr++ = c; X+ escapenext = FALSE; X+ } else { X+ #ifdef DEBUG X+ sMSG( "begin escaped", "" ); X+ #endif X+ escapenext = TRUE; X+ continue; X+ } X+ } X+ X+ if (c == '"') X+ break; X+ X+ } /* while( c = *cp++) */ X+ X+ /* X+ * At the end of the token. Leave the closing quote if no X+ * real need was found. X+ */ X+ if( c == '"' ) { X+ if(required) { X+ #ifdef DEBUG X+ sMSG( "required quote", "" ); X+ wMSG( "strip it", aptr-1, 0 ); X+ #endif X+ aptr[-1] = 0; X+ } else { X+ #ifdef DEBUG X+ sMSG( "unnecessary quote", "" ); X+ wMSG( "Don't strip", aptr, 0 ); X+ #endif X+ *aptr++ = 0; X } X*************** X*** 64,65 **** X--- 347,349 ---- X } X+ X } X*************** X*** 66,70 **** X else { X while ((c=*cp++) && c != ' ' && c != '\t' && c != '\f' && X! c != '\r' && c != '\n') X *aptr++ = c; X *aptr++ = 0; X--- 350,364 ---- X else { X+ #ifdef DEBUG X+ sMSG( "An unquoted token starts", cp ); X+ #endif X while ((c=*cp++) && c != ' ' && c != '\t' && c != '\f' && X! c != '\r' && c != '\n') { X! #ifdef DEBUG X! wMSG( "copy until white space", aptr, c ); X! #endif X *aptr++ = c; X+ } X+ #ifdef DEBUG X+ wMSG( "terminate token", aptr, 0 ); X+ #endif X *aptr++ = 0; X*************** X*** 74,75 **** X--- 368,372 ---- X } X+ #ifdef DEBUG X+ wMSG( "terminate string", aptr, 0 ); X+ #endif X *aptr = 0; SHAR_EOF sed 's/^X//' << \SHAR_EOF > myecho.c X/* X * A test program for my cliparse.c --- this should behave exactly like X * the AmigaDos echo-command. Of course, there's an execption: X * echo "oneToken" != myecho "oneToken" X * X * Kari Sutela X */ X#ifdef DEBUG Xextern int _arg_len; X#endif X Xint main(int ac, char *av[]) X{ X#ifdef DEBUG X printf( "BTW, _arg_len = %d\n", _arg_len ); X#endif X if( ac != 2 ) { X puts("Invalid argument to MYECHO"); X return 20; X } else { X puts(av[1]); X return 0; X } X} SHAR_EOF sed 's/^X//' << \SHAR_EOF > testexecute.c X#include <functions.h> X Xmain( int ac, char *av[] ) X{ X char buf[1000] = {'\0'}; X X if( ac >= 2 ) X sprintf( &buf[0], "%s %s", "myecho", av[1] ); X else X return 20; X (void) Execute(buf,0,0); X return 0; X} SHAR_EOF # End of shell archive exit 0 -- Kari Sutela sutela@polaris.utu.fi