sutela@polaris.utu.fi (Kari Sutela) (01/07/91)
This is my version of the _cliparse() function for the Aztec C compiler.
You need the original cliparse.c from the 5.0 distribution diskettes and
the patch program to extract a working version.
This version properly supports quoted tokens and preprocesses the escaped
characters inside quoted tokens.
Use any way you want, but redistribute as a patch only. I'll appreciate any
comments, bug reports etc.
Kari Sutela sutela@polaris.utu.fi
# This is a shell archive.
# Remove everything above and including the cut line.
# Then run the rest of the file through sh.
#----cut here-----cut here-----cut here-----cut here----#
#!/bin/sh
# shar: Shell Archiver
# Run the following text with /bin/sh to create:
# README
# Makefile
# cliparse.diff
# myecho.c
# testexecute.c
# This archive created: Sun Jan 6 20:52:41 1991
sed 's/^X//' << \SHAR_EOF > README
XThis is my version of cliparse.c which I wrote when I realized that Aztec's
X_cliparse() doesn't properly support quoted argument tokens. For example,
Xin a quoted token the first doublequote would end the token whether the
Xquote was escaped (with `*') or not. Another disadvantage was the fact that
X_cliparse() would always strip the surrounding doublequotes. This fact
Xwould make it impossible for a `list'-like program to distinguish between
Xargument tokens FILES (a keyword) and "FILES" (a filename).
X
XMy version strips the surrounding quotes only if the quotes were `required'.
XQuotes are required if the quoted token has a space or an escaped character.
XFor example, this version won't strip the quotes from "aToken".
X
XThis version also preparses the escaped characters. To my knowledge the
Xstandard escape-sequences are:
X
X "*"" => '"'
X "**" => '*'
X "*n" => '\n'
X "*N" => '\n'
X "*e" => '\033' (the ASCII-escape)
X "*E" => '\033' (the ASCII-escape)
X "*x" => 'x' where x is any other character
X
XPlease, let me know if there are some escape-sequences which I have
Xforgotten.
X
XI distribute my version of _cliparse() as a diff-file because it contains
Xsome code which is copyright by Manx Software Systems. To extract the
X`better' cliparse.c, you first must copy the Manx-supplied cliparse.c to
Xthe directory where you put cliparse.diff. BTW, I used cliparse.c from the
X5.0a distribution diskettes---I hope they haven't changed it since that
Xdistribution.
X
XYou also need the "patch"-program. When you have both cliparse.diff and the
XManx-supplied cliparse.c in the same directory, type:
X patch <cliparse.diff
XNow you should have files cliparse.c (the `better' one) and cliparse.c.orig
X(the old one). Just compile cliparse.c as usual and when you want to parse
Xthe CLI-arguments correctly, link cliparse.o before the normal c-library.
XFor example, "ln whatever cliparse -lc".
X
XPLEASE, DON'T REDISTRIBUTE THE RESULTING cliparse.c. You may redistribute
Xthe diff-file however.
X
XThe new cliparse.c has some debugging statements which can be enabled by
X#defining DEBUG. If you suspect a bug somewhere, you might try compiling
Xwith "cc -dDEBUG cliparse".
X
XIf you notice any bugs or problems, please, let me know.
X
X Kari Sutela (email: sutela@polaris.utu.fi)
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > Makefile
Xmyecho: myecho.o cliparse.o
X ln myecho cliparse -lc
X
Xtestexecute: testexecute.o myecho
X ln testexecute cliparse -lc
X
Xtest: c:echo myecho
X c:echo "aaa aaa"
X myecho "aaa aaa"
X c:echo "aaa*"***naaa"
X myecho "aaa*"***naaa"
X c:echo "aaa*e[33mbbb*e[31mccc"
X myecho "aaa*e[33mbbb*e[31mccc"
X @c:echo "Here's the exception:"
X c:echo "aaa"
X myecho "aaa"
X @c:echo "Doing invalid argument test:"
X -c:echo "aaa"aaa
X -myecho "aaa"aaa
X
Xtest2: testexecute myecho
X testexecute "*"aaa aaa*""
X testexecute "*"*e[33maaa***"*e[31mbbb*""
X testexecute "*"*e[33maaa***"*e[31mbbb"
X testexecute "*"aaa*""
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > cliparse.diff
X*** cliparse.c Sat Jan 05 16:53:13 1991
X--- bcliparse.c Sun Jan 06 13:13:59 1991
X***************
X*** 16,17 ****
X--- 16,48 ----
X
X+ /*
X+ * Patched by Kari Sutela to enable one to differentiate between
X+ * aToken and "aToken". Also AmigaDOS escaped characters in quoted
X+ * tokens are preparsed.
X+ *
X+ * To my knowledge the escaped characters are:
X+ * '*"' ==> '"'
X+ * '**' ==> '*'
X+ * '*n' or '*N' ==> '\n'
X+ * '*e' or '*E' ==> '\033' (escape)
X+ * '*x' ==> 'x', where x is any other character.
X+ *
X+ * I use this `heuristic' to decide whether the quotes should be left
X+ * around the token: if there was an obvious need to quote the token
X+ * the quotes will be stripped. There is an obvious need if the token
X+ * contains a space or an escaped character. Examples:
X+ * "one token" ==> one token
X+ * "oneToken" ==> "oneToken"
X+ * "one*"Token" ==> one"Token
X+ * "one*Token" ==> oneToken
X+ *
X+ * Compile normally and include the resulting object file in the ln-command
X+ * line before `-lc'. Example: "ln whatever cliparse -lc".
X+ *
X+ * Use any way you want provided you don't distribute the source file
X+ * (which contains code copyrighted by Manx Software Systems). You may
X+ * redistribute the patch file however.
X+ *
X+ * Bug reports and comments to sutela@polaris.utu.fi
X+ */
X+
X #include <libraries/dosextens.h>
X***************
X*** 20,21 ****
X--- 51,96 ----
X
X+ #include <ctype.h>
X+ /* BOOL is here */
X+ #include <exec/types.h>
X+
X+ /*
X+ * You might want to "cc -dDEBUG cliparse" if you suspect you have found
X+ * a bug somewhere.
X+ */
X+ #ifdef DEBUG
X+ long Write(BPTR file, char *buffer, long length);
X+ BPTR Output(void);
X+ #define dMSG( str, val ) \
X+ { \
X+ char buf[256]; \
X+ \
X+ sprintf( &buf[0], "%s = %d\n", ((char *) str), (val) ); \
X+ Write( Output(), buf, strlen(buf) ); \
X+ }
X+ #define wMSG( str, haddr, cval ) \
X+ { \
X+ char buf[256]; \
X+ \
X+ if(cval) \
X+ sprintf( &buf[0], "%s: wrote at addr %x, intval = %d, charval = %c\n",(str),(haddr),(int) (cval),(cval)); \
X+ else \
X+ sprintf( &buf[0], "%s: wrote at addr %x, intval = %d, null-character\n", (str),(haddr),(int) (cval)); \
X+ Write( Output(), buf, strlen(buf) ); \
X+ }
X+ #define hMSG( str, val ) \
X+ { \
X+ char buf[256]; \
X+ \
X+ sprintf( &buf[0], "%s = %x\n", (str), (val) ); \
X+ Write(Output(),buf,strlen(buf)); \
X+ }
X+ #define sMSG( str, strval ) \
X+ { \
X+ char buf[2000]; \
X+ \
X+ sprintf( &buf[0], "%s \"%s\"\n",(str),(strval) ); \
X+ Write(Output(),buf,strlen(buf)); \
X+ }
X+ #endif
X+
X extern int _argc, _arg_len;
X***************
X*** 31,32 ****
X--- 106,116 ----
X
X+ #ifdef DEBUG
X+ dMSG( "argument length (from alen)", alen );
X+ sMSG( "aptr contains", aptr );
X+ sMSG( "If launched with Aztec's Make, aptr seems to contain",
X+ "unnecessary characters"
X+ );
X+ sMSG( "Execute() seems to provide a correct argument line", "" );
X+ #endif
X+
X if (pp->pr_CLI) {
X***************
X*** 37,39 ****
X cp = _detach_name;
X! _arg_len = cp[0]+alen+2;
X if ((_arg_lin = AllocMem((long)_arg_len, 0L)) == 0)
X--- 121,161 ----
X cp = _detach_name;
X! #ifdef DEBUG
X! dMSG( "argument length (with strlen())", strlen(aptr) );
X! dMSG( "command name length (with strlen())", strlen(cp+1) );
X! dMSG( "command name length (from process struct)", cp[0] );
X! #endif
X!
X! /*
X! * We must allocate a bit more memory to allow for `terse' closing
X! * quotes such as "Token"AnotherToken. This string will be parsed
X! * into two tokens: "Token" and AnotherToken.
X! *
X! * This isn't too accurate (can allocate a bit more than really needed).
X! */
X! {
X! int terseclose=0;
X! char *tempcp = aptr;
X! BOOL open = FALSE;
X!
X! while( c = *tempcp++ ) {
X! if( open ) {
X! if( c == '"' ) {
X! if( !isspace(*tempcp) && *tempcp != '\0' )
X! terseclose++;
X! open = FALSE;
X! }
X! } else {
X! open = (c == '"');
X! }
X! }
X!
X! /*
X! * OK! We have calculated the extra allocation requirements.
X! */
X! _arg_len = cp[0]+alen+2+terseclose;
X! #ifdef DEBUG
X! dMSG( "terseclose", terseclose );
X! dMSG( "total _arg_len", _arg_len );
X! #endif
X! }
X if ((_arg_lin = AllocMem((long)_arg_len, 0L)) == 0)
X***************
X*** 40,47 ****
X return;
X c = cp[0];
X strncpy(_arg_lin, cp+1, (size_t)c);
X strcpy(_arg_lin+c, " ");
X! strncat(_arg_lin, aptr, (size_t)alen);
X _arg_lin[c] = 0;
X! for (_argc=1,aptr=cp=_arg_lin+c+1;;_argc++) {
X while ((c=*cp) == ' ' || c == '\t' || c == '\f' ||
X--- 162,217 ----
X return;
X+ #ifdef DEBUG
X+ dMSG( "Allocated bytes", _arg_len );
X+ hMSG( "_arg_lin starts @", _arg_lin );
X+ hMSG( "_arg_lin last legal position @", _arg_lin+_arg_len-1 );
X+ #endif
X c = cp[0];
X+
X+ /*
X+ * First, copy the command name!
X+ */
X strncpy(_arg_lin, cp+1, (size_t)c);
X+ #ifdef DEBUG
X+ dMSG( "copied bytes to _arg_lin", c );
X+ #endif
X strcpy(_arg_lin+c, " ");
X! #ifdef DEBUG
X! hMSG( "Copied a blank to address", _arg_lin+c );
X! #endif
X! /*
X! * Then the rest of the command line!
X! * Actually, I don't catenate the argument string after _arg_lin;
X! * I just use aptr and write from it to _arg_lin one character
X! * at a time. But I must overwrite the '\n' at aptr[alen-1] with
X! * a '\0' as Aztec Make launches seem to provide some extra characters
X! * (which I'd better not overwrite---who knows where they belong to).
X! * The last `real' character at end of the command line always seems to
X! * be a '\n'.
X! */
X! aptr[(size_t)alen-1] = '\0';
X _arg_lin[c] = 0;
X!
X! /*
X! * The arguments!
X! *
X! * Here cp points to the original argument string (initially was aptr)
X! * and aptr is used to write the characters to the resulting command
X! * line. cp is used to read, aptr to write.
X! */
X! for (_argc=1,cp=aptr,aptr=_arg_lin+c+1;;_argc++) {
X!
X! /*
X! * This variable will tell if there was a need to quote the token.
X! */
X! BOOL required;
X!
X! #ifdef DEBUG
X! dMSG( "Token #", _argc );
X! hMSG( "cp (read position)", cp );
X! hMSG( "aptr (write position)", aptr );
X! #endif
X!
X! /*
X! * First, skip white space!
X! */
X while ((c=*cp) == ' ' || c == '\t' || c == '\f' ||
X***************
X*** 49,50 ****
X--- 219,227 ----
X cp++;
X+ #ifdef DEBUG
X+ hMSG( "Skipped white space: cp (read position)", cp );
X+ #endif
X+
X+ /*
X+ * Break if at end of command line.
X+ */
X if (*cp < ' ')
X***************
X*** 51,63 ****
X break;
X if (*cp == '"') {
X! cp++;
X while (c = *cp++) {
X! *aptr++ = c;
X! if (c == '"') {
X! if (*cp == '"')
X! cp++;
X! else {
X! aptr[-1] = 0;
X! break;
X }
X }
X--- 228,346 ----
X break;
X+
X+ /*
X+ * Copy to end of token.
X+ */
X if (*cp == '"') {
X!
X! char *tempcp;
X! BOOL escapenext;
X!
X! #ifdef DEBUG
X! sMSG( "a quoted token starts", cp );
X! #endif
X!
X! /*
X! * Don't advance, yet. First must find a space or an
X! * escaped character!
X! */
X! required = FALSE;
X! escapenext = FALSE;
X! for( tempcp = cp+1; c = *tempcp; tempcp++ ) {
X! if(!required)
X! required = isspace(c) || (c == '*');
X! if(!escapenext && c == '"')
X! break;
X! escapenext = (c == '*');
X! }
X! #ifdef DEBUG
X! dMSG( "required", required );
X! #endif
X!
X! /*
X! * Now advance!
X! * If there was a real need to quote the token, we don't
X! * want to leave the quotes around the token. On the other
X! * hand, if no reason was found, the quotes won't be stripped.
X! */
X! if(required)
X! cp++;
X! else {
X! #ifdef DEBUG
X! wMSG( "not required",aptr,*cp);
X! #endif
X! *aptr++ = *cp++;
X! }
X!
X!
X! /*
X! * Copy the token until the closing quote!
X! */
X! escapenext = FALSE;
X while (c = *cp++) {
X!
X! /*
X! * Replace escaped characters!
X! */
X! if(escapenext) {
X! switch( c ) {
X! case 'n':
X! case 'N':
X! #ifdef DEBUG
X! wMSG( "an escaped NL", aptr, '\n' );
X! #endif
X! *aptr++ = '\n';
X! break;
X! case 'e':
X! case 'E':
X! #ifdef DEBUG
X! wMSG( "an escaped escape", aptr, 'e' );
X! #endif
X! *aptr++ = '\033';
X! break;
X! default:
X! #ifdef DEBUG
X! wMSG( "an escaped character", aptr, c );
X! #endif
X! *aptr++ = c;
X }
X+ escapenext = FALSE;
X+ continue;
X+ } else {
X+ if( c != '*' ) {
X+ #ifdef DEBUG
X+ wMSG( "an ordinary character",aptr,c );
X+ #endif
X+ *aptr++ = c;
X+ escapenext = FALSE;
X+ } else {
X+ #ifdef DEBUG
X+ sMSG( "begin escaped", "" );
X+ #endif
X+ escapenext = TRUE;
X+ continue;
X+ }
X+ }
X+
X+ if (c == '"')
X+ break;
X+
X+ } /* while( c = *cp++) */
X+
X+ /*
X+ * At the end of the token. Leave the closing quote if no
X+ * real need was found.
X+ */
X+ if( c == '"' ) {
X+ if(required) {
X+ #ifdef DEBUG
X+ sMSG( "required quote", "" );
X+ wMSG( "strip it", aptr-1, 0 );
X+ #endif
X+ aptr[-1] = 0;
X+ } else {
X+ #ifdef DEBUG
X+ sMSG( "unnecessary quote", "" );
X+ wMSG( "Don't strip", aptr, 0 );
X+ #endif
X+ *aptr++ = 0;
X }
X***************
X*** 64,65 ****
X--- 347,349 ----
X }
X+
X }
X***************
X*** 66,70 ****
X else {
X while ((c=*cp++) && c != ' ' && c != '\t' && c != '\f' &&
X! c != '\r' && c != '\n')
X *aptr++ = c;
X *aptr++ = 0;
X--- 350,364 ----
X else {
X+ #ifdef DEBUG
X+ sMSG( "An unquoted token starts", cp );
X+ #endif
X while ((c=*cp++) && c != ' ' && c != '\t' && c != '\f' &&
X! c != '\r' && c != '\n') {
X! #ifdef DEBUG
X! wMSG( "copy until white space", aptr, c );
X! #endif
X *aptr++ = c;
X+ }
X+ #ifdef DEBUG
X+ wMSG( "terminate token", aptr, 0 );
X+ #endif
X *aptr++ = 0;
X***************
X*** 74,75 ****
X--- 368,372 ----
X }
X+ #ifdef DEBUG
X+ wMSG( "terminate string", aptr, 0 );
X+ #endif
X *aptr = 0;
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > myecho.c
X/*
X * A test program for my cliparse.c --- this should behave exactly like
X * the AmigaDos echo-command. Of course, there's an execption:
X * echo "oneToken" != myecho "oneToken"
X *
X * Kari Sutela
X */
X#ifdef DEBUG
Xextern int _arg_len;
X#endif
X
Xint main(int ac, char *av[])
X{
X#ifdef DEBUG
X printf( "BTW, _arg_len = %d\n", _arg_len );
X#endif
X if( ac != 2 ) {
X puts("Invalid argument to MYECHO");
X return 20;
X } else {
X puts(av[1]);
X return 0;
X }
X}
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > testexecute.c
X#include <functions.h>
X
Xmain( int ac, char *av[] )
X{
X char buf[1000] = {'\0'};
X
X if( ac >= 2 )
X sprintf( &buf[0], "%s %s", "myecho", av[1] );
X else
X return 20;
X (void) Execute(buf,0,0);
X return 0;
X}
SHAR_EOF
# End of shell archive
exit 0
--
Kari Sutela sutela@polaris.utu.fi