[alt.sources.amiga] _cliparse

sutela@polaris.utu.fi (Kari Sutela) (01/07/91)

This is my version of the _cliparse() function for the Aztec C compiler.
You need the original cliparse.c from the 5.0 distribution diskettes and
the patch program to extract a working version.

This version properly supports quoted tokens and preprocesses the escaped
characters inside quoted tokens.

Use any way you want, but redistribute as a patch only. I'll appreciate any
comments, bug reports etc.

Kari Sutela	sutela@polaris.utu.fi

#	This is a shell archive.
#	Remove everything above and including the cut line.
#	Then run the rest of the file through sh.
#----cut here-----cut here-----cut here-----cut here----#
#!/bin/sh
# shar:    Shell Archiver
#	Run the following text with /bin/sh to create:
#	README
#	Makefile
#	cliparse.diff
#	myecho.c
#	testexecute.c
# This archive created: Sun Jan  6 20:52:41 1991
sed 's/^X//' << \SHAR_EOF > README
XThis is my version of cliparse.c which I wrote when I realized that Aztec's
X_cliparse() doesn't properly support quoted argument tokens. For example,
Xin a quoted token the first doublequote would end the token whether the
Xquote was escaped (with `*') or not. Another disadvantage was the fact that
X_cliparse() would always strip the surrounding doublequotes. This fact
Xwould make it impossible for a `list'-like program to distinguish between
Xargument tokens FILES (a keyword) and "FILES" (a filename).
X
XMy version strips the surrounding quotes only if the quotes were `required'.
XQuotes are required if the quoted token has a space or an escaped character.
XFor example, this version won't strip the quotes from "aToken".
X
XThis version also preparses the escaped characters. To my knowledge the
Xstandard escape-sequences are:
X
X	"*""	=> '"'
X	"**"	=> '*'
X	"*n"	=> '\n'
X	"*N"	=> '\n'
X	"*e"	=> '\033' (the ASCII-escape)
X	"*E"	=> '\033' (the ASCII-escape)
X	"*x"	=> 'x' where x is any other character
X
XPlease, let me know if there are some escape-sequences which I have
Xforgotten.
X
XI distribute my version of _cliparse() as a diff-file because it contains
Xsome code which is copyright by Manx Software Systems. To extract the
X`better' cliparse.c, you first must copy the Manx-supplied cliparse.c to
Xthe directory where you put cliparse.diff. BTW, I used cliparse.c from the
X5.0a distribution diskettes---I hope they haven't changed it since that
Xdistribution.
X
XYou also need the "patch"-program. When you have both cliparse.diff and the
XManx-supplied cliparse.c in the same directory, type:
X	patch <cliparse.diff
XNow you should have files cliparse.c (the `better' one) and cliparse.c.orig
X(the old one). Just compile cliparse.c as usual and when you want to parse
Xthe CLI-arguments correctly, link cliparse.o before the normal c-library.
XFor example, "ln whatever cliparse -lc".
X
XPLEASE, DON'T REDISTRIBUTE THE RESULTING cliparse.c. You may redistribute
Xthe diff-file however.
X
XThe new cliparse.c has some debugging statements which can be enabled by
X#defining DEBUG. If you suspect a bug somewhere, you might try compiling
Xwith "cc -dDEBUG cliparse".
X
XIf you notice any bugs or problems, please, let me know.
X
X	Kari Sutela (email: sutela@polaris.utu.fi)
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > Makefile
Xmyecho: myecho.o cliparse.o
X	ln myecho cliparse -lc
X
Xtestexecute: testexecute.o myecho
X	ln testexecute cliparse -lc
X
Xtest: c:echo myecho
X	c:echo "aaa aaa"
X	myecho "aaa aaa"
X	c:echo "aaa*"***naaa"
X	myecho "aaa*"***naaa"
X	c:echo "aaa*e[33mbbb*e[31mccc"
X	myecho "aaa*e[33mbbb*e[31mccc"
X	@c:echo "Here's the exception:"
X	c:echo "aaa"
X	myecho "aaa"
X	@c:echo "Doing invalid argument test:"
X	-c:echo "aaa"aaa
X	-myecho "aaa"aaa
X
Xtest2: testexecute myecho
X	testexecute "*"aaa aaa*""
X	testexecute "*"*e[33maaa***"*e[31mbbb*""
X	testexecute "*"*e[33maaa***"*e[31mbbb"
X	testexecute "*"aaa*""
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > cliparse.diff
X*** cliparse.c	Sat Jan 05 16:53:13 1991
X--- bcliparse.c	Sun Jan 06 13:13:59 1991
X***************
X*** 16,17 ****
X--- 16,48 ----
X  
X+ /*
X+  * Patched by Kari Sutela to enable one to differentiate between
X+  * aToken and "aToken". Also AmigaDOS escaped characters in quoted
X+  * tokens are preparsed.
X+  *
X+  * To my knowledge the escaped characters are:
X+  *	'*"'         ==> '"'
X+  *	'**'         ==> '*'
X+  *	'*n' or '*N' ==> '\n'
X+  *	'*e' or '*E' ==> '\033' (escape)
X+  *	'*x'         ==> 'x', where x is any other character.
X+  *
X+  * I use this `heuristic' to decide whether the quotes should be left
X+  * around the token: if there was an obvious need to quote the token
X+  * the quotes will be stripped. There is an obvious need if the token
X+  * contains a space or an escaped character. Examples:
X+  *	"one token"  ==> one token
X+  *	"oneToken"   ==> "oneToken"
X+  *	"one*"Token" ==> one"Token
X+  *	"one*Token"  ==> oneToken
X+  *
X+  * Compile normally and include the resulting object file in the ln-command
X+  * line before `-lc'. Example: "ln whatever cliparse -lc".
X+  *
X+  * Use any way you want provided you don't distribute the source file
X+  * (which contains code copyrighted by Manx Software Systems). You may
X+  * redistribute the patch file however.
X+  *
X+  * Bug reports and comments to sutela@polaris.utu.fi
X+  */
X+ 
X  #include <libraries/dosextens.h>
X***************
X*** 20,21 ****
X--- 51,96 ----
X  
X+ #include <ctype.h>
X+ /* BOOL is here */
X+ #include <exec/types.h>
X+ 
X+ /*
X+  * You might want to "cc -dDEBUG cliparse" if you suspect you have found
X+  * a bug somewhere.
X+  */
X+ #ifdef DEBUG
X+ 	long Write(BPTR file, char *buffer, long length);
X+ 	BPTR Output(void);
X+ 	#define dMSG( str, val ) \
X+ 	{ \
X+ 		char buf[256]; \
X+ 	\
X+ 		sprintf( &buf[0], "%s = %d\n", ((char *) str), (val) ); \
X+ 		Write( Output(), buf, strlen(buf) ); \
X+ 	}
X+ 	#define wMSG( str, haddr, cval ) \
X+ 	{ \
X+ 		char buf[256]; \
X+ 	\
X+ 		if(cval) \
X+ 			sprintf( &buf[0], "%s: wrote at addr %x, intval = %d, charval = %c\n",(str),(haddr),(int) (cval),(cval)); \
X+ 		else \
X+ 			sprintf( &buf[0], "%s: wrote at addr %x, intval = %d, null-character\n", (str),(haddr),(int) (cval)); \
X+ 		Write( Output(), buf, strlen(buf) ); \
X+ 	}
X+ 	#define hMSG( str, val ) \
X+ 	{ \
X+ 		char buf[256]; \
X+ 	\
X+ 		sprintf( &buf[0], "%s = %x\n", (str), (val) ); \
X+ 		Write(Output(),buf,strlen(buf)); \
X+ 	}
X+ 	#define sMSG( str, strval ) \
X+ 	{ \
X+ 		char buf[2000]; \
X+ 	\
X+ 		sprintf( &buf[0], "%s \"%s\"\n",(str),(strval) ); \
X+ 		Write(Output(),buf,strlen(buf)); \
X+ 	}
X+ #endif
X+ 
X  extern int _argc, _arg_len;
X***************
X*** 31,32 ****
X--- 106,116 ----
X  
X+ #ifdef DEBUG
X+ 	dMSG( "argument length (from alen)", alen );
X+ 	sMSG( "aptr contains", aptr );
X+ 	sMSG( "If launched with Aztec's Make, aptr seems to contain",
X+ 		"unnecessary characters"
X+ 	);
X+ 	sMSG( "Execute() seems to provide a correct argument line", "" );
X+ #endif
X+ 
X  	if (pp->pr_CLI) {
X***************
X*** 37,39 ****
X  		cp = _detach_name;
X! 	_arg_len = cp[0]+alen+2;
X  	if ((_arg_lin = AllocMem((long)_arg_len, 0L)) == 0)
X--- 121,161 ----
X  		cp = _detach_name;
X! #ifdef DEBUG
X! 	dMSG( "argument length (with strlen())", strlen(aptr) );
X! 	dMSG( "command name length (with strlen())", strlen(cp+1) );
X! 	dMSG( "command name length (from process struct)", cp[0] );
X! #endif
X! 
X! 	/*
X! 	 * We must allocate a bit more memory to allow for `terse' closing
X! 	 * quotes such as "Token"AnotherToken. This string will be parsed
X! 	 * into two tokens: "Token" and AnotherToken.
X! 	 *
X! 	 * This isn't too accurate (can allocate a bit more than really needed).
X! 	 */
X! 	{
X! 		int terseclose=0;
X! 		char *tempcp = aptr;
X! 		BOOL open = FALSE;
X! 
X! 		while( c = *tempcp++ ) {
X! 			if( open ) {
X! 				if( c == '"' ) {
X! 					if( !isspace(*tempcp) && *tempcp != '\0' )
X! 						terseclose++;
X! 					open = FALSE;
X! 				}
X! 			} else {
X! 				open = (c == '"');
X! 			}
X! 		}
X! 
X! 		/*
X! 		 * OK! We have calculated the extra allocation requirements.
X! 		 */
X! 		_arg_len = cp[0]+alen+2+terseclose;
X! #ifdef DEBUG
X! 		dMSG( "terseclose", terseclose );
X! 		dMSG( "total _arg_len", _arg_len );
X! #endif
X! 	}
X  	if ((_arg_lin = AllocMem((long)_arg_len, 0L)) == 0)
X***************
X*** 40,47 ****
X  		return;
X  	c = cp[0];
X  	strncpy(_arg_lin, cp+1, (size_t)c);
X  	strcpy(_arg_lin+c, " ");
X! 	strncat(_arg_lin, aptr, (size_t)alen);
X  	_arg_lin[c] = 0;
X! 	for (_argc=1,aptr=cp=_arg_lin+c+1;;_argc++) {
X  		while ((c=*cp) == ' ' || c == '\t' || c == '\f' ||
X--- 162,217 ----
X  		return;
X+ #ifdef DEBUG
X+ 	dMSG( "Allocated bytes", _arg_len );
X+ 	hMSG( "_arg_lin starts @", _arg_lin );
X+ 	hMSG( "_arg_lin last legal position @", _arg_lin+_arg_len-1 );
X+ #endif
X  	c = cp[0];
X+ 
X+ 	/*
X+ 	 * First, copy the command name!
X+ 	 */
X  	strncpy(_arg_lin, cp+1, (size_t)c);
X+ #ifdef DEBUG
X+ 	dMSG( "copied bytes to _arg_lin", c );
X+ #endif
X  	strcpy(_arg_lin+c, " ");
X! #ifdef DEBUG
X! 	hMSG( "Copied a blank to address", _arg_lin+c );
X! #endif
X! 	/*
X! 	 * Then the rest of the command line!
X! 	 * Actually, I don't catenate the argument string after _arg_lin;
X! 	 * I just use aptr and write from it to _arg_lin one character
X! 	 * at a time. But I must overwrite the '\n' at aptr[alen-1] with
X! 	 * a '\0' as Aztec Make launches seem to provide some extra characters
X! 	 * (which I'd better not overwrite---who knows where they belong to).
X! 	 * The last `real' character at end of the command line always seems to
X! 	 * be a '\n'.
X! 	 */
X! 	aptr[(size_t)alen-1] = '\0';
X  	_arg_lin[c] = 0;
X! 
X! 	/*
X! 	 * The arguments!
X! 	 *
X! 	 * Here cp points to the original argument string (initially was aptr)
X! 	 * and aptr is used to write the characters to the resulting command
X! 	 * line. cp is used to read, aptr to write.
X! 	 */
X! 	for (_argc=1,cp=aptr,aptr=_arg_lin+c+1;;_argc++) {
X! 
X! 		/*
X! 		 * This variable will tell if there was a need to quote the token.
X! 		 */
X! 		BOOL required;
X! 
X! #ifdef DEBUG
X! 		dMSG( "Token #", _argc );
X! 		hMSG( "cp (read position)", cp );
X! 		hMSG( "aptr (write position)", aptr );
X! #endif
X! 
X! 		/*
X! 		 * First, skip white space!
X! 		 */
X  		while ((c=*cp) == ' ' || c == '\t' || c == '\f' ||
X***************
X*** 49,50 ****
X--- 219,227 ----
X  			cp++;
X+ #ifdef DEBUG
X+ 		hMSG( "Skipped white space: cp (read position)", cp );
X+ #endif
X+ 
X+ 		/*
X+ 		 * Break if at end of command line.
X+ 		 */
X  		if (*cp < ' ')
X***************
X*** 51,63 ****
X  			break;
X  		if (*cp == '"') {
X! 			cp++;
X  			while (c = *cp++) {
X! 				*aptr++ = c;
X! 				if (c == '"') {
X! 					if (*cp == '"')
X! 						cp++;
X! 					else {
X! 						aptr[-1] = 0;
X! 						break;
X  					}
X  				}
X--- 228,346 ----
X  			break;
X+ 
X+ 		/*
X+ 		 * Copy to end of token.
X+ 		 */
X  		if (*cp == '"') {
X! 
X! 			char *tempcp;
X! 			BOOL escapenext;
X! 
X! #ifdef DEBUG
X! 			sMSG( "a quoted token starts", cp );
X! #endif
X! 
X! 			/*
X! 			 * Don't advance, yet. First must find a space or an
X! 			 * escaped character!
X! 			 */
X! 			required = FALSE;
X! 			escapenext = FALSE;
X! 			for( tempcp = cp+1; c = *tempcp; tempcp++ ) {
X! 				if(!required)
X! 					required = isspace(c) || (c == '*');
X! 				if(!escapenext && c == '"')
X! 					break;
X! 				escapenext = (c == '*');
X! 			}
X! #ifdef DEBUG
X! 			dMSG( "required", required );
X! #endif
X! 
X! 			/*
X! 			 * Now advance!
X! 			 * If there was a real need to quote the token, we don't
X! 			 * want to leave the quotes around the token. On the other
X! 			 * hand, if no reason was found, the quotes won't be stripped.
X! 			 */
X! 			if(required)
X! 				cp++;
X! 			else {
X! #ifdef DEBUG
X! 				wMSG( "not required",aptr,*cp);
X! #endif
X! 				*aptr++ = *cp++;
X! 			}
X! 
X! 
X! 			/*
X! 			 * Copy the token until the closing quote!
X! 			 */
X! 			escapenext = FALSE;
X  			while (c = *cp++) {
X! 
X! 				/*
X! 				 * Replace escaped characters!
X! 				 */
X! 				if(escapenext) {
X! 					switch( c ) {
X! 						case 'n':
X! 						case 'N':
X! #ifdef DEBUG
X! 						wMSG( "an escaped NL", aptr, '\n' );
X! #endif
X! 							*aptr++ = '\n';
X! 							break;
X! 						case 'e':
X! 						case 'E':
X! #ifdef DEBUG
X! 						wMSG( "an escaped escape", aptr, 'e' );
X! #endif
X! 							*aptr++ = '\033';
X! 							break;
X! 						default:
X! #ifdef DEBUG
X! 						wMSG( "an escaped character", aptr, c );
X! #endif
X! 							*aptr++ = c;
X  					}
X+ 					escapenext = FALSE;
X+ 					continue;
X+ 				} else {
X+ 					if( c != '*' ) {
X+ #ifdef DEBUG
X+ 						wMSG( "an ordinary character",aptr,c );
X+ #endif
X+ 						*aptr++ = c;
X+ 						escapenext = FALSE;
X+ 					} else {
X+ #ifdef DEBUG
X+ 						sMSG( "begin escaped", "" );
X+ #endif
X+ 						escapenext = TRUE;
X+ 						continue;
X+ 					}
X+ 				}
X+ 
X+ 				if (c == '"')
X+ 					break;
X+ 
X+ 			} /* while( c = *cp++) */
X+ 
X+ 			/*
X+ 			 * At the end of the token. Leave the closing quote if no
X+ 			 * real need was found.
X+ 			 */
X+ 			if( c == '"' ) {
X+ 				if(required) {
X+ #ifdef DEBUG
X+ 					sMSG( "required quote", "" );
X+ 					wMSG( "strip it", aptr-1, 0 );
X+ #endif
X+ 					aptr[-1] = 0;
X+ 				} else {
X+ #ifdef DEBUG
X+ 					sMSG( "unnecessary quote", "" );
X+ 					wMSG( "Don't strip", aptr, 0 );
X+ #endif
X+ 					*aptr++ = 0;
X  				}
X***************
X*** 64,65 ****
X--- 347,349 ----
X  			}
X+ 
X  		}
X***************
X*** 66,70 ****
X  		else {
X  			while ((c=*cp++) && c != ' ' && c != '\t' && c != '\f' &&
X! 												c != '\r' && c != '\n')
X  				*aptr++ = c;
X  			*aptr++ = 0;
X--- 350,364 ----
X  		else {
X+ #ifdef DEBUG
X+ 			sMSG( "An unquoted token starts", cp );
X+ #endif
X  			while ((c=*cp++) && c != ' ' && c != '\t' && c != '\f' &&
X! 												c != '\r' && c != '\n') {
X! #ifdef DEBUG
X! 				wMSG( "copy until white space", aptr, c );
X! #endif
X  				*aptr++ = c;
X+ 			}
X+ #ifdef DEBUG
X+ 			wMSG( "terminate token", aptr, 0 );
X+ #endif
X  			*aptr++ = 0;
X***************
X*** 74,75 ****
X--- 368,372 ----
X  	}
X+ #ifdef DEBUG
X+ 	wMSG( "terminate string", aptr, 0 );
X+ #endif
X  	*aptr = 0;
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > myecho.c
X/*
X * A test program for my cliparse.c --- this should behave exactly like
X * the AmigaDos echo-command. Of course, there's an execption:
X * echo "oneToken" != myecho "oneToken"
X *
X * Kari Sutela
X */
X#ifdef DEBUG
Xextern int _arg_len;
X#endif
X
Xint main(int ac, char *av[])
X{
X#ifdef DEBUG
X	printf( "BTW, _arg_len = %d\n", _arg_len );
X#endif
X	if( ac != 2 ) {
X		puts("Invalid argument to MYECHO");
X		return 20;
X	} else {
X		puts(av[1]);
X		return 0;
X	}
X}
SHAR_EOF
sed 's/^X//' << \SHAR_EOF > testexecute.c
X#include <functions.h>
X
Xmain( int ac, char *av[] )
X{
X	char buf[1000] = {'\0'};
X
X	if( ac >= 2 )
X		sprintf( &buf[0], "%s %s", "myecho", av[1] );
X	else
X		return 20;
X	(void) Execute(buf,0,0);
X	return 0;
X}
SHAR_EOF
#	End of shell archive
exit 0
-- 
Kari Sutela	sutela@polaris.utu.fi