[comp.sources.amiga] v02i012: bawk sources

jw@sics.sics.se.UUCP (08/09/87)
#	This is a shell archive.
#	Remove everything above and including the cut line.
#	Then run the rest of the file through sh.
#----cut here-----cut here-----cut here-----cut here----#
#!/bin/sh
# Xshar: Extended Shell Archiver.
# This is part  2 out of  2.
# This archive created: Sat Aug  8 19:32:40 1987
# By: Craig Norborg (Purdue University Computing Center)
#	Run the following text with /bin/sh to create:
#	bawk.c
#	bawkact.c
#	bawkdo.c
#	bawkpat.c
#	example2
#	tst1
cat << \SHAR_EOF > bawk.c
/*
 * Bawk main program
 */
#define MAIN 1
#include <stdio.h>
#include "bawk.h"

static char *pattern_arg = NULL; /* Command line bawk program pattern */
static int ungetc_arg = 0;
static char eof_seen = 0;
static int max_field_count = 0;
				
/*
 * Main program
 */
main( argc, argv )
register int argc;
register char **argv;
{
	register char gotrules = 0, didfile = 0, getstdin = 0;
	register char rule_file_flag = 0;

	DBUG_ENTER("main");
	/*
	 * Initialize global variables:
	 */
	Beginact = 0;
	Endact = 0;
	Rules = 0;
	Rulep = 0;
	Filename = 0;
	Linecount = 0;
	Saw_break = 0;
	Stackptr = Stackbtm - 1;
	Stacktop = Stackbtm + MAXSTACKSZ;
	Nextvar = Vartab;
	init_pop_array();

	strcpy( Fieldsep, " \t" );
	strcpy( Recordsep, "\n" );

	/*
	 * Parse command line
	 */
	while ( --argc )
	{
		if ( **(++argv) == '-' )
		{
			/*
			 * Process dash options.
			 */
			switch ( tolower( argv[0][1] ) )
			{
			case '#':
				DBUG_PUSH(&argv[0][2]);
				continue;
			case 'f':
				if(!gotrules) {
					rule_file_flag++;
					argv++;
					argc--;
				} else
					usage();
				break;
			case 0:
				if(!gotrules)
					rule_file_flag++;
				getstdin++;
				break;
			default: usage();
			}
		}
		if ( gotrules )
		{
			/*
			 * Already read rules file - assume this is
			 * is a text file for processing.
			 */
			if ( ++didfile == 1 && Beginact )
				doaction( Beginact );
			if ( getstdin )
			{
				getstdin--;
				newfile( 0 );
			}
			else
				newfile( *argv );
			process();
		}
		else
		{
			if(rule_file_flag) {
				if ( getstdin )
				{
					getstdin--;
					newfile( 0 );
				}
				else
					newfile( *argv );
			} else
				pattern_arg = *argv;
			compile();
			pattern_arg = NULL;
			gotrules = 1;
		}
	}
	if ( !gotrules )
		usage();

	if ( ! didfile )
	{
		/*
		 * Didn't process any files yet - process stdin.
		 */
		newfile( 0 );
		if ( Beginact )
			doaction( Beginact );
		process();
	}
	if ( Endact )
		doaction( Endact );
	DBUG_RETURN(0);
}

/*
 * Regular expression/action file compilation routines.
 */
void compile()
{
	/*
	 * Compile regular expressions and C actions into Rules struct,
	 * reading from current input file "Fileptr".
	 */
	register int c;
	register EXPR_NODE *root;

	DBUG_ENTER("compile");

	while ( (c = getcharacter()) != -1 )
	{
		if ( c==' ' || c=='\t' || c=='\n' )
			/* swallow whitespace */
			;
		else if ( c=='#' )
		{
			/*
			 * Swallow comments
			 */
			while ( (c=getcharacter()) != -1 && c!='\n' )
				;
		}
		else if ( c=='{' )
		{
			DBUG_PRINT("compile",("action"));
			/*
			 * Compile the action string into a parse tree
			 */
			ungetcharacter( (char) '{' );

			if ( Rulep && Rulep->action )
			{
				Rulep->nextrule = (RULE *) 
					get_clear_memory( sizeof( *Rulep ) );
				Rulep = Rulep->nextrule;
			}
			if ( !Rulep )
			{
				/*
				 * This is the first action encountered.
				 * Allocate the first Rules structure and
				 * initialize it
				 */
				Rules = Rulep = (RULE *)
					get_clear_memory( sizeof( *Rulep ) );
			}
			Rulep->action = act_compile( Workbuf );
		}
		else if ( c==',' )
		{
			DBUG_PRINT("compile",("stop pattern"));
			/*
			 * It's (hopefully) the second part of a two-part
			 * pattern string.  Swallow the comma and start
			 * compiling an action string.
			 */
			if ( !Rulep || !Rulep->pattern.start )
				error( "stop pattern without a start",
					RE_ERROR );
			if ( Rulep->pattern.stop )
				error( "already have a stop pattern",
					RE_ERROR );
			Rulep->pattern.stop = pat_compile( Workbuf );
		}
		else
		{
			/*
			 * Assume it's a regular expression pattern
			 */
			DBUG_PRINT("compile",("start pattern"));

			ungetcharacter( (char) c );
			root = pat_compile( Workbuf );

			if ( *Workbuf == T_BEGIN )
			{
				/*
				 * Saw a "BEGIN" keyword - compile following
				 * action into special "Beginact" parse tree.
				 */
				Beginact = act_compile( Workbuf );
				continue;
			}
			if ( *Workbuf == T_END )
			{
				/*
				 * Saw an "END" keyword - compile following
				 * action into special "Endact" parse tree.
				 */
				Endact = act_compile( Workbuf );
				continue;
			}
			if ( Rulep )
			{
				/*
				 * Already saw a pattern/action - link in
				 * another Rules structure.
				 */
				Rulep->nextrule = (RULE *)
					get_clear_memory( sizeof( *Rulep ) );
				Rulep = Rulep->nextrule;
			}
			if ( !Rulep )
			{
				/*
				 * This is the first pattern encountered.
				 * Allocate the first Rules structure and
				 * initialize it
				 */
				Rules = Rulep = (RULE *) 
					get_clear_memory( sizeof( *Rulep ) );
			}
			if ( Rulep->pattern.start )
				error( "already have a start pattern",
					RE_ERROR );

			Rulep->pattern.start = root;
		}
	}
	for(Rulep = Rules; Rulep; Rulep = Rulep->nextrule)
	{
		if(!Rulep->action) {
			pattern_arg = "{printf \"%s\n\", $0}";
			Rulep->action = act_compile( Workbuf );
			pattern_arg = NULL;
		}
	}
	endfile();
	DBUG_VOID_RETURN;
}

/*
 * Text file main processing loop.
 */
void process()
{
	/*
	 * Read a line at a time from current input file at "Fileptr",
	 * then apply each rule in the Rules chain to the input line.
	 */
	register int i;

	DBUG_ENTER("process");

	Recordcount = 0;

	while ( getline() )
	{
		/*
		 * Parse the input line.
		 */
		if(! *Recordsep )
			strcpy(Fieldsep," \t\n");
		Fieldcount = parse( Linebuf, Fields, Fieldsep );
		DBUG_PRINT("process",( "parsed %d words:", Fieldcount ));
		DBUG_EXECUTE("process",for(i=0; i<Fieldcount; ++i )DBUG_PRINT("process",("<%s>",Fields[i])););

		Rulep = Rules;
		while(Rulep)
		{
			if ( ! Rulep->pattern.start )
			{
				/*
				 * No pattern given - perform action on
				 * every input line.
				 */
				doaction( Rulep->action );
			}
			else if ( Rulep->pattern.startseen )
			{
				/*
				 * Start pattern already found - perform
				 * action then check if line matches
				 * stop pattern.
				 */
				doaction( Rulep->action );
				if ( dopattern( Rulep->pattern.stop ) )
					Rulep->pattern.startseen = 0;
			}
			else if ( dopattern( Rulep->pattern.start ) )
			{
				/*
				 * Matched start pattern - perform action.
				 * If a stop pattern was given, set "start
				 * pattern seen" flag and process every input
				 * line until stop pattern found.
				 */
				doaction( Rulep->action );
				if ( Rulep->pattern.stop )
					Rulep->pattern.startseen = 1;
			}
			Rulep = Rulep->nextrule;
		}
	}
	DBUG_VOID_RETURN;
}

/*
 * Miscellaneous functions
 */
parse( str, wrdlst, delim )
register char *str;
char *wrdlst[];
char *delim;
{
	/*
	 * Parse the string of words in "str" into the word list at "wrdlst".
	 * A "word" is a sequence of characters delimited by one or more
	 * of the characters found in the string "delim".
	 * Returns the number of words parsed.
	 */
	register int wrdcnt;
	register char *cp, *wrdcp, c;
	char wrdbuf[ MAXLINELEN+1 ];

	DBUG_ENTER("parse");
	wrdcnt = 0;
	while ( *str )
	{
		while(c = *str++)
		{
			cp = delim;
			while(*cp && c != *cp)
				cp++;
			if(! *cp)
				break;
		}
		str--;
		if ( !*str )
			break;
		wrdcp = wrdbuf;
		while(c = *str++)
		{
			cp = delim;
			while(*cp && c != *cp)
				cp++;
			if(*cp)
				break;			
			*wrdcp++ = c;
		}
		str--;
		*wrdcp = 0;
		/*
		 * NOTE: allocate a MAXLINELEN sized buffer for every
		 * word, just in case user wants to copy a larger string
		 * into a field.
		 */
		if(wrdcnt == max_field_count)
		{
			wrdlst[ wrdcnt ] = getmemory( MAXLINELEN+1 );
			max_field_count++;
		}
		strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
	}
	DBUG_RETURN(wrdcnt);
}

void unparse( wrdlst, wrdcnt, str, delim )
char *wrdlst[];
register int wrdcnt;
register char *str;
char *delim;
{
	/*
	 * Replace all the words in "str" with the words in "wrdlst",
	 * maintaining the same word seperation distance as found in
	 * the string.
	 * A "word" is a sequence of characters delimited by one or more
	 * of the characters found in the string "delim".
	 */
	register int wc;
	register char *sp, *cp, c;
	char strbuf[ MAXLINELEN+1 ], *start;

	DBUG_ENTER("unparse");
	wc = 0;		/* next word in "wrdlst" */
	sp = strbuf;	/* points to our local string */
	start = str;	/* save start address of "str" for later... */
	while ( *str )
	{
		/*
		 * Copy the field delimiters from the original string to
		 * our local version.
		 */
		while(c = *str++)
		{
			cp = delim;
			while(*cp && c != *cp)
				cp++;
			if(!*cp)
				break;
			*sp++ = c;
		}
		str--;
		if ( !*str )
			break;
		/*
		 * Skip over the field in the original string and...
		 */
		while(c = *str++)
		{
			cp = delim;
			while(*cp && c != *cp)
				cp++;
			if(*cp)
				break;
		}
		str--;
		if ( wc < wrdcnt )
		{
			/*
			 * ...copy in the field in the wordlist instead.
			 */
			cp = wrdlst[ wc++ ];
			while(*sp++ = *cp++);
			sp--;
		}
	}
	/*
	 * Tie off the local string, then copy it back to caller's string.
	 */
	*sp = 0;
	strcpy( start, strbuf );
	DBUG_VOID_RETURN;
}

char *
getmemory( len )
register unsigned len;
{
	register char *cp;

	DBUG_ENTER("getmemory");
	if ( cp=malloc( len ) )
		DBUG_RETURN(cp);
	error( "out of memory", MEM_ERROR );
	DBUG_RETURN(NULL);
}

char *
get_clear_memory( len )
register unsigned len;
{
	register char *cp;

	DBUG_ENTER("getmemory");
	if ( cp=calloc( 1, len ) )
		DBUG_RETURN(cp);
	error( "out of memory", MEM_ERROR );
	DBUG_RETURN(NULL);
}

EXPR_NODE *get_expr_node(operator)
char operator;
{
	register EXPR_NODE *node;

	DBUG_ENTER("get_expr_node");
	node = (EXPR_NODE *) getmemory(sizeof(EXPR_NODE));
	node->left = node->right = NULL;
	node->operator = operator;
	DBUG_PRINT("get_expr_node",("operator = '%s'",token_name[operator]));
	DBUG_RETURN(node);
}

void newfile( s )
register char *s;
{
	DBUG_ENTER("newfile");
	Linecount = 0;
	if ( Filename = s )
	{
#ifdef BDS_C
		if ( fopen( s, Fileptr = Curfbuf ) == -1 )
#else
		if ( !(Fileptr = fopen( s, "r" )) )
#endif
			error( "file not found", FILE_ERROR );
	}
	else
	{
		/*
		 * No file name given - process standard input.
		 */
		Fileptr = stdin;
		Filename = "standard input";
	}
	DBUG_VOID_RETURN;
}

getline()
{
	/*
	 * Read a record from current input file.
	 */
	register int rtn, len = 0;
	register char *cp = Linebuf, *last_nl, *sep = Recordsep;

	DBUG_ENTER("getline");
	if(eof_seen)
	{
		endfile();
		DBUG_RETURN(0);
	}
	if(*sep)
	{
		while((*cp++ = rtn = getcharacter()) != *sep++ && rtn != -1)
		{
			while(*sep)
			{
				if(rtn == *sep++)
					break;
			}
			if( ++len == MAXLINELEN )
				error("Input record too long", RECORD_ERROR);
			sep = Recordsep;
		}
	} else	/* Treat an empty line as record separator. */
	{
		while(1)
		{
			last_nl = cp;
			while((*cp++ = rtn = getcharacter()) != '\n' &&
			      rtn != -1)
			{
				if( ++len == MAXLINELEN )
					error("Input record too long",
					      RECORD_ERROR);
			}
			if(((cp - last_nl) == 1) || (rtn == -1))
				break;
		}
	}
	*(--cp) = 0;
	if ( rtn == -1 )
	{
		if(len)
			eof_seen = 1;
		else
		{
			endfile();
			DBUG_RETURN(0);
		}
	}
	++Recordcount;
	DBUG_RETURN(1);
}

int getcharacter()
{
	/*
	 * Read a character from curren input file.
	 * WARNING: your getc() must convert lines that end with CR+LF
	 * to LF and CP/M's EOF character (^Z) to a -1.
	 * Also, getc() must return a -1 when attempting to read from
	 * an unopened file.
	 */
	register int c;

	DBUG_ENTER("getcharacter");
	if(pattern_arg) {
		if(ungetc_arg) {
			c = ungetc_arg;
			ungetc_arg = 0;
		} else if(*pattern_arg)
			c = *pattern_arg++;
		else
			c = EOF;
	} else {
#ifdef BDS_C
		/*
		 * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions
		 * <gag>
		 */
		if ( (c = getc( Fileptr )) == '\r' )
		{
			if ( (c = getc( Fileptr )) != '\n' )
			{
				ungetc( c );
				c = '\r';
			}
		}
		else if ( c == 26 )	/* ^Z */
			c = -1;
#else
		c = getc( Fileptr );
#endif

		if ( c=='\n' )
			++Linecount;
	}
	DBUG_PRINT("getcharacter",("'%c'", c));
	DBUG_RETURN(c);
}

ungetcharacter( c )
register char c;
{
	/*
	 * Push a character back into the input stream.
	 * If the character is a record seperator, or a newline character,
	 * the record and line counters are adjusted appropriately.
	 */
	DBUG_ENTER("ungetcharacter");
	if ( c == *Recordsep )
		--Recordcount;
	if ( c=='\n' )
		--Linecount;
	DBUG_PRINT("ungetcharacter",("'%c'", c));
	if(pattern_arg)
		DBUG_RETURN(ungetc_arg = c);
	DBUG_RETURN(ungetc( c, Fileptr ));
}

void endfile()
{
	DBUG_ENTER("endfile");
	fclose( Fileptr );
	eof_seen = 0;
	Filename = NULL;
	Linecount = 0;
	DBUG_VOID_RETURN;
}

void error( s, severe )
register char *s;
register int severe;
{
	DBUG_ENTER("error");
	if ( Filename )
		fprintf( stderr, "%s:", Filename );

	if ( Linecount )
		fprintf( stderr, " line %d:", Linecount );

	fprintf( stderr, " %s\n", s );
	if ( severe )
		exit( severe );
	DBUG_VOID_RETURN;
}

void usage()
{
	DBUG_ENTER("usage");
	error( "Usage: bawk { action | - | -f <actfile> } <file> ...",
	       USAGE_ERROR );
	DBUG_VOID_RETURN;
}
SHAR_EOF
cat << \SHAR_EOF > bawkact.c
/*
 * Bawk C actions compiler
 */
#include <stdio.h>
#include "bawk.h"

EXPR_NODE *act_compile( actbuf )
register char	*actbuf;/* where tokenized actions are compiled into */
{
	DBUG_ENTER("act_compile");
	Where = ACTION;
	stmt_lex( actbuf );
	Actptr = actbuf;
	getoken();
	DBUG_RETURN(stmt_parse());
}

EXPR_NODE *pat_compile( actbuf )
register char	*actbuf;/* where tokenized actions are compiled into */
{
	DBUG_ENTER("pat_compile");
	Where = PATTERN;
	stmt_lex( actbuf );
	Actptr = actbuf;
	getoken();
	DBUG_RETURN(stmt_parse());
}

void stmt_lex( actbuf )
register char	*actbuf;/* where tokenized actions are compiled into */
{
	/*
	 * Read and tokenize C actions from current input file into the
	 * action buffer.  Strip out comments and whitespace in the
	 * process.
	 */
	register char *actptr,	/* actbuf pointer */
		*cp;		/* work pointer */
	char	buf[MAXLINELEN+1];/* string buffer */
	register int braces = 0,/* counts '{}' pairs - return when 0 */
		parens = 0,	/* counts '()' pairs */
		i,		/* temp */
		c,		/* current input character */
		finished = 0;

	DBUG_ENTER("stmt_lex");
	actptr = actbuf;
	while ( !finished && ((c = getcharacter()) != -1) )
	{
	    switch(c) {
		case ' ':
		case '\t':
		case '\n':
			/*
		 	* Skip over spaces, tabs and newlines
		 	*/
			break;
		case '#':
			/*
			 * Skip comments.  Comments start with a '#' and
			 * end at the next newline.
			 */
			while ( (c = getcharacter()) != -1 && c!='\n' )
				;
			break;
		case '{':
			if ( Where==PATTERN )
			{
				/*
				 * We're compiling a pattern. The '{' marks
				 * the beginning of an action statement.
				 * Push the character back and return.
				 */
				ungetcharacter( (char) '{' );
				finished = 1;
			}
			else
			{
				/*
				 * We must be compiling an action statement.
				 * '{'s mark beginning of action or compound
				 * statements.
				 */
				++braces;
				*actptr++ = T_LBRACE;
			}
			break;
		case '}':
			*actptr++ = T_RBRACE;
			finished = (! --braces );
			break;
		case '(':
			++parens;
			*actptr++ = T_LPAREN;
			break;
		case ')':
			if ( --parens < 0 )
				error( "mismatched '()'", ACT_ERROR );
			*actptr++ = T_RPAREN;
			break;
		case ',':
			if ( !braces && !parens )
			{
				/*
			 	* found a comma outside of any braces or 
				* parens - this must be a regular
				* expression seperator.
				*/
				ungetcharacter( (char) ',' );
				finished = 1;
			} else
				*actptr++ = T_COMMA;
			break;
		case '/':
			*actptr++ = T_DIV;
			break;
		case '@':
			*actptr++ = T_REGEXP;
			ungetcharacter( (char) c );
			actptr += re_compile( actptr );
			break;
		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
		case 'y': case 'z':
		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
		case 'Y': case 'Z':
		case '_':
			/*
			 * It's a symbol reference. Copy the symbol into
			 * string buffer.
			 */
			cp = buf;
			do
				*cp++ = c;
			while ( (c=getcharacter()) != -1 &&
				(isalnum( c ) || (c == '_')));
			ungetcharacter( (char) c );
			*cp = 0;
			/*
			 * Check if a keyword, builtin function or variable.
			 */
			if ( c = iskeyword( buf ) )
				*actptr++ = c;
			else if ( i = isfunction( buf ) )
			{
				*actptr++ = T_FUNCTION;
				storeint( actptr, i );
				actptr += sizeof( i );
			}
			else
			{
				/*
				 * It's a symbol name.
				 */
				*actptr++ = T_VARIABLE;
				if ( !(cp = (char *) findvar( buf )) )
					cp = (char *) addvar( buf );
				storeptr( actptr, cp );
				actptr += sizeof( cp );
			}
			break;
#ifdef QUOTE_STRING_HACK
		case '`':
#endif
		case '"':
			/*
			 * It's a string constant
			 */
			*actptr++ = T_STRING;
			actptr = str_compile( actptr, c );
			break;
		case '\'':
			/*
			 * It's a character constant
			 */
			*actptr++ = T_CONSTANT;
			str_compile( buf, (char) '\'' );
			storeint( actptr, *buf );
			actptr += sizeof( i );
			break;
		case '0': case '1': case '2': case '3': case '4': case '5':
		case '6': case '7': case '8': case '9':
			/*
			 * It's a numeric constant
			 */
			*actptr++ = T_CONSTANT;
			cp = buf;
			do
				*cp++ = c;
			while ( (c=getcharacter()) != -1 && isdigit(c) );
			ungetcharacter( (char) c );
			*cp = 0;
			storeint( actptr, atoi( buf ) );
			actptr += sizeof( i );
			break;
		case '$':
			*actptr++ = T_DOLLAR;
			break;
		case '=':
			if ( (c=getcharacter()) == '=' )
				*actptr++ = T_EQ;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_ASSIGN;
			}
			break;
		case '!':
			if ( (c=getcharacter()) == '=' )
				*actptr++ = T_NE;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_LNOT;
			}
			break;
		case '<':
			if ( (c=getcharacter()) == '<' )
				*actptr++ = T_SHL;
			else if ( c == '=' )
				*actptr++ = T_LE;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_LT;
			}
			break;
		case '>':
			if ( (c=getcharacter()) == '>' )
				*actptr++ = T_SHR;
			else if ( c == '=' )
				*actptr++ = T_GE;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_GT;
			}
			break;
		case '&':
			if ( (c=getcharacter()) == '&' )
				*actptr++ = T_LAND;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_AND;
			}
			break;
		case '|':
			if ( (c=getcharacter()) == '|' )
				*actptr++ = T_LOR;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_OR;
			}
			break;
		case '+':
			if ( (c=getcharacter()) == '+' )
				*actptr++ = T_INCR;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_ADD;
			}
			break;
		case '-':
			if ( (c=getcharacter()) == '-' )
				*actptr++ = T_DECR;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_SUB;
			}
			break;
		case '[':
			*actptr++ = T_LBRACKET;
			break;
		case ']':
			*actptr++ = T_RBRACKET;
			break;
		case ';':
			*actptr++ = T_SEMICOLON;
			break;
		case '*':
			*actptr++ = T_MUL;
			break;
		case '%':
			*actptr++ = T_MOD;
			break;
		case '^':
			*actptr++ = T_XOR;
			break;
		case '~':
			*actptr++ = T_NOT;
			break;
		default:
			/*
			 * Bad character in input line
			 */
			error( "lexical error", ACT_ERROR );
	    }
	    if ( actptr >= Workbuf + MAXWORKBUFLEN )
		error( "action too long", MEM_ERROR );
	}
	if ( braces || parens )
		error( "mismatched '{}' or '()'", ACT_ERROR );

	*actptr++ = T_EOF;

	DBUG_VOID_RETURN;
}

char *
str_compile( str, delim )
register char *str, delim;
{
	/*
	 * Compile a string from current input file into the given string
	 * buffer.  Stop when input character is the delimiter in "delim".
	 * Returns a pointer to the first character after the string.
	 */
	int tmpc;	/* can not be a register variable */
	register int c;
	register char buf[4];

	DBUG_ENTER("str_compile");
	while ( (c = getcharacter()) != -1 && c != delim)
	{
		if ( c  == '\\' )
		{
			switch ( c = getcharacter() )
			{
			case -1: goto err;
			case 'b': c = '\b'; break;
			case 'n': c = '\n'; break;
			case 't': c = '\t'; break;
			case 'f': c = '\f'; break;
			case 'r': c = '\r'; break;
			case '0':
			case '1':
			case '2':
			case '3':
				*buf = c;
				for ( c=1; c<3; c++ )
				{
					if ( (buf[c]=getcharacter()) == -1 )
						goto err;
				}
				buf[c] = 0;
				sscanf( buf, "%o", &tmpc );
				c = tmpc;
				break;
			case '\n':
				if ( getcharacter() == -1 )
					goto err;
			default:
				if ( (c = getcharacter()) == -1 )
					goto err;
			}
		}
		*str++ = c;
	}
	*str++ = 0;

	DBUG_RETURN(str);
err:
	sprintf( buf, "missing %c delimiter", delim );
	error( buf, 4 );
	DBUG_RETURN(NULL);
}

void storeint( ip, i )
char *ip;
int i;
{
	DBUG_ENTER("storeint");
	movmem((char *) &i, ip, sizeof(i));
	DBUG_VOID_RETURN;
}

void storeptr( pp, p )
char *pp, *p;
{
	DBUG_ENTER("storeptr");
	movmem((char *) &p, pp, sizeof(p));
	DBUG_VOID_RETURN;
}

int fetchint( ip )
register char *ip;
{
	int i;

	DBUG_ENTER("fetchint");
	movmem(ip, (char *) &i, sizeof(i));
	DBUG_RETURN(i);
}

char *
fetchptr( pp )
register char *pp;
{
	char *p;

	DBUG_ENTER("fetchptr");
	movmem(pp, (char *) &p, sizeof(p));
	DBUG_RETURN(p);
}

#ifndef DBUG_OFF
char *token_name[] = {
0,
"CHAR",
"BOL",
"EOL",
"ANY",
"CLASS",
"NCLASS",
"STAR",
"PLUS",
"MINUS",
"ALPHA",
"DIGIT",
"NALPHA",
"PUNCT",
"RANGE",
"ENDPAT",
"T_STRING",
"T_DOLLAR",
"T_REGEXP",
"T_REGEXP_ARG",
"T_CONSTANT",
"T_VARIABLE",
"T_FUNCTION",
"T_SEMICOLON",
"T_EOF",
"T_LBRACE",
"T_RBRACE",
"T_LPAREN",
"T_RPAREN",
"T_LBRACKET",
"T_RBRACKET",
"T_COMMA",
"T_ASSIGN",
"T_STAR",
"T_MUL",
"T_DIV",
"T_MOD",
"T_ADD",
"T_UMINUS",
"T_SUB",
"T_SHL",
"T_SHR",
"T_LT",
"T_LE",
"T_GT",
"T_GE",
"T_EQ",
"T_NE",
"T_NOT",
"T_ADDROF",
"T_AND",
"T_XOR",
"T_OR",
"T_LNOT",
"T_LAND",
"T_LOR",
"T_INCR",
"T_DECR",
"T_POSTINCR",
"T_POSTDECR",
"T_IF",
"T_ELSE",
"T_WHILE",
"T_BREAK",
"T_CHAR",
"T_INT",
"T_BEGIN",
"T_END",
"T_NF",
"T_NR",
"T_FS",
"T_RS",
"T_FILENAME",
"T_STATEMENT",
"T_DECLARE",
"T_ARRAY_DECLARE"
};
#endif

char getoken()
{
	register char *cp;
	register int i;

	DBUG_ENTER("getoken");
	switch ( Token = *Actptr++ )
	{
	case T_STRING:
	case T_REGEXP:
		Value.dptr = Actptr;
		Actptr += strlen( Actptr ) + 1;
		break;
	case T_VARIABLE:
		Value.dptr = fetchptr( Actptr );
		Actptr += sizeof( cp );
		break;
	case T_FUNCTION:
	case T_CONSTANT:
		Value.ival = fetchint( Actptr );
		Actptr += sizeof( i );
		break;
	case T_EOF:
		--Actptr;
	default:
		Value.dptr = 0;
	}

	DBUG_PRINT("getoken",
	   ("Token='%s' (%d), Value=%d",token_name[Token],Token,Value.ival));
	DBUG_RETURN(Token);
}
SHAR_EOF
cat << \SHAR_EOF > bawkdo.c
/*
 * Bawk C actions interpreter
 */
#include <stdio.h>
#include "bawk.h"

static char pop_array[MAX_TOKEN + 1];

void init_pop_array()
{
	register int i;

	DBUG_ENTER("init_pop_array");
	for(i = 0; i <= MAX_TOKEN; i++)
		pop_array[i] = 1;
	pop_array[T_STATEMENT] = pop_array[T_IF] = pop_array[T_DECLARE] = 0;
	pop_array[T_DECLARE] = pop_array[T_ARRAY_DECLARE] = 0;
	pop_array[T_BREAK] = 0;
	DBUG_VOID_RETURN;
}

int dopattern( root )
register EXPR_NODE *root;
{
	DBUG_ENTER("dopattern");
	Where = PATTERN;
	walk_tree(root);
	DBUG_RETURN(popint());
}

void doaction( root )
register EXPR_NODE *root;
{
	DBUG_ENTER("doaction");
	Where = ACTION;
	walk_tree(root);
	DBUG_VOID_RETURN;
}

void walk_tree(root)
register EXPR_NODE *root;
{
	register int ival;
	DATUM data;
	register VARIABLE *pvar;
	register VARDECL *pdecl;

	DBUG_ENTER("walk_tree");
	if(Saw_break || !root)
		DBUG_VOID_RETURN;
	switch(root->operator) {
	case T_ASSIGN:
		walk_tree(root->left);
		walk_tree(root->right);
		assignment();
		break;
	case T_LOR:
		walk_tree(root->left);
		if(popint())
			pushint(1);
		else {
			walk_tree(root->right);
			pushint(popint() != 0);
		}
		break;
	case T_LAND:
		walk_tree(root->left);
		if(!popint())
			pushint(0);
		else {
			walk_tree(root->right);
			pushint(popint() != 0);
		}
		break;
	case T_OR:
		walk_tree(root->left);
		walk_tree(root->right);
		pushint(popint() | popint());
		break;
	case T_AND:
		walk_tree(root->left);
		walk_tree(root->right);
		pushint(popint() & popint());
		break;
	case T_XOR:
		walk_tree(root->left);
		walk_tree(root->right);
		pushint(popint() ^ popint());
		break;
	case T_EQ:
		walk_tree(root->left);
		walk_tree(root->right);
		pushint(popint() == popint());
		break;
	case T_NE:
		walk_tree(root->left);
		walk_tree(root->right);
		pushint(popint() != popint());
		break;
	case T_LE:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival <= popint());
		break;
	case T_GE:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival >= popint());
		break;
	case T_LT:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival < popint());
		break;
	case T_GT:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival > popint());
		break;
	case T_SHL:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival << popint());
		break;
	case T_SHR:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival >> popint());
		break;
	case T_ADD:
		walk_tree(root->left);
		walk_tree(root->right);
		pushint(popint() + popint());
		break;
	case T_SUB:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival - popint());
		break;
	case T_MUL:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival * popint());
		break;
	case T_DIV:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival / popint());
		break;
	case T_MOD:
		walk_tree(root->left);
		ival = popint();
		walk_tree(root->right);
		pushint(ival % popint());
		break;
	case T_LNOT:
		walk_tree(root->left);
		pushint( ! popint() );
		break;
	case T_NOT:
		walk_tree(root->left);
		pushint( ~ popint() );
		break;
	case T_INCR:
		walk_tree(root->left);
		preincdec(1);
		break;
	case T_DECR:
		walk_tree(root->left);
		preincdec(-1);
		break;
	case T_DOLLAR:
		/*
		 * It's a reference to one (or all) of the words in Linebuf.
		 */
		walk_tree(root->left);
		if ( ival = popint() )
		{
			if ( ival > Fieldcount )
				ival = Fieldcount;
			else if ( ival < 1 )
				ival = 1;
			data.dptr = Fields[ ival-1 ];
		}
		else
		{
			/*
			 * Reconstitute the line buffer in case any of the
			 * fields have been changed.
			 */
			unparse( Fields, Fieldcount, Linebuf, Fieldsep );
			data.dptr = Linebuf;
		}
		/*
		 * $<expr>'s are treated the same as string constants:
		 */
		push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
		break;
	case T_UMINUS:
		walk_tree(root->left);
		pushint( - popint() );
		break;
	case T_STAR:
		walk_tree(root->left);
		/*
		 * If item on stack is an LVALUE, do an extra level of
		 * indirection before changing it to an LVALUE.
		 */
		if ( Stackptr->lvalue )
			Stackptr->value.ptrptr = 
				(char **) *Stackptr->value.ptrptr;
		Stackptr->lvalue = 1;
		--Stackptr->class;
		break;
	case T_ADDROF:
		walk_tree(root->left);
		if ( Stackptr->lvalue )
			Stackptr->lvalue = 0;
		else
			error( "'&' operator needs an lvalue", ACT_ERROR );
		break;
	case T_CONSTANT:
		pushint(((DATUM *) (root->left))->ival);
		break;
	case T_FUNCTION:
		function(((DATUM *) (root->left))->ival, root->right);
		break;
	case T_REGEXP:
		/*
		 * Perform a match of the regular expression agains input
		 * line.
		 */
		unparse( Fields, Fieldcount, Linebuf, Fieldsep );
		pushint( match( Linebuf, (char *) root->left ) );
		break;
	case T_REGEXP_ARG:
		/*
		 * A regular expression that is to be passed as a function
		 * argument.
		 */
		data.dptr = (char *) root->left;
		push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
		break;
	case T_STRING:
		data.dptr = (char *) root->left;
		push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
		break;
	case T_NF:
		pushint( Fieldcount );
		break;
	case T_NR:
		pushint( Recordcount );
		break;
	case T_FS:
		data.dptr = Fieldsep;
		push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
		break;
	case T_RS:
		data.dptr = Recordsep;
		push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
		break;
	case T_FILENAME:
		data.dptr = Filename;
		push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
		break;
	case T_VARIABLE:
		pvar = (VARIABLE *) root->left;
		/*
		 * it's a plain variable. The way a variable is
		 * represented on the stack depends on its type:
		 *      lvalue class value.dptr
		 * vars:  1      0   address of var
		 * ptrs:  1      1   ptr to address of ptr
		 * array: 0      1   address of var
		 */
		if ( pvar->vclass && !pvar->vlen )
			/* it's a pointer */
			data.dptr = (char *) &pvar->vptr;
		else
			/* an array or simple variable */
			data.dptr = pvar->vptr;
		/*
		 * If it's an array it can't be used as an LVALUE.
		 */
		push( pvar->vclass, (char) !pvar->vlen, pvar->vsize, &data );
		break;
	case T_LBRACKET:
		walk_tree(root->left);
		if ( ! Stackptr->class )
			error( "'[]' needs an array or pointer", ACT_ERROR );
		/*
		 * compute the subscript
		 */
		walk_tree(root->right);
		ival = popint();
		/*
		 * compute the offset (subscript times WORD for int arrays)
		 * and then the effective address.
		 */
		ival *= Stackptr->size;
		if ( Stackptr->lvalue )
			/*
			 * It's a pointer - don't forget that the stack top
			 * item's value is the address of the pointer so we
			 * must do another level of indirection.
			 */
			Stackptr->value.dptr = *Stackptr->value.ptrptr+ival;
		else
			/*
			 * It's a plain array - the stack top item's value is
			 * the address of the first element in the array.
			 */
			Stackptr->value.dptr += ival;

		/*
		 * The stack top item now becomes an LVALUE, but we've
		 * reduced the indirection level.
		 */
		Stackptr->lvalue = 1;
		--Stackptr->class;
		break;
	case T_POSTINCR:
		walk_tree(root->left);
		postincdec(1);
		break;
	case T_POSTDECR:
		walk_tree(root->left);
		postincdec(-1);
		break;
	case T_STATEMENT:
		if(root->left) {
			walk_tree(root->left);
			if(pop_array[root->left->operator])
				popint();
		}
		walk_tree(root->right);
		break;
	case T_DECLARE:
		pdecl = (VARDECL *) root->left;
		pvar = pdecl->variable;
		if(pdecl->vsize != ((pvar->vlen ? pvar->vlen : 1)*
				    pvar->vsize)) {
			/*
			 * The amount of storage needed for the variable has
			 * changed.
			 */
			free(pvar->vptr);
			pvar->vptr = get_clear_memory(pdecl->vsize);
		}
		pvar->vclass = pdecl->vclass;
		pvar->vsize = pdecl->vsize;
		pvar->vlen = 0;
		walk_tree(root->right);
		break;
	case T_ARRAY_DECLARE:
		/* Compute the dimension */
		walk_tree(root->left->right);
		ival = popint();
		pdecl = (VARDECL *) root->left->left;
		pvar = pdecl->variable;
		if((ival*pdecl->vsize) != ((pvar->vlen ? pvar->vlen : 1)*
				    pvar->vsize)) {
			free(pvar->vptr);
			pvar->vptr = get_clear_memory(ival*pdecl->vsize);
		}
		pvar->vclass = pdecl->vclass;
		pvar->vsize = pdecl->vsize;
		pvar->vlen = ival;
		walk_tree(root->right);
		break;
	case T_IF:
		walk_tree(root->left->left);
		if(popint())
			walk_tree(root->left->right);
		else
			walk_tree(root->right);
		break;
	case T_WHILE:
		while( !Saw_break )
		{
			walk_tree(root->left);
			if( ! popint() )
				break;
			walk_tree(root->right);
		}
		Saw_break = 0;
		break;
	case T_BREAK:
		Saw_break = 1;
		break;
	default:
		DBUG_PRINT("walk_tree",
			("decimal value of operator = %d",root->operator));
		error("internal error: parse tree node with unknown symbol",
		      ACT_ERROR);
	}
	DBUG_VOID_RETURN;
}

void preincdec(incr)
register int incr;
{
	/*
	 * Pre increment/decrement
	 */
	DBUG_ENTER("preincdec");
	if ( Stackptr->lvalue )
	{
		if ( Stackptr->class )
			incr *= Stackptr->size;
		*Stackptr->value.ptrptr += incr;
	}
	else
		error( "pre '++' or '--' needs an lvalue", ACT_ERROR );
	DBUG_VOID_RETURN;
}


void postincdec(incr)
register int incr;
{
	/*
	 * Post increment/decrement
	 */
	register char **pp;

	DBUG_ENTER("postincdec");
	if ( Stackptr->lvalue )
	{
		if ( Stackptr->class )
		{
			/*
			 * It's a pointer - save its old value then
			 * increment/decrement the pointer.  This makes the
			 * item on top of the stack look like an array, which
			 * means it can no longer be used as an LVALUE. This
			 * doesn't really hurt, since it doesn't make much
			 * sense to say:
			 *   char *cp;
			 *   cp++ = value;
			 */
			pp = (char **) *Stackptr->value.ptrptr;
			*Stackptr->value.ptrptr += incr * Stackptr->size;
			Stackptr->value.ptrptr = pp;
		}
		else
		{
			/*
			 * It's a simple variable - save its old value then
			 * increment/decrement the variable.  This makes the
			 * item on top of the stack look like a constant,
			 * which means it can no longer be used as an LVALUE.
			 * Same reasoning as above.
			 */
			if ( Stackptr->size == BYTE )
				pp = (char **) *Stackptr->value.dptr;
			else
				pp = (char **) *Stackptr->value.ptrptr;
			*Stackptr->value.ptrptr += incr;
			Stackptr->value.ptrptr = pp;
		}
		Stackptr->lvalue = 0;
	}
	else
		error( "post '++' or '--' needs an lvalue", ACT_ERROR );
	DBUG_VOID_RETURN;
}
SHAR_EOF
cat << \SHAR_EOF > bawkpat.c
/*
 * Bawk regular expression compiler/interpreter
 */
#include <stdio.h>
#include "bawk.h"
 
int re_compile( patbuf )
char	*patbuf;		/* where to put compiled pattern */
{
	/*
	 * Compile a regular expression from current input file
	 * into the given pattern buffer.
	 */
	register int c,		/* Current character         */
		o;		/* Temp                      */
        register char delim,	/* pattern delimiter         */
		*patptr,	/* destination string pntr   */
		*lp,		/* Last pattern pointer      */
		*spp;		/* Save beginning of pattern */
	char *cclass();		/* Compile class routine     */
 
	DBUG_ENTER("re_compile");
	lp = patptr = patbuf;
	delim = getcharacter();

	while ( (c = getcharacter()) != -1 && c != delim )
	{
		/*
		 * STAR, PLUS and MINUS are special.
		 */
		if (c == '*' || c == '+' || c == '-') {
			if (patptr == patbuf ||
				  (o=patptr[-1]) == BOL ||
				  o == EOL ||
				  o == STAR ||
				  o == PLUS ||
				  o == MINUS)
				error( "illegal occurrance op", RE_ERROR );
			*patptr++ = ENDPAT;
			*patptr++ = ENDPAT;
			spp = patptr;		/* Save pattern end     */
			while (--patptr > lp)	/* Move pattern down... */
				*patptr = patptr[-1];	/* one byte     */
			*patptr =   (c == '*') ? STAR :
				(c == '-') ? MINUS : PLUS;
			patptr = spp;		/* Restore pattern end  */
			continue;
		}
		/*
		 * All the rest.
		 */
		lp = patptr;			/* Remember start       */
		switch(c) {
 
		case '^':
			*patptr++ = BOL;
			break;
 
		case '$':
			*patptr++ = EOL;
			break;
 
		case '.':
			*patptr++ = ANY;
			break;
 
		case '[':
			patptr = cclass( patptr );
			break;
 
		case ':':
			if ( (c=getcharacter()) != -1 )
			{
				switch( tolower( c ) )
				{
 
				case 'a':
					*patptr++ = ALPHA;
					break;
 
				case 'd':
					*patptr++ = DIGIT;
					break;
 
				case 'n':
					*patptr++ = NALPHA;
					break;
 
				case ' ':
					*patptr++ = PUNCT;
					break;
 
				default:
					error( "unknown ':' type", RE_ERROR );
 
				}
			}
			else
				error( "no ':' type", RE_ERROR );
 			break;

		case '\\':
			c = getcharacter();
 
		default:
			*patptr++ = CHAR;
			*patptr++ = c;
		}
	}
	*patptr++ = ENDPAT;
	*patptr++ = 0;			/* Terminate string     */

	DBUG_EXECUTE("re_match",re_debug(patbuf,patptr););
	DBUG_RETURN(patptr - patbuf);
}

#ifndef DBUG_OFF
re_debug(patbuf, patptr)
register char *patbuf, *patptr;
{
	register char *lp;

	for ( lp=patbuf; lp<patptr; ++lp )
	{
		switch ( *lp )
		{
		case CHAR:	DBUG_PRINT("re_match",("char ")); break;
		case BOL:	DBUG_PRINT("re_match",("bol ")); break;
		case EOL:	DBUG_PRINT("re_match",("eol ")); break;
		case ANY:	DBUG_PRINT("re_match",("any ")); break;
		case CLASS:	DBUG_PRINT("re_match",("class(%d) ",*++lp)); break;
		case NCLASS:	DBUG_PRINT("re_match",("notclass(%d) ",*++lp)); break;
		case STAR:	DBUG_PRINT("re_match",("star ")); break;
		case PLUS:	DBUG_PRINT("re_match",("plus ")); break;
		case MINUS:	DBUG_PRINT("re_match",("minus ")); break;
		case ALPHA:	DBUG_PRINT("re_match",("alpha ")); break;
		case DIGIT:	DBUG_PRINT("re_match",("digit ")); break;
		case NALPHA:	DBUG_PRINT("re_match",("notalpha ")); break;
		case PUNCT:	DBUG_PRINT("re_match",("punct ")); break;
		case RANGE:	DBUG_PRINT("re_match",("range ")); break;
		case ENDPAT:	DBUG_PRINT("re_match",("endpat ")); break;
		default:	DBUG_PRINT("re_match",("<%c> ", *lp)); break;
		}
	}
}
#endif

char *
cclass( patbuf )
register char	*patbuf;	/* destination pattern buffer */
{
	/*
	 * Compile a class (within [])
	 */
	register char *patptr,	/* destination pattern pointer */
		*cp;		/* Pattern start     */
	register int c,		/* Current character */
		o;		/* Temp              */

	DBUG_ENTER("cclass");
	patptr = patbuf;

	if ( (c = getcharacter()) == -1 )
		error( "class terminates badly", RE_ERROR );
	else if ( c == '^')
	{
		/*
		 * Class exclusion, for example: [^abc]
		 * Swallow the "^" and set token type to class exclusion.
		 */
		o = NCLASS;
	}
	else
	{
		/*
		 * Normal class, for example: [abc]
		 * push back the character and set token type to class
		 */
		ungetcharacter( (char) c );
		o = CLASS;
	}
	*patptr++ = o;

	cp = patptr;	/* remember where byte count is */
	*patptr++ = 0;	/* and initialize byte count */
	while ( (c = getcharacter()) != -1 && c!=']' )
	{
		o = getcharacter();		/* peek at next char */
		if (c == '\\')			/* Store quoted chars */
		{
			if ( o == -1) /* Gotta get something */
				error( "class terminates badly", RE_ERROR );
			*patptr++ = o;
		}
		else if ( c=='-' && (patptr-cp)>1 && o!=']' && o != -1 )
		{
			c = patptr[-1];		/* Range start     */
			patptr[-1] = RANGE;	/* Range signal    */
			*patptr++ = c;		/* Re-store start  */
			*patptr++ = o;		/* Store end char  */
		}
		else
		{
			*patptr++ = c;		/* Store normal char */
			ungetcharacter( (char) o );
		}
	}
	if (c != ']')
		error( "unterminated class", RE_ERROR );
	if ( (c = (patptr - cp)) >= 256 )
		error( "class too large", RE_ERROR );
	if ( c == 0 )
		error( "empty class", RE_ERROR );
	*cp = c;		/* fill in byte count */

	DBUG_RETURN(patptr);
}
 
int match( line, pattern )
char	*line;		/* line to match */
register char *pattern;	/* pattern to match */
{
	/*
	 * Match the current line (in Linebuf[]), return 1 if it does.
	 */
	register char *l;	/* Line pointer       */
	char	*pmatch();
	register char *next;
	register int	matches;
 
	DBUG_ENTER("match");
	matches = 0;
	for (l = line; *l; l++)
	{
		if ( next = pmatch(line, l, pattern) )
		{
			l = next - 1;
			++matches;
			DBUG_PRINT("match",("match found"));
		}
	}

	DBUG_RETURN(matches);
}
 
char *
pmatch(linestart, line, pattern)
char	*linestart;	 /* start of line to match */
char	*line;		 /* (partial) line to match      */
char	*pattern;	 /* (partial) pattern to match   */
{
	register char *l;/* Current line pointer         */
	register char *p;/* Current pattern pointer      */
	register char c; /* Current character            */
	register char *e;/* End for STAR and PLUS match  */
	register int op; /* Pattern operation            */
	register int n;	 /* Class counter                */
	char	*are;	 /* Start of STAR match          */
 
	DBUG_ENTER("pmatch");
	l = line;

	DBUG_PRINT("pmatch",("line: (%s)", line));

	p = pattern;
	while ((op = *p++) != ENDPAT) {

	DBUG_PRINT("pmatch",("byte[%d] = 0%o, '%c', op = 0%o",l-line, *l, *l, op));

		switch(op) {
 
		case CHAR:
			if ( *l++ != *p++)
				DBUG_RETURN(0);
			break;
 
		case BOL:
			if (l != linestart)
				DBUG_RETURN(0);
			break;
 
		case EOL:
			if (*l != '\0')
				DBUG_RETURN(0);
			break;
 
		case ANY:
			if (*l++ == '\0')
				DBUG_RETURN(0);
			break;
 
		case DIGIT:
			if ((c = *l++) < '0' || (c > '9'))
				DBUG_RETURN(0);
			break;
 
		case ALPHA:
			c = *l++;
			c = tolower( c );
			if (c < 'a' || c > 'z')
				DBUG_RETURN(0);
			break;
 
		case NALPHA:
			c = *l++;
			c = tolower( c );
			if (c >= 'a' && c <= 'z')
				break;
			else if (c < '0' || c > '9')
				DBUG_RETURN(0);
			break;
 
		case PUNCT:
			c = *l++;
			if (c == 0 || c > ' ')
				DBUG_RETURN(0);
			break;
 
		case CLASS:
		case NCLASS:
			c = *l++;
			n = *p++ & 0377;
			do {
				if (*p == RANGE) {
					p += 3;
					n -= 2;
					if (c >= p[-2] && c <= p[-1])
						break;
				}
				else if (c == *p++)
					break;
			} while (--n > 1);
			if ((op == CLASS) == (n <= 1))
				DBUG_RETURN(0);
			if (op == CLASS)
				p += n - 2;
			break;
 
		case MINUS:
			e = pmatch(linestart,l,p);/* Look for a match    */
			while (*p++ != ENDPAT);	/* Skip over pattern   */
			if (e)			/* Got a match?        */
				l = e;		/* Yes, update string  */
			break;			/* Always succeeds     */
 
		case PLUS:			/* One or more ...     */
			if ((l = pmatch(linestart,l,p)) == 0)
				DBUG_RETURN(0);	/* Gotta have a match  */
		case STAR:			/* Zero or more ...    */
			are = l;		/* Remember line start */
			while (*l && (e = pmatch(linestart,l,p)))
				l = e;		/* Get longest match   */
			while (*p++ != ENDPAT);	/* Skip over pattern   */
			while (l >= are) {	/* Try to match rest   */
				if (e = pmatch(linestart,l,p))
					DBUG_RETURN(e);
				--l;		/* Nope, try earlier   */
			}
			DBUG_RETURN(0);		/* Nothing else worked */
 
		default:
			fprintf( stderr, "bad op code %d\n", op );
			error( "can't happen -- match", RE_ERROR );
		}
	}
	DBUG_RETURN(l);
}

SHAR_EOF
cat << \SHAR_EOF > example2
BEGIN
{
	strcpy(RS,".");  # set record seperator to a period
}
{
	if ( match( $1, @^[a-z]@ ) )
		*$1 = toupper( *$1 );
	printf( "%s\n", $0 );
}
SHAR_EOF
cat << \SHAR_EOF > tst1
@[(]@
{
	parens = parens + match( $0, @(@ );
}
END
{
	printf("parens=%d\n", parens );
}
SHAR_EOF