jw@sics.sics.se.UUCP (08/09/87)
# This is a shell archive.
# Remove everything above and including the cut line.
# Then run the rest of the file through sh.
#----cut here-----cut here-----cut here-----cut here----#
#!/bin/sh
# Xshar: Extended Shell Archiver.
# This is part 2 out of 2.
# This archive created: Sat Aug 8 19:32:40 1987
# By: Craig Norborg (Purdue University Computing Center)
# Run the following text with /bin/sh to create:
# bawk.c
# bawkact.c
# bawkdo.c
# bawkpat.c
# example2
# tst1
cat << \SHAR_EOF > bawk.c
/*
* Bawk main program
*/
#define MAIN 1
#include <stdio.h>
#include "bawk.h"
static char *pattern_arg = NULL; /* Command line bawk program pattern */
static int ungetc_arg = 0;
static char eof_seen = 0;
static int max_field_count = 0;
/*
* Main program
*/
main( argc, argv )
register int argc;
register char **argv;
{
register char gotrules = 0, didfile = 0, getstdin = 0;
register char rule_file_flag = 0;
DBUG_ENTER("main");
/*
* Initialize global variables:
*/
Beginact = 0;
Endact = 0;
Rules = 0;
Rulep = 0;
Filename = 0;
Linecount = 0;
Saw_break = 0;
Stackptr = Stackbtm - 1;
Stacktop = Stackbtm + MAXSTACKSZ;
Nextvar = Vartab;
init_pop_array();
strcpy( Fieldsep, " \t" );
strcpy( Recordsep, "\n" );
/*
* Parse command line
*/
while ( --argc )
{
if ( **(++argv) == '-' )
{
/*
* Process dash options.
*/
switch ( tolower( argv[0][1] ) )
{
case '#':
DBUG_PUSH(&argv[0][2]);
continue;
case 'f':
if(!gotrules) {
rule_file_flag++;
argv++;
argc--;
} else
usage();
break;
case 0:
if(!gotrules)
rule_file_flag++;
getstdin++;
break;
default: usage();
}
}
if ( gotrules )
{
/*
* Already read rules file - assume this is
* is a text file for processing.
*/
if ( ++didfile == 1 && Beginact )
doaction( Beginact );
if ( getstdin )
{
getstdin--;
newfile( 0 );
}
else
newfile( *argv );
process();
}
else
{
if(rule_file_flag) {
if ( getstdin )
{
getstdin--;
newfile( 0 );
}
else
newfile( *argv );
} else
pattern_arg = *argv;
compile();
pattern_arg = NULL;
gotrules = 1;
}
}
if ( !gotrules )
usage();
if ( ! didfile )
{
/*
* Didn't process any files yet - process stdin.
*/
newfile( 0 );
if ( Beginact )
doaction( Beginact );
process();
}
if ( Endact )
doaction( Endact );
DBUG_RETURN(0);
}
/*
* Regular expression/action file compilation routines.
*/
void compile()
{
/*
* Compile regular expressions and C actions into Rules struct,
* reading from current input file "Fileptr".
*/
register int c;
register EXPR_NODE *root;
DBUG_ENTER("compile");
while ( (c = getcharacter()) != -1 )
{
if ( c==' ' || c=='\t' || c=='\n' )
/* swallow whitespace */
;
else if ( c=='#' )
{
/*
* Swallow comments
*/
while ( (c=getcharacter()) != -1 && c!='\n' )
;
}
else if ( c=='{' )
{
DBUG_PRINT("compile",("action"));
/*
* Compile the action string into a parse tree
*/
ungetcharacter( (char) '{' );
if ( Rulep && Rulep->action )
{
Rulep->nextrule = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
}
if ( !Rulep )
{
/*
* This is the first action encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
}
Rulep->action = act_compile( Workbuf );
}
else if ( c==',' )
{
DBUG_PRINT("compile",("stop pattern"));
/*
* It's (hopefully) the second part of a two-part
* pattern string. Swallow the comma and start
* compiling an action string.
*/
if ( !Rulep || !Rulep->pattern.start )
error( "stop pattern without a start",
RE_ERROR );
if ( Rulep->pattern.stop )
error( "already have a stop pattern",
RE_ERROR );
Rulep->pattern.stop = pat_compile( Workbuf );
}
else
{
/*
* Assume it's a regular expression pattern
*/
DBUG_PRINT("compile",("start pattern"));
ungetcharacter( (char) c );
root = pat_compile( Workbuf );
if ( *Workbuf == T_BEGIN )
{
/*
* Saw a "BEGIN" keyword - compile following
* action into special "Beginact" parse tree.
*/
Beginact = act_compile( Workbuf );
continue;
}
if ( *Workbuf == T_END )
{
/*
* Saw an "END" keyword - compile following
* action into special "Endact" parse tree.
*/
Endact = act_compile( Workbuf );
continue;
}
if ( Rulep )
{
/*
* Already saw a pattern/action - link in
* another Rules structure.
*/
Rulep->nextrule = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
}
if ( !Rulep )
{
/*
* This is the first pattern encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = (RULE *)
get_clear_memory( sizeof( *Rulep ) );
}
if ( Rulep->pattern.start )
error( "already have a start pattern",
RE_ERROR );
Rulep->pattern.start = root;
}
}
for(Rulep = Rules; Rulep; Rulep = Rulep->nextrule)
{
if(!Rulep->action) {
pattern_arg = "{printf \"%s\n\", $0}";
Rulep->action = act_compile( Workbuf );
pattern_arg = NULL;
}
}
endfile();
DBUG_VOID_RETURN;
}
/*
* Text file main processing loop.
*/
void process()
{
/*
* Read a line at a time from current input file at "Fileptr",
* then apply each rule in the Rules chain to the input line.
*/
register int i;
DBUG_ENTER("process");
Recordcount = 0;
while ( getline() )
{
/*
* Parse the input line.
*/
if(! *Recordsep )
strcpy(Fieldsep," \t\n");
Fieldcount = parse( Linebuf, Fields, Fieldsep );
DBUG_PRINT("process",( "parsed %d words:", Fieldcount ));
DBUG_EXECUTE("process",for(i=0; i<Fieldcount; ++i )DBUG_PRINT("process",("<%s>",Fields[i])););
Rulep = Rules;
while(Rulep)
{
if ( ! Rulep->pattern.start )
{
/*
* No pattern given - perform action on
* every input line.
*/
doaction( Rulep->action );
}
else if ( Rulep->pattern.startseen )
{
/*
* Start pattern already found - perform
* action then check if line matches
* stop pattern.
*/
doaction( Rulep->action );
if ( dopattern( Rulep->pattern.stop ) )
Rulep->pattern.startseen = 0;
}
else if ( dopattern( Rulep->pattern.start ) )
{
/*
* Matched start pattern - perform action.
* If a stop pattern was given, set "start
* pattern seen" flag and process every input
* line until stop pattern found.
*/
doaction( Rulep->action );
if ( Rulep->pattern.stop )
Rulep->pattern.startseen = 1;
}
Rulep = Rulep->nextrule;
}
}
DBUG_VOID_RETURN;
}
/*
* Miscellaneous functions
*/
parse( str, wrdlst, delim )
register char *str;
char *wrdlst[];
char *delim;
{
/*
* Parse the string of words in "str" into the word list at "wrdlst".
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
* Returns the number of words parsed.
*/
register int wrdcnt;
register char *cp, *wrdcp, c;
char wrdbuf[ MAXLINELEN+1 ];
DBUG_ENTER("parse");
wrdcnt = 0;
while ( *str )
{
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(! *cp)
break;
}
str--;
if ( !*str )
break;
wrdcp = wrdbuf;
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(*cp)
break;
*wrdcp++ = c;
}
str--;
*wrdcp = 0;
/*
* NOTE: allocate a MAXLINELEN sized buffer for every
* word, just in case user wants to copy a larger string
* into a field.
*/
if(wrdcnt == max_field_count)
{
wrdlst[ wrdcnt ] = getmemory( MAXLINELEN+1 );
max_field_count++;
}
strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
}
DBUG_RETURN(wrdcnt);
}
void unparse( wrdlst, wrdcnt, str, delim )
char *wrdlst[];
register int wrdcnt;
register char *str;
char *delim;
{
/*
* Replace all the words in "str" with the words in "wrdlst",
* maintaining the same word seperation distance as found in
* the string.
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
*/
register int wc;
register char *sp, *cp, c;
char strbuf[ MAXLINELEN+1 ], *start;
DBUG_ENTER("unparse");
wc = 0; /* next word in "wrdlst" */
sp = strbuf; /* points to our local string */
start = str; /* save start address of "str" for later... */
while ( *str )
{
/*
* Copy the field delimiters from the original string to
* our local version.
*/
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(!*cp)
break;
*sp++ = c;
}
str--;
if ( !*str )
break;
/*
* Skip over the field in the original string and...
*/
while(c = *str++)
{
cp = delim;
while(*cp && c != *cp)
cp++;
if(*cp)
break;
}
str--;
if ( wc < wrdcnt )
{
/*
* ...copy in the field in the wordlist instead.
*/
cp = wrdlst[ wc++ ];
while(*sp++ = *cp++);
sp--;
}
}
/*
* Tie off the local string, then copy it back to caller's string.
*/
*sp = 0;
strcpy( start, strbuf );
DBUG_VOID_RETURN;
}
char *
getmemory( len )
register unsigned len;
{
register char *cp;
DBUG_ENTER("getmemory");
if ( cp=malloc( len ) )
DBUG_RETURN(cp);
error( "out of memory", MEM_ERROR );
DBUG_RETURN(NULL);
}
char *
get_clear_memory( len )
register unsigned len;
{
register char *cp;
DBUG_ENTER("getmemory");
if ( cp=calloc( 1, len ) )
DBUG_RETURN(cp);
error( "out of memory", MEM_ERROR );
DBUG_RETURN(NULL);
}
EXPR_NODE *get_expr_node(operator)
char operator;
{
register EXPR_NODE *node;
DBUG_ENTER("get_expr_node");
node = (EXPR_NODE *) getmemory(sizeof(EXPR_NODE));
node->left = node->right = NULL;
node->operator = operator;
DBUG_PRINT("get_expr_node",("operator = '%s'",token_name[operator]));
DBUG_RETURN(node);
}
void newfile( s )
register char *s;
{
DBUG_ENTER("newfile");
Linecount = 0;
if ( Filename = s )
{
#ifdef BDS_C
if ( fopen( s, Fileptr = Curfbuf ) == -1 )
#else
if ( !(Fileptr = fopen( s, "r" )) )
#endif
error( "file not found", FILE_ERROR );
}
else
{
/*
* No file name given - process standard input.
*/
Fileptr = stdin;
Filename = "standard input";
}
DBUG_VOID_RETURN;
}
getline()
{
/*
* Read a record from current input file.
*/
register int rtn, len = 0;
register char *cp = Linebuf, *last_nl, *sep = Recordsep;
DBUG_ENTER("getline");
if(eof_seen)
{
endfile();
DBUG_RETURN(0);
}
if(*sep)
{
while((*cp++ = rtn = getcharacter()) != *sep++ && rtn != -1)
{
while(*sep)
{
if(rtn == *sep++)
break;
}
if( ++len == MAXLINELEN )
error("Input record too long", RECORD_ERROR);
sep = Recordsep;
}
} else /* Treat an empty line as record separator. */
{
while(1)
{
last_nl = cp;
while((*cp++ = rtn = getcharacter()) != '\n' &&
rtn != -1)
{
if( ++len == MAXLINELEN )
error("Input record too long",
RECORD_ERROR);
}
if(((cp - last_nl) == 1) || (rtn == -1))
break;
}
}
*(--cp) = 0;
if ( rtn == -1 )
{
if(len)
eof_seen = 1;
else
{
endfile();
DBUG_RETURN(0);
}
}
++Recordcount;
DBUG_RETURN(1);
}
int getcharacter()
{
/*
* Read a character from curren input file.
* WARNING: your getc() must convert lines that end with CR+LF
* to LF and CP/M's EOF character (^Z) to a -1.
* Also, getc() must return a -1 when attempting to read from
* an unopened file.
*/
register int c;
DBUG_ENTER("getcharacter");
if(pattern_arg) {
if(ungetc_arg) {
c = ungetc_arg;
ungetc_arg = 0;
} else if(*pattern_arg)
c = *pattern_arg++;
else
c = EOF;
} else {
#ifdef BDS_C
/*
* BDS C doesn't do CR+LF to LF and ^Z to -1 conversions
* <gag>
*/
if ( (c = getc( Fileptr )) == '\r' )
{
if ( (c = getc( Fileptr )) != '\n' )
{
ungetc( c );
c = '\r';
}
}
else if ( c == 26 ) /* ^Z */
c = -1;
#else
c = getc( Fileptr );
#endif
if ( c=='\n' )
++Linecount;
}
DBUG_PRINT("getcharacter",("'%c'", c));
DBUG_RETURN(c);
}
ungetcharacter( c )
register char c;
{
/*
* Push a character back into the input stream.
* If the character is a record seperator, or a newline character,
* the record and line counters are adjusted appropriately.
*/
DBUG_ENTER("ungetcharacter");
if ( c == *Recordsep )
--Recordcount;
if ( c=='\n' )
--Linecount;
DBUG_PRINT("ungetcharacter",("'%c'", c));
if(pattern_arg)
DBUG_RETURN(ungetc_arg = c);
DBUG_RETURN(ungetc( c, Fileptr ));
}
void endfile()
{
DBUG_ENTER("endfile");
fclose( Fileptr );
eof_seen = 0;
Filename = NULL;
Linecount = 0;
DBUG_VOID_RETURN;
}
void error( s, severe )
register char *s;
register int severe;
{
DBUG_ENTER("error");
if ( Filename )
fprintf( stderr, "%s:", Filename );
if ( Linecount )
fprintf( stderr, " line %d:", Linecount );
fprintf( stderr, " %s\n", s );
if ( severe )
exit( severe );
DBUG_VOID_RETURN;
}
void usage()
{
DBUG_ENTER("usage");
error( "Usage: bawk { action | - | -f <actfile> } <file> ...",
USAGE_ERROR );
DBUG_VOID_RETURN;
}
SHAR_EOF
cat << \SHAR_EOF > bawkact.c
/*
* Bawk C actions compiler
*/
#include <stdio.h>
#include "bawk.h"
EXPR_NODE *act_compile( actbuf )
register char *actbuf;/* where tokenized actions are compiled into */
{
DBUG_ENTER("act_compile");
Where = ACTION;
stmt_lex( actbuf );
Actptr = actbuf;
getoken();
DBUG_RETURN(stmt_parse());
}
EXPR_NODE *pat_compile( actbuf )
register char *actbuf;/* where tokenized actions are compiled into */
{
DBUG_ENTER("pat_compile");
Where = PATTERN;
stmt_lex( actbuf );
Actptr = actbuf;
getoken();
DBUG_RETURN(stmt_parse());
}
void stmt_lex( actbuf )
register char *actbuf;/* where tokenized actions are compiled into */
{
/*
* Read and tokenize C actions from current input file into the
* action buffer. Strip out comments and whitespace in the
* process.
*/
register char *actptr, /* actbuf pointer */
*cp; /* work pointer */
char buf[MAXLINELEN+1];/* string buffer */
register int braces = 0,/* counts '{}' pairs - return when 0 */
parens = 0, /* counts '()' pairs */
i, /* temp */
c, /* current input character */
finished = 0;
DBUG_ENTER("stmt_lex");
actptr = actbuf;
while ( !finished && ((c = getcharacter()) != -1) )
{
switch(c) {
case ' ':
case '\t':
case '\n':
/*
* Skip over spaces, tabs and newlines
*/
break;
case '#':
/*
* Skip comments. Comments start with a '#' and
* end at the next newline.
*/
while ( (c = getcharacter()) != -1 && c!='\n' )
;
break;
case '{':
if ( Where==PATTERN )
{
/*
* We're compiling a pattern. The '{' marks
* the beginning of an action statement.
* Push the character back and return.
*/
ungetcharacter( (char) '{' );
finished = 1;
}
else
{
/*
* We must be compiling an action statement.
* '{'s mark beginning of action or compound
* statements.
*/
++braces;
*actptr++ = T_LBRACE;
}
break;
case '}':
*actptr++ = T_RBRACE;
finished = (! --braces );
break;
case '(':
++parens;
*actptr++ = T_LPAREN;
break;
case ')':
if ( --parens < 0 )
error( "mismatched '()'", ACT_ERROR );
*actptr++ = T_RPAREN;
break;
case ',':
if ( !braces && !parens )
{
/*
* found a comma outside of any braces or
* parens - this must be a regular
* expression seperator.
*/
ungetcharacter( (char) ',' );
finished = 1;
} else
*actptr++ = T_COMMA;
break;
case '/':
*actptr++ = T_DIV;
break;
case '@':
*actptr++ = T_REGEXP;
ungetcharacter( (char) c );
actptr += re_compile( actptr );
break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '_':
/*
* It's a symbol reference. Copy the symbol into
* string buffer.
*/
cp = buf;
do
*cp++ = c;
while ( (c=getcharacter()) != -1 &&
(isalnum( c ) || (c == '_')));
ungetcharacter( (char) c );
*cp = 0;
/*
* Check if a keyword, builtin function or variable.
*/
if ( c = iskeyword( buf ) )
*actptr++ = c;
else if ( i = isfunction( buf ) )
{
*actptr++ = T_FUNCTION;
storeint( actptr, i );
actptr += sizeof( i );
}
else
{
/*
* It's a symbol name.
*/
*actptr++ = T_VARIABLE;
if ( !(cp = (char *) findvar( buf )) )
cp = (char *) addvar( buf );
storeptr( actptr, cp );
actptr += sizeof( cp );
}
break;
#ifdef QUOTE_STRING_HACK
case '`':
#endif
case '"':
/*
* It's a string constant
*/
*actptr++ = T_STRING;
actptr = str_compile( actptr, c );
break;
case '\'':
/*
* It's a character constant
*/
*actptr++ = T_CONSTANT;
str_compile( buf, (char) '\'' );
storeint( actptr, *buf );
actptr += sizeof( i );
break;
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
/*
* It's a numeric constant
*/
*actptr++ = T_CONSTANT;
cp = buf;
do
*cp++ = c;
while ( (c=getcharacter()) != -1 && isdigit(c) );
ungetcharacter( (char) c );
*cp = 0;
storeint( actptr, atoi( buf ) );
actptr += sizeof( i );
break;
case '$':
*actptr++ = T_DOLLAR;
break;
case '=':
if ( (c=getcharacter()) == '=' )
*actptr++ = T_EQ;
else
{
ungetcharacter( (char) c );
*actptr++ = T_ASSIGN;
}
break;
case '!':
if ( (c=getcharacter()) == '=' )
*actptr++ = T_NE;
else
{
ungetcharacter( (char) c );
*actptr++ = T_LNOT;
}
break;
case '<':
if ( (c=getcharacter()) == '<' )
*actptr++ = T_SHL;
else if ( c == '=' )
*actptr++ = T_LE;
else
{
ungetcharacter( (char) c );
*actptr++ = T_LT;
}
break;
case '>':
if ( (c=getcharacter()) == '>' )
*actptr++ = T_SHR;
else if ( c == '=' )
*actptr++ = T_GE;
else
{
ungetcharacter( (char) c );
*actptr++ = T_GT;
}
break;
case '&':
if ( (c=getcharacter()) == '&' )
*actptr++ = T_LAND;
else
{
ungetcharacter( (char) c );
*actptr++ = T_AND;
}
break;
case '|':
if ( (c=getcharacter()) == '|' )
*actptr++ = T_LOR;
else
{
ungetcharacter( (char) c );
*actptr++ = T_OR;
}
break;
case '+':
if ( (c=getcharacter()) == '+' )
*actptr++ = T_INCR;
else
{
ungetcharacter( (char) c );
*actptr++ = T_ADD;
}
break;
case '-':
if ( (c=getcharacter()) == '-' )
*actptr++ = T_DECR;
else
{
ungetcharacter( (char) c );
*actptr++ = T_SUB;
}
break;
case '[':
*actptr++ = T_LBRACKET;
break;
case ']':
*actptr++ = T_RBRACKET;
break;
case ';':
*actptr++ = T_SEMICOLON;
break;
case '*':
*actptr++ = T_MUL;
break;
case '%':
*actptr++ = T_MOD;
break;
case '^':
*actptr++ = T_XOR;
break;
case '~':
*actptr++ = T_NOT;
break;
default:
/*
* Bad character in input line
*/
error( "lexical error", ACT_ERROR );
}
if ( actptr >= Workbuf + MAXWORKBUFLEN )
error( "action too long", MEM_ERROR );
}
if ( braces || parens )
error( "mismatched '{}' or '()'", ACT_ERROR );
*actptr++ = T_EOF;
DBUG_VOID_RETURN;
}
char *
str_compile( str, delim )
register char *str, delim;
{
/*
* Compile a string from current input file into the given string
* buffer. Stop when input character is the delimiter in "delim".
* Returns a pointer to the first character after the string.
*/
int tmpc; /* can not be a register variable */
register int c;
register char buf[4];
DBUG_ENTER("str_compile");
while ( (c = getcharacter()) != -1 && c != delim)
{
if ( c == '\\' )
{
switch ( c = getcharacter() )
{
case -1: goto err;
case 'b': c = '\b'; break;
case 'n': c = '\n'; break;
case 't': c = '\t'; break;
case 'f': c = '\f'; break;
case 'r': c = '\r'; break;
case '0':
case '1':
case '2':
case '3':
*buf = c;
for ( c=1; c<3; c++ )
{
if ( (buf[c]=getcharacter()) == -1 )
goto err;
}
buf[c] = 0;
sscanf( buf, "%o", &tmpc );
c = tmpc;
break;
case '\n':
if ( getcharacter() == -1 )
goto err;
default:
if ( (c = getcharacter()) == -1 )
goto err;
}
}
*str++ = c;
}
*str++ = 0;
DBUG_RETURN(str);
err:
sprintf( buf, "missing %c delimiter", delim );
error( buf, 4 );
DBUG_RETURN(NULL);
}
void storeint( ip, i )
char *ip;
int i;
{
DBUG_ENTER("storeint");
movmem((char *) &i, ip, sizeof(i));
DBUG_VOID_RETURN;
}
void storeptr( pp, p )
char *pp, *p;
{
DBUG_ENTER("storeptr");
movmem((char *) &p, pp, sizeof(p));
DBUG_VOID_RETURN;
}
int fetchint( ip )
register char *ip;
{
int i;
DBUG_ENTER("fetchint");
movmem(ip, (char *) &i, sizeof(i));
DBUG_RETURN(i);
}
char *
fetchptr( pp )
register char *pp;
{
char *p;
DBUG_ENTER("fetchptr");
movmem(pp, (char *) &p, sizeof(p));
DBUG_RETURN(p);
}
#ifndef DBUG_OFF
char *token_name[] = {
0,
"CHAR",
"BOL",
"EOL",
"ANY",
"CLASS",
"NCLASS",
"STAR",
"PLUS",
"MINUS",
"ALPHA",
"DIGIT",
"NALPHA",
"PUNCT",
"RANGE",
"ENDPAT",
"T_STRING",
"T_DOLLAR",
"T_REGEXP",
"T_REGEXP_ARG",
"T_CONSTANT",
"T_VARIABLE",
"T_FUNCTION",
"T_SEMICOLON",
"T_EOF",
"T_LBRACE",
"T_RBRACE",
"T_LPAREN",
"T_RPAREN",
"T_LBRACKET",
"T_RBRACKET",
"T_COMMA",
"T_ASSIGN",
"T_STAR",
"T_MUL",
"T_DIV",
"T_MOD",
"T_ADD",
"T_UMINUS",
"T_SUB",
"T_SHL",
"T_SHR",
"T_LT",
"T_LE",
"T_GT",
"T_GE",
"T_EQ",
"T_NE",
"T_NOT",
"T_ADDROF",
"T_AND",
"T_XOR",
"T_OR",
"T_LNOT",
"T_LAND",
"T_LOR",
"T_INCR",
"T_DECR",
"T_POSTINCR",
"T_POSTDECR",
"T_IF",
"T_ELSE",
"T_WHILE",
"T_BREAK",
"T_CHAR",
"T_INT",
"T_BEGIN",
"T_END",
"T_NF",
"T_NR",
"T_FS",
"T_RS",
"T_FILENAME",
"T_STATEMENT",
"T_DECLARE",
"T_ARRAY_DECLARE"
};
#endif
char getoken()
{
register char *cp;
register int i;
DBUG_ENTER("getoken");
switch ( Token = *Actptr++ )
{
case T_STRING:
case T_REGEXP:
Value.dptr = Actptr;
Actptr += strlen( Actptr ) + 1;
break;
case T_VARIABLE:
Value.dptr = fetchptr( Actptr );
Actptr += sizeof( cp );
break;
case T_FUNCTION:
case T_CONSTANT:
Value.ival = fetchint( Actptr );
Actptr += sizeof( i );
break;
case T_EOF:
--Actptr;
default:
Value.dptr = 0;
}
DBUG_PRINT("getoken",
("Token='%s' (%d), Value=%d",token_name[Token],Token,Value.ival));
DBUG_RETURN(Token);
}
SHAR_EOF
cat << \SHAR_EOF > bawkdo.c
/*
* Bawk C actions interpreter
*/
#include <stdio.h>
#include "bawk.h"
static char pop_array[MAX_TOKEN + 1];
void init_pop_array()
{
register int i;
DBUG_ENTER("init_pop_array");
for(i = 0; i <= MAX_TOKEN; i++)
pop_array[i] = 1;
pop_array[T_STATEMENT] = pop_array[T_IF] = pop_array[T_DECLARE] = 0;
pop_array[T_DECLARE] = pop_array[T_ARRAY_DECLARE] = 0;
pop_array[T_BREAK] = 0;
DBUG_VOID_RETURN;
}
int dopattern( root )
register EXPR_NODE *root;
{
DBUG_ENTER("dopattern");
Where = PATTERN;
walk_tree(root);
DBUG_RETURN(popint());
}
void doaction( root )
register EXPR_NODE *root;
{
DBUG_ENTER("doaction");
Where = ACTION;
walk_tree(root);
DBUG_VOID_RETURN;
}
void walk_tree(root)
register EXPR_NODE *root;
{
register int ival;
DATUM data;
register VARIABLE *pvar;
register VARDECL *pdecl;
DBUG_ENTER("walk_tree");
if(Saw_break || !root)
DBUG_VOID_RETURN;
switch(root->operator) {
case T_ASSIGN:
walk_tree(root->left);
walk_tree(root->right);
assignment();
break;
case T_LOR:
walk_tree(root->left);
if(popint())
pushint(1);
else {
walk_tree(root->right);
pushint(popint() != 0);
}
break;
case T_LAND:
walk_tree(root->left);
if(!popint())
pushint(0);
else {
walk_tree(root->right);
pushint(popint() != 0);
}
break;
case T_OR:
walk_tree(root->left);
walk_tree(root->right);
pushint(popint() | popint());
break;
case T_AND:
walk_tree(root->left);
walk_tree(root->right);
pushint(popint() & popint());
break;
case T_XOR:
walk_tree(root->left);
walk_tree(root->right);
pushint(popint() ^ popint());
break;
case T_EQ:
walk_tree(root->left);
walk_tree(root->right);
pushint(popint() == popint());
break;
case T_NE:
walk_tree(root->left);
walk_tree(root->right);
pushint(popint() != popint());
break;
case T_LE:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival <= popint());
break;
case T_GE:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival >= popint());
break;
case T_LT:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival < popint());
break;
case T_GT:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival > popint());
break;
case T_SHL:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival << popint());
break;
case T_SHR:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival >> popint());
break;
case T_ADD:
walk_tree(root->left);
walk_tree(root->right);
pushint(popint() + popint());
break;
case T_SUB:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival - popint());
break;
case T_MUL:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival * popint());
break;
case T_DIV:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival / popint());
break;
case T_MOD:
walk_tree(root->left);
ival = popint();
walk_tree(root->right);
pushint(ival % popint());
break;
case T_LNOT:
walk_tree(root->left);
pushint( ! popint() );
break;
case T_NOT:
walk_tree(root->left);
pushint( ~ popint() );
break;
case T_INCR:
walk_tree(root->left);
preincdec(1);
break;
case T_DECR:
walk_tree(root->left);
preincdec(-1);
break;
case T_DOLLAR:
/*
* It's a reference to one (or all) of the words in Linebuf.
*/
walk_tree(root->left);
if ( ival = popint() )
{
if ( ival > Fieldcount )
ival = Fieldcount;
else if ( ival < 1 )
ival = 1;
data.dptr = Fields[ ival-1 ];
}
else
{
/*
* Reconstitute the line buffer in case any of the
* fields have been changed.
*/
unparse( Fields, Fieldcount, Linebuf, Fieldsep );
data.dptr = Linebuf;
}
/*
* $<expr>'s are treated the same as string constants:
*/
push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
break;
case T_UMINUS:
walk_tree(root->left);
pushint( - popint() );
break;
case T_STAR:
walk_tree(root->left);
/*
* If item on stack is an LVALUE, do an extra level of
* indirection before changing it to an LVALUE.
*/
if ( Stackptr->lvalue )
Stackptr->value.ptrptr =
(char **) *Stackptr->value.ptrptr;
Stackptr->lvalue = 1;
--Stackptr->class;
break;
case T_ADDROF:
walk_tree(root->left);
if ( Stackptr->lvalue )
Stackptr->lvalue = 0;
else
error( "'&' operator needs an lvalue", ACT_ERROR );
break;
case T_CONSTANT:
pushint(((DATUM *) (root->left))->ival);
break;
case T_FUNCTION:
function(((DATUM *) (root->left))->ival, root->right);
break;
case T_REGEXP:
/*
* Perform a match of the regular expression agains input
* line.
*/
unparse( Fields, Fieldcount, Linebuf, Fieldsep );
pushint( match( Linebuf, (char *) root->left ) );
break;
case T_REGEXP_ARG:
/*
* A regular expression that is to be passed as a function
* argument.
*/
data.dptr = (char *) root->left;
push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
break;
case T_STRING:
data.dptr = (char *) root->left;
push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
break;
case T_NF:
pushint( Fieldcount );
break;
case T_NR:
pushint( Recordcount );
break;
case T_FS:
data.dptr = Fieldsep;
push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
break;
case T_RS:
data.dptr = Recordsep;
push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
break;
case T_FILENAME:
data.dptr = Filename;
push( (char) 1, (char) ACTUAL, (char) BYTE, &data );
break;
case T_VARIABLE:
pvar = (VARIABLE *) root->left;
/*
* it's a plain variable. The way a variable is
* represented on the stack depends on its type:
* lvalue class value.dptr
* vars: 1 0 address of var
* ptrs: 1 1 ptr to address of ptr
* array: 0 1 address of var
*/
if ( pvar->vclass && !pvar->vlen )
/* it's a pointer */
data.dptr = (char *) &pvar->vptr;
else
/* an array or simple variable */
data.dptr = pvar->vptr;
/*
* If it's an array it can't be used as an LVALUE.
*/
push( pvar->vclass, (char) !pvar->vlen, pvar->vsize, &data );
break;
case T_LBRACKET:
walk_tree(root->left);
if ( ! Stackptr->class )
error( "'[]' needs an array or pointer", ACT_ERROR );
/*
* compute the subscript
*/
walk_tree(root->right);
ival = popint();
/*
* compute the offset (subscript times WORD for int arrays)
* and then the effective address.
*/
ival *= Stackptr->size;
if ( Stackptr->lvalue )
/*
* It's a pointer - don't forget that the stack top
* item's value is the address of the pointer so we
* must do another level of indirection.
*/
Stackptr->value.dptr = *Stackptr->value.ptrptr+ival;
else
/*
* It's a plain array - the stack top item's value is
* the address of the first element in the array.
*/
Stackptr->value.dptr += ival;
/*
* The stack top item now becomes an LVALUE, but we've
* reduced the indirection level.
*/
Stackptr->lvalue = 1;
--Stackptr->class;
break;
case T_POSTINCR:
walk_tree(root->left);
postincdec(1);
break;
case T_POSTDECR:
walk_tree(root->left);
postincdec(-1);
break;
case T_STATEMENT:
if(root->left) {
walk_tree(root->left);
if(pop_array[root->left->operator])
popint();
}
walk_tree(root->right);
break;
case T_DECLARE:
pdecl = (VARDECL *) root->left;
pvar = pdecl->variable;
if(pdecl->vsize != ((pvar->vlen ? pvar->vlen : 1)*
pvar->vsize)) {
/*
* The amount of storage needed for the variable has
* changed.
*/
free(pvar->vptr);
pvar->vptr = get_clear_memory(pdecl->vsize);
}
pvar->vclass = pdecl->vclass;
pvar->vsize = pdecl->vsize;
pvar->vlen = 0;
walk_tree(root->right);
break;
case T_ARRAY_DECLARE:
/* Compute the dimension */
walk_tree(root->left->right);
ival = popint();
pdecl = (VARDECL *) root->left->left;
pvar = pdecl->variable;
if((ival*pdecl->vsize) != ((pvar->vlen ? pvar->vlen : 1)*
pvar->vsize)) {
free(pvar->vptr);
pvar->vptr = get_clear_memory(ival*pdecl->vsize);
}
pvar->vclass = pdecl->vclass;
pvar->vsize = pdecl->vsize;
pvar->vlen = ival;
walk_tree(root->right);
break;
case T_IF:
walk_tree(root->left->left);
if(popint())
walk_tree(root->left->right);
else
walk_tree(root->right);
break;
case T_WHILE:
while( !Saw_break )
{
walk_tree(root->left);
if( ! popint() )
break;
walk_tree(root->right);
}
Saw_break = 0;
break;
case T_BREAK:
Saw_break = 1;
break;
default:
DBUG_PRINT("walk_tree",
("decimal value of operator = %d",root->operator));
error("internal error: parse tree node with unknown symbol",
ACT_ERROR);
}
DBUG_VOID_RETURN;
}
void preincdec(incr)
register int incr;
{
/*
* Pre increment/decrement
*/
DBUG_ENTER("preincdec");
if ( Stackptr->lvalue )
{
if ( Stackptr->class )
incr *= Stackptr->size;
*Stackptr->value.ptrptr += incr;
}
else
error( "pre '++' or '--' needs an lvalue", ACT_ERROR );
DBUG_VOID_RETURN;
}
void postincdec(incr)
register int incr;
{
/*
* Post increment/decrement
*/
register char **pp;
DBUG_ENTER("postincdec");
if ( Stackptr->lvalue )
{
if ( Stackptr->class )
{
/*
* It's a pointer - save its old value then
* increment/decrement the pointer. This makes the
* item on top of the stack look like an array, which
* means it can no longer be used as an LVALUE. This
* doesn't really hurt, since it doesn't make much
* sense to say:
* char *cp;
* cp++ = value;
*/
pp = (char **) *Stackptr->value.ptrptr;
*Stackptr->value.ptrptr += incr * Stackptr->size;
Stackptr->value.ptrptr = pp;
}
else
{
/*
* It's a simple variable - save its old value then
* increment/decrement the variable. This makes the
* item on top of the stack look like a constant,
* which means it can no longer be used as an LVALUE.
* Same reasoning as above.
*/
if ( Stackptr->size == BYTE )
pp = (char **) *Stackptr->value.dptr;
else
pp = (char **) *Stackptr->value.ptrptr;
*Stackptr->value.ptrptr += incr;
Stackptr->value.ptrptr = pp;
}
Stackptr->lvalue = 0;
}
else
error( "post '++' or '--' needs an lvalue", ACT_ERROR );
DBUG_VOID_RETURN;
}
SHAR_EOF
cat << \SHAR_EOF > bawkpat.c
/*
* Bawk regular expression compiler/interpreter
*/
#include <stdio.h>
#include "bawk.h"
int re_compile( patbuf )
char *patbuf; /* where to put compiled pattern */
{
/*
* Compile a regular expression from current input file
* into the given pattern buffer.
*/
register int c, /* Current character */
o; /* Temp */
register char delim, /* pattern delimiter */
*patptr, /* destination string pntr */
*lp, /* Last pattern pointer */
*spp; /* Save beginning of pattern */
char *cclass(); /* Compile class routine */
DBUG_ENTER("re_compile");
lp = patptr = patbuf;
delim = getcharacter();
while ( (c = getcharacter()) != -1 && c != delim )
{
/*
* STAR, PLUS and MINUS are special.
*/
if (c == '*' || c == '+' || c == '-') {
if (patptr == patbuf ||
(o=patptr[-1]) == BOL ||
o == EOL ||
o == STAR ||
o == PLUS ||
o == MINUS)
error( "illegal occurrance op", RE_ERROR );
*patptr++ = ENDPAT;
*patptr++ = ENDPAT;
spp = patptr; /* Save pattern end */
while (--patptr > lp) /* Move pattern down... */
*patptr = patptr[-1]; /* one byte */
*patptr = (c == '*') ? STAR :
(c == '-') ? MINUS : PLUS;
patptr = spp; /* Restore pattern end */
continue;
}
/*
* All the rest.
*/
lp = patptr; /* Remember start */
switch(c) {
case '^':
*patptr++ = BOL;
break;
case '$':
*patptr++ = EOL;
break;
case '.':
*patptr++ = ANY;
break;
case '[':
patptr = cclass( patptr );
break;
case ':':
if ( (c=getcharacter()) != -1 )
{
switch( tolower( c ) )
{
case 'a':
*patptr++ = ALPHA;
break;
case 'd':
*patptr++ = DIGIT;
break;
case 'n':
*patptr++ = NALPHA;
break;
case ' ':
*patptr++ = PUNCT;
break;
default:
error( "unknown ':' type", RE_ERROR );
}
}
else
error( "no ':' type", RE_ERROR );
break;
case '\\':
c = getcharacter();
default:
*patptr++ = CHAR;
*patptr++ = c;
}
}
*patptr++ = ENDPAT;
*patptr++ = 0; /* Terminate string */
DBUG_EXECUTE("re_match",re_debug(patbuf,patptr););
DBUG_RETURN(patptr - patbuf);
}
#ifndef DBUG_OFF
re_debug(patbuf, patptr)
register char *patbuf, *patptr;
{
register char *lp;
for ( lp=patbuf; lp<patptr; ++lp )
{
switch ( *lp )
{
case CHAR: DBUG_PRINT("re_match",("char ")); break;
case BOL: DBUG_PRINT("re_match",("bol ")); break;
case EOL: DBUG_PRINT("re_match",("eol ")); break;
case ANY: DBUG_PRINT("re_match",("any ")); break;
case CLASS: DBUG_PRINT("re_match",("class(%d) ",*++lp)); break;
case NCLASS: DBUG_PRINT("re_match",("notclass(%d) ",*++lp)); break;
case STAR: DBUG_PRINT("re_match",("star ")); break;
case PLUS: DBUG_PRINT("re_match",("plus ")); break;
case MINUS: DBUG_PRINT("re_match",("minus ")); break;
case ALPHA: DBUG_PRINT("re_match",("alpha ")); break;
case DIGIT: DBUG_PRINT("re_match",("digit ")); break;
case NALPHA: DBUG_PRINT("re_match",("notalpha ")); break;
case PUNCT: DBUG_PRINT("re_match",("punct ")); break;
case RANGE: DBUG_PRINT("re_match",("range ")); break;
case ENDPAT: DBUG_PRINT("re_match",("endpat ")); break;
default: DBUG_PRINT("re_match",("<%c> ", *lp)); break;
}
}
}
#endif
char *
cclass( patbuf )
register char *patbuf; /* destination pattern buffer */
{
/*
* Compile a class (within [])
*/
register char *patptr, /* destination pattern pointer */
*cp; /* Pattern start */
register int c, /* Current character */
o; /* Temp */
DBUG_ENTER("cclass");
patptr = patbuf;
if ( (c = getcharacter()) == -1 )
error( "class terminates badly", RE_ERROR );
else if ( c == '^')
{
/*
* Class exclusion, for example: [^abc]
* Swallow the "^" and set token type to class exclusion.
*/
o = NCLASS;
}
else
{
/*
* Normal class, for example: [abc]
* push back the character and set token type to class
*/
ungetcharacter( (char) c );
o = CLASS;
}
*patptr++ = o;
cp = patptr; /* remember where byte count is */
*patptr++ = 0; /* and initialize byte count */
while ( (c = getcharacter()) != -1 && c!=']' )
{
o = getcharacter(); /* peek at next char */
if (c == '\\') /* Store quoted chars */
{
if ( o == -1) /* Gotta get something */
error( "class terminates badly", RE_ERROR );
*patptr++ = o;
}
else if ( c=='-' && (patptr-cp)>1 && o!=']' && o != -1 )
{
c = patptr[-1]; /* Range start */
patptr[-1] = RANGE; /* Range signal */
*patptr++ = c; /* Re-store start */
*patptr++ = o; /* Store end char */
}
else
{
*patptr++ = c; /* Store normal char */
ungetcharacter( (char) o );
}
}
if (c != ']')
error( "unterminated class", RE_ERROR );
if ( (c = (patptr - cp)) >= 256 )
error( "class too large", RE_ERROR );
if ( c == 0 )
error( "empty class", RE_ERROR );
*cp = c; /* fill in byte count */
DBUG_RETURN(patptr);
}
int match( line, pattern )
char *line; /* line to match */
register char *pattern; /* pattern to match */
{
/*
* Match the current line (in Linebuf[]), return 1 if it does.
*/
register char *l; /* Line pointer */
char *pmatch();
register char *next;
register int matches;
DBUG_ENTER("match");
matches = 0;
for (l = line; *l; l++)
{
if ( next = pmatch(line, l, pattern) )
{
l = next - 1;
++matches;
DBUG_PRINT("match",("match found"));
}
}
DBUG_RETURN(matches);
}
char *
pmatch(linestart, line, pattern)
char *linestart; /* start of line to match */
char *line; /* (partial) line to match */
char *pattern; /* (partial) pattern to match */
{
register char *l;/* Current line pointer */
register char *p;/* Current pattern pointer */
register char c; /* Current character */
register char *e;/* End for STAR and PLUS match */
register int op; /* Pattern operation */
register int n; /* Class counter */
char *are; /* Start of STAR match */
DBUG_ENTER("pmatch");
l = line;
DBUG_PRINT("pmatch",("line: (%s)", line));
p = pattern;
while ((op = *p++) != ENDPAT) {
DBUG_PRINT("pmatch",("byte[%d] = 0%o, '%c', op = 0%o",l-line, *l, *l, op));
switch(op) {
case CHAR:
if ( *l++ != *p++)
DBUG_RETURN(0);
break;
case BOL:
if (l != linestart)
DBUG_RETURN(0);
break;
case EOL:
if (*l != '\0')
DBUG_RETURN(0);
break;
case ANY:
if (*l++ == '\0')
DBUG_RETURN(0);
break;
case DIGIT:
if ((c = *l++) < '0' || (c > '9'))
DBUG_RETURN(0);
break;
case ALPHA:
c = *l++;
c = tolower( c );
if (c < 'a' || c > 'z')
DBUG_RETURN(0);
break;
case NALPHA:
c = *l++;
c = tolower( c );
if (c >= 'a' && c <= 'z')
break;
else if (c < '0' || c > '9')
DBUG_RETURN(0);
break;
case PUNCT:
c = *l++;
if (c == 0 || c > ' ')
DBUG_RETURN(0);
break;
case CLASS:
case NCLASS:
c = *l++;
n = *p++ & 0377;
do {
if (*p == RANGE) {
p += 3;
n -= 2;
if (c >= p[-2] && c <= p[-1])
break;
}
else if (c == *p++)
break;
} while (--n > 1);
if ((op == CLASS) == (n <= 1))
DBUG_RETURN(0);
if (op == CLASS)
p += n - 2;
break;
case MINUS:
e = pmatch(linestart,l,p);/* Look for a match */
while (*p++ != ENDPAT); /* Skip over pattern */
if (e) /* Got a match? */
l = e; /* Yes, update string */
break; /* Always succeeds */
case PLUS: /* One or more ... */
if ((l = pmatch(linestart,l,p)) == 0)
DBUG_RETURN(0); /* Gotta have a match */
case STAR: /* Zero or more ... */
are = l; /* Remember line start */
while (*l && (e = pmatch(linestart,l,p)))
l = e; /* Get longest match */
while (*p++ != ENDPAT); /* Skip over pattern */
while (l >= are) { /* Try to match rest */
if (e = pmatch(linestart,l,p))
DBUG_RETURN(e);
--l; /* Nope, try earlier */
}
DBUG_RETURN(0); /* Nothing else worked */
default:
fprintf( stderr, "bad op code %d\n", op );
error( "can't happen -- match", RE_ERROR );
}
}
DBUG_RETURN(l);
}
SHAR_EOF
cat << \SHAR_EOF > example2
BEGIN
{
strcpy(RS,"."); # set record seperator to a period
}
{
if ( match( $1, @^[a-z]@ ) )
*$1 = toupper( *$1 );
printf( "%s\n", $0 );
}
SHAR_EOF
cat << \SHAR_EOF > tst1
@[(]@
{
parens = parens + match( $0, @(@ );
}
END
{
printf("parens=%d\n", parens );
}
SHAR_EOF