jw@sics.sics.se.UUCP (08/09/87)
# This is a shell archive. # Remove everything above and including the cut line. # Then run the rest of the file through sh. #----cut here-----cut here-----cut here-----cut here----# #!/bin/sh # Xshar: Extended Shell Archiver. # This is part 2 out of 2. # This archive created: Sat Aug 8 19:32:40 1987 # By: Craig Norborg (Purdue University Computing Center) # Run the following text with /bin/sh to create: # bawk.c # bawkact.c # bawkdo.c # bawkpat.c # example2 # tst1 cat << \SHAR_EOF > bawk.c /* * Bawk main program */ #define MAIN 1 #include <stdio.h> #include "bawk.h" static char *pattern_arg = NULL; /* Command line bawk program pattern */ static int ungetc_arg = 0; static char eof_seen = 0; static int max_field_count = 0; /* * Main program */ main( argc, argv ) register int argc; register char **argv; { register char gotrules = 0, didfile = 0, getstdin = 0; register char rule_file_flag = 0; DBUG_ENTER("main"); /* * Initialize global variables: */ Beginact = 0; Endact = 0; Rules = 0; Rulep = 0; Filename = 0; Linecount = 0; Saw_break = 0; Stackptr = Stackbtm - 1; Stacktop = Stackbtm + MAXSTACKSZ; Nextvar = Vartab; init_pop_array(); strcpy( Fieldsep, " \t" ); strcpy( Recordsep, "\n" ); /* * Parse command line */ while ( --argc ) { if ( **(++argv) == '-' ) { /* * Process dash options. */ switch ( tolower( argv[0][1] ) ) { case '#': DBUG_PUSH(&argv[0][2]); continue; case 'f': if(!gotrules) { rule_file_flag++; argv++; argc--; } else usage(); break; case 0: if(!gotrules) rule_file_flag++; getstdin++; break; default: usage(); } } if ( gotrules ) { /* * Already read rules file - assume this is * is a text file for processing. */ if ( ++didfile == 1 && Beginact ) doaction( Beginact ); if ( getstdin ) { getstdin--; newfile( 0 ); } else newfile( *argv ); process(); } else { if(rule_file_flag) { if ( getstdin ) { getstdin--; newfile( 0 ); } else newfile( *argv ); } else pattern_arg = *argv; compile(); pattern_arg = NULL; gotrules = 1; } } if ( !gotrules ) usage(); if ( ! didfile ) { /* * Didn't process any files yet - process stdin. */ newfile( 0 ); if ( Beginact ) doaction( Beginact ); process(); } if ( Endact ) doaction( Endact ); DBUG_RETURN(0); } /* * Regular expression/action file compilation routines. */ void compile() { /* * Compile regular expressions and C actions into Rules struct, * reading from current input file "Fileptr". */ register int c; register EXPR_NODE *root; DBUG_ENTER("compile"); while ( (c = getcharacter()) != -1 ) { if ( c==' ' || c=='\t' || c=='\n' ) /* swallow whitespace */ ; else if ( c=='#' ) { /* * Swallow comments */ while ( (c=getcharacter()) != -1 && c!='\n' ) ; } else if ( c=='{' ) { DBUG_PRINT("compile",("action")); /* * Compile the action string into a parse tree */ ungetcharacter( (char) '{' ); if ( Rulep && Rulep->action ) { Rulep->nextrule = (RULE *) get_clear_memory( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; } if ( !Rulep ) { /* * This is the first action encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = (RULE *) get_clear_memory( sizeof( *Rulep ) ); } Rulep->action = act_compile( Workbuf ); } else if ( c==',' ) { DBUG_PRINT("compile",("stop pattern")); /* * It's (hopefully) the second part of a two-part * pattern string. Swallow the comma and start * compiling an action string. */ if ( !Rulep || !Rulep->pattern.start ) error( "stop pattern without a start", RE_ERROR ); if ( Rulep->pattern.stop ) error( "already have a stop pattern", RE_ERROR ); Rulep->pattern.stop = pat_compile( Workbuf ); } else { /* * Assume it's a regular expression pattern */ DBUG_PRINT("compile",("start pattern")); ungetcharacter( (char) c ); root = pat_compile( Workbuf ); if ( *Workbuf == T_BEGIN ) { /* * Saw a "BEGIN" keyword - compile following * action into special "Beginact" parse tree. */ Beginact = act_compile( Workbuf ); continue; } if ( *Workbuf == T_END ) { /* * Saw an "END" keyword - compile following * action into special "Endact" parse tree. */ Endact = act_compile( Workbuf ); continue; } if ( Rulep ) { /* * Already saw a pattern/action - link in * another Rules structure. */ Rulep->nextrule = (RULE *) get_clear_memory( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; } if ( !Rulep ) { /* * This is the first pattern encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = (RULE *) get_clear_memory( sizeof( *Rulep ) ); } if ( Rulep->pattern.start ) error( "already have a start pattern", RE_ERROR ); Rulep->pattern.start = root; } } for(Rulep = Rules; Rulep; Rulep = Rulep->nextrule) { if(!Rulep->action) { pattern_arg = "{printf \"%s\n\", $0}"; Rulep->action = act_compile( Workbuf ); pattern_arg = NULL; } } endfile(); DBUG_VOID_RETURN; } /* * Text file main processing loop. */ void process() { /* * Read a line at a time from current input file at "Fileptr", * then apply each rule in the Rules chain to the input line. */ register int i; DBUG_ENTER("process"); Recordcount = 0; while ( getline() ) { /* * Parse the input line. */ if(! *Recordsep ) strcpy(Fieldsep," \t\n"); Fieldcount = parse( Linebuf, Fields, Fieldsep ); DBUG_PRINT("process",( "parsed %d words:", Fieldcount )); DBUG_EXECUTE("process",for(i=0; i<Fieldcount; ++i )DBUG_PRINT("process",("<%s>",Fields[i]));); Rulep = Rules; while(Rulep) { if ( ! Rulep->pattern.start ) { /* * No pattern given - perform action on * every input line. */ doaction( Rulep->action ); } else if ( Rulep->pattern.startseen ) { /* * Start pattern already found - perform * action then check if line matches * stop pattern. */ doaction( Rulep->action ); if ( dopattern( Rulep->pattern.stop ) ) Rulep->pattern.startseen = 0; } else if ( dopattern( Rulep->pattern.start ) ) { /* * Matched start pattern - perform action. * If a stop pattern was given, set "start * pattern seen" flag and process every input * line until stop pattern found. */ doaction( Rulep->action ); if ( Rulep->pattern.stop ) Rulep->pattern.startseen = 1; } Rulep = Rulep->nextrule; } } DBUG_VOID_RETURN; } /* * Miscellaneous functions */ parse( str, wrdlst, delim ) register char *str; char *wrdlst[]; char *delim; { /* * Parse the string of words in "str" into the word list at "wrdlst". * A "word" is a sequence of characters delimited by one or more * of the characters found in the string "delim". * Returns the number of words parsed. */ register int wrdcnt; register char *cp, *wrdcp, c; char wrdbuf[ MAXLINELEN+1 ]; DBUG_ENTER("parse"); wrdcnt = 0; while ( *str ) { while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(! *cp) break; } str--; if ( !*str ) break; wrdcp = wrdbuf; while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(*cp) break; *wrdcp++ = c; } str--; *wrdcp = 0; /* * NOTE: allocate a MAXLINELEN sized buffer for every * word, just in case user wants to copy a larger string * into a field. */ if(wrdcnt == max_field_count) { wrdlst[ wrdcnt ] = getmemory( MAXLINELEN+1 ); max_field_count++; } strcpy( wrdlst[ wrdcnt++ ], wrdbuf ); } DBUG_RETURN(wrdcnt); } void unparse( wrdlst, wrdcnt, str, delim ) char *wrdlst[]; register int wrdcnt; register char *str; char *delim; { /* * Replace all the words in "str" with the words in "wrdlst", * maintaining the same word seperation distance as found in * the string. * A "word" is a sequence of characters delimited by one or more * of the characters found in the string "delim". */ register int wc; register char *sp, *cp, c; char strbuf[ MAXLINELEN+1 ], *start; DBUG_ENTER("unparse"); wc = 0; /* next word in "wrdlst" */ sp = strbuf; /* points to our local string */ start = str; /* save start address of "str" for later... */ while ( *str ) { /* * Copy the field delimiters from the original string to * our local version. */ while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(!*cp) break; *sp++ = c; } str--; if ( !*str ) break; /* * Skip over the field in the original string and... */ while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(*cp) break; } str--; if ( wc < wrdcnt ) { /* * ...copy in the field in the wordlist instead. */ cp = wrdlst[ wc++ ]; while(*sp++ = *cp++); sp--; } } /* * Tie off the local string, then copy it back to caller's string. */ *sp = 0; strcpy( start, strbuf ); DBUG_VOID_RETURN; } char * getmemory( len ) register unsigned len; { register char *cp; DBUG_ENTER("getmemory"); if ( cp=malloc( len ) ) DBUG_RETURN(cp); error( "out of memory", MEM_ERROR ); DBUG_RETURN(NULL); } char * get_clear_memory( len ) register unsigned len; { register char *cp; DBUG_ENTER("getmemory"); if ( cp=calloc( 1, len ) ) DBUG_RETURN(cp); error( "out of memory", MEM_ERROR ); DBUG_RETURN(NULL); } EXPR_NODE *get_expr_node(operator) char operator; { register EXPR_NODE *node; DBUG_ENTER("get_expr_node"); node = (EXPR_NODE *) getmemory(sizeof(EXPR_NODE)); node->left = node->right = NULL; node->operator = operator; DBUG_PRINT("get_expr_node",("operator = '%s'",token_name[operator])); DBUG_RETURN(node); } void newfile( s ) register char *s; { DBUG_ENTER("newfile"); Linecount = 0; if ( Filename = s ) { #ifdef BDS_C if ( fopen( s, Fileptr = Curfbuf ) == -1 ) #else if ( !(Fileptr = fopen( s, "r" )) ) #endif error( "file not found", FILE_ERROR ); } else { /* * No file name given - process standard input. */ Fileptr = stdin; Filename = "standard input"; } DBUG_VOID_RETURN; } getline() { /* * Read a record from current input file. */ register int rtn, len = 0; register char *cp = Linebuf, *last_nl, *sep = Recordsep; DBUG_ENTER("getline"); if(eof_seen) { endfile(); DBUG_RETURN(0); } if(*sep) { while((*cp++ = rtn = getcharacter()) != *sep++ && rtn != -1) { while(*sep) { if(rtn == *sep++) break; } if( ++len == MAXLINELEN ) error("Input record too long", RECORD_ERROR); sep = Recordsep; } } else /* Treat an empty line as record separator. */ { while(1) { last_nl = cp; while((*cp++ = rtn = getcharacter()) != '\n' && rtn != -1) { if( ++len == MAXLINELEN ) error("Input record too long", RECORD_ERROR); } if(((cp - last_nl) == 1) || (rtn == -1)) break; } } *(--cp) = 0; if ( rtn == -1 ) { if(len) eof_seen = 1; else { endfile(); DBUG_RETURN(0); } } ++Recordcount; DBUG_RETURN(1); } int getcharacter() { /* * Read a character from curren input file. * WARNING: your getc() must convert lines that end with CR+LF * to LF and CP/M's EOF character (^Z) to a -1. * Also, getc() must return a -1 when attempting to read from * an unopened file. */ register int c; DBUG_ENTER("getcharacter"); if(pattern_arg) { if(ungetc_arg) { c = ungetc_arg; ungetc_arg = 0; } else if(*pattern_arg) c = *pattern_arg++; else c = EOF; } else { #ifdef BDS_C /* * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions * <gag> */ if ( (c = getc( Fileptr )) == '\r' ) { if ( (c = getc( Fileptr )) != '\n' ) { ungetc( c ); c = '\r'; } } else if ( c == 26 ) /* ^Z */ c = -1; #else c = getc( Fileptr ); #endif if ( c=='\n' ) ++Linecount; } DBUG_PRINT("getcharacter",("'%c'", c)); DBUG_RETURN(c); } ungetcharacter( c ) register char c; { /* * Push a character back into the input stream. * If the character is a record seperator, or a newline character, * the record and line counters are adjusted appropriately. */ DBUG_ENTER("ungetcharacter"); if ( c == *Recordsep ) --Recordcount; if ( c=='\n' ) --Linecount; DBUG_PRINT("ungetcharacter",("'%c'", c)); if(pattern_arg) DBUG_RETURN(ungetc_arg = c); DBUG_RETURN(ungetc( c, Fileptr )); } void endfile() { DBUG_ENTER("endfile"); fclose( Fileptr ); eof_seen = 0; Filename = NULL; Linecount = 0; DBUG_VOID_RETURN; } void error( s, severe ) register char *s; register int severe; { DBUG_ENTER("error"); if ( Filename ) fprintf( stderr, "%s:", Filename ); if ( Linecount ) fprintf( stderr, " line %d:", Linecount ); fprintf( stderr, " %s\n", s ); if ( severe ) exit( severe ); DBUG_VOID_RETURN; } void usage() { DBUG_ENTER("usage"); error( "Usage: bawk { action | - | -f <actfile> } <file> ...", USAGE_ERROR ); DBUG_VOID_RETURN; } SHAR_EOF cat << \SHAR_EOF > bawkact.c /* * Bawk C actions compiler */ #include <stdio.h> #include "bawk.h" EXPR_NODE *act_compile( actbuf ) register char *actbuf;/* where tokenized actions are compiled into */ { DBUG_ENTER("act_compile"); Where = ACTION; stmt_lex( actbuf ); Actptr = actbuf; getoken(); DBUG_RETURN(stmt_parse()); } EXPR_NODE *pat_compile( actbuf ) register char *actbuf;/* where tokenized actions are compiled into */ { DBUG_ENTER("pat_compile"); Where = PATTERN; stmt_lex( actbuf ); Actptr = actbuf; getoken(); DBUG_RETURN(stmt_parse()); } void stmt_lex( actbuf ) register char *actbuf;/* where tokenized actions are compiled into */ { /* * Read and tokenize C actions from current input file into the * action buffer. Strip out comments and whitespace in the * process. */ register char *actptr, /* actbuf pointer */ *cp; /* work pointer */ char buf[MAXLINELEN+1];/* string buffer */ register int braces = 0,/* counts '{}' pairs - return when 0 */ parens = 0, /* counts '()' pairs */ i, /* temp */ c, /* current input character */ finished = 0; DBUG_ENTER("stmt_lex"); actptr = actbuf; while ( !finished && ((c = getcharacter()) != -1) ) { switch(c) { case ' ': case '\t': case '\n': /* * Skip over spaces, tabs and newlines */ break; case '#': /* * Skip comments. Comments start with a '#' and * end at the next newline. */ while ( (c = getcharacter()) != -1 && c!='\n' ) ; break; case '{': if ( Where==PATTERN ) { /* * We're compiling a pattern. The '{' marks * the beginning of an action statement. * Push the character back and return. */ ungetcharacter( (char) '{' ); finished = 1; } else { /* * We must be compiling an action statement. * '{'s mark beginning of action or compound * statements. */ ++braces; *actptr++ = T_LBRACE; } break; case '}': *actptr++ = T_RBRACE; finished = (! --braces ); break; case '(': ++parens; *actptr++ = T_LPAREN; break; case ')': if ( --parens < 0 ) error( "mismatched '()'", ACT_ERROR ); *actptr++ = T_RPAREN; break; case ',': if ( !braces && !parens ) { /* * found a comma outside of any braces or * parens - this must be a regular * expression seperator. */ ungetcharacter( (char) ',' ); finished = 1; } else *actptr++ = T_COMMA; break; case '/': *actptr++ = T_DIV; break; case '@': *actptr++ = T_REGEXP; ungetcharacter( (char) c ); actptr += re_compile( actptr ); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': /* * It's a symbol reference. Copy the symbol into * string buffer. */ cp = buf; do *cp++ = c; while ( (c=getcharacter()) != -1 && (isalnum( c ) || (c == '_'))); ungetcharacter( (char) c ); *cp = 0; /* * Check if a keyword, builtin function or variable. */ if ( c = iskeyword( buf ) ) *actptr++ = c; else if ( i = isfunction( buf ) ) { *actptr++ = T_FUNCTION; storeint( actptr, i ); actptr += sizeof( i ); } else { /* * It's a symbol name. */ *actptr++ = T_VARIABLE; if ( !(cp = (char *) findvar( buf )) ) cp = (char *) addvar( buf ); storeptr( actptr, cp ); actptr += sizeof( cp ); } break; #ifdef QUOTE_STRING_HACK case '`': #endif case '"': /* * It's a string constant */ *actptr++ = T_STRING; actptr = str_compile( actptr, c ); break; case '\'': /* * It's a character constant */ *actptr++ = T_CONSTANT; str_compile( buf, (char) '\'' ); storeint( actptr, *buf ); actptr += sizeof( i ); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* * It's a numeric constant */ *actptr++ = T_CONSTANT; cp = buf; do *cp++ = c; while ( (c=getcharacter()) != -1 && isdigit(c) ); ungetcharacter( (char) c ); *cp = 0; storeint( actptr, atoi( buf ) ); actptr += sizeof( i ); break; case '$': *actptr++ = T_DOLLAR; break; case '=': if ( (c=getcharacter()) == '=' ) *actptr++ = T_EQ; else { ungetcharacter( (char) c ); *actptr++ = T_ASSIGN; } break; case '!': if ( (c=getcharacter()) == '=' ) *actptr++ = T_NE; else { ungetcharacter( (char) c ); *actptr++ = T_LNOT; } break; case '<': if ( (c=getcharacter()) == '<' ) *actptr++ = T_SHL; else if ( c == '=' ) *actptr++ = T_LE; else { ungetcharacter( (char) c ); *actptr++ = T_LT; } break; case '>': if ( (c=getcharacter()) == '>' ) *actptr++ = T_SHR; else if ( c == '=' ) *actptr++ = T_GE; else { ungetcharacter( (char) c ); *actptr++ = T_GT; } break; case '&': if ( (c=getcharacter()) == '&' ) *actptr++ = T_LAND; else { ungetcharacter( (char) c ); *actptr++ = T_AND; } break; case '|': if ( (c=getcharacter()) == '|' ) *actptr++ = T_LOR; else { ungetcharacter( (char) c ); *actptr++ = T_OR; } break; case '+': if ( (c=getcharacter()) == '+' ) *actptr++ = T_INCR; else { ungetcharacter( (char) c ); *actptr++ = T_ADD; } break; case '-': if ( (c=getcharacter()) == '-' ) *actptr++ = T_DECR; else { ungetcharacter( (char) c ); *actptr++ = T_SUB; } break; case '[': *actptr++ = T_LBRACKET; break; case ']': *actptr++ = T_RBRACKET; break; case ';': *actptr++ = T_SEMICOLON; break; case '*': *actptr++ = T_MUL; break; case '%': *actptr++ = T_MOD; break; case '^': *actptr++ = T_XOR; break; case '~': *actptr++ = T_NOT; break; default: /* * Bad character in input line */ error( "lexical error", ACT_ERROR ); } if ( actptr >= Workbuf + MAXWORKBUFLEN ) error( "action too long", MEM_ERROR ); } if ( braces || parens ) error( "mismatched '{}' or '()'", ACT_ERROR ); *actptr++ = T_EOF; DBUG_VOID_RETURN; } char * str_compile( str, delim ) register char *str, delim; { /* * Compile a string from current input file into the given string * buffer. Stop when input character is the delimiter in "delim". * Returns a pointer to the first character after the string. */ int tmpc; /* can not be a register variable */ register int c; register char buf[4]; DBUG_ENTER("str_compile"); while ( (c = getcharacter()) != -1 && c != delim) { if ( c == '\\' ) { switch ( c = getcharacter() ) { case -1: goto err; case 'b': c = '\b'; break; case 'n': c = '\n'; break; case 't': c = '\t'; break; case 'f': c = '\f'; break; case 'r': c = '\r'; break; case '0': case '1': case '2': case '3': *buf = c; for ( c=1; c<3; c++ ) { if ( (buf[c]=getcharacter()) == -1 ) goto err; } buf[c] = 0; sscanf( buf, "%o", &tmpc ); c = tmpc; break; case '\n': if ( getcharacter() == -1 ) goto err; default: if ( (c = getcharacter()) == -1 ) goto err; } } *str++ = c; } *str++ = 0; DBUG_RETURN(str); err: sprintf( buf, "missing %c delimiter", delim ); error( buf, 4 ); DBUG_RETURN(NULL); } void storeint( ip, i ) char *ip; int i; { DBUG_ENTER("storeint"); movmem((char *) &i, ip, sizeof(i)); DBUG_VOID_RETURN; } void storeptr( pp, p ) char *pp, *p; { DBUG_ENTER("storeptr"); movmem((char *) &p, pp, sizeof(p)); DBUG_VOID_RETURN; } int fetchint( ip ) register char *ip; { int i; DBUG_ENTER("fetchint"); movmem(ip, (char *) &i, sizeof(i)); DBUG_RETURN(i); } char * fetchptr( pp ) register char *pp; { char *p; DBUG_ENTER("fetchptr"); movmem(pp, (char *) &p, sizeof(p)); DBUG_RETURN(p); } #ifndef DBUG_OFF char *token_name[] = { 0, "CHAR", "BOL", "EOL", "ANY", "CLASS", "NCLASS", "STAR", "PLUS", "MINUS", "ALPHA", "DIGIT", "NALPHA", "PUNCT", "RANGE", "ENDPAT", "T_STRING", "T_DOLLAR", "T_REGEXP", "T_REGEXP_ARG", "T_CONSTANT", "T_VARIABLE", "T_FUNCTION", "T_SEMICOLON", "T_EOF", "T_LBRACE", "T_RBRACE", "T_LPAREN", "T_RPAREN", "T_LBRACKET", "T_RBRACKET", "T_COMMA", "T_ASSIGN", "T_STAR", "T_MUL", "T_DIV", "T_MOD", "T_ADD", "T_UMINUS", "T_SUB", "T_SHL", "T_SHR", "T_LT", "T_LE", "T_GT", "T_GE", "T_EQ", "T_NE", "T_NOT", "T_ADDROF", "T_AND", "T_XOR", "T_OR", "T_LNOT", "T_LAND", "T_LOR", "T_INCR", "T_DECR", "T_POSTINCR", "T_POSTDECR", "T_IF", "T_ELSE", "T_WHILE", "T_BREAK", "T_CHAR", "T_INT", "T_BEGIN", "T_END", "T_NF", "T_NR", "T_FS", "T_RS", "T_FILENAME", "T_STATEMENT", "T_DECLARE", "T_ARRAY_DECLARE" }; #endif char getoken() { register char *cp; register int i; DBUG_ENTER("getoken"); switch ( Token = *Actptr++ ) { case T_STRING: case T_REGEXP: Value.dptr = Actptr; Actptr += strlen( Actptr ) + 1; break; case T_VARIABLE: Value.dptr = fetchptr( Actptr ); Actptr += sizeof( cp ); break; case T_FUNCTION: case T_CONSTANT: Value.ival = fetchint( Actptr ); Actptr += sizeof( i ); break; case T_EOF: --Actptr; default: Value.dptr = 0; } DBUG_PRINT("getoken", ("Token='%s' (%d), Value=%d",token_name[Token],Token,Value.ival)); DBUG_RETURN(Token); } SHAR_EOF cat << \SHAR_EOF > bawkdo.c /* * Bawk C actions interpreter */ #include <stdio.h> #include "bawk.h" static char pop_array[MAX_TOKEN + 1]; void init_pop_array() { register int i; DBUG_ENTER("init_pop_array"); for(i = 0; i <= MAX_TOKEN; i++) pop_array[i] = 1; pop_array[T_STATEMENT] = pop_array[T_IF] = pop_array[T_DECLARE] = 0; pop_array[T_DECLARE] = pop_array[T_ARRAY_DECLARE] = 0; pop_array[T_BREAK] = 0; DBUG_VOID_RETURN; } int dopattern( root ) register EXPR_NODE *root; { DBUG_ENTER("dopattern"); Where = PATTERN; walk_tree(root); DBUG_RETURN(popint()); } void doaction( root ) register EXPR_NODE *root; { DBUG_ENTER("doaction"); Where = ACTION; walk_tree(root); DBUG_VOID_RETURN; } void walk_tree(root) register EXPR_NODE *root; { register int ival; DATUM data; register VARIABLE *pvar; register VARDECL *pdecl; DBUG_ENTER("walk_tree"); if(Saw_break || !root) DBUG_VOID_RETURN; switch(root->operator) { case T_ASSIGN: walk_tree(root->left); walk_tree(root->right); assignment(); break; case T_LOR: walk_tree(root->left); if(popint()) pushint(1); else { walk_tree(root->right); pushint(popint() != 0); } break; case T_LAND: walk_tree(root->left); if(!popint()) pushint(0); else { walk_tree(root->right); pushint(popint() != 0); } break; case T_OR: walk_tree(root->left); walk_tree(root->right); pushint(popint() | popint()); break; case T_AND: walk_tree(root->left); walk_tree(root->right); pushint(popint() & popint()); break; case T_XOR: walk_tree(root->left); walk_tree(root->right); pushint(popint() ^ popint()); break; case T_EQ: walk_tree(root->left); walk_tree(root->right); pushint(popint() == popint()); break; case T_NE: walk_tree(root->left); walk_tree(root->right); pushint(popint() != popint()); break; case T_LE: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival <= popint()); break; case T_GE: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival >= popint()); break; case T_LT: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival < popint()); break; case T_GT: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival > popint()); break; case T_SHL: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival << popint()); break; case T_SHR: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival >> popint()); break; case T_ADD: walk_tree(root->left); walk_tree(root->right); pushint(popint() + popint()); break; case T_SUB: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival - popint()); break; case T_MUL: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival * popint()); break; case T_DIV: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival / popint()); break; case T_MOD: walk_tree(root->left); ival = popint(); walk_tree(root->right); pushint(ival % popint()); break; case T_LNOT: walk_tree(root->left); pushint( ! popint() ); break; case T_NOT: walk_tree(root->left); pushint( ~ popint() ); break; case T_INCR: walk_tree(root->left); preincdec(1); break; case T_DECR: walk_tree(root->left); preincdec(-1); break; case T_DOLLAR: /* * It's a reference to one (or all) of the words in Linebuf. */ walk_tree(root->left); if ( ival = popint() ) { if ( ival > Fieldcount ) ival = Fieldcount; else if ( ival < 1 ) ival = 1; data.dptr = Fields[ ival-1 ]; } else { /* * Reconstitute the line buffer in case any of the * fields have been changed. */ unparse( Fields, Fieldcount, Linebuf, Fieldsep ); data.dptr = Linebuf; } /* * $<expr>'s are treated the same as string constants: */ push( (char) 1, (char) ACTUAL, (char) BYTE, &data ); break; case T_UMINUS: walk_tree(root->left); pushint( - popint() ); break; case T_STAR: walk_tree(root->left); /* * If item on stack is an LVALUE, do an extra level of * indirection before changing it to an LVALUE. */ if ( Stackptr->lvalue ) Stackptr->value.ptrptr = (char **) *Stackptr->value.ptrptr; Stackptr->lvalue = 1; --Stackptr->class; break; case T_ADDROF: walk_tree(root->left); if ( Stackptr->lvalue ) Stackptr->lvalue = 0; else error( "'&' operator needs an lvalue", ACT_ERROR ); break; case T_CONSTANT: pushint(((DATUM *) (root->left))->ival); break; case T_FUNCTION: function(((DATUM *) (root->left))->ival, root->right); break; case T_REGEXP: /* * Perform a match of the regular expression agains input * line. */ unparse( Fields, Fieldcount, Linebuf, Fieldsep ); pushint( match( Linebuf, (char *) root->left ) ); break; case T_REGEXP_ARG: /* * A regular expression that is to be passed as a function * argument. */ data.dptr = (char *) root->left; push( (char) 1, (char) ACTUAL, (char) BYTE, &data ); break; case T_STRING: data.dptr = (char *) root->left; push( (char) 1, (char) ACTUAL, (char) BYTE, &data ); break; case T_NF: pushint( Fieldcount ); break; case T_NR: pushint( Recordcount ); break; case T_FS: data.dptr = Fieldsep; push( (char) 1, (char) ACTUAL, (char) BYTE, &data ); break; case T_RS: data.dptr = Recordsep; push( (char) 1, (char) ACTUAL, (char) BYTE, &data ); break; case T_FILENAME: data.dptr = Filename; push( (char) 1, (char) ACTUAL, (char) BYTE, &data ); break; case T_VARIABLE: pvar = (VARIABLE *) root->left; /* * it's a plain variable. The way a variable is * represented on the stack depends on its type: * lvalue class value.dptr * vars: 1 0 address of var * ptrs: 1 1 ptr to address of ptr * array: 0 1 address of var */ if ( pvar->vclass && !pvar->vlen ) /* it's a pointer */ data.dptr = (char *) &pvar->vptr; else /* an array or simple variable */ data.dptr = pvar->vptr; /* * If it's an array it can't be used as an LVALUE. */ push( pvar->vclass, (char) !pvar->vlen, pvar->vsize, &data ); break; case T_LBRACKET: walk_tree(root->left); if ( ! Stackptr->class ) error( "'[]' needs an array or pointer", ACT_ERROR ); /* * compute the subscript */ walk_tree(root->right); ival = popint(); /* * compute the offset (subscript times WORD for int arrays) * and then the effective address. */ ival *= Stackptr->size; if ( Stackptr->lvalue ) /* * It's a pointer - don't forget that the stack top * item's value is the address of the pointer so we * must do another level of indirection. */ Stackptr->value.dptr = *Stackptr->value.ptrptr+ival; else /* * It's a plain array - the stack top item's value is * the address of the first element in the array. */ Stackptr->value.dptr += ival; /* * The stack top item now becomes an LVALUE, but we've * reduced the indirection level. */ Stackptr->lvalue = 1; --Stackptr->class; break; case T_POSTINCR: walk_tree(root->left); postincdec(1); break; case T_POSTDECR: walk_tree(root->left); postincdec(-1); break; case T_STATEMENT: if(root->left) { walk_tree(root->left); if(pop_array[root->left->operator]) popint(); } walk_tree(root->right); break; case T_DECLARE: pdecl = (VARDECL *) root->left; pvar = pdecl->variable; if(pdecl->vsize != ((pvar->vlen ? pvar->vlen : 1)* pvar->vsize)) { /* * The amount of storage needed for the variable has * changed. */ free(pvar->vptr); pvar->vptr = get_clear_memory(pdecl->vsize); } pvar->vclass = pdecl->vclass; pvar->vsize = pdecl->vsize; pvar->vlen = 0; walk_tree(root->right); break; case T_ARRAY_DECLARE: /* Compute the dimension */ walk_tree(root->left->right); ival = popint(); pdecl = (VARDECL *) root->left->left; pvar = pdecl->variable; if((ival*pdecl->vsize) != ((pvar->vlen ? pvar->vlen : 1)* pvar->vsize)) { free(pvar->vptr); pvar->vptr = get_clear_memory(ival*pdecl->vsize); } pvar->vclass = pdecl->vclass; pvar->vsize = pdecl->vsize; pvar->vlen = ival; walk_tree(root->right); break; case T_IF: walk_tree(root->left->left); if(popint()) walk_tree(root->left->right); else walk_tree(root->right); break; case T_WHILE: while( !Saw_break ) { walk_tree(root->left); if( ! popint() ) break; walk_tree(root->right); } Saw_break = 0; break; case T_BREAK: Saw_break = 1; break; default: DBUG_PRINT("walk_tree", ("decimal value of operator = %d",root->operator)); error("internal error: parse tree node with unknown symbol", ACT_ERROR); } DBUG_VOID_RETURN; } void preincdec(incr) register int incr; { /* * Pre increment/decrement */ DBUG_ENTER("preincdec"); if ( Stackptr->lvalue ) { if ( Stackptr->class ) incr *= Stackptr->size; *Stackptr->value.ptrptr += incr; } else error( "pre '++' or '--' needs an lvalue", ACT_ERROR ); DBUG_VOID_RETURN; } void postincdec(incr) register int incr; { /* * Post increment/decrement */ register char **pp; DBUG_ENTER("postincdec"); if ( Stackptr->lvalue ) { if ( Stackptr->class ) { /* * It's a pointer - save its old value then * increment/decrement the pointer. This makes the * item on top of the stack look like an array, which * means it can no longer be used as an LVALUE. This * doesn't really hurt, since it doesn't make much * sense to say: * char *cp; * cp++ = value; */ pp = (char **) *Stackptr->value.ptrptr; *Stackptr->value.ptrptr += incr * Stackptr->size; Stackptr->value.ptrptr = pp; } else { /* * It's a simple variable - save its old value then * increment/decrement the variable. This makes the * item on top of the stack look like a constant, * which means it can no longer be used as an LVALUE. * Same reasoning as above. */ if ( Stackptr->size == BYTE ) pp = (char **) *Stackptr->value.dptr; else pp = (char **) *Stackptr->value.ptrptr; *Stackptr->value.ptrptr += incr; Stackptr->value.ptrptr = pp; } Stackptr->lvalue = 0; } else error( "post '++' or '--' needs an lvalue", ACT_ERROR ); DBUG_VOID_RETURN; } SHAR_EOF cat << \SHAR_EOF > bawkpat.c /* * Bawk regular expression compiler/interpreter */ #include <stdio.h> #include "bawk.h" int re_compile( patbuf ) char *patbuf; /* where to put compiled pattern */ { /* * Compile a regular expression from current input file * into the given pattern buffer. */ register int c, /* Current character */ o; /* Temp */ register char delim, /* pattern delimiter */ *patptr, /* destination string pntr */ *lp, /* Last pattern pointer */ *spp; /* Save beginning of pattern */ char *cclass(); /* Compile class routine */ DBUG_ENTER("re_compile"); lp = patptr = patbuf; delim = getcharacter(); while ( (c = getcharacter()) != -1 && c != delim ) { /* * STAR, PLUS and MINUS are special. */ if (c == '*' || c == '+' || c == '-') { if (patptr == patbuf || (o=patptr[-1]) == BOL || o == EOL || o == STAR || o == PLUS || o == MINUS) error( "illegal occurrance op", RE_ERROR ); *patptr++ = ENDPAT; *patptr++ = ENDPAT; spp = patptr; /* Save pattern end */ while (--patptr > lp) /* Move pattern down... */ *patptr = patptr[-1]; /* one byte */ *patptr = (c == '*') ? STAR : (c == '-') ? MINUS : PLUS; patptr = spp; /* Restore pattern end */ continue; } /* * All the rest. */ lp = patptr; /* Remember start */ switch(c) { case '^': *patptr++ = BOL; break; case '$': *patptr++ = EOL; break; case '.': *patptr++ = ANY; break; case '[': patptr = cclass( patptr ); break; case ':': if ( (c=getcharacter()) != -1 ) { switch( tolower( c ) ) { case 'a': *patptr++ = ALPHA; break; case 'd': *patptr++ = DIGIT; break; case 'n': *patptr++ = NALPHA; break; case ' ': *patptr++ = PUNCT; break; default: error( "unknown ':' type", RE_ERROR ); } } else error( "no ':' type", RE_ERROR ); break; case '\\': c = getcharacter(); default: *patptr++ = CHAR; *patptr++ = c; } } *patptr++ = ENDPAT; *patptr++ = 0; /* Terminate string */ DBUG_EXECUTE("re_match",re_debug(patbuf,patptr);); DBUG_RETURN(patptr - patbuf); } #ifndef DBUG_OFF re_debug(patbuf, patptr) register char *patbuf, *patptr; { register char *lp; for ( lp=patbuf; lp<patptr; ++lp ) { switch ( *lp ) { case CHAR: DBUG_PRINT("re_match",("char ")); break; case BOL: DBUG_PRINT("re_match",("bol ")); break; case EOL: DBUG_PRINT("re_match",("eol ")); break; case ANY: DBUG_PRINT("re_match",("any ")); break; case CLASS: DBUG_PRINT("re_match",("class(%d) ",*++lp)); break; case NCLASS: DBUG_PRINT("re_match",("notclass(%d) ",*++lp)); break; case STAR: DBUG_PRINT("re_match",("star ")); break; case PLUS: DBUG_PRINT("re_match",("plus ")); break; case MINUS: DBUG_PRINT("re_match",("minus ")); break; case ALPHA: DBUG_PRINT("re_match",("alpha ")); break; case DIGIT: DBUG_PRINT("re_match",("digit ")); break; case NALPHA: DBUG_PRINT("re_match",("notalpha ")); break; case PUNCT: DBUG_PRINT("re_match",("punct ")); break; case RANGE: DBUG_PRINT("re_match",("range ")); break; case ENDPAT: DBUG_PRINT("re_match",("endpat ")); break; default: DBUG_PRINT("re_match",("<%c> ", *lp)); break; } } } #endif char * cclass( patbuf ) register char *patbuf; /* destination pattern buffer */ { /* * Compile a class (within []) */ register char *patptr, /* destination pattern pointer */ *cp; /* Pattern start */ register int c, /* Current character */ o; /* Temp */ DBUG_ENTER("cclass"); patptr = patbuf; if ( (c = getcharacter()) == -1 ) error( "class terminates badly", RE_ERROR ); else if ( c == '^') { /* * Class exclusion, for example: [^abc] * Swallow the "^" and set token type to class exclusion. */ o = NCLASS; } else { /* * Normal class, for example: [abc] * push back the character and set token type to class */ ungetcharacter( (char) c ); o = CLASS; } *patptr++ = o; cp = patptr; /* remember where byte count is */ *patptr++ = 0; /* and initialize byte count */ while ( (c = getcharacter()) != -1 && c!=']' ) { o = getcharacter(); /* peek at next char */ if (c == '\\') /* Store quoted chars */ { if ( o == -1) /* Gotta get something */ error( "class terminates badly", RE_ERROR ); *patptr++ = o; } else if ( c=='-' && (patptr-cp)>1 && o!=']' && o != -1 ) { c = patptr[-1]; /* Range start */ patptr[-1] = RANGE; /* Range signal */ *patptr++ = c; /* Re-store start */ *patptr++ = o; /* Store end char */ } else { *patptr++ = c; /* Store normal char */ ungetcharacter( (char) o ); } } if (c != ']') error( "unterminated class", RE_ERROR ); if ( (c = (patptr - cp)) >= 256 ) error( "class too large", RE_ERROR ); if ( c == 0 ) error( "empty class", RE_ERROR ); *cp = c; /* fill in byte count */ DBUG_RETURN(patptr); } int match( line, pattern ) char *line; /* line to match */ register char *pattern; /* pattern to match */ { /* * Match the current line (in Linebuf[]), return 1 if it does. */ register char *l; /* Line pointer */ char *pmatch(); register char *next; register int matches; DBUG_ENTER("match"); matches = 0; for (l = line; *l; l++) { if ( next = pmatch(line, l, pattern) ) { l = next - 1; ++matches; DBUG_PRINT("match",("match found")); } } DBUG_RETURN(matches); } char * pmatch(linestart, line, pattern) char *linestart; /* start of line to match */ char *line; /* (partial) line to match */ char *pattern; /* (partial) pattern to match */ { register char *l;/* Current line pointer */ register char *p;/* Current pattern pointer */ register char c; /* Current character */ register char *e;/* End for STAR and PLUS match */ register int op; /* Pattern operation */ register int n; /* Class counter */ char *are; /* Start of STAR match */ DBUG_ENTER("pmatch"); l = line; DBUG_PRINT("pmatch",("line: (%s)", line)); p = pattern; while ((op = *p++) != ENDPAT) { DBUG_PRINT("pmatch",("byte[%d] = 0%o, '%c', op = 0%o",l-line, *l, *l, op)); switch(op) { case CHAR: if ( *l++ != *p++) DBUG_RETURN(0); break; case BOL: if (l != linestart) DBUG_RETURN(0); break; case EOL: if (*l != '\0') DBUG_RETURN(0); break; case ANY: if (*l++ == '\0') DBUG_RETURN(0); break; case DIGIT: if ((c = *l++) < '0' || (c > '9')) DBUG_RETURN(0); break; case ALPHA: c = *l++; c = tolower( c ); if (c < 'a' || c > 'z') DBUG_RETURN(0); break; case NALPHA: c = *l++; c = tolower( c ); if (c >= 'a' && c <= 'z') break; else if (c < '0' || c > '9') DBUG_RETURN(0); break; case PUNCT: c = *l++; if (c == 0 || c > ' ') DBUG_RETURN(0); break; case CLASS: case NCLASS: c = *l++; n = *p++ & 0377; do { if (*p == RANGE) { p += 3; n -= 2; if (c >= p[-2] && c <= p[-1]) break; } else if (c == *p++) break; } while (--n > 1); if ((op == CLASS) == (n <= 1)) DBUG_RETURN(0); if (op == CLASS) p += n - 2; break; case MINUS: e = pmatch(linestart,l,p);/* Look for a match */ while (*p++ != ENDPAT); /* Skip over pattern */ if (e) /* Got a match? */ l = e; /* Yes, update string */ break; /* Always succeeds */ case PLUS: /* One or more ... */ if ((l = pmatch(linestart,l,p)) == 0) DBUG_RETURN(0); /* Gotta have a match */ case STAR: /* Zero or more ... */ are = l; /* Remember line start */ while (*l && (e = pmatch(linestart,l,p))) l = e; /* Get longest match */ while (*p++ != ENDPAT); /* Skip over pattern */ while (l >= are) { /* Try to match rest */ if (e = pmatch(linestart,l,p)) DBUG_RETURN(e); --l; /* Nope, try earlier */ } DBUG_RETURN(0); /* Nothing else worked */ default: fprintf( stderr, "bad op code %d\n", op ); error( "can't happen -- match", RE_ERROR ); } } DBUG_RETURN(l); } SHAR_EOF cat << \SHAR_EOF > example2 BEGIN { strcpy(RS,"."); # set record seperator to a period } { if ( match( $1, @^[a-z]@ ) ) *$1 = toupper( *$1 ); printf( "%s\n", $0 ); } SHAR_EOF cat << \SHAR_EOF > tst1 @[(]@ { parens = parens + match( $0, @(@ ); } END { printf("parens=%d\n", parens ); } SHAR_EOF