ast@cs.vu.nl (Andy Tanenbaum) (09/14/87)
: This is a shar archive. Extract with sh, not csh. : This archive ends with exit, so do not worry about trailing junk. : --------------------------- cut here -------------------------- PATH=/bin:/usr/bin echo Extracting \b\a\w\k\.\c sed 's/^X//' > \b\a\w\k\.\c << '+ END-OF-FILE '\b\a\w\k\.\c X/* X * Bawk main program X */ X#define MAIN 1 X#include <stdio.h> X#include "bawk.h" X X/* X * Main program X */ Xmain(argc, argv) X int argc; X char **argv; X{ X char gotrules, didfile, getstdin; X X getstdin = X didfile = X gotrules = 0; X X /* X * Initialize global variables: X */ X Beginact = (char *) 0; X Endact = (char *) 0; X Rules = (RULE *) 0; X Rulep = (RULE *) 0; X#ifdef DEBUG X Debug = 0; X#endif X Filename = (char *) 0; X Linecount = 0; X Saw_break = 0; X X Stackptr = Stackbtm - 1; X Stacktop = Stackbtm + MAXSTACKSZ; X Nextvar = Vartab; X X strcpy(Fieldsep, " \t"); X strcpy(Recordsep, "\n"); X X /* X * Parse command line X */ X while (--argc) { X if (**(++argv) == '-') { X /* X * Process dash options. X */ X switch (tolower(*(++(*argv)))) { X#ifdef DEBUG X case 'd': X ++Debug; X break; X#endif X case 0: X ++getstdin; X --argv; X goto dosomething; X break; X default: X usage(); X } X } X else { X dosomething: X if (gotrules) { X /* X * Already read rules file - assume this is X * is a text file for processing. X */ X if (++didfile == 1 && Beginact) X doaction(Beginact); X if (getstdin) { X --getstdin; X newfile(0); X } X else X newfile(*argv); X process(); X } X else { X /* X * First file name argument on command line X * is assumed to be a rules file - attempt to X * compile it. X */ X if (getstdin) { X --getstdin; X newfile(0); X } X else X newfile(*argv); X compile(); X gotrules = 1; X } X } X } X if (!gotrules) X usage(); X X if (!didfile) { X /* X * Didn't process any files yet - process stdin. X */ X newfile(0); X if (Beginact) X doaction(Beginact); X process(); X } X if (Endact) X doaction(Endact); X exit(0); X} X X/* X * Regular expression/action file compilation routines. X */ Xcompile() X{ X /* X * Compile regular expressions and C actions into Rules struct, X * reading from current input file "Fileptr". X */ X int c, len; X X#ifdef DEBUG X if (Debug) X error("compiling...", 0); X#endif X X while ((c = getcharacter()) != -1) { X if (c == ' ' || c == '\t' || c == '\n') X /* swallow whitespace */ X ; X else if (c == '#') { X /* X * Swallow comments X */ X while ((c = getcharacter()) != -1 && c != '\n'); X } X else if (c == '{') { X#ifdef DEBUG X if (Debug) X error("action", 0); X#endif X /* X * Compile (tokenize) the action string into our X * global work buffer, then allocate some memory for X * it and copy it over. X */ X ungetcharacter('{'); X len = act_compile(Workbuf); X X if (Rulep && Rulep->action) { X Rulep->nextrule = X (struct rule *)getmem(sizeof(*Rulep)); X Rulep = Rulep->nextrule; X fillmem(Rulep, sizeof(*Rulep), 0); X } X if (!Rulep) { X /* X * This is the first action encountered. X * Allocate the first Rules structure and X * initialize it X */ X Rules = Rulep = X (RULE *) getmem(sizeof(*Rulep)); X fillmem(Rulep, sizeof(*Rulep), 0); X } X Rulep->action = getmem(len); X movemem(Workbuf, Rulep->action, len); X } X else if (c == ',') { X#ifdef DEBUG X if (Debug) X error("stop pattern", 0); X#endif X /* X * It's (hopefully) the second part of a two-part X * pattern string. Swallow the comma and start X * compiling an action string. X */ X if (!Rulep || !Rulep->pattern.start) X error("stop pattern without a start", X RE_ERROR); X if (Rulep->pattern.stop) X error("already have a stop pattern", X RE_ERROR); X len = pat_compile(Workbuf); X Rulep->pattern.stop = getmem(len); X movemem(Workbuf, Rulep->pattern.stop, len); X } X else { X /* X * Assume it's a regular expression pattern X */ X#ifdef DEBUG X if (Debug) X error("start pattern", 0); X#endif X X ungetcharacter(c); X len = pat_compile(Workbuf); X X if (*Workbuf == T_BEGIN) { X /* X * Saw a "BEGIN" keyword - compile following X * action into special "Beginact" buffer. X */ X len = act_compile(Workbuf); X Beginact = getmem(len); X movemem(Workbuf, Beginact, len); X continue; X } X if (*Workbuf == T_END) { X /* X * Saw an "END" keyword - compile following X * action into special "Endact" buffer. X */ X len = act_compile(Workbuf); X Endact = getmem(len); X movemem(Workbuf, Endact, len); X continue; X } X if (Rulep) { X /* X * Already saw a pattern/action - link in X * another Rules structure. X */ X Rulep->nextrule = X (struct rule *) getmem(sizeof(*Rulep)); X Rulep = Rulep->nextrule; X fillmem(Rulep, sizeof(*Rulep), 0); X } X if (!Rulep) { X /* X * This is the first pattern encountered. X * Allocate the first Rules structure and X * initialize it X */ X Rules = Rulep = X (RULE *) getmem(sizeof(*Rulep)); X fillmem(Rulep, sizeof(*Rulep), 0); X } X if (Rulep->pattern.start) X error("already have a start pattern", X RE_ERROR); X X Rulep->pattern.start = getmem(len); X movemem(Workbuf, Rulep->pattern.start, len); X } X } X endfile(); X} X X/* X * Text file main processing loop. X */ Xprocess() X{ X /* X * Read a line at a time from current input file at "Fileptr", then X * apply each rule in the Rules chain to the input line. X */ X int i; X X#ifdef DEBUG X if (Debug) X error("processing...", 0); X#endif X X Recordcount = 0; X X while (getline()) { X /* X * Parse the input line. X */ X Fieldcount = parse(Linebuf, Fields, Fieldsep); X#ifdef DEBUG X if (Debug > 1) { X printf("parsed %d words:\n", Fieldcount); X for (i = 0; i < Fieldcount; ++i) X printf("<%s>\n", Fields[i]); X } X#endif X X Rulep = Rules; X do { X if (!Rulep->pattern.start) { X /* X * No pattern given - perform action on every X * input line. X */ X doaction(Rulep->action); X } X else if (Rulep->pattern.startseen) { X /* X * Start pattern already found - perform X * action then check if line matches stop X * pattern. X */ X doaction(Rulep->action); X if (dopattern(Rulep->pattern.stop)) X Rulep->pattern.startseen = 0; X } X else if (dopattern(Rulep->pattern.start)) { X /* X * Matched start pattern - perform action. If X * a stop pattern was given, set "start X * pattern seen" flag and process every input X * line until stop pattern found. X */ X doaction(Rulep->action); X if (Rulep->pattern.stop) X Rulep->pattern.startseen = 1; X } X } X while (Rulep = Rulep->nextrule); X X /* X * Release memory allocated by parse(). X */ X while (Fieldcount) X free(Fields[--Fieldcount]); X } X} X X/* X * Miscellaneous functions X */ Xparse(str, wrdlst, delim) X char *str; X char *wrdlst[]; Xchar *delim; X{ X /* X * Parse the string of words in "str" into the word list at "wrdlst". X * A "word" is a sequence of characters delimited by one or more of X * the characters found in the string "delim". Returns the number of X * words parsed. CAUTION: the memory for the words in "wrdlst" is X * allocated by malloc() and should eventually be returned by X * free()... X */ X int wrdcnt, wrdlen; X char wrdbuf[MAXLINELEN], c; X X wrdcnt = 0; X while (*str) { X while (instr(*str, delim)) X ++str; X if (!*str) X break; X wrdlen = 0; X while ((c = *str) && !instr(c, delim)) { X wrdbuf[wrdlen++] = c; X ++str; X } X wrdbuf[wrdlen++] = 0; X /* X * NOTE: allocate a MAXLINELEN sized buffer for every word, X * just in case user wants to copy a larger string into a X * field. X */ X wrdlst[wrdcnt] = getmem(MAXLINELEN); X strcpy(wrdlst[wrdcnt++], wrdbuf); X } X X return wrdcnt; X} X Xunparse(wrdlst, wrdcnt, str, delim) X char *wrdlst[]; Xint wrdcnt; Xchar *str; Xchar *delim; X{ X /* X * Replace all the words in "str" with the words in "wrdlst", X * maintaining the same word seperation distance as found in the X * string. A "word" is a sequence of characters delimited by one or X * more of the characters found in the string "delim". X */ X int wc; X char strbuf[MAXLINELEN], *sp, *wp, *start; X X wc = 0; /* next word in "wrdlst" */ X sp = strbuf; /* points to our local string */ X start = str; /* save start address of "str" for later... */ X while (*str) { X /* X * Copy the field delimiters from the original string to our X * local version. X */ X while (instr(*str, delim)) X *sp++ = *str++; X if (!*str) X break; X /* X * Skip over the field in the original string and... X */ X while (*str && !instr(*str, delim)) X ++str; X X if (wc < wrdcnt) { X /* X * ...copy in the field in the wordlist instead. X */ X wp = wrdlst[wc++]; X while (*wp) X *sp++ = *wp++; X } X } X /* X * Tie off the local string, then copy it back to caller's string. X */ X *sp = 0; X strcpy(start, strbuf); X} X Xinstr(c, s) X char c, *s; X{ X while (*s) X if (c == *s++) X return 1; X return 0; X} X Xchar * Xgetmem(len) X unsigned len; X{ X char *cp, *malloc(); X X if (cp = malloc(len)) X return cp; X error("out of memory", MEM_ERROR); X} X X/* char * */ Xnewfile(s) X char *s; X{ X Linecount = 0; X if (Filename = s) { X#ifdef BDS_C X if (fopen(s, Fileptr = Curfbuf) == -1) X#else X if (!(Fileptr = fopen(s, "r"))) X#endif X error("file not found", FILE_ERROR); X } X else { X /* X * No file name given - process standard input. X */ X Fileptr = stdin; X Filename = "standard input"; X } X} X Xgetline() X{ X /* X * Read a line of text from current input file. Strip off trailing X * record seperator (newline). X */ X int rtn, len; X X for (len = 0; len < MAXLINELEN; ++len) { X if ((rtn = getcharacter()) == *Recordsep || rtn == -1) X break; X Linebuf[len] = rtn; X } X Linebuf[len] = 0; X X if (rtn == -1) { X endfile(); X return 0; X } X return 1; X} X Xgetcharacter() X{ X /* X * Read a character from curren input file. WARNING: your getc() must X * convert lines that end with CR+LF to LF and CP/M's EOF character X * (^Z) to a -1. Also, getc() must return a -1 when attempting to X * read from an unopened file. X */ X int c; X X#ifdef BDS_C X /* X * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions <gag> X */ X if ((c = getc(Fileptr)) == '\r') { X if ((c = getc(Fileptr)) != '\n') { X ungetc(c); X c = '\r'; X } X } X else if (c == 26) /* ^Z */ X c = -1; X#else X c = getc(Fileptr); X#endif X X if (c == *Recordsep) X ++Recordcount; X if (c == '\n') X ++Linecount; X X return c; X} X Xungetcharacter(c) X{ X /* X * Push a character back into the input stream. If the character is a X * record seperator, or a newline character, the record and line X * counters are adjusted appropriately. X */ X if (c == *Recordsep) X --Recordcount; X if (c == '\n') X --Linecount; X return ungetc(c, Fileptr); X} X Xendfile() X{ X fclose(Fileptr); X Filename = ""; X Linecount = 0; X} X Xerror(s, severe) X char *s; X int severe; X{ X char *cp, *errat; X X if (Filename) X fprintf(stderr, "%s:", Filename); X X if (Linecount) X fprintf(stderr, " line %d:", Linecount); X X fprintf(stderr, " %s\n", s); X if (severe) X exit(1); X} X Xusage() X{ X error("Usage: bawk <actfile> [<file> ...]\n", USAGE_ERROR); X} X Xmovemem(from, to, count) X char *from, *to; X int count; X{ X while (count-- > 0) X *to++ = *from++; X} X X Xstrncmp(s, t, n) X char *s, *t; X int n; X{ X while (--n > 0 && *s && *t && *s == *t) { X ++s; X ++t; X } X if (*s || *t) X return *s - *t; X return 0; X} X Xnum(c) X char c; X{ X return '0' <= c && c <= '9'; X} X Xalpha(c) X char c; X{ X return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; X} X Xalphanum(c) X char c; X{ X return alpha(c) || num(c); X} X Xfillmem(array, count, value) X char *array, value; X int count; X{ X while (count-- > 0) X *array++ = value; X} X + END-OF-FILE bawk.c chmod 'u=rw,g=r,o=r' \b\a\w\k\.\c set `sum \b\a\w\k\.\c` sum=$1 case $sum in 30221) :;; *) echo 'Bad sum in '\b\a\w\k\.\c >&2 esac echo Extracting \b\a\w\k\.\d\o\c sed 's/^X//' > \b\a\w\k\.\d\o\c << '+ END-OF-FILE '\b\a\w\k\.\d\o\c XNAME X X bawk - text processor X XSYNOPSIS X X bawk rules [file] ... X XDESCRIPTION X X Bawk is a text processing program that searches files for X specific patterns and performs "actions" for every occurrance X of these patterns. The patterns can be "regular expressions" X as used in the UNIX "ex" editor. The actions are expressed X using a subset of the "C" language. X X The patterns and actions are usually placed in a "rules" file X whose name must be the first argument in the command line. X All other arguments are taken to be the names of text files on X which the rules are to be applied. X The special file name "-" may also be used anywhere on the X command line to take input from the standard input device. X X The command: X X bawk - prog.c - prog.h X X would read the patterns and actions rules from the standard X input, then apply them to the files "prog.c", the standard X input and "prog.h" in that order. X X The general format of a rules file is: X X <pattern> { <action> } X <pattern> { <action> } X ... X X There may be any number of these <pattern> { <action> } X sequences in the rules file. Bawk reads a line of input from X the current input file and applies every <pattern> { <action> } X in sequence to the line. X X If the <pattern> corresponding to any { <action> } is missing, X the action is applied to every line of input. The default X { <action> } is to print the matched input line. X XPATTERNS X X The <pattern>'s may consist of any valid C expression. If the X <pattern> consists of two expressions seperated by a comma, it X is taken to be a range and the <action> is performed on all X lines of input that match the range. <pattern>'s may contain X "regular expressions" delimited by an '@' symbol. Regular X expressions can be thought of as a generalized "wildcard" X string matching mechanism, similar to that used by many X operating systems to specify file names. Regular expressions X may contain any of the following characters: X X x An ordinary character (not mentioned below) X matches that character. X '\' The backslash quotes any character. X "\$" matches a dollar-sign. X '^' A circumflex at the beginning of an expression X matches the beginning of a line. X '$' A dollar-sign at the end of an expression X matches the end of a line. X '.' A period matches any single character except X newline. X ':x' A colon matches a class of characters described X by the character following it: X ':a' ":a" matches any alphabetic; X ':d' ":d" matches digits; X ':n' ":n" matches alphanumerics; X ': ' ": " matches spaces, tabs, and other control X characters, such as newline. X '*' An expression followed by an asterisk matches X zero or more occurrances of that expression: X "fo*" matches "f", "fo", "foo", "fooo", etc. X '+' An expression followed by a plus sign matches X one or more occurrances of that expression: X "fo+" matches "fo", "foo", "fooo", etc. X '-' An expression followed by a minus sign X optionally matches the expression. X '[]' A string enclosed in square brackets matches X any single character in that string, but no X others. If the first character in the string X is a circumflex, the expression matches any X character except newline and the characters in X the string. For example, "[xyz]" matches "xx" X and "zyx", while "[^xyz]" matches "abc" but not X "axb". A range of characters may be specified X by two characters separated by "-". Note that, X [a-z] matches alphabetics, while [z-a] never X matches. X X For example, the following rules file would print every line X that contained a valid C identifier: X X @[a-zA-Z][a-zA-Z0-9]@ X X And this rules file would print all lines between and including X the ones that contained the word "START" and "END": X X @START@, @END@ X XACTIONS X X Actions are expressed as a subset of the C language. All X variables are global and default to int's if not formally X declared. Variable declarations may appear anywhere within X an action. Only char's and int's and pointers and arrays of X char and int are allowed. Bawk allows only decimal integer X constants to be used - no hex (0xnn) or octal (0nn). String X and character constants may contain all of the special C X escapes (\n, \r, etc.). X X Bawk supports the "if", "else", "while" and "break" flow of X control constructs, which behave exactly as in C. X X Also supported are the following unary and binary operators, X listed in order from highest to lowest precedence: X X operator type associativity X () [] unary left to right X ! ~ ++ -- - * & unary right to left X * / % binary left to right X + - binary left to right X << >> binary left to right X < <= > >= binary left to right X == != binary left to right X & binary left to right X ^ binary left to right X | binary left to right X && binary left to right X || binary left to right X = binary right to left X X Comments are introduced by a '#' symbol and are terminated by X the first newline character. The standard "/*" and "*/" X comment delimiters are not supported and will result in a X syntax error. X XFIELDS X X When bawk reads a line from the current input file, the X record is automatically seperated into "fields". A field is X simply a string of consecutive characters delimited by either X the beginning or end of line, or a "field seperator" character X Initially, the field seperators are the space and tab character. X The special unary operator '$' is used to reference one of the X fields in the current input record (line). The fields are X numbered sequentially starting at 1. The expression "$0" X references the entire input line. X X Similarly, the "record seperator" is used to determine the end X of an input "line", initially the newline character. X The field and record seperators may be changed programatically X by one of the actions and will remain in effect until changed X again. X X Fields behave exactly like strings; and can be used in the same X context as a character array. These "arrays" can be considered X to have been declared as: X X char ($n)[ 128 ]; X X In other words, they are 128 bytes long. Notice that the X parentheses are necessary because the operators [] and $ X associate from right to left; without them, the statement X would have parsed as: X X char $(1[ 128 ]); X X which is obviously ridiculous. X X If the contents of one of these field arrays is altered, the X "$0" field will reflect this change. For example, this X expression: X X *$4 = 'A'; X X will change the first character of the fourth field to an upper- X case letter 'A'. Then, when the following input line: X X 120 PRINT "Name address Zip" X X is processed, it would be printed as: X X 120 PRINT "Name Address Zip" X X Fields may also be modified with the strcpy() function (see X below). For example, the expression: X X strcpy( $4, "Addr." ); X X applied to the same line above would yield: X X 120 PRINT "Name Addr. Zip" X XPREDEFINED VARIABLES X X The following variables are pre-defined: X X FS Field seperator (see below). X RS Record seperator (see below also). X NF Number of fields in current input X record (line). X NR Number of records processed thus far. X FILENAME Name of current input file. X BEGIN A special <pattern> that matches the X beginning of input text, before the X first record is read. X END A special <pattern> that matches the X end of input text, after the last X record has been read. X X Bawk also provides some useful builtin functions for string X manipulation and printing: X X printf(arg..) Exactly the printf() function from C. X getline() Reads the next record from the current X input file and returns 0 on end of file. X nextfile() Closes out the current input file and X begins processing the next file in the X list (if any). X strlen(s) Returns the length of its string argument. X strcpy(s,t) Copies the string "t" to the string "s". X strcmp(s,t) Compares the "s" to "t" and returns 0 if X they match. X toupper(c) Returns its character argument converted X to upper-case. X tolower(c) Returns its character argument converted X to lower-case. X match(s,@re@) Compares the string "s" to the regular X expression "re" and returns the number X of matches found (zero if none). X XEXAMPLES X X The following rules file will scan a C program, counting the X number of mismatched parentheses, brackets, and braces. X X /[()\[\]{}]/ X { X parens = parens + match( $0, @(@ ); X parens = parens - match( $0, @)@ ); X bracks = bracks + match( $0, @[@ ); X bracks = bracks - match( $0, @]@ ); X braces = braces + match( $0, @{@ ); X braces = braces - match( $0, @}@ ); X } X END { printf("parens=%d, brackets=%d, braces=%d\n", X parens, bracks, braces ); X } X X This program will capitalize the first word in every sentence of X a document: X X BEGIN X { X RS = '.'; # set record seperator to a period X } X { X if ( match( $1, @^[a-z]@ ) ) X *$1 = toupper( *$1 ); X printf( "%s\n", $0 ); X } X XLIMITATIONS X X Bawk was originally written in BDS C, but every attempt was made X to keep the code as portable as possible. The program should X be compilable with any "standard" C compiler. On CP/M systems X compiled with BDS C, bawk takes up about 24K. X X An input record may be no longer than 128 characters. If longer X records are encountered, they terminate prematurely and the X next record starts where the previous one was hacked off. X X A single pattern or action statement may be no longer than about X 4K characters, excluding comments and whitespace. Since the X program is semi-compiled the tokenized version will probably X wind up being smaller than the source code, so the 4K figure is X only approximate. X XAUTHOR X X Bob Brodt X 486 Linden Ave. X Bogota, NJ 07603 X XACKNOWLEDGEMENTS X X The concept for bawk (and 3/4 of the name!) was taken from X the program "awk" written by Afred V. Aho, Brian W. Kernighan X and Peter J. Weinberger. My apologies for any irreverences. X X The regular expression compiler/parser was borrowed from a X program called "grep" and has been highly modified. Grep is X distributed by the DEC Users Society (DECUS) and is Copyright X (C) 1980 by DECUS. The author acknowledges DECUS with a nod of X thanks for giving their general permission and okey-dokey to X copy or modify the grep program. X X UNIX is a trademark of AT&T Bell Labs. + END-OF-FILE bawk.doc chmod 'u=rw,g=r,o=r' \b\a\w\k\.\d\o\c set `sum \b\a\w\k\.\d\o\c` sum=$1 case $sum in 36437) :;; *) echo 'Bad sum in '\b\a\w\k\.\d\o\c >&2 esac echo Extracting \b\a\w\k\.\h sed 's/^X//' > \b\a\w\k\.\h << '+ END-OF-FILE '\b\a\w\k\.\h X#include <ctype.h> X/* X * Bawk constants and variable declarations. X */ X X#ifdef BDS_C X#define EXTERN X#else X X#ifdef MAIN X#define EXTERN X#else X#define EXTERN extern X#endif X X#endif X X X#ifdef DEBUG XEXTERN char Debug; /* debug print flag */ X#endif X X/* X * Table and buffer sizes X */ X#define MAXLINELEN 128 X#define MAXWORDS (MAXLINELEN/2) X#define MAXWORKBUFLEN 4096 X#define MAXVARTABSZ 50 X#define MAXVARLEN 10 X#define MAXSTACKSZ 40 X X X/********************************************************** X * Current Input File variables * X **********************************************************/ X/* X * Current Input File pointer: X */ X#ifdef BDS_C XEXTERN char *Fileptr, Curfbuf[BUFSIZ]; X#else XEXTERN FILE *Fileptr; X#endif XEXTERN char *Filename; /* current input file name */ XEXTERN int Linecount; /* current input line number */ XEXTERN int Recordcount; /* record count */ X/* X * Working buffers. X */ XEXTERN char Linebuf[MAXLINELEN];/* current input line buffer */ XEXTERN char *Fields[MAXWORDS]; /* pointers to the words in Linebuf */ XEXTERN int Fieldcount; /* and the # of words */ XEXTERN char Workbuf[MAXWORKBUFLEN]; /* work area for C action and */ X /* regular expression parsers */ X X/********************************************************** X * Regular Expression Parser variables * X **********************************************************/ X/* X * Tokens: X */ X#define CHAR 1 X#define BOL 2 X#define EOL 3 X#define ANY 4 X#define CLASS 5 X#define NCLASS 6 X#define STAR 7 X#define PLUS 8 X#define MINUS 9 X#define ALPHA 10 X#define DIGIT 11 X#define NALPHA 12 X#define PUNCT 13 X#define RANGE 14 X#define ENDPAT 15 X X X/********************************************************** X * C Actions Interpreter variables * X **********************************************************/ X/* X * Tokens: X */ X#define T_STRING 'S' X#define T_DOLLAR '$' X#define T_REGEXP 'r' X#define T_CONSTANT 'C' X#define T_VARIABLE 'V' X#define T_FUNCTION 'F' X#define T_SEMICOLON ';' X#define T_EOF 'Z' X#define T_LBRACE '{' X#define T_RBRACE '}' X#define T_LPAREN '(' X#define T_RPAREN ')' X#define T_LBRACKET '[' X#define T_RBRACKET ']' X#define T_COMMA ',' X#define T_ASSIGN '=' X#define T_MUL '*' X#define T_DIV '/' X#define T_MOD '%' X#define T_ADD '+' X#define T_SUB '-' X#define T_SHL 'L' X#define T_SHR 'R' X#define T_LT '<' X#define T_LE 'l' X#define T_GT '>' X#define T_GE 'g' X#define T_EQ 'q' X#define T_NE 'n' X#define T_NOT '~' X#define T_AND '&' X#define T_XOR '^' X#define T_IOR '|' X#define T_LNOT '!' X#define T_LAND 'a' X#define T_LIOR 'o' X#define T_INCR 'p' X#define T_DECR 'm' X#define T_IF 'i' X#define T_ELSE 'e' X#define T_WHILE 'w' X#define T_BREAK 'b' X#define T_CHAR 'c' X#define T_INT 't' X#define T_BEGIN 'B' X#define T_END 'E' X#define T_NF 'f' X#define T_NR '#' X#define T_FS ' ' X#define T_RS '\n' X#define T_FILENAME 'z' X X#define PATTERN 'P' X#define ACTION 'A' X X/* X * Symbol Table values X */ X#define ACTUAL 0 X#define LVALUE 1 X#define BYTE 1 X#define WORD 2 X/* X * Symbol table X */ Xstruct variable { X char vname[MAXVARLEN]; X char vclass; X char vsize; X int vlen; X char *vptr; X}; X#define VARIABLE struct variable XEXTERN VARIABLE Vartab[MAXVARTABSZ], *Nextvar; X/* X * Value stack X */ Xunion datum { X int ival; X char *dptr; X char **ptrptr; X}; X#define DATUM union datum Xstruct item { X char class; X char lvalue; X char size; X DATUM value; X}; X#define ITEM struct item XEXTERN ITEM Stackbtm[MAXSTACKSZ], *Stackptr, *Stacktop; X/* X * Miscellaneous X */ XEXTERN char *Actptr; /* pointer into Workbuf during compilation */ XEXTERN char Token; /* current input token */ XEXTERN DATUM Value; /* and its value */ XEXTERN char Saw_break; /* set when break stmt seen */ XEXTERN char Where; /* indicates whether C stmt is a PATTERN or X * ACTION */ XEXTERN char Fieldsep[3]; /* field seperator */ XEXTERN char Recordsep[3]; /* record seperator */ XEXTERN char *Beginact; /* BEGINning of input actions */ XEXTERN char *Endact; /* END of input actions */ X X/********************************************************** X * Rules structure * X **********************************************************/ Xstruct rule { X struct { X char *start; /* C statements that match pattern start */ X char *stop; /* C statements that match pattern end */ X char startseen; /* set if both a start and stop pattern */ X /* given and if an input line matched the */ X /* start pattern */ X } pattern; X char *action; /* contains quasi-C statements of actions */ X struct rule *nextrule; /* pointer to next rule */ X}; X#define RULE struct rule XEXTERN RULE *Rules, /* rule structures linked list head */ X*Rulep; /* working pointer */ X X X/********************************************************** X * Miscellaneous * X **********************************************************/ X/* X * Error exit values (returned to command shell) X */ X#define USAGE_ERROR 1 X#define FILE_ERROR 2 X#define RE_ERROR 3 X#define ACT_ERROR 4 X#define MEM_ERROR 5 X/* X * Functions that return something special: X */ Xchar * Xstr_compile(), *getmem(), *cclass(), *pmatch(), *fetchptr(); Xchar *storeptr(); XVARIABLE * Xfindvar(), *addvar(), *decl(); + END-OF-FILE bawk.h chmod 'u=rw,g=r,o=r' \b\a\w\k\.\h set `sum \b\a\w\k\.\h` sum=$1 case $sum in 06605) :;; *) echo 'Bad sum in '\b\a\w\k\.\h >&2 esac echo Extracting \b\a\w\k\a\c\t\.\c sed 's/^X//' > \b\a\w\k\a\c\t\.\c << '+ END-OF-FILE '\b\a\w\k\a\c\t\.\c X/* X * Bawk C actions compiler X */ X#include <stdio.h> X#include "bawk.h" X Xact_compile(actbuf) X char *actbuf; /* where tokenized actions are compiled into */ X{ X Where = ACTION; X return stmt_compile(actbuf); X} X Xpat_compile(actbuf) X char *actbuf; /* where tokenized actions are compiled into */ X{ X Where = PATTERN; X return stmt_compile(actbuf); X} X Xstmt_compile(actbuf) X char *actbuf; /* where tokenized actions are compiled into */ X{ X /* X * Read and tokenize C actions from current input file into the X * action buffer. Strip out comments and whitespace in the process. X */ X char *actptr, /* actbuf pointer */ X *cp, /* work pointer */ X buf[MAXLINELEN]; /* string buffer */ X int braces, /* counts '{}' pairs - return when 0 */ X parens, /* counts '()' pairs */ X i, /* temp */ X c; /* current input character */ X X braces = parens = 0; X actptr = actbuf; X while ((c = getcharacter()) != -1) { X /* X * Skip over spaces, tabs and newlines X */ X if (c == ' ' || c == '\t' || c == '\n') X continue; X if (c == '#') { X /* X * Skip comments. Comments start with a '#' and end X * at the next newline. X */ X while ((c = getcharacter()) != -1 && c != '\n'); X continue; X } X X if (c == '{') { X if (Where == PATTERN) { X /* X * We're compiling a pattern. The '{' marks X * the beginning of an action statement. Push X * the character back and return. X */ X ungetcharacter('{'); X break; X } X else { X /* X * We must be compiling an action statement. X * '{'s mark beginning of action or compound X * statements. X */ X ++braces; X *actptr++ = T_LBRACE; X } X } X else if (c == '}') { X *actptr++ = T_RBRACE; X if (!--braces) X /* X * Found the end of the action string X */ X break; X } X else if (c == '(') { X ++parens; X *actptr++ = T_LPAREN; X } X else if (c == ')') { X if (--parens < 0) X error("mismatched '()'", ACT_ERROR); X *actptr++ = T_RPAREN; X } X else if (c == ',' && !braces && !parens && Where == PATTERN) { X /* X * found a comma outside of any braces or parens- X * this must be a regular expression seperator. X */ X ungetcharacter(','); X break; X } X X /* X * Check if it's a regular expression: X */ X else if (c == '/') { X /* X * A '/' inside a pattern string starts a regular X * expression. Inside action strings, a '/' is the X * division operator. X */ X if (Where == PATTERN) X goto dopattern; X else X *actptr++ = T_DIV; X } X else if (c == '@') { X dopattern: X /* X * Within action strings, only the '@' may be used to X * delimit regular expressions X */ X *actptr++ = T_REGEXP; X ungetcharacter(c); X actptr += re_compile(actptr); X } X X /* X * symbol, string or constant: X */ X else if (alpha(c)) { X /* X * It's a symbol reference. Copy the symbol into X * string buffer. X */ X cp = buf; X do X *cp++ = c; X while ((c = getcharacter()) != -1 && alphanum(c)); X ungetcharacter(c); X *cp = 0; X /* X * Check if a keyword, builtin function or variable. X */ X if (c = iskeyword(buf)) X *actptr++ = c; X else if (i = isfunction(buf)) { X *actptr++ = T_FUNCTION; X storeint(actptr, i); X actptr += sizeof(i); X } X else { X /* X * It's a symbol name. X */ X *actptr++ = T_VARIABLE; X if (!(cp = (char *) findvar(buf))) X cp = (char *) addvar(buf); X storeptr(actptr, cp); X actptr += sizeof(cp); X } X } X X else if (c == '"') { X /* X * It's a string constant X */ X *actptr++ = T_STRING; X actptr = str_compile(actptr, '"'); X } X else if (c == '\'') { X /* X * It's a character constant X */ X *actptr++ = T_CONSTANT; X str_compile(buf, '\''); X storeint(actptr, *buf); X actptr += sizeof(i); X } X X else if (num(c)) { X /* X * It's a numeric constant X */ X *actptr++ = T_CONSTANT; X cp = buf; X do X *cp++ = c; X while ((c = getcharacter()) != -1 && num(c)); X ungetcharacter(c); X *cp = 0; X storeint(actptr, atoi(buf)); X actptr += sizeof(i); X } X X /* X * unary operator: X */ X else if (c == '$') X *actptr++ = T_DOLLAR; X X /* X * or binary operator: X */ X else if (c == '=') { X if ((c = getcharacter()) == '=') X *actptr++ = T_EQ; X else { X ungetcharacter(c); X *actptr++ = T_ASSIGN; X } X } X X else if (c == '!') { X if ((c = getcharacter()) == '=') X *actptr++ = T_NE; X else { X ungetcharacter(c); X *actptr++ = T_LNOT; X } X } X X else if (c == '<') { X if ((c = getcharacter()) == '<') X *actptr++ = T_SHL; X else if (c == '=') X *actptr++ = T_LE; X else { X ungetcharacter(c); X *actptr++ = T_LT; X } X } X X else if (c == '>') { X if ((c = getcharacter()) == '>') X *actptr++ = T_SHR; X else if (c == '=') X *actptr++ = T_GE; X else { X ungetcharacter(c); X *actptr++ = T_GT; X } X } X X else if (c == '&') { X if ((c = getcharacter()) == '&') X *actptr++ = T_LAND; X else { X ungetcharacter(c); X *actptr++ = T_AND; X } X } X X else if (c == '|') { X if ((c = getcharacter()) == '|') X *actptr++ = T_LIOR; X else { X ungetcharacter(c); X *actptr++ = T_IOR; X } X } X else if (c == '+') { X if ((c = getcharacter()) == '+') X *actptr++ = T_INCR; X else { X ungetcharacter(c); X *actptr++ = T_ADD; X } X } X X else if (c == '-') { X if ((c = getcharacter()) == '-') X *actptr++ = T_DECR; X else { X ungetcharacter(c); X *actptr++ = T_SUB; X } X } X X /* X * punctuation X */ X else if (instr(c, "[](),;*/%+-^~")) X *actptr++ = c; X X else { X /* X * Bad character in input line X */ X error("lexical error", ACT_ERROR); X } X X if (actptr >= Workbuf + MAXWORKBUFLEN) X error("action too long", MEM_ERROR); X } X if (braces || parens) X error("mismatched '{}' or '()'", ACT_ERROR); X X *actptr++ = T_EOF; X X return actptr - actbuf; X} X Xchar * Xstr_compile(str, delim) X char *str, delim; X{ X /* X * Compile a string from current input file into the given string X * buffer. Stop when input character is the delimiter in "delim". X * Returns a pointer to the first character after the string. X */ X int c; X char buf[MAXLINELEN]; X X while ((c = getcharacter()) != -1 && c != delim) { X if (c == '\\') { X switch (c = getcharacter()) { X case -1: X goto err; X case 'b': X c = '\b'; X break; X case 'n': X c = '\n'; X break; X case 't': X c = '\t'; X break; X case 'f': X c = '\f'; X break; X case 'r': X c = '\r'; X break; X case '0': X case '1': X case '2': X case '3': X *buf = c; X for (c = 1; c < 3; ++c) { X if ((buf[c] = getcharacter()) == -1) X goto err; X } X buf[c] = 0; X sscanf(buf, "%o", &c); X break; X case '\n': X if (getcharacter() == -1) X goto err; X default: X if ((c = getcharacter()) == -1) X goto err; X } X } X *str++ = c; X } X *str++ = 0; X X return (str); Xerr: X sprintf(buf, "missing %c delimiter", delim); X error(buf, 4); X} X Xstoreint(ip, i) X int *ip, i; X{ X return *ip = i; X} X Xchar * Xstoreptr(pp, p) X char **pp, *p; X{ X return (*pp = p); X} X Xfetchint(ip) X int *ip; X{ X return *ip; X} X Xchar * Xfetchptr(pp) X char **pp; X{ X return *pp; X} X Xgetoken() X{ X char *cp; X int i; X X switch (Token = *Actptr++) { X case T_STRING: X case T_REGEXP: X Value.dptr = Actptr; X Actptr += strlen(Actptr) + 1; X break; X case T_VARIABLE: X Value.dptr = fetchptr(Actptr); X Actptr += sizeof(cp); X break; X case T_FUNCTION: X case T_CONSTANT: X Value.ival = fetchint(Actptr); X Actptr += sizeof(i); X break; X case T_EOF: X --Actptr; X default: X Value.dptr = 0; X } X X#ifdef DEBUG X if (Debug > 1) X printf("Token='%c' (0x%x), Value=%d\n", X Token, Token, Value.ival); X#endif X X return Token; X} + END-OF-FILE bawkact.c chmod 'u=rw,g=r,o=r' \b\a\w\k\a\c\t\.\c set `sum \b\a\w\k\a\c\t\.\c` sum=$1 case $sum in 28797) :;; *) echo 'Bad sum in '\b\a\w\k\a\c\t\.\c >&2 esac exit 0