ast@cs.vu.nl (Andy Tanenbaum) (09/14/87)
: This is a shar archive. Extract with sh, not csh.
: This archive ends with exit, so do not worry about trailing junk.
: --------------------------- cut here --------------------------
PATH=/bin:/usr/bin
echo Extracting \b\a\w\k\.\c
sed 's/^X//' > \b\a\w\k\.\c << '+ END-OF-FILE '\b\a\w\k\.\c
X/*
X * Bawk main program
X */
X#define MAIN 1
X#include <stdio.h>
X#include "bawk.h"
X
X/*
X * Main program
X */
Xmain(argc, argv)
X int argc;
X char **argv;
X{
X char gotrules, didfile, getstdin;
X
X getstdin =
X didfile =
X gotrules = 0;
X
X /*
X * Initialize global variables:
X */
X Beginact = (char *) 0;
X Endact = (char *) 0;
X Rules = (RULE *) 0;
X Rulep = (RULE *) 0;
X#ifdef DEBUG
X Debug = 0;
X#endif
X Filename = (char *) 0;
X Linecount = 0;
X Saw_break = 0;
X
X Stackptr = Stackbtm - 1;
X Stacktop = Stackbtm + MAXSTACKSZ;
X Nextvar = Vartab;
X
X strcpy(Fieldsep, " \t");
X strcpy(Recordsep, "\n");
X
X /*
X * Parse command line
X */
X while (--argc) {
X if (**(++argv) == '-') {
X /*
X * Process dash options.
X */
X switch (tolower(*(++(*argv)))) {
X#ifdef DEBUG
X case 'd':
X ++Debug;
X break;
X#endif
X case 0:
X ++getstdin;
X --argv;
X goto dosomething;
X break;
X default:
X usage();
X }
X }
X else {
X dosomething:
X if (gotrules) {
X /*
X * Already read rules file - assume this is
X * is a text file for processing.
X */
X if (++didfile == 1 && Beginact)
X doaction(Beginact);
X if (getstdin) {
X --getstdin;
X newfile(0);
X }
X else
X newfile(*argv);
X process();
X }
X else {
X /*
X * First file name argument on command line
X * is assumed to be a rules file - attempt to
X * compile it.
X */
X if (getstdin) {
X --getstdin;
X newfile(0);
X }
X else
X newfile(*argv);
X compile();
X gotrules = 1;
X }
X }
X }
X if (!gotrules)
X usage();
X
X if (!didfile) {
X /*
X * Didn't process any files yet - process stdin.
X */
X newfile(0);
X if (Beginact)
X doaction(Beginact);
X process();
X }
X if (Endact)
X doaction(Endact);
X exit(0);
X}
X
X/*
X * Regular expression/action file compilation routines.
X */
Xcompile()
X{
X /*
X * Compile regular expressions and C actions into Rules struct,
X * reading from current input file "Fileptr".
X */
X int c, len;
X
X#ifdef DEBUG
X if (Debug)
X error("compiling...", 0);
X#endif
X
X while ((c = getcharacter()) != -1) {
X if (c == ' ' || c == '\t' || c == '\n')
X /* swallow whitespace */
X ;
X else if (c == '#') {
X /*
X * Swallow comments
X */
X while ((c = getcharacter()) != -1 && c != '\n');
X }
X else if (c == '{') {
X#ifdef DEBUG
X if (Debug)
X error("action", 0);
X#endif
X /*
X * Compile (tokenize) the action string into our
X * global work buffer, then allocate some memory for
X * it and copy it over.
X */
X ungetcharacter('{');
X len = act_compile(Workbuf);
X
X if (Rulep && Rulep->action) {
X Rulep->nextrule =
X (struct rule *)getmem(sizeof(*Rulep));
X Rulep = Rulep->nextrule;
X fillmem(Rulep, sizeof(*Rulep), 0);
X }
X if (!Rulep) {
X /*
X * This is the first action encountered.
X * Allocate the first Rules structure and
X * initialize it
X */
X Rules = Rulep =
X (RULE *) getmem(sizeof(*Rulep));
X fillmem(Rulep, sizeof(*Rulep), 0);
X }
X Rulep->action = getmem(len);
X movemem(Workbuf, Rulep->action, len);
X }
X else if (c == ',') {
X#ifdef DEBUG
X if (Debug)
X error("stop pattern", 0);
X#endif
X /*
X * It's (hopefully) the second part of a two-part
X * pattern string. Swallow the comma and start
X * compiling an action string.
X */
X if (!Rulep || !Rulep->pattern.start)
X error("stop pattern without a start",
X RE_ERROR);
X if (Rulep->pattern.stop)
X error("already have a stop pattern",
X RE_ERROR);
X len = pat_compile(Workbuf);
X Rulep->pattern.stop = getmem(len);
X movemem(Workbuf, Rulep->pattern.stop, len);
X }
X else {
X /*
X * Assume it's a regular expression pattern
X */
X#ifdef DEBUG
X if (Debug)
X error("start pattern", 0);
X#endif
X
X ungetcharacter(c);
X len = pat_compile(Workbuf);
X
X if (*Workbuf == T_BEGIN) {
X /*
X * Saw a "BEGIN" keyword - compile following
X * action into special "Beginact" buffer.
X */
X len = act_compile(Workbuf);
X Beginact = getmem(len);
X movemem(Workbuf, Beginact, len);
X continue;
X }
X if (*Workbuf == T_END) {
X /*
X * Saw an "END" keyword - compile following
X * action into special "Endact" buffer.
X */
X len = act_compile(Workbuf);
X Endact = getmem(len);
X movemem(Workbuf, Endact, len);
X continue;
X }
X if (Rulep) {
X /*
X * Already saw a pattern/action - link in
X * another Rules structure.
X */
X Rulep->nextrule =
X (struct rule *) getmem(sizeof(*Rulep));
X Rulep = Rulep->nextrule;
X fillmem(Rulep, sizeof(*Rulep), 0);
X }
X if (!Rulep) {
X /*
X * This is the first pattern encountered.
X * Allocate the first Rules structure and
X * initialize it
X */
X Rules = Rulep =
X (RULE *) getmem(sizeof(*Rulep));
X fillmem(Rulep, sizeof(*Rulep), 0);
X }
X if (Rulep->pattern.start)
X error("already have a start pattern",
X RE_ERROR);
X
X Rulep->pattern.start = getmem(len);
X movemem(Workbuf, Rulep->pattern.start, len);
X }
X }
X endfile();
X}
X
X/*
X * Text file main processing loop.
X */
Xprocess()
X{
X /*
X * Read a line at a time from current input file at "Fileptr", then
X * apply each rule in the Rules chain to the input line.
X */
X int i;
X
X#ifdef DEBUG
X if (Debug)
X error("processing...", 0);
X#endif
X
X Recordcount = 0;
X
X while (getline()) {
X /*
X * Parse the input line.
X */
X Fieldcount = parse(Linebuf, Fields, Fieldsep);
X#ifdef DEBUG
X if (Debug > 1) {
X printf("parsed %d words:\n", Fieldcount);
X for (i = 0; i < Fieldcount; ++i)
X printf("<%s>\n", Fields[i]);
X }
X#endif
X
X Rulep = Rules;
X do {
X if (!Rulep->pattern.start) {
X /*
X * No pattern given - perform action on every
X * input line.
X */
X doaction(Rulep->action);
X }
X else if (Rulep->pattern.startseen) {
X /*
X * Start pattern already found - perform
X * action then check if line matches stop
X * pattern.
X */
X doaction(Rulep->action);
X if (dopattern(Rulep->pattern.stop))
X Rulep->pattern.startseen = 0;
X }
X else if (dopattern(Rulep->pattern.start)) {
X /*
X * Matched start pattern - perform action. If
X * a stop pattern was given, set "start
X * pattern seen" flag and process every input
X * line until stop pattern found.
X */
X doaction(Rulep->action);
X if (Rulep->pattern.stop)
X Rulep->pattern.startseen = 1;
X }
X }
X while (Rulep = Rulep->nextrule);
X
X /*
X * Release memory allocated by parse().
X */
X while (Fieldcount)
X free(Fields[--Fieldcount]);
X }
X}
X
X/*
X * Miscellaneous functions
X */
Xparse(str, wrdlst, delim)
X char *str;
X char *wrdlst[];
Xchar *delim;
X{
X /*
X * Parse the string of words in "str" into the word list at "wrdlst".
X * A "word" is a sequence of characters delimited by one or more of
X * the characters found in the string "delim". Returns the number of
X * words parsed. CAUTION: the memory for the words in "wrdlst" is
X * allocated by malloc() and should eventually be returned by
X * free()...
X */
X int wrdcnt, wrdlen;
X char wrdbuf[MAXLINELEN], c;
X
X wrdcnt = 0;
X while (*str) {
X while (instr(*str, delim))
X ++str;
X if (!*str)
X break;
X wrdlen = 0;
X while ((c = *str) && !instr(c, delim)) {
X wrdbuf[wrdlen++] = c;
X ++str;
X }
X wrdbuf[wrdlen++] = 0;
X /*
X * NOTE: allocate a MAXLINELEN sized buffer for every word,
X * just in case user wants to copy a larger string into a
X * field.
X */
X wrdlst[wrdcnt] = getmem(MAXLINELEN);
X strcpy(wrdlst[wrdcnt++], wrdbuf);
X }
X
X return wrdcnt;
X}
X
Xunparse(wrdlst, wrdcnt, str, delim)
X char *wrdlst[];
Xint wrdcnt;
Xchar *str;
Xchar *delim;
X{
X /*
X * Replace all the words in "str" with the words in "wrdlst",
X * maintaining the same word seperation distance as found in the
X * string. A "word" is a sequence of characters delimited by one or
X * more of the characters found in the string "delim".
X */
X int wc;
X char strbuf[MAXLINELEN], *sp, *wp, *start;
X
X wc = 0; /* next word in "wrdlst" */
X sp = strbuf; /* points to our local string */
X start = str; /* save start address of "str" for later... */
X while (*str) {
X /*
X * Copy the field delimiters from the original string to our
X * local version.
X */
X while (instr(*str, delim))
X *sp++ = *str++;
X if (!*str)
X break;
X /*
X * Skip over the field in the original string and...
X */
X while (*str && !instr(*str, delim))
X ++str;
X
X if (wc < wrdcnt) {
X /*
X * ...copy in the field in the wordlist instead.
X */
X wp = wrdlst[wc++];
X while (*wp)
X *sp++ = *wp++;
X }
X }
X /*
X * Tie off the local string, then copy it back to caller's string.
X */
X *sp = 0;
X strcpy(start, strbuf);
X}
X
Xinstr(c, s)
X char c, *s;
X{
X while (*s)
X if (c == *s++)
X return 1;
X return 0;
X}
X
Xchar *
Xgetmem(len)
X unsigned len;
X{
X char *cp, *malloc();
X
X if (cp = malloc(len))
X return cp;
X error("out of memory", MEM_ERROR);
X}
X
X/* char * */
Xnewfile(s)
X char *s;
X{
X Linecount = 0;
X if (Filename = s) {
X#ifdef BDS_C
X if (fopen(s, Fileptr = Curfbuf) == -1)
X#else
X if (!(Fileptr = fopen(s, "r")))
X#endif
X error("file not found", FILE_ERROR);
X }
X else {
X /*
X * No file name given - process standard input.
X */
X Fileptr = stdin;
X Filename = "standard input";
X }
X}
X
Xgetline()
X{
X /*
X * Read a line of text from current input file. Strip off trailing
X * record seperator (newline).
X */
X int rtn, len;
X
X for (len = 0; len < MAXLINELEN; ++len) {
X if ((rtn = getcharacter()) == *Recordsep || rtn == -1)
X break;
X Linebuf[len] = rtn;
X }
X Linebuf[len] = 0;
X
X if (rtn == -1) {
X endfile();
X return 0;
X }
X return 1;
X}
X
Xgetcharacter()
X{
X /*
X * Read a character from curren input file. WARNING: your getc() must
X * convert lines that end with CR+LF to LF and CP/M's EOF character
X * (^Z) to a -1. Also, getc() must return a -1 when attempting to
X * read from an unopened file.
X */
X int c;
X
X#ifdef BDS_C
X /*
X * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions <gag>
X */
X if ((c = getc(Fileptr)) == '\r') {
X if ((c = getc(Fileptr)) != '\n') {
X ungetc(c);
X c = '\r';
X }
X }
X else if (c == 26) /* ^Z */
X c = -1;
X#else
X c = getc(Fileptr);
X#endif
X
X if (c == *Recordsep)
X ++Recordcount;
X if (c == '\n')
X ++Linecount;
X
X return c;
X}
X
Xungetcharacter(c)
X{
X /*
X * Push a character back into the input stream. If the character is a
X * record seperator, or a newline character, the record and line
X * counters are adjusted appropriately.
X */
X if (c == *Recordsep)
X --Recordcount;
X if (c == '\n')
X --Linecount;
X return ungetc(c, Fileptr);
X}
X
Xendfile()
X{
X fclose(Fileptr);
X Filename = "";
X Linecount = 0;
X}
X
Xerror(s, severe)
X char *s;
X int severe;
X{
X char *cp, *errat;
X
X if (Filename)
X fprintf(stderr, "%s:", Filename);
X
X if (Linecount)
X fprintf(stderr, " line %d:", Linecount);
X
X fprintf(stderr, " %s\n", s);
X if (severe)
X exit(1);
X}
X
Xusage()
X{
X error("Usage: bawk <actfile> [<file> ...]\n", USAGE_ERROR);
X}
X
Xmovemem(from, to, count)
X char *from, *to;
X int count;
X{
X while (count-- > 0)
X *to++ = *from++;
X}
X
X
Xstrncmp(s, t, n)
X char *s, *t;
X int n;
X{
X while (--n > 0 && *s && *t && *s == *t) {
X ++s;
X ++t;
X }
X if (*s || *t)
X return *s - *t;
X return 0;
X}
X
Xnum(c)
X char c;
X{
X return '0' <= c && c <= '9';
X}
X
Xalpha(c)
X char c;
X{
X return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
X}
X
Xalphanum(c)
X char c;
X{
X return alpha(c) || num(c);
X}
X
Xfillmem(array, count, value)
X char *array, value;
X int count;
X{
X while (count-- > 0)
X *array++ = value;
X}
X
+ END-OF-FILE bawk.c
chmod 'u=rw,g=r,o=r' \b\a\w\k\.\c
set `sum \b\a\w\k\.\c`
sum=$1
case $sum in
30221) :;;
*) echo 'Bad sum in '\b\a\w\k\.\c >&2
esac
echo Extracting \b\a\w\k\.\d\o\c
sed 's/^X//' > \b\a\w\k\.\d\o\c << '+ END-OF-FILE '\b\a\w\k\.\d\o\c
XNAME
X
X bawk - text processor
X
XSYNOPSIS
X
X bawk rules [file] ...
X
XDESCRIPTION
X
X Bawk is a text processing program that searches files for
X specific patterns and performs "actions" for every occurrance
X of these patterns. The patterns can be "regular expressions"
X as used in the UNIX "ex" editor. The actions are expressed
X using a subset of the "C" language.
X
X The patterns and actions are usually placed in a "rules" file
X whose name must be the first argument in the command line.
X All other arguments are taken to be the names of text files on
X which the rules are to be applied.
X The special file name "-" may also be used anywhere on the
X command line to take input from the standard input device.
X
X The command:
X
X bawk - prog.c - prog.h
X
X would read the patterns and actions rules from the standard
X input, then apply them to the files "prog.c", the standard
X input and "prog.h" in that order.
X
X The general format of a rules file is:
X
X <pattern> { <action> }
X <pattern> { <action> }
X ...
X
X There may be any number of these <pattern> { <action> }
X sequences in the rules file. Bawk reads a line of input from
X the current input file and applies every <pattern> { <action> }
X in sequence to the line.
X
X If the <pattern> corresponding to any { <action> } is missing,
X the action is applied to every line of input. The default
X { <action> } is to print the matched input line.
X
XPATTERNS
X
X The <pattern>'s may consist of any valid C expression. If the
X <pattern> consists of two expressions seperated by a comma, it
X is taken to be a range and the <action> is performed on all
X lines of input that match the range. <pattern>'s may contain
X "regular expressions" delimited by an '@' symbol. Regular
X expressions can be thought of as a generalized "wildcard"
X string matching mechanism, similar to that used by many
X operating systems to specify file names. Regular expressions
X may contain any of the following characters:
X
X x An ordinary character (not mentioned below)
X matches that character.
X '\' The backslash quotes any character.
X "\$" matches a dollar-sign.
X '^' A circumflex at the beginning of an expression
X matches the beginning of a line.
X '$' A dollar-sign at the end of an expression
X matches the end of a line.
X '.' A period matches any single character except
X newline.
X ':x' A colon matches a class of characters described
X by the character following it:
X ':a' ":a" matches any alphabetic;
X ':d' ":d" matches digits;
X ':n' ":n" matches alphanumerics;
X ': ' ": " matches spaces, tabs, and other control
X characters, such as newline.
X '*' An expression followed by an asterisk matches
X zero or more occurrances of that expression:
X "fo*" matches "f", "fo", "foo", "fooo", etc.
X '+' An expression followed by a plus sign matches
X one or more occurrances of that expression:
X "fo+" matches "fo", "foo", "fooo", etc.
X '-' An expression followed by a minus sign
X optionally matches the expression.
X '[]' A string enclosed in square brackets matches
X any single character in that string, but no
X others. If the first character in the string
X is a circumflex, the expression matches any
X character except newline and the characters in
X the string. For example, "[xyz]" matches "xx"
X and "zyx", while "[^xyz]" matches "abc" but not
X "axb". A range of characters may be specified
X by two characters separated by "-". Note that,
X [a-z] matches alphabetics, while [z-a] never
X matches.
X
X For example, the following rules file would print every line
X that contained a valid C identifier:
X
X @[a-zA-Z][a-zA-Z0-9]@
X
X And this rules file would print all lines between and including
X the ones that contained the word "START" and "END":
X
X @START@, @END@
X
XACTIONS
X
X Actions are expressed as a subset of the C language. All
X variables are global and default to int's if not formally
X declared. Variable declarations may appear anywhere within
X an action. Only char's and int's and pointers and arrays of
X char and int are allowed. Bawk allows only decimal integer
X constants to be used - no hex (0xnn) or octal (0nn). String
X and character constants may contain all of the special C
X escapes (\n, \r, etc.).
X
X Bawk supports the "if", "else", "while" and "break" flow of
X control constructs, which behave exactly as in C.
X
X Also supported are the following unary and binary operators,
X listed in order from highest to lowest precedence:
X
X operator type associativity
X () [] unary left to right
X ! ~ ++ -- - * & unary right to left
X * / % binary left to right
X + - binary left to right
X << >> binary left to right
X < <= > >= binary left to right
X == != binary left to right
X & binary left to right
X ^ binary left to right
X | binary left to right
X && binary left to right
X || binary left to right
X = binary right to left
X
X Comments are introduced by a '#' symbol and are terminated by
X the first newline character. The standard "/*" and "*/"
X comment delimiters are not supported and will result in a
X syntax error.
X
XFIELDS
X
X When bawk reads a line from the current input file, the
X record is automatically seperated into "fields". A field is
X simply a string of consecutive characters delimited by either
X the beginning or end of line, or a "field seperator" character
X Initially, the field seperators are the space and tab character.
X The special unary operator '$' is used to reference one of the
X fields in the current input record (line). The fields are
X numbered sequentially starting at 1. The expression "$0"
X references the entire input line.
X
X Similarly, the "record seperator" is used to determine the end
X of an input "line", initially the newline character.
X The field and record seperators may be changed programatically
X by one of the actions and will remain in effect until changed
X again.
X
X Fields behave exactly like strings; and can be used in the same
X context as a character array. These "arrays" can be considered
X to have been declared as:
X
X char ($n)[ 128 ];
X
X In other words, they are 128 bytes long. Notice that the
X parentheses are necessary because the operators [] and $
X associate from right to left; without them, the statement
X would have parsed as:
X
X char $(1[ 128 ]);
X
X which is obviously ridiculous.
X
X If the contents of one of these field arrays is altered, the
X "$0" field will reflect this change. For example, this
X expression:
X
X *$4 = 'A';
X
X will change the first character of the fourth field to an upper-
X case letter 'A'. Then, when the following input line:
X
X 120 PRINT "Name address Zip"
X
X is processed, it would be printed as:
X
X 120 PRINT "Name Address Zip"
X
X Fields may also be modified with the strcpy() function (see
X below). For example, the expression:
X
X strcpy( $4, "Addr." );
X
X applied to the same line above would yield:
X
X 120 PRINT "Name Addr. Zip"
X
XPREDEFINED VARIABLES
X
X The following variables are pre-defined:
X
X FS Field seperator (see below).
X RS Record seperator (see below also).
X NF Number of fields in current input
X record (line).
X NR Number of records processed thus far.
X FILENAME Name of current input file.
X BEGIN A special <pattern> that matches the
X beginning of input text, before the
X first record is read.
X END A special <pattern> that matches the
X end of input text, after the last
X record has been read.
X
X Bawk also provides some useful builtin functions for string
X manipulation and printing:
X
X printf(arg..) Exactly the printf() function from C.
X getline() Reads the next record from the current
X input file and returns 0 on end of file.
X nextfile() Closes out the current input file and
X begins processing the next file in the
X list (if any).
X strlen(s) Returns the length of its string argument.
X strcpy(s,t) Copies the string "t" to the string "s".
X strcmp(s,t) Compares the "s" to "t" and returns 0 if
X they match.
X toupper(c) Returns its character argument converted
X to upper-case.
X tolower(c) Returns its character argument converted
X to lower-case.
X match(s,@re@) Compares the string "s" to the regular
X expression "re" and returns the number
X of matches found (zero if none).
X
XEXAMPLES
X
X The following rules file will scan a C program, counting the
X number of mismatched parentheses, brackets, and braces.
X
X /[()\[\]{}]/
X {
X parens = parens + match( $0, @(@ );
X parens = parens - match( $0, @)@ );
X bracks = bracks + match( $0, @[@ );
X bracks = bracks - match( $0, @]@ );
X braces = braces + match( $0, @{@ );
X braces = braces - match( $0, @}@ );
X }
X END { printf("parens=%d, brackets=%d, braces=%d\n",
X parens, bracks, braces );
X }
X
X This program will capitalize the first word in every sentence of
X a document:
X
X BEGIN
X {
X RS = '.'; # set record seperator to a period
X }
X {
X if ( match( $1, @^[a-z]@ ) )
X *$1 = toupper( *$1 );
X printf( "%s\n", $0 );
X }
X
XLIMITATIONS
X
X Bawk was originally written in BDS C, but every attempt was made
X to keep the code as portable as possible. The program should
X be compilable with any "standard" C compiler. On CP/M systems
X compiled with BDS C, bawk takes up about 24K.
X
X An input record may be no longer than 128 characters. If longer
X records are encountered, they terminate prematurely and the
X next record starts where the previous one was hacked off.
X
X A single pattern or action statement may be no longer than about
X 4K characters, excluding comments and whitespace. Since the
X program is semi-compiled the tokenized version will probably
X wind up being smaller than the source code, so the 4K figure is
X only approximate.
X
XAUTHOR
X
X Bob Brodt
X 486 Linden Ave.
X Bogota, NJ 07603
X
XACKNOWLEDGEMENTS
X
X The concept for bawk (and 3/4 of the name!) was taken from
X the program "awk" written by Afred V. Aho, Brian W. Kernighan
X and Peter J. Weinberger. My apologies for any irreverences.
X
X The regular expression compiler/parser was borrowed from a
X program called "grep" and has been highly modified. Grep is
X distributed by the DEC Users Society (DECUS) and is Copyright
X (C) 1980 by DECUS. The author acknowledges DECUS with a nod of
X thanks for giving their general permission and okey-dokey to
X copy or modify the grep program.
X
X UNIX is a trademark of AT&T Bell Labs.
+ END-OF-FILE bawk.doc
chmod 'u=rw,g=r,o=r' \b\a\w\k\.\d\o\c
set `sum \b\a\w\k\.\d\o\c`
sum=$1
case $sum in
36437) :;;
*) echo 'Bad sum in '\b\a\w\k\.\d\o\c >&2
esac
echo Extracting \b\a\w\k\.\h
sed 's/^X//' > \b\a\w\k\.\h << '+ END-OF-FILE '\b\a\w\k\.\h
X#include <ctype.h>
X/*
X * Bawk constants and variable declarations.
X */
X
X#ifdef BDS_C
X#define EXTERN
X#else
X
X#ifdef MAIN
X#define EXTERN
X#else
X#define EXTERN extern
X#endif
X
X#endif
X
X
X#ifdef DEBUG
XEXTERN char Debug; /* debug print flag */
X#endif
X
X/*
X * Table and buffer sizes
X */
X#define MAXLINELEN 128
X#define MAXWORDS (MAXLINELEN/2)
X#define MAXWORKBUFLEN 4096
X#define MAXVARTABSZ 50
X#define MAXVARLEN 10
X#define MAXSTACKSZ 40
X
X
X/**********************************************************
X * Current Input File variables *
X **********************************************************/
X/*
X * Current Input File pointer:
X */
X#ifdef BDS_C
XEXTERN char *Fileptr, Curfbuf[BUFSIZ];
X#else
XEXTERN FILE *Fileptr;
X#endif
XEXTERN char *Filename; /* current input file name */
XEXTERN int Linecount; /* current input line number */
XEXTERN int Recordcount; /* record count */
X/*
X * Working buffers.
X */
XEXTERN char Linebuf[MAXLINELEN];/* current input line buffer */
XEXTERN char *Fields[MAXWORDS]; /* pointers to the words in Linebuf */
XEXTERN int Fieldcount; /* and the # of words */
XEXTERN char Workbuf[MAXWORKBUFLEN]; /* work area for C action and */
X /* regular expression parsers */
X
X/**********************************************************
X * Regular Expression Parser variables *
X **********************************************************/
X/*
X * Tokens:
X */
X#define CHAR 1
X#define BOL 2
X#define EOL 3
X#define ANY 4
X#define CLASS 5
X#define NCLASS 6
X#define STAR 7
X#define PLUS 8
X#define MINUS 9
X#define ALPHA 10
X#define DIGIT 11
X#define NALPHA 12
X#define PUNCT 13
X#define RANGE 14
X#define ENDPAT 15
X
X
X/**********************************************************
X * C Actions Interpreter variables *
X **********************************************************/
X/*
X * Tokens:
X */
X#define T_STRING 'S'
X#define T_DOLLAR '$'
X#define T_REGEXP 'r'
X#define T_CONSTANT 'C'
X#define T_VARIABLE 'V'
X#define T_FUNCTION 'F'
X#define T_SEMICOLON ';'
X#define T_EOF 'Z'
X#define T_LBRACE '{'
X#define T_RBRACE '}'
X#define T_LPAREN '('
X#define T_RPAREN ')'
X#define T_LBRACKET '['
X#define T_RBRACKET ']'
X#define T_COMMA ','
X#define T_ASSIGN '='
X#define T_MUL '*'
X#define T_DIV '/'
X#define T_MOD '%'
X#define T_ADD '+'
X#define T_SUB '-'
X#define T_SHL 'L'
X#define T_SHR 'R'
X#define T_LT '<'
X#define T_LE 'l'
X#define T_GT '>'
X#define T_GE 'g'
X#define T_EQ 'q'
X#define T_NE 'n'
X#define T_NOT '~'
X#define T_AND '&'
X#define T_XOR '^'
X#define T_IOR '|'
X#define T_LNOT '!'
X#define T_LAND 'a'
X#define T_LIOR 'o'
X#define T_INCR 'p'
X#define T_DECR 'm'
X#define T_IF 'i'
X#define T_ELSE 'e'
X#define T_WHILE 'w'
X#define T_BREAK 'b'
X#define T_CHAR 'c'
X#define T_INT 't'
X#define T_BEGIN 'B'
X#define T_END 'E'
X#define T_NF 'f'
X#define T_NR '#'
X#define T_FS ' '
X#define T_RS '\n'
X#define T_FILENAME 'z'
X
X#define PATTERN 'P'
X#define ACTION 'A'
X
X/*
X * Symbol Table values
X */
X#define ACTUAL 0
X#define LVALUE 1
X#define BYTE 1
X#define WORD 2
X/*
X * Symbol table
X */
Xstruct variable {
X char vname[MAXVARLEN];
X char vclass;
X char vsize;
X int vlen;
X char *vptr;
X};
X#define VARIABLE struct variable
XEXTERN VARIABLE Vartab[MAXVARTABSZ], *Nextvar;
X/*
X * Value stack
X */
Xunion datum {
X int ival;
X char *dptr;
X char **ptrptr;
X};
X#define DATUM union datum
Xstruct item {
X char class;
X char lvalue;
X char size;
X DATUM value;
X};
X#define ITEM struct item
XEXTERN ITEM Stackbtm[MAXSTACKSZ], *Stackptr, *Stacktop;
X/*
X * Miscellaneous
X */
XEXTERN char *Actptr; /* pointer into Workbuf during compilation */
XEXTERN char Token; /* current input token */
XEXTERN DATUM Value; /* and its value */
XEXTERN char Saw_break; /* set when break stmt seen */
XEXTERN char Where; /* indicates whether C stmt is a PATTERN or
X * ACTION */
XEXTERN char Fieldsep[3]; /* field seperator */
XEXTERN char Recordsep[3]; /* record seperator */
XEXTERN char *Beginact; /* BEGINning of input actions */
XEXTERN char *Endact; /* END of input actions */
X
X/**********************************************************
X * Rules structure *
X **********************************************************/
Xstruct rule {
X struct {
X char *start; /* C statements that match pattern start */
X char *stop; /* C statements that match pattern end */
X char startseen; /* set if both a start and stop pattern */
X /* given and if an input line matched the */
X /* start pattern */
X } pattern;
X char *action; /* contains quasi-C statements of actions */
X struct rule *nextrule; /* pointer to next rule */
X};
X#define RULE struct rule
XEXTERN RULE *Rules, /* rule structures linked list head */
X*Rulep; /* working pointer */
X
X
X/**********************************************************
X * Miscellaneous *
X **********************************************************/
X/*
X * Error exit values (returned to command shell)
X */
X#define USAGE_ERROR 1
X#define FILE_ERROR 2
X#define RE_ERROR 3
X#define ACT_ERROR 4
X#define MEM_ERROR 5
X/*
X * Functions that return something special:
X */
Xchar *
Xstr_compile(), *getmem(), *cclass(), *pmatch(), *fetchptr();
Xchar *storeptr();
XVARIABLE *
Xfindvar(), *addvar(), *decl();
+ END-OF-FILE bawk.h
chmod 'u=rw,g=r,o=r' \b\a\w\k\.\h
set `sum \b\a\w\k\.\h`
sum=$1
case $sum in
06605) :;;
*) echo 'Bad sum in '\b\a\w\k\.\h >&2
esac
echo Extracting \b\a\w\k\a\c\t\.\c
sed 's/^X//' > \b\a\w\k\a\c\t\.\c << '+ END-OF-FILE '\b\a\w\k\a\c\t\.\c
X/*
X * Bawk C actions compiler
X */
X#include <stdio.h>
X#include "bawk.h"
X
Xact_compile(actbuf)
X char *actbuf; /* where tokenized actions are compiled into */
X{
X Where = ACTION;
X return stmt_compile(actbuf);
X}
X
Xpat_compile(actbuf)
X char *actbuf; /* where tokenized actions are compiled into */
X{
X Where = PATTERN;
X return stmt_compile(actbuf);
X}
X
Xstmt_compile(actbuf)
X char *actbuf; /* where tokenized actions are compiled into */
X{
X /*
X * Read and tokenize C actions from current input file into the
X * action buffer. Strip out comments and whitespace in the process.
X */
X char *actptr, /* actbuf pointer */
X *cp, /* work pointer */
X buf[MAXLINELEN]; /* string buffer */
X int braces, /* counts '{}' pairs - return when 0 */
X parens, /* counts '()' pairs */
X i, /* temp */
X c; /* current input character */
X
X braces = parens = 0;
X actptr = actbuf;
X while ((c = getcharacter()) != -1) {
X /*
X * Skip over spaces, tabs and newlines
X */
X if (c == ' ' || c == '\t' || c == '\n')
X continue;
X if (c == '#') {
X /*
X * Skip comments. Comments start with a '#' and end
X * at the next newline.
X */
X while ((c = getcharacter()) != -1 && c != '\n');
X continue;
X }
X
X if (c == '{') {
X if (Where == PATTERN) {
X /*
X * We're compiling a pattern. The '{' marks
X * the beginning of an action statement. Push
X * the character back and return.
X */
X ungetcharacter('{');
X break;
X }
X else {
X /*
X * We must be compiling an action statement.
X * '{'s mark beginning of action or compound
X * statements.
X */
X ++braces;
X *actptr++ = T_LBRACE;
X }
X }
X else if (c == '}') {
X *actptr++ = T_RBRACE;
X if (!--braces)
X /*
X * Found the end of the action string
X */
X break;
X }
X else if (c == '(') {
X ++parens;
X *actptr++ = T_LPAREN;
X }
X else if (c == ')') {
X if (--parens < 0)
X error("mismatched '()'", ACT_ERROR);
X *actptr++ = T_RPAREN;
X }
X else if (c == ',' && !braces && !parens && Where == PATTERN) {
X /*
X * found a comma outside of any braces or parens-
X * this must be a regular expression seperator.
X */
X ungetcharacter(',');
X break;
X }
X
X /*
X * Check if it's a regular expression:
X */
X else if (c == '/') {
X /*
X * A '/' inside a pattern string starts a regular
X * expression. Inside action strings, a '/' is the
X * division operator.
X */
X if (Where == PATTERN)
X goto dopattern;
X else
X *actptr++ = T_DIV;
X }
X else if (c == '@') {
X dopattern:
X /*
X * Within action strings, only the '@' may be used to
X * delimit regular expressions
X */
X *actptr++ = T_REGEXP;
X ungetcharacter(c);
X actptr += re_compile(actptr);
X }
X
X /*
X * symbol, string or constant:
X */
X else if (alpha(c)) {
X /*
X * It's a symbol reference. Copy the symbol into
X * string buffer.
X */
X cp = buf;
X do
X *cp++ = c;
X while ((c = getcharacter()) != -1 && alphanum(c));
X ungetcharacter(c);
X *cp = 0;
X /*
X * Check if a keyword, builtin function or variable.
X */
X if (c = iskeyword(buf))
X *actptr++ = c;
X else if (i = isfunction(buf)) {
X *actptr++ = T_FUNCTION;
X storeint(actptr, i);
X actptr += sizeof(i);
X }
X else {
X /*
X * It's a symbol name.
X */
X *actptr++ = T_VARIABLE;
X if (!(cp = (char *) findvar(buf)))
X cp = (char *) addvar(buf);
X storeptr(actptr, cp);
X actptr += sizeof(cp);
X }
X }
X
X else if (c == '"') {
X /*
X * It's a string constant
X */
X *actptr++ = T_STRING;
X actptr = str_compile(actptr, '"');
X }
X else if (c == '\'') {
X /*
X * It's a character constant
X */
X *actptr++ = T_CONSTANT;
X str_compile(buf, '\'');
X storeint(actptr, *buf);
X actptr += sizeof(i);
X }
X
X else if (num(c)) {
X /*
X * It's a numeric constant
X */
X *actptr++ = T_CONSTANT;
X cp = buf;
X do
X *cp++ = c;
X while ((c = getcharacter()) != -1 && num(c));
X ungetcharacter(c);
X *cp = 0;
X storeint(actptr, atoi(buf));
X actptr += sizeof(i);
X }
X
X /*
X * unary operator:
X */
X else if (c == '$')
X *actptr++ = T_DOLLAR;
X
X /*
X * or binary operator:
X */
X else if (c == '=') {
X if ((c = getcharacter()) == '=')
X *actptr++ = T_EQ;
X else {
X ungetcharacter(c);
X *actptr++ = T_ASSIGN;
X }
X }
X
X else if (c == '!') {
X if ((c = getcharacter()) == '=')
X *actptr++ = T_NE;
X else {
X ungetcharacter(c);
X *actptr++ = T_LNOT;
X }
X }
X
X else if (c == '<') {
X if ((c = getcharacter()) == '<')
X *actptr++ = T_SHL;
X else if (c == '=')
X *actptr++ = T_LE;
X else {
X ungetcharacter(c);
X *actptr++ = T_LT;
X }
X }
X
X else if (c == '>') {
X if ((c = getcharacter()) == '>')
X *actptr++ = T_SHR;
X else if (c == '=')
X *actptr++ = T_GE;
X else {
X ungetcharacter(c);
X *actptr++ = T_GT;
X }
X }
X
X else if (c == '&') {
X if ((c = getcharacter()) == '&')
X *actptr++ = T_LAND;
X else {
X ungetcharacter(c);
X *actptr++ = T_AND;
X }
X }
X
X else if (c == '|') {
X if ((c = getcharacter()) == '|')
X *actptr++ = T_LIOR;
X else {
X ungetcharacter(c);
X *actptr++ = T_IOR;
X }
X }
X else if (c == '+') {
X if ((c = getcharacter()) == '+')
X *actptr++ = T_INCR;
X else {
X ungetcharacter(c);
X *actptr++ = T_ADD;
X }
X }
X
X else if (c == '-') {
X if ((c = getcharacter()) == '-')
X *actptr++ = T_DECR;
X else {
X ungetcharacter(c);
X *actptr++ = T_SUB;
X }
X }
X
X /*
X * punctuation
X */
X else if (instr(c, "[](),;*/%+-^~"))
X *actptr++ = c;
X
X else {
X /*
X * Bad character in input line
X */
X error("lexical error", ACT_ERROR);
X }
X
X if (actptr >= Workbuf + MAXWORKBUFLEN)
X error("action too long", MEM_ERROR);
X }
X if (braces || parens)
X error("mismatched '{}' or '()'", ACT_ERROR);
X
X *actptr++ = T_EOF;
X
X return actptr - actbuf;
X}
X
Xchar *
Xstr_compile(str, delim)
X char *str, delim;
X{
X /*
X * Compile a string from current input file into the given string
X * buffer. Stop when input character is the delimiter in "delim".
X * Returns a pointer to the first character after the string.
X */
X int c;
X char buf[MAXLINELEN];
X
X while ((c = getcharacter()) != -1 && c != delim) {
X if (c == '\\') {
X switch (c = getcharacter()) {
X case -1:
X goto err;
X case 'b':
X c = '\b';
X break;
X case 'n':
X c = '\n';
X break;
X case 't':
X c = '\t';
X break;
X case 'f':
X c = '\f';
X break;
X case 'r':
X c = '\r';
X break;
X case '0':
X case '1':
X case '2':
X case '3':
X *buf = c;
X for (c = 1; c < 3; ++c) {
X if ((buf[c] = getcharacter()) == -1)
X goto err;
X }
X buf[c] = 0;
X sscanf(buf, "%o", &c);
X break;
X case '\n':
X if (getcharacter() == -1)
X goto err;
X default:
X if ((c = getcharacter()) == -1)
X goto err;
X }
X }
X *str++ = c;
X }
X *str++ = 0;
X
X return (str);
Xerr:
X sprintf(buf, "missing %c delimiter", delim);
X error(buf, 4);
X}
X
Xstoreint(ip, i)
X int *ip, i;
X{
X return *ip = i;
X}
X
Xchar *
Xstoreptr(pp, p)
X char **pp, *p;
X{
X return (*pp = p);
X}
X
Xfetchint(ip)
X int *ip;
X{
X return *ip;
X}
X
Xchar *
Xfetchptr(pp)
X char **pp;
X{
X return *pp;
X}
X
Xgetoken()
X{
X char *cp;
X int i;
X
X switch (Token = *Actptr++) {
X case T_STRING:
X case T_REGEXP:
X Value.dptr = Actptr;
X Actptr += strlen(Actptr) + 1;
X break;
X case T_VARIABLE:
X Value.dptr = fetchptr(Actptr);
X Actptr += sizeof(cp);
X break;
X case T_FUNCTION:
X case T_CONSTANT:
X Value.ival = fetchint(Actptr);
X Actptr += sizeof(i);
X break;
X case T_EOF:
X --Actptr;
X default:
X Value.dptr = 0;
X }
X
X#ifdef DEBUG
X if (Debug > 1)
X printf("Token='%c' (0x%x), Value=%d\n",
X Token, Token, Value.ival);
X#endif
X
X return Token;
X}
+ END-OF-FILE bawkact.c
chmod 'u=rw,g=r,o=r' \b\a\w\k\a\c\t\.\c
set `sum \b\a\w\k\a\c\t\.\c`
sum=$1
case $sum in
28797) :;;
*) echo 'Bad sum in '\b\a\w\k\a\c\t\.\c >&2
esac
exit 0