brennan@ssc-vax.UUCP (Mike Brennan) (05/11/91)
------------------cut here---------------- case SC_SPACE : goto reswitch ; case SC_COMMENT : eat_comment() ; goto reswitch ; case SC_NL : lineno++ ; eat_nl() ; ct_ret(NL) ; case SC_ESCAPE : while ( scan_code[ c = next() ] == SC_SPACE ) ; if ( c == '\n') { token_lineno = ++lineno ; goto reswitch ; } if ( c == 0 ) ct_ret(EOF) ; un_next() ; yylval.ival = '\\' ; ct_ret(UNEXPECTED) ; case SC_SEMI_COLON : eat_nl() ; ct_ret(SEMI_COLON) ; case SC_LBRACE : eat_nl() ; brace_cnt++ ; ct_ret(LBRACE) ; case SC_PLUS : test2_ret('+', INC, '=', ADD_ASG, PLUS ) ; case SC_MINUS : test2_ret('-', DEC, '=', SUB_ASG, MINUS ) ; case SC_COMMA : eat_nl() ; ct_ret(COMMA) ; case SC_MUL : test1_ret('=', MUL_ASG, MUL) ; case SC_DIV : { int *p = can_precede_re ; do if ( *p == current_token ) ct_ret( collect_RE() ) ; while ( *p++ != -1 ) ; test1_ret( '=', DIV_ASG , DIV ) ; } case SC_MOD : test1_ret('=', MOD_ASG, MOD) ; case SC_POW : test1_ret('=' , POW_ASG, POW) ; case SC_LPAREN : paren_cnt++ ; ct_ret(LPAREN) ; case SC_RPAREN : if ( --paren_cnt < 0 ) { compile_error( "extra ')'" ) ; paren_cnt = 0 ; goto reswitch ; } ct_ret(RPAREN) ; case SC_LBOX : ct_ret(LBOX) ; case SC_RBOX : ct_ret(RBOX) ; case SC_MATCH : ct_ret(MATCH) ; case SC_EQUAL : test1_ret( '=', EQ, ASSIGN ) ; case SC_NOT : /* ! */ test2_ret('=', NEQ, '~', NOT_MATCH, NOT ) ; case SC_LT : /* '<' */ if ( getline_flag ) { getline_flag = 0 ; ct_ret(IO_IN) ; } else { ct_ret( ifnext('=', LTE , LT) ) ; } case SC_GT : /* '>' */ if ( print_flag && paren_cnt == 0 ) { print_flag = 0 ; /* there are 3 types of IO_OUT -- build the error string in temp_buff */ temp_buff.string_buff[0] = '>' ; if ( next() == '>' ) { yylval.ival = F_APPEND ; temp_buff.string_buff[1] = '>' ; temp_buff.string_buff[2] = 0 ; } else { un_next() ; yylval.ival = F_TRUNC ; temp_buff.string_buff[1] = 0 ; } return current_token = IO_OUT ; } ct_ret( ifnext('=', GTE , GT) ) ; case SC_OR : if ( next() == '|' ) { eat_nl() ; ct_ret(brace_cnt?OR:P_OR) ; } else { un_next() ; if ( print_flag && paren_cnt == 0 ) { print_flag = 0 ; yylval.ival = PIPE_OUT; temp_buff.string_buff[0] = '|' ; temp_buff.string_buff[1] = 0 ; ct_ret(IO_OUT) ; } else ct_ret(PIPE) ; } case SC_AND : if ( next() == '&' ) { eat_nl() ; ct_ret(brace_cnt?AND:P_AND) ; } else { un_next() ; yylval.ival = '&' ; ct_ret(UNEXPECTED) ; } case SC_QMARK : ct_ret(QMARK) ; case SC_COLON : ct_ret(COLON) ; case SC_RBRACE : if ( --brace_cnt < 0 ) { compile_error("extra '}'" ) ; brace_cnt = 0 ; goto reswitch ; } if ( (c = current_token) == NL || c == SEMI_COLON || c == SC_FAKE_SEMI_COLON || c == RBRACE ) { eat_nl() ; ct_ret(RBRACE) ; } brace_cnt++ ; un_next() ; current_token = SC_FAKE_SEMI_COLON ; return SEMI_COLON ; case SC_DIGIT : case SC_DOT : { double d ; int flag ; if ( (d = collect_decimal(c, &flag)) == 0.0 ) if ( flag ) ct_ret(flag) ; else yylval.cp = &cell_zero ; else if ( d == 1.0 ) yylval.cp = &cell_one ; else { yylval.cp = new_CELL() ; yylval.cp->type = C_DOUBLE ; yylval.cp->dval = d ; } ct_ret( CONSTANT ) ; } case SC_DOLLAR : /* '$' */ { double d ; int flag ; while ( scan_code[c = next()] == SC_SPACE ) ; if ( scan_code[c] != SC_DIGIT && scan_code[c] != SC_DOT ) { un_next() ; ct_ret(DOLLAR) ; } /* compute field address at compile time */ if ( (d = collect_decimal(c, &flag)) == 0.0 ) if ( flag ) ct_ret(flag) ; /* an error */ else yylval.cp = &field[0] ; else { int k = (int) d ; if ( k > MAX_FIELD ) { compile_error( "maximum field index(%d) exceeded" , k ) ; k = MAX_FIELD ; } else yylval.cp = &field[k] ; } ct_ret(FIELD) ; } case SC_DQUOTE : return current_token = collect_string() ; case SC_IDCHAR : /* collect an identifier */ { unsigned char *p = (unsigned char *)temp_buff.string_buff + 1 ; SYMTAB *stp ; temp_buff.string_buff[0] = c ; while ( (c = scan_code[ *p++ = next()]) == SC_IDCHAR || c == SC_DIGIT ) ; un_next() ; * --p = 0 ; switch( (stp = find(temp_buff.string_buff))->type ) { case ST_NONE : /* check for function call before defined */ if ( next() == '(' ) { stp->type = ST_FUNCT ; stp->stval.fbp = (FBLOCK *) zmalloc(sizeof(FBLOCK)) ; stp->stval.fbp->name = stp->name ; stp->stval.fbp->code = (INST *) 0 ; yylval.fbp = stp->stval.fbp ; current_token = FUNCT_ID ; } else { yylval.stp = stp ; current_token = ID ; } un_next() ; break ; case ST_VAR : case ST_ARRAY : case ST_LOCAL_NONE : case ST_LOCAL_VAR : case ST_LOCAL_ARRAY : yylval.stp = stp ; current_token = ID ; break ; case ST_FUNCT : yylval.fbp = stp->stval.fbp ; current_token = FUNCT_ID ; break ; case ST_KEYWORD : current_token = stp->stval.kw ; break ; case ST_BUILTIN : yylval.bip = stp->stval.bip ; current_token = BUILTIN ; break ; case ST_FIELD : yylval.cp = stp->stval.cp ; current_token = FIELD ; break ; case ST_LENGTH : { CELL *bi_length() ; static BI_REC length_bi_rec = { "length", bi_length, 1, 1 } ; while ( scan_code[ c = next() ] == SC_SPACE ) ; un_next() ; if ( c == '(' ) { yylval.bip = &length_bi_rec ; current_token = BUILTIN ; } else current_token = LENGTH ; } break ; default : bozo("find returned bad st type") ; } return current_token ; } case SC_UNEXPECTED : yylval.ival = c & 0xff ; ct_ret(UNEXPECTED) ; } return 0 ; /* never get here make lint happy */ } /* collect a decimal constant in temp_buff. Return the value and error conditions by reference */ static double collect_decimal(c, flag) int c ; int *flag ; { register unsigned char *p = (unsigned char*) temp_buff.string_buff + 1; unsigned char *endp ; double d ; *flag = 0 ; temp_buff.string_buff[0] = c ; if ( c == '.' ) { if ( scan_code[*p++ = next()] != SC_DIGIT ) { *flag = UNEXPECTED ; yylval.ival = '.' ; return 0.0 ; } } else { while ( scan_code[*p++ = next()] == SC_DIGIT ) ; if ( p[-1] != '.' ) { un_next() ; p-- ; } } /* get rest of digits after decimal point */ while ( scan_code[*p++ = next()] == SC_DIGIT ) ; /* check for exponent */ if ( p[-1] != 'e' && p[-1] != 'E' ) { un_next() ; * --p = 0 ; } else /* get the exponent */ if ( scan_code[*p = next()] != SC_DIGIT && *p != '-' && *p != '+' ) { *++p = 0 ; *flag = BAD_DECIMAL ; return 0.0 ; } else /* get the rest of the exponent */ { p++ ; while ( scan_code[*p++ = next()] == SC_DIGIT ) ; un_next() ; * --p = 0 ; } errno = 0 ; /* check for overflow/underflow */ d = strtod( temp_buff.string_buff, &endp ) ; if ( errno ) compile_error( "%s : decimal %sflow" , temp_buff.string_buff, d == 0.0 ? "under" : "over") ; if ( endp != p ) { *flag = BAD_DECIMAL ; return 0.0 ; } return d ; } /*---------- process escape characters ---------------*/ static char hex_val['f' - 'A' + 1] = { 10,11,12,13,14,15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,11,12,13,14,15 } ; #define isoctal(x) ((x)>='0'&&(x)<='7') #define hex_value(x) hex_val[(x)-'A'] #define ishex(x) (scan_code[x] == SC_DIGIT ||\ 'A' <= (x) && (x) <= 'f' && hex_value(x)) static int PROTO(octal, (char **)) ; static int PROTO(hex, (char **)) ; /* process one , two or three octal digits moving a pointer forward by reference */ static int octal( start_p ) char **start_p ; { register char *p = *start_p ; register unsigned x ; x = *p++ - '0' ; if ( isoctal(*p) ) { x = (x<<3) + *p++ - '0' ; if ( isoctal(*p) ) x = (x<<3) + *p++ - '0' ; } *start_p = p ; return x & 0xff ; } /* process one or two hex digits moving a pointer forward by reference */ static int hex( start_p ) unsigned char **start_p ; { register unsigned char *p = *start_p ; register unsigned x ; unsigned t ; if ( scan_code[*p] == SC_DIGIT ) x = *p++ - '0' ; else x = hex_value(*p++) ; if ( scan_code[*p] == SC_DIGIT ) x = (x<<4) + *p++ - '0' ; else if ( 'A' <= *p && *p <= 'f' && (t = hex_value(*p)) ) { x = (x<<4) + t ; p++ ; } *start_p = p ; return x ; } static char escape_test[] = "n\nt\tb\br\rf\fa\07v\013\\\\\"\"\'\'" ; /* process the escape characters in a string, in place . */ static char *rm_escape(s) char *s ; { register char *p, *q ; char *t ; q = p = s ; while ( *p ) if ( *p == '\\' ) { if ( t = strchr(escape_test, * ++p) ) { p++ ; *q++ = t[1] ; } else if ( isoctal(*p) ) { t = p ; *q++ = octal(&t) ; p = t ; } else if ( *p == 'x' && ishex(*(unsigned char*)(p+1)) ) { t = p+1 ; *q++ = hex(&t) ; p = t ; } else /* not an escape sequence */ { *q++ = '\\' ; *q++ = *p++ ; } } else *q++ = *p++ ; *q = 0 ; return s ; } static int collect_string() { register unsigned char *p = (unsigned char *)temp_buff.string_buff ; int c ; int e_flag = 0 ; /* on if have an escape char */ while ( 1 ) switch( scan_code[ *p++ = next() ] ) { case SC_DQUOTE : /* done */ * --p = 0 ; goto out ; case SC_NL : p[-1] = 0 ; /* fall thru */ case 0 : /* unterminated string */ compile_error( "runaway string constant \"%.10s ..." , temp_buff.string_buff, token_lineno ) ; mawk_exit(1) ; case SC_ESCAPE : if ( (c = next()) == '\n' ) { p-- ; lineno++ ; } else if ( c == 0 ) un_next() ; else { *p++ = c ; e_flag = 1 ; } break ; default : break ; } out: yylval.cp = new_CELL() ; yylval.cp->type = C_STRING ; yylval.cp->ptr = (PTR) new_STRING( e_flag ? rm_escape( temp_buff.string_buff ) : temp_buff.string_buff ) ; return CONSTANT ; } static int collect_RE() { register unsigned char *p = (unsigned char*) temp_buff.string_buff ; int c ; STRING *sval ; while ( 1 ) switch( scan_code[ *p++ = next() ] ) { case SC_DIV : /* done */ * --p = 0 ; goto out ; case SC_NL : p[-1] = 0 ; /* fall thru */ case 0 : /* unterminated re */ compile_error( "runaway regular expression /%.10s ..." , temp_buff.string_buff, token_lineno ) ; mawk_exit(1) ; case SC_ESCAPE : switch( c = next() ) { case '/' : p[-1] = '/' ; break ; case '\n' : p-- ; break ; case 0 : un_next() ; break ; default : *p++ = c ; break ; } break ; } out: /* now we've got the RE, so compile it */ sval = new_STRING( temp_buff.string_buff ) ; yylval.cp = new_CELL() ; yylval.cp->type = C_RE ; yylval.cp->ptr = re_compile(sval) ; free_STRING(sval) ; return RE ; } @//E*O*F mawk0.97/scan.c// chmod u=rw,g=r,o=r mawk0.97/scan.c echo x - mawk0.97/scan.h sed 's/^@//' > "mawk0.97/scan.h" <<'@//E*O*F mawk0.97/scan.h//' /******************************************** scan.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: scan.h,v $ * Revision 2.2 91/04/09 12:39:31 brennan * added static to funct decls to satisfy STARDENT compiler * * Revision 2.1 91/04/08 08:23:54 brennan * VERSION 0.97 * */ /* scan.h */ #ifndef SCAN_H_INCLUDED #define SCAN_H_INCLUDED 1 #include <stdio.h> #ifndef MAKESCAN #include "symtype.h" #include "parse.h" #endif extern char scan_code[256] ; /* the scan codes to compactify the main switch */ #define SC_SPACE 1 #define SC_NL 2 #define SC_SEMI_COLON 3 #define SC_FAKE_SEMI_COLON 4 #define SC_LBRACE 5 #define SC_RBRACE 6 #define SC_QMARK 7 #define SC_COLON 8 #define SC_OR 9 #define SC_AND 10 #define SC_PLUS 11 #define SC_MINUS 12 #define SC_MUL 13 #define SC_DIV 14 #define SC_MOD 15 #define SC_POW 16 #define SC_LPAREN 17 #define SC_RPAREN 18 #define SC_LBOX 19 #define SC_RBOX 20 #define SC_IDCHAR 21 #define SC_DIGIT 22 #define SC_DQUOTE 23 #define SC_ESCAPE 24 #define SC_COMMENT 25 #define SC_EQUAL 26 #define SC_NOT 27 #define SC_LT 28 #define SC_GT 29 #define SC_COMMA 30 #define SC_DOT 31 #define SC_MATCH 32 #define SC_DOLLAR 33 #define SC_UNEXPECTED 34 #ifndef MAKESCAN /* global functions in scan.c */ void PROTO(scan_init, (int, char *) ) ; void PROTO(scan_cleanup, (void) ) ; void PROTO(eat_nl, (void) ) ; int PROTO(yylex, (void) ) ; extern YYSTYPE yylval ; #define ct_ret(x) return current_token = (x) #define next() (*buffp ? *buffp++ : slow_next()) #define un_next() buffp-- #define ifnext(c,x,y) (next()==c?x:(un_next(),y)) #define test1_ret(c,x,d) if ( next() == (c) ) ct_ret(x) ;\ else { un_next() ; ct_ret(d) ; } #define test2_ret(c1,x1,c2,x2,d) switch( next() )\ { case c1: ct_ret(x1) ;\ case c2: ct_ret(x2) ;\ default: un_next() ;\ ct_ret(d) ; } #endif /* ! MAKESCAN */ #endif @//E*O*F mawk0.97/scan.h// chmod u=rw,g=r,o=r mawk0.97/scan.h echo x - mawk0.97/scancode.c sed 's/^@//' > "mawk0.97/scancode.c" <<'@//E*O*F mawk0.97/scancode.c//' /* scancode.c */ char scan_code[256] = { 0,34,34,34,34,34,34,34,34, 1, 2, 1, 1, 1,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 1,27,23,25,33,15,10,34,17,18,13,11,30,12,31,14, 22,22,22,22,22,22,22,22,22,22, 8, 3,28,26,29, 7, 34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21, 21,21,21,21,21,21,21,21,21,21,21,19,24,20,16,21, 34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21, 21,21,21,21,21,21,21,21,21,21,21, 5, 9, 6,32,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34 } ; @//E*O*F mawk0.97/scancode.c// chmod u=rw,g=r,o=r mawk0.97/scancode.c echo x - mawk0.97/sizes.h sed 's/^@//' > "mawk0.97/sizes.h" <<'@//E*O*F mawk0.97/sizes.h//' /******************************************** sizes.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: sizes.h,v $ * Revision 2.1 91/04/08 08:24:09 brennan * VERSION 0.97 * */ /* sizes.h */ #ifndef SIZES_H #define SIZES_H #define HASH_PRIME 53 #define A_HASH_PRIME 37 #if SMALL_EVAL_STACK /* allow some put not a lot of recursion */ #define EVAL_STACK_SIZE 64 #else #define EVAL_STACK_SIZE 256 #endif #define MAX_COMPILE_ERRORS 5 /* quit if more than 4 errors */ #define BUFFSZ 4096 /* input buffer size */ #define MAX_LOOP_DEPTH 20 /* should never be exceeded, doesn't matter if its too big (unless gross) because resources sized by it are freed */ #define MAX_FIELD 100 /* biggest field number */ #define SPRINTF_SZ 300 /* biggest sprintf string length */ /* the size of the temp buffer in front of main_buff */ #define PTR_SZ sizeof(PTR) #define TEMP_BUFF_SZ (MAX_FIELD*PTR_SZ > SPRINTF_SZ ?\ MAX_FIELD*PTR_SZ : SPRINTF_SZ ) #define PAGE_SZ 1024 /* max instructions for a block */ #endif /* SIZES_H */ @//E*O*F mawk0.97/sizes.h// chmod u=rw,g=r,o=r mawk0.97/sizes.h echo x - mawk0.97/split.c sed 's/^@//' > "mawk0.97/split.c" <<'@//E*O*F mawk0.97/split.c//' /******************************************** split.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: split.c,v $ * Revision 2.1 91/04/08 08:24:11 brennan * VERSION 0.97 * */ /* split.c */ #include "mawk.h" #include "symtype.h" #include "bi_vars.h" #include "bi_funct.h" #include "memory.h" #include "scan.h" #include "regexp.h" #include "field.h" #include <string.h> /* split string s on SPACE without changing s. load the pieces into STRINGS and ptrs into temp_buff.ptr_buff[] return the number of pieces */ int space_split( s ) register char *s ; { char *back = strchr(s,0) ; int i = 0 ; int len ; char *q ; STRING *sval ; while ( 1 ) { while ( scan_code[*(unsigned char*)s] == SC_SPACE ) s++ ; if ( *s == 0 ) break ; /* mark the front with q */ q = s++ ; *back = ' ' ; /* sentinal */ while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ; *back = 0 ; sval = (STRING *) (temp_buff.ptr_buff[i++] = (PTR) new_STRING((char *) 0, len = s - q )) ; (void) memcpy(sval->str, q, len) ; } if ( i > MAX_FIELD ) rt_overflow("maximum number of fields", MAX_FIELD) ; return i ; } char *re_pos_match(s, re, lenp) register char *s ; PTR re ; unsigned *lenp ; { while ( s = REmatch(s, re, lenp) ) if ( *lenp ) return s ; else if ( *s == 0 ) break ; else s++ ; return (char *) 0 ; } int re_split(s, re) char *s ; PTR re ; { register char *t ; int i = 0 ; unsigned mlen, len ; STRING *sval ; while ( t = re_pos_match(s, re, &mlen) ) { sval = (STRING*)(temp_buff.ptr_buff[i++] = (PTR) new_STRING( (char *)0, len = t-s) ) ; (void) memcpy(sval->str, s, len) ; s = t + mlen ; } temp_buff.ptr_buff[i++] = (PTR) new_STRING(s) ; if ( i > MAX_FIELD ) rt_overflow("maximum number of fields", MAX_FIELD) ; return i ; } /* split(s, X, r) split s into array X on r entry: sp[0] holds r sp[-1] pts at X sp[-2] holds s */ CELL *bi_split(sp) register CELL *sp ; { int cnt ; /* the number of pieces */ double dcnt ; /* double version of cnt */ ARRAY A ; CELL *cp ; char *ofmt ; if ( sp->type < C_RE ) cast_for_split(sp) ; /* can be C_RE, C_SPACE or C_SNULL */ sp -= 2 ; if ( sp->type < C_STRING ) cast1_to_s(sp) ; if ( string(sp)->len == 0 ) /* nothing to split */ { free_STRING( string(sp) ) ; sp->type = C_DOUBLE ; sp->dval = 0.0 ; return sp ; } switch ( (sp+2)->type ) { case C_RE : cnt = re_split(string(sp)->str, (sp+2)->ptr) ; break ; case C_SPACE : cnt = space_split(string(sp)->str) ; break ; /* this case could be done by C_RE, but very slowly. Since it is the common way to eliminate fields, we'll treat the special case for speed */ case C_SNULL : /* split on empty string */ cnt = 1 ; temp_buff.ptr_buff[0] = sp->ptr ; string(sp)->ref_cnt++ ; break ; default : bozo("bad splitting cell in bi_split") ; } /* now load the array */ free_STRING( string(sp) ) ; sp->type = C_DOUBLE ; sp->dval = dcnt = (double) cnt ; ofmt = string(field + OFMT)->str ; A = (ARRAY) (sp+1)->ptr ; while ( cnt ) { char xbuff[256] ; /* this big in case the user did something goofy with OFMT */ (void) sprintf(xbuff, ofmt, dcnt ) ; dcnt -= 1.0 ; cp = array_find( A, xbuff, 1) ; cell_destroy(cp) ; cp->ptr = temp_buff.ptr_buff[--cnt] ; cp->type = C_MBSTRN ; } return sp ; } @//E*O*F mawk0.97/split.c// chmod u=rw,g=r,o=r mawk0.97/split.c echo x - mawk0.97/symtype.h sed 's/^@//' > "mawk0.97/symtype.h" <<'@//E*O*F mawk0.97/symtype.h//' /******************************************** symtype.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /*$Log: symtype.h,v $ * Revision 2.1 91/04/08 08:24:14 brennan * VERSION 0.97 * */ /* types related to symbols are defined here */ #ifndef SYMTYPE_H #define SYMTYPE_H /* struct to hold info about builtins */ typedef struct { char *name ; PF_CP fp ; /* ptr to function that does the builtin */ unsigned char min_args, max_args ; /* info for parser to check correct number of arguments */ } BI_REC ; /*--------------------------- structures and types for arrays *--------------------------*/ /* array hash nodes */ typedef struct anode { struct anode *link ; STRING *sval ; CELL *cp ; } ANODE, **ARRAY ; /* note ARRAY is a ptr to a hash table */ CELL *PROTO(array_find, (ARRAY,void *, int) ) ; int PROTO(array_test, (ARRAY, STRING *) ) ; INST *PROTO(array_loop, (INST *, CELL *, CELL *) ) ; void PROTO(array_delete, (ARRAY, STRING *) ) ; CELL *PROTO(array_cat, (CELL *, int) ) ; void PROTO(array_free, (ARRAY) ) ; #define new_ARRAY() (ARRAY)memset(zmalloc(A_HASH_PRIME *\ sizeof(ANODE*)), 0, A_HASH_PRIME*sizeof(ANODE*)) extern ARRAY Argv ; /* for parsing (i,j) in A */ typedef struct { INST *start ; int cnt ; } ARG2_REC ; /*------------------------ user defined functions ------------------------*/ typedef struct fblock { char *name ; INST *code ; unsigned short nargs ; char *typev ; /* array of size nargs holding types */ } FBLOCK ; /* function block */ void PROTO(add_to_fdump_list, (FBLOCK *) ) ; void PROTO( fdump, (void) ) ; /*------------------------- elements of the symbol table -----------------------*/ #define ST_NONE 0 #define ST_VAR 1 #define ST_KEYWORD 2 #define ST_BUILTIN 3 /* a pointer to a builtin record */ #define ST_ARRAY 4 /* a void * ptr to a hash table */ #define ST_FIELD 5 /* a cell ptr to a field */ #define ST_FUNCT 6 #define ST_LENGTH 7 /* length is special */ #define ST_LOCAL_NONE 8 #define ST_LOCAL_VAR 9 #define ST_LOCAL_ARRAY 10 #define is_local(stp) ((stp)->type>=ST_LOCAL_NONE) typedef struct { char *name ; char type ; unsigned char offset ; /* offset in stack frame for local vars */ union { CELL *cp ; int kw ; PF_CP fp ; BI_REC *bip ; ARRAY array ; FBLOCK *fbp ; } stval ; } SYMTAB ; /***************************** structures for type checking function calls ******************************/ typedef struct ca_rec { struct ca_rec *link ; short type ; short arg_num ; /* position in callee's stack */ /*--------- this data only set if we'll need to patch -------*/ /* happens if argument is an ID or type ST_NONE or ST_LOCAL_NONE */ int call_offset ; /* where the type is stored */ SYMTAB *sym_p ; /* if type is ST_NONE */ char *type_p ; /* if type is ST_LOCAL_NONE */ } CA_REC ; /* call argument record */ /* type field of CA_REC matches with ST_ types */ #define CA_EXPR ST_LOCAL_VAR #define CA_ARRAY ST_LOCAL_ARRAY typedef struct fcall { struct fcall *link ; FBLOCK *callee ; short call_scope ; FBLOCK *call ; /* only used if call_scope == SCOPE_FUNCT */ INST *call_start ; /* computed later as code may be moved */ CA_REC *arg_list ; short arg_cnt_checked ; unsigned line_no ; /* for error messages */ } FCALL_REC ; extern FCALL_REC *resolve_list ; void PROTO(resolve_fcalls, (void) ) ; void PROTO(check_fcall, (FBLOCK*,int,FBLOCK*,CA_REC*,unsigned) ) ; /* hash.c */ unsigned PROTO( hash, (char *) ) ; SYMTAB *PROTO( insert, (char *) ) ; SYMTAB *PROTO( find, (char *) ) ; SYMTAB *PROTO( save_id, (char *) ) ; void PROTO( restore_ids, (void) ) ; /* error.c */ void PROTO(type_error, (SYMTAB *) ) ; #endif /* SYMTYPE_H */ @//E*O*F mawk0.97/symtype.h// chmod u=rw,g=r,o=r mawk0.97/symtype.h echo x - mawk0.97/types.h sed 's/^@//' > "mawk0.97/types.h" <<'@//E*O*F mawk0.97/types.h//' /******************************************** types.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: types.h,v $ * Revision 2.1 91/04/08 08:24:15 brennan * VERSION 0.97 * */ /* types.h */ #ifndef TYPES_H #define TYPES_H #if HAVE_VOID_PTR typedef void *PTR ; #else typedef char *PTR ; #endif #include "sizes.h" /* CELL types */ #define C_NOINIT 0 #define C_DOUBLE 1 #define C_STRING 2 #define C_STRNUM 3 #define C_MBSTRN 4 /*could be STRNUM, has not been checked */ #define C_RE 5 #define C_SPACE 6 /* split on space */ #define C_SNULL 7 /* split on the empty string */ #define C_REPL 8 /* a replacement string '\&' changed to & */ #define C_REPLV 9 /* a vector replacement -- broken on & */ #define NUM_CELL_TYPES 10 /* these defines are used to check types for two CELLs which are adjacent in memory */ #define TWO_NOINITS (2*(1<<C_NOINIT)) #define TWO_DOUBLES (2*(1<<C_DOUBLE)) #define TWO_STRINGS (2*(1<<C_STRING)) #define TWO_STRNUMS (2*(1<<C_STRNUM)) #define TWO_MBSTRNS (2*(1<<C_MBSTRN)) #define NOINIT_AND_DOUBLE ((1<<C_NOINIT)+(1<<C_DOUBLE)) #define NOINIT_AND_STRING ((1<<C_NOINIT)+(1<<C_STRING)) #define NOINIT_AND_STRNUM ((1<<C_NOINIT)+(1<<C_STRNUM)) #define DOUBLE_AND_STRING ((1<<C_DOUBLE)+(1<<C_STRING)) #define DOUBLE_AND_STRNUM ((1<<C_STRNUM)+(1<<C_DOUBLE)) #define STRING_AND_STRNUM ((1<<C_STRING)+(1<<C_STRNUM)) #define NOINIT_AND_MBSTRN ((1<<C_NOINIT)+(1<<C_MBSTRN)) #define DOUBLE_AND_MBSTRN ((1<<C_DOUBLE)+(1<<C_MBSTRN)) #define STRING_AND_MBSTRN ((1<<C_STRING)+(1<<C_MBSTRN)) #define STRNUM_AND_MBSTRN ((1<<C_STRNUM)+(1<<C_MBSTRN)) typedef struct { unsigned short ref_cnt ; unsigned short len ; char str[4] ; } STRING ; typedef struct cell { short type ; short vcnt ; /* only used if type == C_REPLV */ PTR ptr ; double dval ; } CELL ; /* all builtins are passed the evaluation stack pointer and return its new value, here is the type */ #ifdef __STDC__ typedef CELL *(*PF_CP)(CELL *) ; #else typedef CELL *(*PF_CP)() ; #endif /* an element of code (instruction) */ typedef union { int op ; PTR ptr ; } INST ; /* a scratch buffer type */ union tbuff { PTR ptr_buff[MAX_FIELD] ; char string_buff[TEMP_BUFF_SZ + BUFFSZ + 1] ; } ; #endif @//E*O*F mawk0.97/types.h// chmod u=rw,g=r,o=r mawk0.97/types.h echo x - mawk0.97/zmalloc.c sed 's/^@//' > "mawk0.97/zmalloc.c" <<'@//E*O*F mawk0.97/zmalloc.c//' /******************************************** zmalloc.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /*$Log: zmalloc.c,v $ * Revision 2.2 91/04/09 12:39:45 brennan * added static to funct decls to satisfy STARDENT compiler * * Revision 2.1 91/04/08 08:24:17 brennan * VERSION 0.97 * */ /* zmalloc.c */ #include "mawk.h" #include "zmalloc.h" void PROTO( mawk_exit, (int) ) ; /* zmalloc() gets mem from malloc() in CHUNKS of 2048 bytes and cuts these blocks into smaller pieces that are multiples of eight bytes. When a piece is returned via zfree(), it goes on a linked linear list indexed by its size. The lists are an array, pool[]. E.g., if you ask for 22 bytes with p = zmalloc(22), you actually get a piece of size 24. When you free it with zfree(p,22) , it is added to the list at pool[2]. */ #define ZBLOCKSZ 8 #define ZSHIFT 3 #define POOLSZ 16 #define CHUNK 256 /* number of blocks to get from malloc */ static PTR PROTO( emalloc, (unsigned) ) ; void PROTO( errmsg, (int , char *, ...) ) ; static PTR emalloc(size) unsigned size ; { PTR p ; if( !(p = malloc(size)) ) { errmsg(0, "out of memory") ; mawk_exit(1) ; } return p ; } typedef union zblock { char dummy[ZBLOCKSZ] ; union zblock *link ; } ZBLOCK ; /* ZBLOCKS of sizes 1, 2, ... 16 which is bytes of sizes 8, 16, ... , 128 are stored on the linked linear lists in pool[0], pool[1], ... , pool[15] */ static ZBLOCK *pool[POOLSZ] ; PTR zmalloc( size ) unsigned size ; { register unsigned blocks ; register ZBLOCK *p ; static unsigned amt_avail ; static ZBLOCK *avail ; if ( size > POOLSZ * ZBLOCKSZ ) return emalloc(size) ; blocks = (size >> ZSHIFT) + ((size & (ZBLOCKSZ-1)) != 0) ; if ( p = pool[blocks-1] ) { pool[blocks-1] = p->link ; return (PTR) p ; } if ( blocks > amt_avail ) { if ( amt_avail ) /* free avail */ { avail->link = pool[--amt_avail] ; pool[amt_avail] = avail ; } if ( !(avail = (ZBLOCK *) malloc(CHUNK*ZBLOCKSZ)) ) { /* if we get here, almost out of memory */ amt_avail = 0 ; return emalloc(size) ; } amt_avail = CHUNK ; } /* get p from the avail pile */ p = avail ; avail += blocks ; amt_avail -= blocks ; return (PTR) p ; } void zfree( p, size) register PTR p ; unsigned size ; { register int index ; ; if ( size > POOLSZ * ZBLOCKSZ ) free(p) ; else { index = (size >> ZSHIFT) + ((size & (ZBLOCKSZ-1)) != 0) - 1; ((ZBLOCK *) p)->link = pool[index] ; pool[index] = (ZBLOCK *) p ; } } PTR zrealloc( p, old_size, new_size ) register PTR p ; unsigned old_size, new_size ; { register PTR q ; (void) memcpy(q = zmalloc(new_size), p, old_size < new_size ? old_size : new_size) ; zfree(p, old_size) ; return q ; } @//E*O*F mawk0.97/zmalloc.c// chmod u=rw,g=r,o=r mawk0.97/zmalloc.c echo x - mawk0.97/zmalloc.h sed 's/^@//' > "mawk0.97/zmalloc.h" <<'@//E*O*F mawk0.97/zmalloc.h//' /******************************************** zmalloc.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /*$Log: zmalloc.h,v $ * Revision 2.1 91/04/08 08:24:19 brennan * VERSION 0.97 * */ /* zmalloc.h */ #ifndef ZMALLOC_H #define ZMALLOC_H #ifdef __STDC__ #include <stdlib.h> #include <string.h> /* memcpy() */ #else PTR memcpy(), malloc(), realloc() ; void free() ; #endif PTR PROTO( zmalloc, (unsigned) ) ; void PROTO( zfree, (PTR, unsigned) ) ; PTR PROTO( zrealloc , (PTR,unsigned,unsigned) ) ; #endif /* ZMALLOC_H */ @//E*O*F mawk0.97/zmalloc.h// chmod u=rw,g=r,o=r mawk0.97/zmalloc.h echo mkdir - mawk0.97/rexp mkdir mawk0.97/rexp chmod u=rwx,g=rx,o=rx mawk0.97/rexp echo x - mawk0.97/rexp/Makefile sed 's/^@//' > "mawk0.97/rexp/Makefile" <<'@//E*O*F mawk0.97/rexp/Makefile//' #################################### # This is a makefile for mawk, # an implementation of AWK (1988). #################################### # # # This builds a regular expression library # Remove the -DMAWK and the library has general use. # (Even if left in, the diff is very small) # CFLAGS = -O -DMAWK C=rexp.c rexp0.c rexp1.c rexp2.c rexp3.c rexpdb.c regexp.a : $(C) rm -f *.o cc -c $(CFLAGS) $? ar r regexp.a *.o rm -f *.o @//E*O*F mawk0.97/rexp/Makefile// chmod u=rw,g=r,o=r mawk0.97/rexp/Makefile echo x - mawk0.97/rexp/rexp.c sed 's/^@//' > "mawk0.97/rexp/rexp.c" <<'@//E*O*F mawk0.97/rexp/rexp.c//' /******************************************** rexp.c copyright 1991, Michael D. Brennan This is a source file for mawk an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* rexp.c */ /* op precedence parser for regular expressions */ #include "rexp.h" /* static prototypes */ void PROTO( op_pop, (void) ) ; /* DATA */ int REerrno ; char *REerrlist[] = { (char *) 0 , /* 1 */ "missing '('", /* 2 */ "missing ')'", /* 3 */ "bad class -- [], [^] or [" , /* 4 */ "missing operand" , /* 5 */ "resource exhaustion -- regular expression too large", /* 6 */ "null regular expression" } ; /* E5 is very unlikely to occur */ /* This table drives the operator precedence parser */ static int table[8][8] = { /* 0 | CAT * + ? ( ) */ /* 0 */ 0, L, L, L, L, L, L, E1, /* | */ G, G, L, L, L, L, L, G, /* CAT*/ G, G, G, L, L, L, L, G, /* * */ G, G, G, G, G, G, E7, G, /* + */ G, G, G, G, G, G, E7, G, /* ? */ G, G, G, G, G, G, E7, G, /* ( */ E2, L, L, L, L, L, L, EQ, /* ) */ G , G, G, G, G, G, E7, G } ; /*==================================== THE STACKS ==========================*/ typedef struct { int token ; int prec ; } OP ; #define STACKSZ 96 /*--------------------------- m_ptr -> top filled slot on the m_stack op_ptr -> top filled slot on op_stack, initially this is only half filled with the token the precedence is added later *----------------------*/ static OP *op_stack, *op_limit, *op_ptr ; static MACHINE *m_stack, *m_limit, *m_ptr ; /* inline for speed on the m_stack */ #define m_pop() (m_ptr<m_stack?RE_error_trap(-E4): *m_ptr--) #define m_push(x) if(++m_ptr==m_limit) RE_error_trap(-E5);*m_ptr=(x) /*=======================*/ static jmp_buf err_buf ; /* used to trap on error */ MACHINE RE_error_trap(x) /* return is dummy to make macro OK */ int x ; { while ( m_ptr >= m_stack ) RE_free( m_ptr-- -> start ) ; RE_free(m_stack) ; RE_free(op_stack) ; REerrno = x ; longjmp(err_buf, 1 ) ; /* dummy return to make compiler happy */ return *m_stack ; } VOID *REcompile(re) char *re ; { MACHINE m ; register int t ; RE_lex_init(re) ; if ( *re == 0 ) { STATE *p = (STATE *) RE_malloc( sizeof(STATE) ) ; p->type = M_ACCEPT ; return (VOID *) p ; } if ( setjmp(err_buf) ) return (VOID *) 0 ; /* global error trap */ /* initialize the stacks */ m_stack =(MACHINE *) RE_malloc(STACKSZ*sizeof(MACHINE)) ; m_ptr = m_stack - 1 ; m_limit = m_stack + STACKSZ ; op_ptr = op_stack = (OP *) RE_malloc(STACKSZ*sizeof(OP)) ; op_ptr->token = 0 ; op_limit = op_stack + STACKSZ ; t = RE_lex(&m) ; while( 1 ) { switch( t ) { case T_STR : case T_ANY : case T_U : case T_START : case T_END : case T_CLASS : m_push(m) ; break ; case 0 : /* end of reg expr */ if ( op_ptr -> token == 0 ) /* done */ { m = m_pop() ; if ( m_ptr < m_stack ) /* DONE !!! */ { free(m_stack) ; free(op_stack) ; return (VOID *) m.start ; } /* machines still on the stack */ RE_panic("values still on machine stack") ; } /* case 0 falls thru to default which is operator case */ default: if ( (op_ptr -> prec = table[op_ptr -> token][t]) == G ) { while ( op_ptr -> prec != L ) op_pop() ; continue ; } if ( op_ptr -> prec < 0 ) if ( op_ptr->prec == E7 ) RE_panic("parser returns E7") ; else RE_error_trap(-op_ptr->prec) ; if ( ++op_ptr == op_stack + STACKSZ ) /* stack overflow */ RE_error_trap(-E5) ; op_ptr -> token = t ; } t = RE_lex(&m) ; } } static void op_pop() { register int t ; MACHINE m, n ; if ( (t = op_ptr-- -> token) >= T_LP ) return ; /* nothing to do with '(' or ')' */ if ( t <= T_CAT ) /* binary operation */ n = m_pop() ; m = m_pop() ; switch( t ) { case T_CAT : RE_cat(&m, &n) ; break ; case T_OR : RE_or( &m, &n) ; break ; case T_STAR : RE_close( &m) ; break ; case T_PLUS : RE_poscl( &m ) ; break ; case T_Q : RE_01( &m ) ; break ; default : RE_panic("strange token popped from op_stack") ; } m_push(m) ; } /* getting here means a logic flaw or unforeseen case */ void RE_panic( s ) char *s ; { fprintf( stderr, "REcompile() - panic: %s\n", s) ; exit(100) ; } @//E*O*F mawk0.97/rexp/rexp.c// chmod u=rw,g=r,o=r mawk0.97/rexp/rexp.c echo x - mawk0.97/rexp/rexp.h sed 's/^@//' > "mawk0.97/rexp/rexp.h" <<'@//E*O*F mawk0.97/rexp/rexp.h//' /******************************************** rexp.h copyright 1991, Michael D. Brennan This is a source file for mawk an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* rexp.h */ #ifndef REXP_H #define REXP_H #include <string.h> #include <stdio.h> #include <setjmp.h> #ifndef PROTO #ifdef __STDC__ #define PROTO(name, args) name args #else #define PROTO(name, args) name() #endif #endif #ifdef __STDC__ #define VOID void #include <stdlib.h> #else #define VOID char char *malloc(), *realloc() ; void free() ; #endif /* user can change this */ #define RE_malloc(x) RE_xmalloc(x) #define RE_realloc(x,l) RE_xrealloc(x,l) #define RE_free(x) free(x) VOID *PROTO( RE_xmalloc, (unsigned) ) ; VOID *PROTO( RE_xrealloc, (void *,unsigned) ) ; /* finite machine state types */ #define M_STR 0 #define M_CLASS 1 #define M_ANY 2 #define M_START 3 #define M_END 4 #define M_U 5 #define M_1J 6 #define M_2JA 7 #define M_2JB 8 #define M_ACCEPT 9 #define U_ON 10 #define U_OFF 0 #define END_OFF 0 #define END_ON (2*U_ON) typedef unsigned char BV[32] ; /* bit vector */ typedef struct { char type ; unsigned char len ; /* used for M_STR */ union { char *str ; /* string */ BV *bvp ; /* class */ int jump ; } data ; } STATE ; #define STATESZ (sizeof(STATE)) typedef struct { STATE *start, *stop ; } MACHINE ; /* tokens */ #define T_OR 1 /* | */ #define T_CAT 2 #define T_STAR 3 /* * */ #define T_PLUS 4 /* + */ #define T_Q 5 /* ? */ #define T_LP 6 /* ( */ #define T_RP 7 /* ) */ #define T_START 8 /* ^ */ #define T_END 9 /* $ */ #define T_ANY 10 /* . */ #define T_CLASS 11 /* starts with [ */ #define T_SLASH 12 /* \ */ #define T_CHAR 13 /* all the rest */ #define T_STR 14 #define T_U 15 /* precedences and error codes */ #define L 0 #define EQ 1 #define G 2 #define E1 (-1) #define E2 (-2) #define E3 (-3) #define E4 (-4) #define E5 (-5) #define E6 (-6) #define E7 (-7) #define MEMORY_FAILURE 5 /* struct for the run time stack */ typedef struct { STATE *m ; /* save the machine ptr */ int u ; /* save the u_flag */ char *s ; /* save the active string ptr */ char *ss ; /* save the match start -- only used by REmatch */ } RT_STATE ; /* run time state */ /* error trap */ extern int REerrno ; MACHINE PROTO(RE_error_trap, (int) ) ; MACHINE PROTO( RE_u, (void) ) ; MACHINE PROTO( RE_start, (void) ) ; MACHINE PROTO( RE_end, (void) ) ; MACHINE PROTO( RE_any, (void) ) ; MACHINE PROTO( RE_str, (char *, unsigned) ) ; MACHINE PROTO( RE_class, (BV *) ) ; void PROTO( RE_cat, (MACHINE *, MACHINE *) ) ; void PROTO( RE_or, (MACHINE *, MACHINE *) ) ; void PROTO( RE_close, (MACHINE *) ) ; void PROTO( RE_poscl, (MACHINE *) ) ; void PROTO( RE_01, (MACHINE *) ) ; void PROTO( RE_panic, (char *) ) ; char *PROTO( str_str, (char *, char *, unsigned) ) ; void PROTO( RE_lex_init , (char *) ) ; int PROTO( RE_lex , (MACHINE *) ) ; void PROTO( RE_run_stack_init, (void) ) ; RT_STATE *PROTO( RE_new_run_stack, (void) ) ; #endif /* REXP_H */ @//E*O*F mawk0.97/rexp/rexp.h// chmod u=rw,g=r,o=r mawk0.97/rexp/rexp.h echo x - mawk0.97/rexp/rexp0.c sed 's/^@//' > "mawk0.97/rexp/rexp0.c" <<'@//E*O*F mawk0.97/rexp/rexp0.c//' /******************************************** rexp0.c copyright 1991, Michael D. Brennan This is a source file for mawk an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* rexp0.c */ /* lexical scanner */ #include "rexp.h" /* static functions */ static int PROTO( do_str, (int, char **, MACHINE *) ) ; static int PROTO( do_class, (char **, MACHINE *) ) ; static int PROTO( escape, (char **) ) ; static BV *PROTO( store_bvp, (BV *) ) ; static int PROTO( ctohex, (int) ) ; #ifndef EG /* if EG make next array visible */ static #endif char RE_char2token[ '|' + 1 ] = { 0,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,9,13,13,13, 6,7,3,4,13,13,10,13,13,13,13,13,13,13,13,13,13,13,13,13,13, 13,13,5,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, 13,13,13,13,13,13,13,13,13,13,11,12,13,8,13,13,13,13,13,13, 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, 13,13,13,13,1} ; #define char2token(x) ( (unsigned char)(x) > '|' ? T_CHAR : RE_char2token[x] ) #define NOT_STARTED (-1) static int prev ; static char *lp ; /* ptr to reg exp string */ static unsigned re_len ; void RE_lex_init( re ) char *re ; { lp = re ; re_len = strlen(re) + 1 ; prev = NOT_STARTED ; RE_run_stack_init() ; } int RE_lex( mp ) MACHINE *mp ; { register int c ; switch( c = char2token(*lp) ) { case T_OR : case T_PLUS : case T_STAR : case T_Q : case T_RP : lp++ ; return prev = c ; case T_SLASH : if ( lp[1] != 0 ) break ; /* else fall thru */ case 0 : return 0 ; case T_LP : switch( prev ) { case T_CHAR : case T_STR : case T_ANY : case T_CLASS : case T_START : case T_RP : case T_PLUS : case T_STAR : case T_Q : case T_U : return prev = T_CAT ; default : lp++ ; return prev = T_LP ; } } /* *lp is an operand, but implicit cat op is possible */ switch( prev ) { case NOT_STARTED : case T_OR : case T_LP : case T_CAT : switch( c ) { case T_ANY : { static plus_is_star_flag = 0 ; if ( * ++lp == '*' ) { lp++ ; *mp = RE_u() ; return prev = T_U ; } else if ( *lp == '+' ) if ( plus_is_star_flag ) { lp++ ; *mp = RE_u() ; plus_is_star_flag = 0 ; return prev = T_U ; } else { plus_is_star_flag = 1 ; lp-- ; *mp = RE_any() ; return prev = T_ANY ; } else { *mp = RE_any() ; prev = T_ANY ; } } break ; case T_SLASH : lp++ ; c = escape(&lp) ; prev = do_str(c, &lp, mp) ; break ; case T_CHAR : c = *lp++ ; prev = do_str(c, &lp, mp) ; break ; case T_CLASS : prev = do_class(&lp, mp) ; break ; case T_START : *mp = RE_start() ; lp++ ; prev = T_START ; break ; case T_END : lp++ ; *mp = RE_end() ; return prev = T_END ; default : RE_panic("bad switch in RE_lex") ; } break ; default : /* don't advance the pointer, return T_CAT */ return prev = T_CAT ; } /* check for end character */ if ( *lp == '$' ) { mp->start->type += END_ON ; lp++ ; } return prev ; } static int do_str( c, pp, mp) int c ; /* the first character */ char **pp ; /* where to put the re_char pointer on exit */ MACHINE *mp ; /* where to put the string machine */ { register char *p , *s ; char *str ; unsigned len ; p = *pp ; s = str = RE_malloc( re_len ) ; *s++ = c ; len = 1 ; while ( 1 ) { char *save ; switch( char2token(*p) ) { case T_CHAR : *s++ = *p++ ; break ; case T_SLASH : save = ++p ; *s++ = escape(&save) ; p = save ; break ; default : goto out ; } len++ ; } out: /* if len > 1 and we failed on a ? + or * , need to back up */ if ( len > 1 && (*p == '*' || *p == '+' || *p == '?' ) ) { len-- ; p-- ; s-- ; } *s = 0 ; *pp = p ; *mp = RE_str((char *) RE_realloc(str, len+1) , len) ; return T_STR ; } /*-------------------------------------------- BUILD A CHARACTER CLASS *---------------------------*/ #define on( b, x) ( (b)[(x)>>3] |= ( 1 << ((x)&7) )) static void PROTO(block_on, (BV,int,int) ) ; static void block_on( b, x, y) BV b ; int x, y ; /* must call with x<=y */ { int lo = x >> 3 ; int hi = y >> 3 ; int i, j, bit ; if ( lo == hi ) { j = x&7 ; bit = 1 << j ; i = (y&7) - j + 1 ; for ( ; i ; i-- , bit <<= 1 ) b[lo] |= bit ; } else { for ( i = lo + 1 ; i <= hi - 1 ; i++ ) b[i] = 0xff ; b[lo] |= ( 0xff << (x&7) ) ; b[hi] |= ~( 0xff << ((y&7)+1)) ; } } /* build a BV for a character class. *start points at the '[' on exit: *start points at the character after ']' mp points at a machine that recognizes the class */ static int do_class( start, mp) char **start ; MACHINE *mp ; { register char *p ; register BV *bvp ; int prev ; char *q , *t; int cnt ; int comp_flag ; p = (*start) + 1 ; if ( *p == ']' || *p == '^' && *(p+1) == ']' ) RE_error_trap(-E3) ; while ( 1 ) /* find the back of the class */ { if ( ! (q = strchr(p,']')) ) /* no closing bracket */ RE_error_trap(-E3) ; p = q-1 ; cnt = 0 ; while ( *p == '\\') { cnt++ ; p-- ; } if ( (cnt & 1) == 0 ) /* even number of \ */ break ; p = q+1 ; } /* q now pts at the back of the class */ p = (*start) + 1 ; *start = q + 1 ; bvp = (BV *) RE_malloc( sizeof(BV) ) ; (void) memset( bvp, 0, sizeof(BV) ) ; comp_flag = *p == '^' ? p++ , 1 : 0 ; prev = -1 ; /* indicates - cannot be part of a range */ while ( p < q ) { switch( *p ) { case '\\' : t = ++p ; prev = escape(&t) ; on(*bvp, prev) ; p = t ; continue ; case '-' : if ( prev == -1 || p+1 == q || prev > *(p+1) ) { prev = '-' ; on(*bvp, '-') ; } else { p++ ; block_on(*bvp, prev, *p) ; prev = -1 ; } break ; default : prev = *p ; on(*bvp, *p) ; break ; } p++ ; } if ( comp_flag ) for ( p = (char *) bvp ; p < (char *) bvp + sizeof(BV) ; p++) *p = ~*p ; /* make sure zero is off */ (*bvp)[0] &= 0xfe ; *mp = RE_class( store_bvp( bvp ) ) ; return T_CLASS ; } /* storage for bit vectors so they can be reused , stored in an unsorted linear array the array grows as needed */ #define BV_GROWTH 6 static BV *store_bvp( bvp ) BV *bvp ; { static BV **bv_base, **bv_limit ; static BV **bv_next ; /* next empty slot in the array */ register BV **p ; unsigned t ; if ( bv_next == bv_limit ) /* need to grow */ { if ( ! bv_base ) /* first growth */ { t = 0 ; bv_base = (BV**)RE_malloc(BV_GROWTH*sizeof(BV*)) ; } else { t = bv_next - bv_base ; bv_base = (BV**) RE_realloc(bv_base, (t+BV_GROWTH)*sizeof(BV*)) ; } bv_next = bv_base + t ; bv_limit = bv_next + BV_GROWTH ; } /* put bvp in bv_next as a sentinal */ *bv_next = bvp ; p = bv_base ; while ( memcmp(*p, bvp, sizeof(BV)) ) p++ ; if ( p == bv_next ) /* it is new */ bv_next++ ; else /* we already have it */ RE_free(bvp) ; return *p ; } /* ---------- convert escape sequences -------------*/ #define isoctal(x) ((x)>='0'&&(x)<='7') #define NOT_HEX 16 static char hex_val['f' - 'A' + 1] = { 10,11,12,13,14,15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,11,12,13,14,15 } ; /* interpret 1 character as hex */ static int ctohex( c ) register int c ; { int t ; if ( c >= '0' && c <= '9' ) return c - '0' ; if ( c >= 'A' && c <= 'f' && ( t = hex_val[c-'A'] )) return t ; return NOT_HEX ; } static char escape_test[] = "n\nt\tb\br\rf\fa\07v\013" ; /*----------------- return the char and move the pointer forward on entry *s -> at the character after the slash *-------------------*/ static int escape(start_p) char **start_p ; { register char *p = *start_p ; register unsigned x ; unsigned xx ; char *t ; if ( t = strchr(escape_test, *p) ) { *start_p = p + 1 ; return t[1] ; } if ( isoctal(*p) ) { x = *p++ - '0' ; if ( isoctal(*p) ) { x = (x<<3) + *p++ - '0' ; if ( isoctal(*p) ) x = (x<<3) + *p++ - '0' ; } *start_p = p ; return x & 0xff ; } if ( *p == 0 ) return 0 ; if ( *p++ == 'x' ) /* might be a hex digit */ { if ( (x = ctohex(*p)) == NOT_HEX ) { *start_p = p ; return 'x' ; } /* look for another hex digit */ if ( (xx = ctohex(* ++p)) != NOT_HEX ) { x = (x<<4) + xx ; p++ ; } *start_p = p ; return x ; } /* anything else \c -> c */ *start_p = p ; return p[-1] ; } @//E*O*F mawk0.97/rexp/rexp0.c// chmod u=rw,g=r,o=r mawk0.97/rexp/rexp0.c echo x - mawk0.97/rexp/rexp1.c sed 's/^@//' > "mawk0.97/rexp/rexp1.c" <<'@//E*O*F mawk0.97/rexp/rexp1.c//' /******************************************** rexp1.c copyright 1991, Michael D. Brennan This is a source file for mawk an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* rexp1.c */ /* re machine operations */ #include "rexp.h" static MACHINE *PROTO( new_TWO , (int) ) ; static MACHINE *new_TWO(type) int type ; { static MACHINE x ; x.start = (STATE *) RE_malloc(2*STATESZ) ; x.stop = x.start + 1 ; x.start->type = type ; x.stop->type = M_ACCEPT ; return &x ; } ; /* build a machine that recognizes any */ MACHINE RE_any() { return * new_TWO(M_ANY) ; } /* build a machine that recognizes the start of string */ MACHINE RE_start() { return * new_TWO(M_START) ; } MACHINE RE_end() { return * new_TWO(M_END) ; } /* build a machine that recognizes a class */ MACHINE RE_class( bvp ) BV *bvp ; { register MACHINE *p = new_TWO(M_CLASS) ; p->start->data.bvp = bvp ; return *p ; } MACHINE RE_u() { return *new_TWO(M_U) ; } MACHINE RE_str( str, len) char *str ; unsigned len ; { register MACHINE *p = new_TWO(M_STR) ; p->start->len = len ; p->start->data.str = str ; return *p ; } /* replace m and n by a machine that recognizes mn */ void RE_cat( mp, np) MACHINE *mp, *np ; { unsigned sz1, sz2, sz ; sz1 = mp->stop - mp->start ; sz2 = np->stop - np->start + 1 ; sz = sz1 + sz2 ; mp->start = (STATE *) RE_realloc( mp->start, sz * STATESZ ) ; mp->stop = mp->start + (sz - 1) ; (void) memcpy( mp->start + sz1, np->start, sz2 * STATESZ ) ; RE_free( np->start ) ; } /* replace m by a machine that recognizes m|n */ void RE_or( mp, np) MACHINE *mp, *np ; { register STATE *p ; unsigned szm, szn ; szm = mp->stop - mp->start + 1 ; szn = np->stop - np->start + 1 ; p = (STATE *) RE_malloc( (szm+szn+1) * STATESZ ) ; (void) memcpy( p+1, mp->start, szm * STATESZ ) ; RE_free( mp->start) ; mp->start = p ; (mp->stop = p + szm + szn) -> type = M_ACCEPT ; p->type = M_2JA ; p->data.jump = szm+1 ; (void) memcpy( p + szm + 1 , np->start, szn * STATESZ) ; RE_free( np->start ) ; (p += szm)->type = M_1J ; p->data.jump = szn ; } /* UNARY OPERATIONS */ /* replace m by m* */ void RE_close( mp ) MACHINE *mp ; { register STATE *p ; unsigned sz ; sz = mp->stop - mp->start + 1 ; p = (STATE *) RE_malloc( (sz+2) * STATESZ ) ; (void) memcpy( p+1, mp->start, sz * STATESZ) ; RE_free( mp->start ) ; mp->start = p ; mp->stop = p + (sz+1) ; p->type = M_2JA ; p->data.jump = sz + 1 ; (p += sz) -> type = M_2JB ; p->data.jump = -(sz-1) ; (p+1)->type = M_ACCEPT ; } /* replace m by m+ (positive closure) */ void RE_poscl( mp ) MACHINE *mp ; { register STATE *p ; unsigned sz ; sz = mp->stop - mp->start + 1 ; mp->start = p = (STATE *) RE_realloc(mp->start , (sz+1) * STATESZ ) ; mp->stop = p + sz ; p += --sz ; p->type = M_2JB ; p->data.jump = -sz ; (p+1)->type = M_ACCEPT ; } /* replace m by m? (zero or one) */ void RE_01( mp ) MACHINE *mp ; { unsigned sz ; register STATE *p ; sz = mp->stop - mp->start + 1 ; p = (STATE *) RE_malloc( (sz+1) * STATESZ ) ; (void) memcpy( p+1, mp->start, sz * STATESZ) ; RE_free( mp->start ) ; mp->start = p ; mp->stop = p + sz ; p->type = M_2JB ; p->data.jump = sz ; } /*=================================== MEMORY ALLOCATION *==============================*/ VOID *RE_xmalloc( sz ) unsigned sz ; { register VOID *p ; if ( ! ( p = malloc(sz) ) ) RE_error_trap(MEMORY_FAILURE) ; return p ; } VOID *RE_xrealloc( p, sz) register VOID *p ; unsigned sz ; { if ( ! ( p = realloc( p, sz) ) ) RE_error_trap(MEMORY_FAILURE) ; return p ; } @//E*O*F mawk0.97/rexp/rexp1.c// chmod u=rw,g=r,o=r mawk0.97/rexp/rexp1.c echo x - mawk0.97/rexp/rexp2.c sed 's/^@//' > "mawk0.97/rexp/rexp2.c" <<'@//E*O*F mawk0.97/rexp/rexp2.c//' /******************************************** rexp2.c copyright 1991, Michael D. Brennan This is a source file for mawk an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* rexp2.c */ /* test a string against a machine */ #include "rexp.h" #include <string.h> /* statics */ static RT_STATE *PROTO(slow_push,(RT_STATE *,STATE*,char*,int)); /* check that a bit is on */ #define ison(b,x) ( (b)[(x)>>3] & ( 1 << ((x)&7) )) RT_STATE *RE_run_stack_base; RT_STATE *RE_run_stack_limit ; /* for statistics and debug */ static RT_STATE *stack_max ; void RE_run_stack_init() { if ( !RE_run_stack_base ) { RE_run_stack_base = (RT_STATE *) RE_malloc(sizeof(RT_STATE) * 16 ) ; RE_run_stack_limit = RE_run_stack_base + 16 ; stack_max = RE_run_stack_base-1 ; } } RT_STATE *RE_new_run_stack() { int oldsize = RE_run_stack_limit - RE_run_stack_base ; RE_run_stack_base = (RT_STATE *) RE_realloc( RE_run_stack_base , (oldsize+8) * sizeof(RT_STATE) ) ; RE_run_stack_limit = RE_run_stack_base + oldsize + 8 ; return stack_max = RE_run_stack_base + oldsize ; } static RT_STATE *slow_push(sp, m, s, u) RT_STATE *sp ; STATE *m ; char *s ; int u ; { if ( sp > stack_max ) if ( (stack_max = sp) == RE_run_stack_limit ) sp = RE_new_run_stack() ; sp->m = m ; sp->s = s ; sp->u = u ; return sp ; } #ifdef DEBUG void print_max_stack(f) FILE *f ; { fprintf(f, "stack_max = %d\n", stack_max-RE_run_stack_base+1) ; } #endif #ifdef DEBUG #define push(mx,sx,ux) stackp = slow_push(++stackp, mx, sx, ux) #else #define push(mx,sx,ux) if (++stackp == RE_run_stack_limit)\ stackp = slow_push(stackp,mx,sx,ux) ;\