brennan@ssc-vax.UUCP (Mike Brennan) (05/11/91)
------------------cut here---------------- scan_code['&'] = SC_AND ; scan_code['?'] = SC_QMARK ; scan_code[':'] = SC_COLON ; scan_code['['] = SC_LBOX ; scan_code[']'] = SC_RBOX ; scan_code['\\'] = SC_ESCAPE ; scan_code['.'] = SC_DOT ; scan_code['~'] = SC_MATCH ; scan_code['$'] = SC_DOLLAR ; for( p = scan_code + 'A' ; p <= scan_code + 'Z' ; p++ ) *p = *(p + 'a' - 'A') = SC_IDCHAR ; } void scan_print() { register char *p = scan_code ; register int c ; /* column */ register int r ; /* row */ printf("\n\n/* scancode.c */\n\n\n") ; printf( "char scan_code[256] = {\n" ) ; for( r = 1 ; r <= 16 ; r++) { for( c = 1 ; c <= 16 ; c++) { printf("%2d" , *p++) ; if ( r != 16 || c != 16 ) putchar(',') ; } putchar('\n') ; } printf("} ;\n") ; } main() { scan_init() ; scan_print() ; return 0 ; } @//E*O*F mawk0.97/makescan.c// chmod u=rw,g=r,o=r mawk0.97/makescan.c echo x - mawk0.97/matherr.c sed 's/^@//' > "mawk0.97/matherr.c" <<'@//E*O*F mawk0.97/matherr.c//' /******************************************** matherr.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /*$Log: matherr.c,v $ * Revision 2.1 91/04/08 08:23:31 brennan * VERSION 0.97 * */ #include "mawk.h" #include <math.h> #if FPE_TRAPS #include <signal.h> /* machine dependent changes might be needed here */ static void fpe_catch( signal, why) int signal, why ; { switch(why) { case FPE_ZERODIVIDE : rt_error("division by zero") ; case FPE_OVERFLOW : rt_error("floating point overflow") ; default : rt_error("floating point exception") ; } } void fpe_init() { (void) signal(SIGFPE, fpe_catch) ; } #else void fpe_init() { TURNOFF_FPE_TRAPS() ; } #endif #if HAVE_MATHERR #if ! FPE_TRAPS /* If we are not trapping math errors, we will shutup the library calls */ int matherr( e ) struct exception *e ; { return 1 ; } #else /* print error message and exit */ int matherr( e ) struct exception *e ; { char *error ; switch( e->type ) { case DOMAIN : case SING : error = "domain error" ; break ; case OVERFLOW : error = "overflow" ; break ; case TLOSS : case PLOSS : error = "loss of significance" ; break ; case UNDERFLOW : e->retval = 0.0 ; return 1 ; /* ignore it */ } if ( strcmp(e->name, "atan2") == 0 ) rt_error("atan2(%g,%g) : %s" , e->arg1, e->arg2, error ) ; else rt_error("%s(%g) : %s" , e->name, e->arg1, error) ; /* won't get here */ return 0 ; } #endif /* FPE_TRAPS */ #endif /* HAVE_MATHERR */ @//E*O*F mawk0.97/matherr.c// chmod u=rw,g=r,o=r mawk0.97/matherr.c echo x - mawk0.97/mawk.h sed 's/^@//' > "mawk0.97/mawk.h" <<'@//E*O*F mawk0.97/mawk.h//' /******************************************** mawk.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: mawk.h,v $ * Revision 2.1 91/04/08 08:23:33 brennan * VERSION 0.97 * */ /* mawk.h */ #ifndef MAWK_H #define MAWK_H #include "machine.h" #ifdef DEBUG #define YYDEBUG 1 extern int yydebug ; /* print parse if on */ extern int dump_RE ; #endif extern int dump_code ; #ifdef __STDC__ #define PROTO(name, args) name args #undef HAVE_VOID_PTR #define HAVE_VOID_PTR 1 #else #define PROTO(name, args) name() #endif #include <stdio.h> #include <string.h> #include "types.h" /*---------------- * GLOBAL VARIABLES *----------------*/ /* some well known cells */ extern CELL cell_zero, cell_one ; extern STRING null_str ; /* a useful scratch area */ extern union tbuff temp_buff ; extern char *main_buff ; /* main file input buffer */ /* help with casts */ extern int pow2[] ; /* these are used by the parser, scanner and error messages from the compile */ extern int current_token ; extern unsigned token_lineno ; /* lineno of current token */ extern unsigned compile_error_count ; extern int paren_cnt, brace_cnt ; extern int print_flag, getline_flag ; /*---------*/ extern int errno ; extern char *progname ; /* for error messages */ /* macro to test the type of two adjacent cells */ #define TEST2(cp) (pow2[(cp)->type]+pow2[((cp)+1)->type]) /* macro to get at the string part of a CELL */ #define string(cp) ((STRING *)(cp)->ptr) #ifdef DEBUG #define cell_destroy(cp) DB_cell_destroy(cp) #else #define cell_destroy(cp) if ( (cp)->type >= C_STRING &&\ -- string(cp)->ref_cnt == 0 )\ zfree(string(cp),string(cp)->len+5);else #endif /* prototypes */ void PROTO( cast1_to_s, (CELL *) ) ; void PROTO( cast1_to_d, (CELL *) ) ; void PROTO( cast2_to_s, (CELL *) ) ; void PROTO( cast2_to_d, (CELL *) ) ; void PROTO( cast_to_RE, (CELL *) ) ; void PROTO( cast_for_split, (CELL *) ) ; void PROTO( check_strnum, (CELL *) ) ; void PROTO( cast_to_REPL, (CELL *) ) ; int PROTO( test, (CELL *) ) ; /* test for null non-null */ CELL *PROTO( cellcpy, (CELL *, CELL *) ) ; CELL *PROTO( repl_cpy, (CELL *, CELL *) ) ; void PROTO( DB_cell_destroy, (CELL *) ) ; void PROTO( overflow, (char *, unsigned) ) ; void PROTO( rt_overflow, (char *, unsigned) ) ; void PROTO( rt_error, ( char *, ...) ) ; void PROTO( mawk_exit, (int) ) ; void PROTO( da, (INST *, FILE *)) ; int PROTO( space_split, (char *) ) ; char *PROTO( str_str, (char*, char*, unsigned) ) ; int PROTO( re_split, (char *, PTR) ) ; char *PROTO( re_pos_match, (char *, PTR, unsigned *) ) ; void PROTO( exit, (int) ) ; int PROTO( close, (int) ) ; int PROTO( open, (char *,int, int) ) ; int PROTO( read, (int , PTR, unsigned) ) ; char *PROTO( getenv, (const char *) ) ; int PROTO ( parse, (void) ) ; int PROTO ( yylex, (void) ) ; int PROTO( yyparse, (void) ) ; void PROTO( yyerror, (char *) ) ; void PROTO( bozo, (char *) ) ; void PROTO( errmsg , (int, char*, ...) ) ; void PROTO( compile_error, ( char *, ...) ) ; INST *PROTO( execute, (INST *, CELL *, CELL *) ) ; char *PROTO( find_kw_str, (int) ) ; double strtod(), fmod() ; #endif /* MAWK_H */ @//E*O*F mawk0.97/mawk.h// chmod u=rw,g=r,o=r mawk0.97/mawk.h echo x - mawk0.97/memory.c sed 's/^@//' > "mawk0.97/memory.c" <<'@//E*O*F mawk0.97/memory.c//' /******************************************** memory.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: memory.c,v $ * Revision 2.1 91/04/08 08:23:35 brennan * VERSION 0.97 * */ /* memory.c */ #include "mawk.h" #ifdef __TURBOC__ #define SUPPRESS_NEW_STRING_PROTO /* get compiler off our back on the definition of new_STRING() */ #pragma warn -pro #endif #include "memory.h" STRING null_str = {1, 0, "" } ; static STRING *char_string[127] ; /* slots for strings of one character "\01" thru "\177" */ STRING *new_STRING(s, xlen) char *s ; unsigned xlen ; /* WARNING: if s != NULL, don't access xlen because it won't be there */ { register STRING *p ; unsigned len ; if ( s ) switch( len = strlen(s) ) { case 0 : p = &null_str ; p->ref_cnt++ ; break ; case 1 : if ( *(unsigned char *)s < 128 ) { if ( p = char_string[*s-1] ) p->ref_cnt++ ; else { p = (STRING *) zmalloc(6) ; p->ref_cnt = 2 ; p->len = 1 ; p->str[0] = s[0] ; p->str[1] = 0 ; char_string[*s-1] = p ; } break ; /*case */ } /* else FALL THRU */ default : p = (STRING *) zmalloc(len+5) ; p->ref_cnt = 1 ; p->len = len ; (void) memcpy( p->str , s, len+1) ; break ; } else { p = (STRING *) zmalloc( xlen+5 ) ; p->ref_cnt = 1 ; p->len = xlen ; /* zero out the end marker */ p->str[xlen] = 0 ; } return p ; } #ifdef DEBUG void DB_free_STRING(sval) register STRING *sval ; { if ( -- sval->ref_cnt == 0 ) zfree(sval, sval->len+5) ; } #endif @//E*O*F mawk0.97/memory.c// chmod u=rw,g=r,o=r mawk0.97/memory.c echo x - mawk0.97/memory.h sed 's/^@//' > "mawk0.97/memory.h" <<'@//E*O*F mawk0.97/memory.h//' /******************************************** memory.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: memory.h,v $ * Revision 2.1 91/04/08 08:23:37 brennan * VERSION 0.97 * */ /* memory.h */ #ifndef MEMORY_H #define MEMORY_H #include "zmalloc.h" #define new_CELL() (CELL *) zmalloc(sizeof(CELL)) #define free_CELL(p) zfree(p,sizeof(CELL)) #ifndef SUPPRESS_NEW_STRING_PROTO STRING *PROTO( new_STRING, (char *, ...) ) ; #endif #ifdef DEBUG void PROTO( DB_free_STRING , (STRING *) ) ; #define free_STRING(s) DB_free_STRING(s) #else #define free_STRING(sval) if ( -- (sval)->ref_cnt == 0 )\ zfree(sval, (sval)->len+5) ; else #endif #endif /* MEMORY_H */ @//E*O*F mawk0.97/memory.h// chmod u=rw,g=r,o=r mawk0.97/memory.h echo x - mawk0.97/parse.y sed 's/^@//' > "mawk0.97/parse.y" <<'@//E*O*F mawk0.97/parse.y//' /******************************************** parse.y copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: parse.y,v $ * Revision 2.1 91/04/08 08:23:39 brennan * VERSION 0.97 * */ %{ #include <stdio.h> #include "mawk.h" #include "code.h" #include "symtype.h" #include "memory.h" #include "bi_funct.h" #include "bi_vars.h" #include "jmp.h" #include "field.h" #include "files.h" extern void PROTO( eat_nl, (void) ) ; static void PROTO( resize_fblock, (FBLOCK *, INST *) ) ; static void PROTO( code_array, (SYMTAB *) ) ; static void PROTO( code_call_id, (CA_REC *, SYMTAB *) ) ; static int PROTO( current_offset, (void) ) ; static int scope ; static FBLOCK *active_funct ; /* when scope is SCOPE_FUNCT */ #define code_address(x) if( is_local(x) )\ { code1(L_PUSHA) ; code1((x)->offset) ; }\ else code2(_PUSHA, (x)->stval.cp) %} %union{ CELL *cp ; SYMTAB *stp ; INST *start ; /* code starting address */ PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */ BI_REC *bip ; /* ptr to info about a builtin */ FBLOCK *fbp ; /* ptr to a function block */ ARG2_REC *arg2p ; CA_REC *ca_p ; int ival ; } /* two tokens to help with errors */ %token UNEXPECTED /* unexpected character */ %token BAD_DECIMAL %token NL %token SEMI_COLON %token LBRACE RBRACE %token LBOX RBOX %token COMMA %token <ival> IO_OUT /* > or output pipe */ %left P_OR %left P_AND %right ASSIGN ADD_ASG SUB_ASG MUL_ASG DIV_ASG MOD_ASG POW_ASG %right QMARK COLON %left OR %left AND %left IN %left MATCH NOT_MATCH %left EQ NEQ LT LTE GT GTE %left CAT %left GETLINE %left PLUS MINUS %left MUL DIV MOD %left NOT UMINUS %nonassoc IO_IN PIPE %right POW %left INC DEC /* ++ -- */ %left DOLLAR ID FIELD /* last two to remove a SR conflict with getline */ %right LPAREN RPAREN /* removes some SR conflicts */ %token <cp> CONSTANT RE %token <stp> ID %token <fbp> FUNCT_ID %token <bip> BUILTIN %token <cp> FIELD %token PRINT PRINTF SPLIT MATCH_FUNC SUB GSUB LENGTH /* keywords */ %token DO WHILE FOR BREAK CONTINUE IF ELSE IN %token DELETE BEGIN END EXIT NEXT RETURN FUNCTION %type <start> block block_or_newline %type <start> statement_list statement mark %type <start> pattern p_pattern %type <start> print_statement %type <ival> pr_args %type <arg2p> arg2 %type <start> builtin %type <start> getline_file %type <start> lvalue field fvalue %type <start> expr cat_expr p_expr re_or_expr sub_back %type <start> do_statement while_statement for_statement %type <start> if_statement if_else_statement %type <start> while_front if_front for_front %type <start> fexpr0 fexpr1 %type <start> array_loop array_loop_front %type <start> exit_statement return_statement %type <ival> arglist args %type <stp> id array %type <fp> print sub_or_gsub %type <fbp> funct_start funct_head %type <ca_p> call_args ca_front ca_back %type <ival> f_arglist f_args %% /* productions */ program : program_block | program program_block ; program_block : PA_block | function_def | error block { if (scope == SCOPE_FUNCT) { restore_ids() ; scope = SCOPE_MAIN ; } code_ptr = main_code_ptr ; } ; PA_block : block | BEGIN { main_code_ptr = code_ptr ; code_ptr = begin_code_ptr ; scope = SCOPE_BEGIN ; } block { begin_code_ptr = code_ptr ; code_ptr = main_code_ptr ; scope = SCOPE_MAIN ; } | END { main_code_ptr = code_ptr ; code_ptr = end_code_ptr ; scope = SCOPE_END ; } block { end_code_ptr = code_ptr ; code_ptr = main_code_ptr ; scope = SCOPE_MAIN ; } | pattern /* this works just like an if statement */ { code_jmp(_JZ, 0) ; } block_or_newline { patch_jmp( code_ptr ) ; } /* range pattern, see comment in execute.c near _RANGE */ | pattern COMMA { code_push($1, code_ptr - $1) ; code_ptr = $1 ; code1(_RANGE) ; code1(1) ; code_ptr += 3 ; code_ptr += code_pop(code_ptr) ; code1(_STOP0) ; $1[2].op = code_ptr - ($1+1) ; } pattern { code1(_STOP0) ; } block_or_newline { $1[3].op = $6 - ($1+1) ; $1[4].op = code_ptr - ($1+1) ; } ; pattern : expr %prec LPAREN | p_pattern /* these work just like short circuit booleans */ | pattern P_OR { code1(_DUP) ; code_jmp(_JNZ, 0) ; code1(_POP) ; } pattern { patch_jmp(code_ptr) ; } | pattern P_AND { code1(_DUP) ; code_jmp(_JZ, 0) ; code1(_POP) ; } pattern { patch_jmp(code_ptr) ; } ; /* we want the not (!) operator to apply to expr if possible and then to a pattern. Two types of pattern do it */ p_pattern : RE { $$ = code_ptr ; code2(_PUSHI, &field[0]) ; code2(_PUSHC, $1) ; code1(_MATCH) ; } | LPAREN pattern RPAREN { $$ = $2 ; } | NOT p_pattern { code1(_NOT) ; $$ = $2 ; } ; block : LBRACE statement_list RBRACE { $$ = $2 ; } | LBRACE error RBRACE { $$ = code_ptr ; /* does nothing won't be executed */ print_flag = getline_flag = paren_cnt = 0 ; yyerrok ; } ; block_or_newline : block | NL /* default print action */ { $$ = code_ptr ; code1(_PUSHINT) ; code1(0) ; code2(_PRINT, bi_print) ; } statement_list : statement | statement_list statement ; statement : block | expr separator { code1(_POP) ; } | /* empty */ separator { $$ = code_ptr ; } | error separator { $$ = code_ptr ; print_flag = getline_flag = 0 ; paren_cnt = 0 ; yyerrok ; } | print_statement | if_statement | if_else_statement | do_statement | while_statement | for_statement | array_loop | BREAK separator { $$ = code_ptr ; BC_insert('B', code_ptr) ; code2(_JMP, 0) /* don't use code_jmp ! */ ; } | CONTINUE separator { $$ = code_ptr ; BC_insert('C', code_ptr) ; code2(_JMP, 0) ; } | exit_statement | return_statement { if ( scope != SCOPE_FUNCT ) compile_error("return outside function body") ; } | NEXT separator { if ( scope != SCOPE_MAIN ) compile_error( "improper use of next" ) ; $$ = code_ptr ; code1(_NEXT) ; } ; separator : NL | SEMI_COLON ; expr : cat_expr | lvalue ASSIGN expr { code1(_ASSIGN) ; } | lvalue ADD_ASG expr { code1(_ADD_ASG) ; } | lvalue SUB_ASG expr { code1(_SUB_ASG) ; } | lvalue MUL_ASG expr { code1(_MUL_ASG) ; } | lvalue DIV_ASG expr { code1(_DIV_ASG) ; } | lvalue MOD_ASG expr { code1(_MOD_ASG) ; } | lvalue POW_ASG expr { code1(_POW_ASG) ; } | expr EQ expr { code1(_EQ) ; } | expr NEQ expr { code1(_NEQ) ; } | expr LT expr { code1(_LT) ; } | expr LTE expr { code1(_LTE) ; } | expr GT expr { code1(_GT) ; } | expr GTE expr { code1(_GTE) ; } | expr MATCH re_or_expr { code1(_MATCH) ; } | expr NOT_MATCH re_or_expr { code1(_MATCH) ; code1(_NOT) ; } /* short circuit boolean evaluation */ | expr OR { code1(_DUP) ; code_jmp(_JNZ, 0) ; code1(_POP) ; } expr { patch_jmp(code_ptr) ; code1(_TEST) ; } | expr AND { code1(_DUP) ; code_jmp(_JZ, 0) ; code1(_POP) ; } expr { patch_jmp(code_ptr) ; code1(_TEST) ; } | expr QMARK { code_jmp(_JZ, 0) ; } expr COLON { code_jmp(_JMP, 0) ; } expr { patch_jmp(code_ptr) ; patch_jmp($7) ; } ; cat_expr : p_expr %prec CAT | cat_expr p_expr %prec CAT { code1(_CAT) ; } ; p_expr : CONSTANT { $$ = code_ptr ; code2(_PUSHC, $1) ; } | lvalue %prec CAT /* removes lvalue (++|--) sr conflict */ { switch( code_ptr[-2].op ) { case _PUSHA : code_ptr[-2].op = _PUSHI ; break ; case AE_PUSHA : code_ptr[-2].op = AE_PUSHI ; break ; case L_PUSHA : code_ptr[-2].op = L_PUSHI ; break ; case LAE_PUSHA : code_ptr[-2].op = LAE_PUSHI ; break ; #ifdef DEBUG default : bozo("p_expr->lvalue") ; #endif } } | LPAREN expr RPAREN { $$ = $2 ; } ; p_expr : p_expr PLUS p_expr { code1(_ADD) ; } | p_expr MINUS p_expr { code1(_SUB) ; } | p_expr MUL p_expr { code1(_MUL) ; } | p_expr DIV p_expr { code1(_DIV) ; } | p_expr MOD p_expr { code1(_MOD) ; } | p_expr POW p_expr { code1(_POW) ; } | NOT p_expr { $$ = $2 ; code1(_NOT) ; } | PLUS p_expr %prec UMINUS { $$ = $2 ; code1(_UPLUS) ; } | MINUS p_expr %prec UMINUS { $$ = $2 ; code1(_UMINUS) ; } | builtin ; p_expr : lvalue INC { code1(_POST_INC ) ; } | lvalue DEC { code1(_POST_DEC) ; } | INC lvalue { $$ = $2 ; code1(_PRE_INC) ; } | DEC lvalue { $$ = $2 ; code1(_PRE_DEC) ; } ; p_expr : field INC { code1(F_POST_INC ) ; } | field DEC { code1(F_POST_DEC) ; } | INC field { $$ = $2 ; code1(F_PRE_INC) ; } | DEC field { $$ = $2 ; code1(F_PRE_DEC) ; } ; lvalue : id { $$ = code_ptr ; code_address($1) ; } | LPAREN lvalue RPAREN { $$ = $2 ; } ; id : ID { switch($1->type) { case ST_NONE : /* new id */ $1->type = ST_VAR ; $1->stval.cp = new_CELL() ; $1->stval.cp->type = C_NOINIT ; break ; case ST_LOCAL_NONE : $1->type = ST_LOCAL_VAR ; active_funct->typev[$1->offset] = ST_LOCAL_VAR ; break ; case ST_VAR : case ST_LOCAL_VAR : break ; default : type_error($1) ; break ; } } ; arglist : /* empty */ { $$ = 0 ; } | args ; args : expr %prec LPAREN { $$ = 1 ; } | args COMMA expr { $$ = $1 + 1 ; } ; builtin : BUILTIN mark LPAREN arglist RPAREN { BI_REC *p = $1 ; $$ = $2 ; if ( p-> min_args > $4 || p->max_args < $4 ) compile_error( "wrong number of arguments in call to %s" , p->name ) ; if ( p->min_args != p->max_args ) /* variable args */ code2(_PUSHINT , $4 ) ; code2(_BUILTIN , p->fp) ; } ; /* an empty production to store the code_ptr */ mark : /* empty */ { $$ = code_ptr ; } print_statement : print mark pr_args pr_direction separator { code2(_PRINT, $1) ; $$ = $2 ; if ( $1 == bi_printf && $3 == 0 ) compile_error("no arguments in call to printf") ; print_flag = 0 ; $$ = $2 ; } ; print : PRINT { $$ = bi_print ; print_flag = 1 ;} | PRINTF { $$ = bi_printf ; print_flag = 1 ; } ; pr_args : arglist { code1(_PUSHINT) ; code1($1) ; } | LPAREN arg2 RPAREN { $$ = $2->cnt ; zfree($2,sizeof(ARG2_REC)) ; code1(_PUSHINT) ; code1($$) ; } ; arg2 : expr COMMA expr { $$ = (ARG2_REC*) zmalloc(sizeof(ARG2_REC)) ; $$->start = $1 ; $$->cnt = 2 ; } | arg2 COMMA expr { $$ = $1 ; $$->cnt++ ; } ; pr_direction : /* empty */ | IO_OUT expr { code2(_PUSHINT, $1) ; } ; /* IF and IF-ELSE */ if_front : IF LPAREN expr RPAREN { $$ = $3 ; eat_nl() ; code_jmp(_JZ, 0) ; } ; if_statement : if_front statement { patch_jmp( code_ptr ) ; } ; else : ELSE { eat_nl() ; code_jmp(_JMP, 0) ; } ; if_else_statement : if_front statement else statement { patch_jmp(code_ptr) ; patch_jmp($4) ; } /* LOOPS */ do : DO { eat_nl() ; BC_new() ; } ; do_statement : do statement WHILE LPAREN expr RPAREN separator { $$ = $2 ; code_jmp(_JNZ, $2) ; BC_clear(code_ptr, $5) ; } ; while_front : WHILE LPAREN expr RPAREN { eat_nl() ; BC_new() ; code_push($3, code_ptr-$3) ; code_ptr = $$ = $3 ; code_jmp(_JMP,0) ; } ; while_statement : while_front statement { INST *c_addr = code_ptr ; /*continue address*/ patch_jmp( c_addr) ; code_ptr += code_pop(c_addr) ; code_jmp(_JNZ, $2) ; BC_clear(code_ptr, c_addr) ; } ; for_front : FOR LPAREN fexpr0 SEMI_COLON fexpr1 SEMI_COLON fexpr0 RPAREN { $$ = $3 ; eat_nl() ; BC_new() ; /* push fexpr2 and 3 */ code_push( $5, $7-$5) ; code_push( $7, code_ptr - $7) ; /* reset code_ptr */ code_ptr = $5 ; code_jmp(_JMP, 0) ; } ; for_statement : for_front statement { INST *c_addr = code_ptr ; unsigned len = code_pop(code_ptr) ; code_ptr += len ; patch_jmp(code_ptr) ; len = code_pop(code_ptr) ; code_ptr += len ; code_jmp(_JNZ, $2) ; BC_clear( code_ptr, c_addr) ; } ; fexpr0 : /* empty */ { $$ = code_ptr; } | expr { code1(_POP) ; } ; fexpr1 : /* empty */ { /* this will be wiped out when the jmp is coded */ $$ = code_ptr ; code2(_PUSHC, &cell_one) ; } | expr ; /* arrays */ array : ID { switch($1->type) { case ST_NONE : /* a new array */ $1->type = ST_ARRAY ; $1->stval.array = new_ARRAY() ; break ; case ST_ARRAY : case ST_LOCAL_ARRAY : break ; case ST_LOCAL_NONE : $1->type = ST_LOCAL_ARRAY ; active_funct->typev[$1->offset] = ST_LOCAL_ARRAY ; break ; default : type_error($1) ; break ; } } ; expr : expr IN array { code_array($3) ; code1(A_TEST) ; } | LPAREN arg2 RPAREN IN array { $$ = $2->start ; code1(A_CAT) ; code1($2->cnt) ; zfree($2, sizeof(ARG2_REC)) ; code_array($5) ; code1(A_TEST) ; } ; lvalue : array mark LBOX args RBOX { if ( $4 > 1 ) { code1(A_CAT) ; code1($4) ; } if( is_local($1) ) { code1(LAE_PUSHA) ; code1($1->offset) ; } else code2(AE_PUSHA, $1->stval.array) ; $$ = $2 ; } ; /* delete A[i] */ statement : DELETE array mark LBOX args RBOX separator { $$ = $3 ; if ( $5 > 1 ) { code1(A_CAT) ; code1($5) ; } code_array($2) ; code1(A_DEL) ; } ; /* for ( i in A ) statement */ array_loop_front : FOR LPAREN id IN array RPAREN { eat_nl() ; BC_new() ; $$ = code_ptr ; code_address($3) ; code_array($5) ; code1(A_LOOP) ; code1(_STOP) ; code1(0) ; /* put offset of following code here*/ } ; array_loop : array_loop_front statement { code1(_STOP) ; BC_clear( $2 - 2, code_ptr-1) ; $2[-1].op = code_ptr - & $2[-2] ; } ; /* fields */ field : FIELD { $$ = code_ptr ; code2(F_PUSHA, $1) ; } | DOLLAR p_expr { $$ = $2 ; code1( FE_PUSHA ) ; } | LPAREN field RPAREN { $$ = $2 ; } ; p_expr : field %prec CAT /* removes field (++|--) sr conflict */ { if ( code_ptr[-2].op == F_PUSHA ) code_ptr[-2].op = ((CELL *)code_ptr[-1].ptr == field || (CELL *)code_ptr[-1].ptr > field+NF ) ? _PUSHI : F_PUSHI ; else if ( code_ptr[-1].op == FE_PUSHA ) code_ptr[-1].op = FE_PUSHI ; else bozo("missing F(E)_PUSHA") ; } ; expr : field ASSIGN expr { code1(F_ASSIGN) ; } | field ADD_ASG expr { code1(F_ADD_ASG) ; } | field SUB_ASG expr { code1(F_SUB_ASG) ; } | field MUL_ASG expr { code1(F_MUL_ASG) ; } | field DIV_ASG expr { code1(F_DIV_ASG) ; } | field MOD_ASG expr { code1(F_MOD_ASG) ; } | field POW_ASG expr { code1(F_POW_ASG) ; } ; /* split is handled different than a builtin because it takes an array and optionally a regular expression as args */ p_expr : SPLIT LPAREN expr COMMA array RPAREN { $$ = $3 ; code_array($5) ; code2(_PUSHI, &fs_shadow) ; code2(_BUILTIN, bi_split) ; } | SPLIT LPAREN expr COMMA array COMMA { code_array($5) ; } split_back { $$ = $3 ; code2(_BUILTIN, bi_split) ; } ; /* split back is not the same as re_or_expr RPAREN because the action is cast_for_split() instead of cast_to_RE() */ split_back : expr RPAREN { if ( code_ptr[-2].op == _PUSHC && ((CELL *)code_ptr[-1].ptr)->type == C_STRING ) cast_for_split(code_ptr[-1].ptr) ; } | RE RPAREN { code2(_PUSHC, $1) ; } ; /* match(expr, RE) */ p_expr : MATCH_FUNC LPAREN expr COMMA re_or_expr RPAREN { $$ = $3 ; code2(_BUILTIN, bi_match) ; } ; re_or_expr : RE { $$ = code_ptr ; code2(_PUSHC, $1) ; } | expr %prec MATCH { if ( code_ptr[-2].op == _PUSHC && ((CELL *)code_ptr[-1].ptr)->type == C_STRING ) /* re compile now */ cast_to_RE((CELL *) code_ptr[-1].ptr) ; } ; /* length w/o an argument */ p_expr : LENGTH { $$ = code_ptr ; code2(_PUSHI, field) ; code2(_BUILTIN, bi_length) ; } ; exit_statement : EXIT separator { $$ = code_ptr ; code1(_EXIT0) ; } | EXIT expr separator { $$ = $2 ; code1(_EXIT) ; } return_statement : RETURN separator { $$ = code_ptr ; code1(_RET0) ; } | RETURN expr separator { $$ = $2 ; code1(_RET) ; } /* getline */ p_expr : getline %prec GETLINE { $$ = code_ptr ; code2(F_PUSHA, &field[0]) ; code1(_PUSHINT) ; code1(0) ; code2(_BUILTIN, bi_getline) ; getline_flag = 0 ; } | getline fvalue %prec GETLINE { $$ = $2 ; code1(_PUSHINT) ; code1(0) ; code2(_BUILTIN, bi_getline) ; getline_flag = 0 ; } | getline_file p_expr %prec IO_IN { code1(_PUSHINT) ; code1(F_IN) ; code2(_BUILTIN, bi_getline) ; /* getline_flag already off in yylex() */ } | p_expr PIPE GETLINE { code2(F_PUSHA, &field[0]) ; code1(_PUSHINT) ; code1(PIPE_IN) ; code2(_BUILTIN, bi_getline) ; } | p_expr PIPE GETLINE fvalue { code1(_PUSHINT) ; code1(PIPE_IN) ; code2(_BUILTIN, bi_getline) ; } ; getline : GETLINE { getline_flag = 1 ; } fvalue : lvalue | field ; getline_file : getline IO_IN { $$ = code_ptr ; code2(F_PUSHA, field+0) ; } | getline fvalue IO_IN { $$ = $2 ; } ; /*========================================== sub and gsub ==========================================*/ p_expr : sub_or_gsub LPAREN re_or_expr COMMA expr sub_back { if ( $6 - $5 == 2 && $5->op == _PUSHC && ((CELL *) $5[1].ptr)->type == C_STRING ) /* cast from STRING to REPL at compile time */ cast_to_REPL( (CELL *) $5[1].ptr ) ; code2(_BUILTIN, $1) ; $$ = $3 ; } sub_or_gsub : SUB { $$ = bi_sub ; } | GSUB { $$ = bi_gsub ; } ; sub_back : RPAREN /* substitute into $0 */ { $$ = code_ptr ; code2(F_PUSHA, &field[0]) ; } | COMMA fvalue RPAREN { $$ = $2 ; } ; /*================================================ user defined functions *=================================*/ function_def : funct_start block { resize_fblock($1, code_ptr) ; code_ptr = main_code_ptr ; scope = SCOPE_MAIN ; active_funct = (FBLOCK *) 0 ; restore_ids() ; } ; funct_start : funct_head LPAREN f_arglist RPAREN { eat_nl() ; scope = SCOPE_FUNCT ; active_funct = $1 ; main_code_ptr = code_ptr ; if ( $1->nargs = $3 ) $1->typev = (char *) memset( zmalloc($3), ST_LOCAL_NONE, $3) ; else $1->typev = (char *) 0 ; code_ptr = $1->code = (INST *) zmalloc(PAGE_SZ*sizeof(INST)) ; } ; funct_head : FUNCTION ID { FBLOCK *fbp ; if ( $2->type == ST_NONE ) { $2->type = ST_FUNCT ; fbp = $2->stval.fbp = (FBLOCK *) zmalloc(sizeof(FBLOCK)) ; fbp->name = $2->name ; } else { type_error( $2 ) ; /* this FBLOCK will not be put in the symbol table */ fbp = (FBLOCK*) zmalloc(sizeof(FBLOCK)) ; fbp->name = "" ; } $$ = fbp ; } | FUNCTION FUNCT_ID { $$ = $2 ; if ( $2->code ) compile_error("redefinition of %s" , $2->name) ; } ; f_arglist : /* empty */ { $$ = 0 ; } | f_args ; f_args : ID { $1 = save_id($1->name) ; $1->type = ST_LOCAL_NONE ; $1->offset = 0 ; $$ = 1 ; } | f_args COMMA ID { if ( is_local($3) ) compile_error("%s is duplicated in argument list", $3->name) ; else { $3 = save_id($3->name) ; $3->type = ST_LOCAL_NONE ; $3->offset = $1 ; $$ = $1 + 1 ; } } ; /* a call to a user defined function */ p_expr : FUNCT_ID mark call_args { $$ = $2 ; code2(_CALL, $1) ; if ( $3 ) code1($3->arg_num+1) ; else code1(0) ; check_fcall($1, scope, active_funct, $3, token_lineno) ; } ; call_args : LPAREN RPAREN { $$ = (CA_REC *) 0 ; } | ca_front ca_back { $$ = $2 ; $$->link = $1 ; $$->arg_num = $1 ? $1->arg_num+1 : 0 ; } ; /* The funny definition of ca_front with the COMMA bound to the ID is to force a shift to avoid a reduce/reduce conflict ID->id or ID->array Or to avoid a decision, if the type of the ID has not yet been determined */ ca_front : LPAREN { $$ = (CA_REC *) 0 ; } | ca_front expr COMMA { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ; $$->link = $1 ; $$->type = CA_EXPR ; $$->arg_num = $1 ? $1->arg_num+1 : 0 ; } | ca_front ID COMMA { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ; $$->link = $1 ; $$->arg_num = $1 ? $1->arg_num+1 : 0 ; code_call_id($$, $2) ; } ; ca_back : expr RPAREN { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ; $$->type = CA_EXPR ; } | ID RPAREN { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ; code_call_id($$, $1) ; } ; %% /* resize the code for a user function */ static void resize_fblock( fbp, code_ptr ) FBLOCK *fbp ; INST *code_ptr ; { int size ; code1(_RET0) ; /* make sure there is always a return statement */ if ( dump_code ) { code1(_HALT) ; /*stops da() */ add_to_fdump_list(fbp) ; } if ( (size = code_ptr - fbp->code) > PAGE_SZ-1 ) overflow("function code size", PAGE_SZ ) ; /* resize the code */ fbp->code = (INST*) zrealloc(fbp->code, PAGE_SZ*sizeof(INST), size * sizeof(INST) ) ; } static void code_array(p) register SYMTAB *p ; { if ( is_local(p) ) { code1(LA_PUSHA) ; code1(p->offset) ; } else code2(A_PUSHA, p->stval.array) ; } static int current_offset() { switch( scope ) { case SCOPE_MAIN : return code_ptr - main_start ; case SCOPE_BEGIN : return code_ptr - begin_start ; case SCOPE_END : return code_ptr - end_start ; case SCOPE_FUNCT : return code_ptr - active_funct->code ; } } static void code_call_id( p, ip ) register CA_REC *p ; register SYMTAB *ip ; { static CELL dummy ; switch( ip->type ) { case ST_VAR : p->type = CA_EXPR ; code2(_PUSHI, ip->stval.cp) ; break ; case ST_LOCAL_VAR : p->type = CA_EXPR ; code1(L_PUSHI) ; code1(ip->offset) ; break ; case ST_ARRAY : p->type = CA_ARRAY ; code2(A_PUSHA, ip->stval.array) ; break ; case ST_LOCAL_ARRAY : p->type = CA_ARRAY ; code1(LA_PUSHA) ; code1(ip->offset) ; break ; case ST_NONE : p->type = ST_NONE ; p->call_offset = current_offset() ; p->sym_p = ip ; code2(_PUSHI, &dummy) ; break ; case ST_LOCAL_NONE : p->type = ST_LOCAL_NONE ; p->call_offset = current_offset() ; p->type_p = & active_funct->typev[ip->offset] ; code1(L_PUSHI) ; code1(ip->offset) ; break ; #ifdef DEBUG default : bozo("code_call_id") ; #endif } } int parse() { int yy = yyparse() ; if ( resolve_list ) resolve_fcalls() ; return yy ; } @//E*O*F mawk0.97/parse.y// chmod u=rw,g=r,o=r mawk0.97/parse.y echo x - mawk0.97/print.c sed 's/^@//' > "mawk0.97/print.c" <<'@//E*O*F mawk0.97/print.c//' /******************************************** print.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: print.c,v $ * Revision 2.2 91/04/09 12:39:23 brennan * added static to funct decls to satisfy STARDENT compiler * * Revision 2.1 91/04/08 08:23:43 brennan * VERSION 0.97 * */ #include "mawk.h" #include "bi_vars.h" #include "bi_funct.h" #include "memory.h" #include "field.h" #include "scan.h" #include "files.h" /* static functions */ static void PROTO( print_cell, (CELL *, FILE *) ) ; static void PROTO( do_printf, (FILE *, char *, unsigned, CELL *) ) ; static void PROTO( do_sprintf, (char *, unsigned, CELL *) ) ; static void print_cell(p, fp) register CELL *p ; register FILE *fp ; { register int len ; switch( p->type ) { case C_NOINIT : break ; case C_MBSTRN : case C_STRING : case C_STRNUM : switch( len = string(p)->len ) { case 0 : break ; case 1 : putc(string(p)->str[0],fp) ; break ; default : fwrite(string(p)->str, 1, len, fp) ; } break ; case C_DOUBLE : fprintf(fp, string(field + OFMT)->str, p->dval) ; break ; default : bozo("bad cell passed to print_cell") ; } } /* on entry to bi_print or bi_printf the stack is: sp[0] = an integer k if ( k < 0 ) output is to a file with name in sp[-1] { so open file and sp -= 2 } sp[0] = k >= 0 is the number of print args sp[-k] holds the first argument */ CELL *bi_print(sp) CELL *sp ; /* stack ptr passed in */ { register CELL *p ; register int k ; FILE *fp ; if ( (k = sp->type) < 0 ) { if ( (--sp)->type < C_STRING ) cast1_to_s(sp) ; fp = (FILE *) file_find( string(sp), k ) ; free_STRING(string(sp)) ; k = (--sp)->type ; } else fp = stdout ; if ( k ) { p = sp - k ; /* clear k variables off the stack */ sp = p - 1 ; while ( k-- > 1 ) { print_cell(p,fp) ; print_cell(bi_vars+OFS,fp) ; cell_destroy(p) ; p++ ; } print_cell(p, fp) ; cell_destroy(p) ; } else { sp-- ; print_cell( &field[0], fp ) ; } print_cell( bi_vars + ORS , fp) ; return sp ; } /* the contents of format are preserved */ static void do_printf( fp, format, argcnt, cp) FILE *fp ; char *format ; unsigned argcnt ; CELL *cp ; /* ptr to an array of arguments ( on the eval stack) */ { register char *q ; char save ; char *p = format ; while ( 1 ) { if ( ! (q = strchr(p, '%')) ) if ( argcnt == 0 ) { fputs(p, fp) ; return ; } else rt_error("too many arguments in call to printf(%s)", format ) ; if ( * ++q == '%' ) { fwrite( p, q-p, 1, fp) ; p = q+1 ; continue ; } if ( argcnt == 0 ) rt_error("too few arguments in call to printf(%s)", format) ; if ( *q == '-' ) q++ ; while ( scan_code[*(unsigned char*)q] == SC_DIGIT ) q++ ; if ( *q == '.' ) { q++ ; while ( scan_code[*(unsigned char*)q] == SC_DIGIT ) q++ ; } save = * ++q ; *q = 0 ; switch( q[-1] ) { case 'c' : case 'd' : case 'o' : case 'x' : if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ; (void) fprintf(fp, p, (int) cp->dval) ; break ; case 'e' : case 'g' : case 'f' : if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ; (void) fprintf(fp, p, cp->dval) ; break ; case 's' : if ( cp->type < C_STRING ) cast1_to_s(cp) ; (void) fprintf(fp, p, string(cp)->str) ; break ; default : rt_error("bad format string in call to printf(%s)", format) ; } *q = save ; p = q ; argcnt-- ; cp++ ; } } CELL *bi_printf(sp) register CELL *sp ; { register int k ; register CELL *p ; FILE *fp ; if ( (k = sp->type) < 0 ) { if ( (--sp)->type < C_STRING ) cast1_to_s(sp) ; fp = (FILE *) file_find( string(sp), k ) ; free_STRING(string(sp)) ; k = (--sp)->type ; } else fp = stdout ; sp -= k-- ; /* sp points at the format string */ if ( sp->type < C_STRING ) cast1_to_s(sp) ; do_printf(fp, string(sp)->str, k, sp+1) ; free_STRING(string(sp)) ; for ( p = sp+1 ; k-- ; p++ ) cell_destroy(p) ; return --sp ; } CELL *bi_sprintf(sp) CELL *sp ; { CELL *p ; int argcnt = sp->type ; void do_sprintf() ; sp -= argcnt-- ; /* sp points at the format string */ if ( sp->type < C_STRING ) cast1_to_s(sp) ; do_sprintf(string(sp)->str, argcnt, sp+1) ; free_STRING(string(sp)) ; for ( p = sp+1 ; argcnt-- ; p++ ) cell_destroy(p) ; sp->ptr = (PTR) new_STRING( temp_buff.string_buff ) ; return sp ; } /* the contents of format are preserved */ static void do_sprintf( format, argcnt, cp) char *format ; unsigned argcnt ; CELL *cp ; { register char *q ; char save ; char *p = format ; register char *target = temp_buff.string_buff ; *target = 0 ; while ( 1 ) { if ( ! (q = strchr(p, '%')) ) if ( argcnt == 0 ) { strcpy(target, p) ; /* check the result is not too large */ if ( main_buff[-1] != 0 ) { /* This may have damaged us -- try to croak out an error message and exit */ rt_overflow("sprintf buffer", TEMP_BUFF_SZ) ; } return ; } else rt_error("too many arguments in call to sprintf(%s)", format ) ; if ( * ++q == '%' ) { unsigned len ; (void) memcpy(target, p, len = q-p ) ; p = q + 1 ; *(target += len) = 0 ; continue ; } if ( argcnt == 0 ) rt_error("too few arguments in call to sprintf(%s)", format) ; if ( *q == '-' ) q++ ; while ( scan_code[*(unsigned char*)q] == SC_DIGIT ) q++ ; if ( *q == '.' ) { q++ ; while ( scan_code[*(unsigned char*)q] == SC_DIGIT ) q++ ; } save = * ++q ; *q = 0 ; switch( q[-1] ) { case 'c' : case 'd' : case 'o' : case 'x' : if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ; (void) sprintf(target, p, (int) cp->dval ) ; target = strchr(target, 0) ; break ; case 'e' : case 'g' : case 'f' : if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ; (void) sprintf(target, p, cp->dval ) ; target = strchr(target, 0) ; break ; case 's' : if ( cp->type < C_STRING ) cast1_to_s(cp) ; (void) sprintf(target, p, string(cp)->str ) ; target = strchr(target, 0) ; break ; default : rt_error("bad format string in call to sprintf(%s)", format) ; } *q = save ; p = q ; argcnt-- ; cp++ ; } } @//E*O*F mawk0.97/print.c// chmod u=rw,g=r,o=r mawk0.97/print.c echo x - mawk0.97/re_cmpl.c sed 's/^@//' > "mawk0.97/re_cmpl.c" <<'@//E*O*F mawk0.97/re_cmpl.c//' /******************************************** re_cmpl.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: re_cmpl.c,v $ * Revision 2.1 91/04/08 08:23:45 brennan * VERSION 0.97 * */ /* re_cmpl.c */ #include "mawk.h" #include "memory.h" #include "scan.h" #include "regexp.h" #include "repl.h" #include <string.h> static CELL *PROTO( REPL_compile, (STRING *) ) ; typedef struct re_node { STRING *sval ; PTR re ; struct re_node *link ; } RE_NODE ; static RE_NODE *re_list ; /* a list of compiled regular expressions */ PTR re_compile( sval ) STRING *sval ; { register RE_NODE *p ; RE_NODE *q ; char *s ; /* search list */ s = sval->str ; p = re_list ; q = (RE_NODE *) 0 ; while ( p ) if ( strcmp(s, p->sval->str) == 0 ) /* found */ if ( !q ) /* already at front */ goto _return ; else /* delete from list for move to front */ { q->link = p->link ; goto found ; } else { q = p ; p = p->link ; } /* not found */ p = (RE_NODE *) zmalloc( sizeof(RE_NODE) ) ; p->sval = sval ; sval->ref_cnt++ ; if( !(p->re = REcompile(s)) ) { errmsg(0, "regular expression compile failed (%s)\n%s\n" , REerrlist[REerrno] , s) ; mawk_exit(1) ; } found : /* insert p at the front of the list */ p->link = re_list ; re_list = p ; _return : #ifdef DEBUG if ( dump_RE ) REmprint(p->re, stderr) ; #endif return p->re ; } /* this is only used by da() */ char *re_uncompile( m ) PTR m ; { register RE_NODE *p ; for( p = re_list ; p ; p = p->link ) if ( p->re == m ) return p->sval->str ; #ifdef DEBUG bozo("non compiled machine") ; #endif } /*=================================================*/ /* replacement operations */ /* create a replacement CELL from a STRING * */ static CELL *REPL_compile( sval ) STRING *sval ; { int i = 0 ; register char *p = sval->str ; register char *q ; char *xbuff ; CELL *cp ; q = xbuff = (char *) zmalloc( sval->len + 1 ) ; while ( 1 ) { switch( *p ) { case 0 : *q = 0 ; goto done ; case '\\': if ( p[1] == '&' ) { *q++ = '&' ; p += 2 ; continue ; } else break ; case '&': /* if empty we don't need to make a node */ if ( q != xbuff ) { *q = 0 ; temp_buff.ptr_buff[i++] = (PTR) new_STRING(xbuff) ; } /* and a null node for the '&' */ temp_buff.ptr_buff[i++] = (PTR) 0 ; /* reset */ p++ ; q = xbuff ; continue ; default : break ; } *q++ = *p++ ; } done : /* if we have one empty string it will get made now */ if ( q > xbuff || i == 0 ) temp_buff.ptr_buff[i++] = (PTR) new_STRING(xbuff) ; if ( i > MAX_FIELD ) overflow("replacement pieces", MAX_FIELD) ; cp = new_CELL() ; if ( i == 1 ) { cp->type = C_REPL ; cp->ptr = temp_buff.ptr_buff[0] ; } else { STRING **sp = (STRING**) (cp->ptr = zmalloc(sizeof(STRING *)*i)) ; int j = 0 ; while ( j < i ) *sp++ = (STRING *)temp_buff.ptr_buff[j++] ; cp->type = C_REPLV ; cp->vcnt = i ; } zfree(xbuff, sval->len+1) ; return cp ; } /* free memory used by a replacement CELL */ void repl_destroy( cp ) register CELL *cp ; { register STRING **p ; unsigned cnt ; if ( cp->type == C_REPL ) free_STRING(string(cp)) ; else /* an C_REPLV */ { p = (STRING **) cp->ptr ; for( cnt = cp->vcnt ; cnt ; cnt--) { if ( *p ) free_STRING( *p ) ; p++ ; } zfree( cp->ptr, cp->vcnt * sizeof(STRING *) ) ; } } /* copy a C_REPLV cell to another CELL */ CELL *replv_cpy( target, source ) CELL *target , *source ; { STRING **t, **s ; unsigned cnt ; target->type = C_REPLV ; target->vcnt = source->vcnt ; target->ptr = (PTR) zmalloc( target->vcnt * sizeof(STRING *) ) ; cnt = target->vcnt ; t = (STRING **) target->ptr ; s = (STRING **) source->ptr ; while ( cnt-- ) { if ( *t = *s++ ) (*t)->ref_cnt++ ; t++ ; } return target ; } /* here's our old friend linked linear list with move to the front for compilation of replacement CELLs */ typedef struct repl_node { struct repl_node *link ; STRING *sval ; /* the input */ CELL *cp ; /* the output */ } REPL_NODE ; static REPL_NODE *repl_list ; /* search the list (with move to the front) for a compiled separator. return a ptr to a CELL (C_REPL or C_REPLV) */ CELL *repl_compile( sval ) STRING *sval ; { register REPL_NODE *p ; REPL_NODE *q ; char *s ; /* search the list */ s = sval->str ; p = repl_list ; q = (REPL_NODE *) 0 ; while ( p ) if ( strcmp(s, p->sval->str) == 0 ) /* found */ if ( !q ) /* already at front */ return p->cp ; else /* delete from list for move to front */ { q->link = p->link ; goto found ; } else { q = p ; p = p->link ; } /* not found */ p = (REPL_NODE *) zmalloc( sizeof(REPL_NODE) ) ; p->sval = sval ; sval->ref_cnt++ ; p->cp = REPL_compile(sval) ; found : /* insert p at the front of the list */ p->link = repl_list ; repl_list = p ; return p->cp ; } /* return the string for a CELL or type REPL or REPLV, this is only used by da() */ char *repl_uncompile( cp ) CELL *cp ; { register REPL_NODE *p = repl_list ; if ( cp->type == C_REPL ) while ( p ) if ( p->cp->type == C_REPL && p->cp->ptr == cp->ptr ) return p->sval->str ; else p = p->link ; else while ( p ) if ( p->cp->type == C_REPLV && memcmp( cp->ptr, p->cp->ptr, cp->vcnt * sizeof(STRING*)) == 0 ) return p->sval->str ; else p = p->link ; bozo("unable to uncompile an repl") ; } /* convert a C_REPLV to C_REPL replacing the &s with sval */ CELL *replv_to_repl( cp, sval) CELL *cp ; STRING *sval ; { register STRING **p ; STRING **sblock = (STRING **) cp->ptr ; unsigned cnt , vcnt = cp->vcnt ; unsigned len ; char *target ; #ifdef DEBUG if ( cp->type != C_REPLV ) bozo("not replv") ; #endif p = sblock ; cnt = vcnt ; len = 0 ; while ( cnt-- ) if ( *p ) len += (*p++)->len ; else { *p++ = sval ; sval->ref_cnt++ ; len += sval->len ; } cp->type = C_REPL ; cp->ptr = (PTR) new_STRING((char *) 0, len) ; p = sblock ; cnt = vcnt ; target = string(cp)->str ; while ( cnt-- ) { (void) memcpy(target, (*p)->str, (*p)->len) ; target += (*p)->len ; free_STRING(*p) ; p++ ; } zfree( sblock, vcnt * sizeof(STRING *) ) ; return cp ; } @//E*O*F mawk0.97/re_cmpl.c// chmod u=rw,g=r,o=r mawk0.97/re_cmpl.c echo x - mawk0.97/regexp.h sed 's/^@//' > "mawk0.97/regexp.h" <<'@//E*O*F mawk0.97/regexp.h//' /******************************************** regexp.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /*$Log: regexp.h,v $ * Revision 2.1 91/04/08 08:23:47 brennan * VERSION 0.97 * */ #include <stdio.h> PTR PROTO( REcompile , (char *) ) ; int PROTO( REtest, (char *, PTR) ) ; char *PROTO( REmatch, (char *, PTR, unsigned *) ) ; void PROTO( REmprint, (PTR , FILE*) ) ; extern int REerrno ; extern char *REerrlist[] ; @//E*O*F mawk0.97/regexp.h// chmod u=rw,g=r,o=r mawk0.97/regexp.h echo x - mawk0.97/repl.h sed 's/^@//' > "mawk0.97/repl.h" <<'@//E*O*F mawk0.97/repl.h//' /******************************************** repl.h copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /*$Log: repl.h,v $ * Revision 2.1 91/04/08 08:23:49 brennan * VERSION 0.97 * */ /* repl.h */ #ifndef REPL_H #define REPL_H PTR PROTO( re_compile, (STRING *) ) ; char *PROTO( re_uncompile, (PTR) ) ; CELL *PROTO( repl_compile, (STRING *) ) ; char *PROTO( repl_uncompile, (CELL *) ) ; void PROTO( repl_destroy, (CELL *) ) ; CELL *PROTO( replv_cpy, (CELL *, CELL *) ) ; CELL *PROTO( replv_to_repl, (CELL *, STRING *) ) ; #endif @//E*O*F mawk0.97/repl.h// chmod u=rw,g=r,o=r mawk0.97/repl.h echo x - mawk0.97/scan.c sed 's/^@//' > "mawk0.97/scan.c" <<'@//E*O*F mawk0.97/scan.c//' /******************************************** scan.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* $Log: scan.c,v $ * Revision 2.2 91/04/09 12:39:27 brennan * added static to funct decls to satisfy STARDENT compiler * * Revision 2.1 91/04/08 08:23:51 brennan * VERSION 0.97 * */ #include "mawk.h" #include "sizes.h" #include "scan.h" #include "memory.h" #include "field.h" #include "init.h" #include "fin.h" #include "repl.h" #include <fcntl.h> #include <string.h> #include "files.h" /* static functions */ static void PROTO(buff_create, (char *) ) ; static int PROTO(slow_next, (void) ) ; static void PROTO(eat_comment, (void) ) ; static double PROTO(collect_decimal, (int, int *) ) ; static int PROTO(collect_string, (void) ) ; static int PROTO(collect_RE, (void) ) ; static char *PROTO(rm_escape, (char *) ) ; /*----------------------------- program file management *----------------------------*/ static unsigned char *buffer ; static unsigned char *buffp ; /* unsigned so it works with 8 bit chars */ static int program_fd = -1 ; static int eof_flag ; static void buff_create(s) char *s ; { /* If program_fd == -1, program came from command line and s is it, else s is a filename */ if ( program_fd == -1 ) { buffer = buffp = (unsigned char *) s ; eof_flag = 1 ; } else /* s is a filename, open it */ { if ( s[0] == '-' && s[1] == 0 ) program_fd = 0 ; else if ( (program_fd = open(s, O_RDONLY, 0)) == -1 ) { errmsg( errno, "cannot open %s", s) ; mawk_exit(1) ; } buffp = buffer = (unsigned char *) zmalloc( BUFFSZ+1 ) ; eof_flag = fillbuff(program_fd, buffer, BUFFSZ) < BUFFSZ ; } } void scan_cleanup() { if ( program_fd >= 0 ) zfree(buffer, BUFFSZ+1) ; if ( program_fd > 0 ) (void) close(program_fd) ; scan_code['\n'] = SC_SPACE ; } void scan_init(flag, p) int flag ; /* on if program is from the command line */ char *p ; { if ( ! flag ) program_fd = 0 ; buff_create(p) ; eat_nl() ; /* scan to first token */ if ( next() == 0 ) { errmsg(0, "no program") ; mawk_exit(1) ; } un_next() ; } /*-------------------------------- global variables shared by yyparse() and yylex() *-------------------------------*/ int current_token = -1 ; unsigned token_lineno ; unsigned compile_error_count ; int paren_cnt ; int brace_cnt ; int print_flag ; /* changes meaning of '>' */ int getline_flag ; /* changes meaning of '<' */ extern YYSTYPE yylval ; /*---------------------------------------- file reading functions next() and un_next(c) are macros in scan.h *---------------------*/ static unsigned lineno = 1 ; /* used to help distinguish / as divide or start of RE */ static int can_precede_re[] = { MATCH, NOT_MATCH, COMMA, RBRACE, LPAREN, NOT, P_OR, P_AND, NL, -1 } ; /* read one character -- slowly */ static int slow_next() { if ( *buffp == 0 ) if ( !eof_flag ) { buffp = buffer ; eof_flag = fillbuff(program_fd, buffer,BUFFSZ) < BUFFSZ ; } return *buffp++ ; /* note can un_next() , eof which is zero */ } static void eat_comment() { register int c ; while ( (c = next()) != '\n' && scan_code[c] ) ; un_next() ; } void eat_nl() /* eat all space including newlines */ { while ( 1 ) switch( scan_code[next()] ) { case SC_COMMENT : eat_comment() ; break ; case SC_NL : lineno++ ; /* fall thru */ case SC_SPACE : break ; default : un_next() ; return ; } } int yylex() { register int c ; token_lineno = lineno ; reswitch: switch( scan_code[c = next()] ) { case 0 : /* if no terminator on the line put one */ if ( (c = current_token) == RBRACE || c == NL || c == SEMI_COLON ) ct_ret(EOF) ; else { un_next() ; ct_ret(NL) ; }