[alt.sources] mawk0.97.shar 4 of 6

brennan@ssc-vax.UUCP (Mike Brennan) (05/11/91)

------------------cut here----------------
  scan_code['&'] = SC_AND ;
  scan_code['?'] = SC_QMARK ;
  scan_code[':'] = SC_COLON ;
  scan_code['['] = SC_LBOX ;
  scan_code[']'] = SC_RBOX ;
  scan_code['\\'] = SC_ESCAPE ;
  scan_code['.'] = SC_DOT ;
  scan_code['~'] = SC_MATCH ;
  scan_code['$'] = SC_DOLLAR ;

  for( p = scan_code + 'A' ; p <= scan_code + 'Z' ; p++ )
       *p = *(p + 'a' - 'A') = SC_IDCHAR ;

}

void scan_print()
{ register char *p = scan_code ;
  register int c ; /* column */
  register int r ; /* row */

  printf("\n\n/* scancode.c */\n\n\n") ;
  printf( "char scan_code[256] = {\n" ) ;

  for( r = 1 ; r <= 16 ; r++)
  {
    for( c = 1 ; c <= 16 ; c++)
    {
      printf("%2d" , *p++) ;
      if ( r != 16 || c != 16 )  putchar(',') ;
    }
    putchar('\n') ;
  }

  printf("} ;\n") ;
}


main()
{
  scan_init() ; scan_print() ;
  return 0 ;
}
@//E*O*F mawk0.97/makescan.c//
chmod u=rw,g=r,o=r mawk0.97/makescan.c
 
echo x - mawk0.97/matherr.c
sed 's/^@//' > "mawk0.97/matherr.c" <<'@//E*O*F mawk0.97/matherr.c//'

/********************************************
matherr.c
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/

/*$Log:	matherr.c,v $
 * Revision 2.1  91/04/08  08:23:31  brennan
 * VERSION 0.97
 * 
*/

#include  "mawk.h"
#include  <math.h>

#if   FPE_TRAPS
#include <signal.h>

/* machine dependent changes might be needed here */

static void  fpe_catch( signal, why)
  int signal, why ;
{
  switch(why)
  {
    case FPE_ZERODIVIDE :
       rt_error("division by zero") ;

    case FPE_OVERFLOW  :
       rt_error("floating point overflow") ;

    default :
      rt_error("floating point exception") ;
  }
}

void   fpe_init()
{ (void) signal(SIGFPE, fpe_catch) ; }

#else

void  fpe_init()
{
  TURNOFF_FPE_TRAPS() ;
}
#endif

#if  HAVE_MATHERR

#if  ! FPE_TRAPS 

/* If we are not trapping math errors, we will shutup the library calls
*/

int  matherr( e )
  struct exception *e ;
{ return 1 ; } 

#else   /* print error message and exit */

int matherr( e )
  struct exception  *e ;
{ char *error ;

  switch( e->type )
  {
    case  DOMAIN :
    case  SING :
            error = "domain error" ;
            break ;

    case  OVERFLOW :
            error = "overflow" ;
            break ;

    case  TLOSS :
    case  PLOSS :
            error = "loss of significance" ;
            break ;

    case  UNDERFLOW :
            e->retval = 0.0 ;
            return  1 ;  /* ignore it */
  }

  if ( strcmp(e->name, "atan2") == 0 )
      rt_error("atan2(%g,%g) : %s" ,
         e->arg1, e->arg2, error ) ;
  else
      rt_error("%s(%g) : %s" , e->name, e->arg1, error) ;

  /* won't get here */
  return 0 ;
}
#endif   /* FPE_TRAPS */

#endif   /*  HAVE_MATHERR */
@//E*O*F mawk0.97/matherr.c//
chmod u=rw,g=r,o=r mawk0.97/matherr.c
 
echo x - mawk0.97/mawk.h
sed 's/^@//' > "mawk0.97/mawk.h" <<'@//E*O*F mawk0.97/mawk.h//'

/********************************************
mawk.h
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/


/*   $Log:	mawk.h,v $
 * Revision 2.1  91/04/08  08:23:33  brennan
 * VERSION 0.97
 * 
*/


/*  mawk.h  */

#ifndef  MAWK_H
#define  MAWK_H   

#include  "machine.h"

#ifdef   DEBUG
#define  YYDEBUG  1
extern  int   yydebug ;  /* print parse if on */
extern  int   dump_RE ;
#endif
extern  int   dump_code ;

#ifdef  __STDC__
#define  PROTO(name, args)   name  args
#undef   HAVE_VOID_PTR
#define  HAVE_VOID_PTR          1
#else
#define  PROTO(name, args)   name()
#endif 


#include <stdio.h>
#include <string.h>
#include "types.h"


/*----------------
 *  GLOBAL VARIABLES
 *----------------*/

/* some well known cells */
extern CELL cell_zero, cell_one ;
extern STRING  null_str ;
/* a useful scratch area */
extern union tbuff temp_buff ;
extern char *main_buff ; /* main file input buffer */

/* help with casts */
extern int pow2[] ;


 /* these are used by the parser, scanner and error messages
    from the compile  */

extern  int current_token ;
extern  unsigned  token_lineno ; /* lineno of current token */
extern  unsigned  compile_error_count ;
extern  int  paren_cnt, brace_cnt ;
extern  int  print_flag, getline_flag ;


/*---------*/

extern  int  errno ;     
extern  char *progname ; /* for error messages */

/* macro to test the type of two adjacent cells */
#define TEST2(cp)  (pow2[(cp)->type]+pow2[((cp)+1)->type])

/* macro to get at the string part of a CELL */
#define string(cp) ((STRING *)(cp)->ptr)

#ifdef   DEBUG
#define cell_destroy(cp)  DB_cell_destroy(cp)
#else

#define cell_destroy(cp)   if ( (cp)->type >= C_STRING &&\
                                -- string(cp)->ref_cnt == 0 )\
                                zfree(string(cp),string(cp)->len+5);else
#endif

/*  prototypes  */

void  PROTO( cast1_to_s, (CELL *) ) ;
void  PROTO( cast1_to_d, (CELL *) ) ;
void  PROTO( cast2_to_s, (CELL *) ) ;
void  PROTO( cast2_to_d, (CELL *) ) ;
void  PROTO( cast_to_RE, (CELL *) ) ;
void  PROTO( cast_for_split, (CELL *) ) ;
void  PROTO( check_strnum, (CELL *) ) ;
void  PROTO( cast_to_REPL, (CELL *) ) ;

int   PROTO( test, (CELL *) ) ; /* test for null non-null */
CELL *PROTO( cellcpy, (CELL *, CELL *) ) ;
CELL *PROTO( repl_cpy, (CELL *, CELL *) ) ;
void  PROTO( DB_cell_destroy, (CELL *) ) ;
void  PROTO( overflow, (char *, unsigned) ) ;
void  PROTO( rt_overflow, (char *, unsigned) ) ;
void  PROTO( rt_error, ( char *, ...) ) ;
void  PROTO( mawk_exit, (int) ) ;
void PROTO( da, (INST *, FILE *)) ;
int  PROTO( space_split, (char *) ) ;
char *PROTO( str_str, (char*, char*, unsigned) ) ;
int   PROTO( re_split, (char *, PTR) ) ;
char *PROTO( re_pos_match, (char *, PTR, unsigned *) ) ;

void  PROTO( exit, (int) ) ;
int   PROTO( close, (int) ) ;
int   PROTO( open, (char *,int, int) ) ;
int   PROTO( read, (int , PTR, unsigned) ) ;
char *PROTO( getenv, (const char *) ) ;

int  PROTO ( parse, (void) ) ;
int  PROTO ( yylex, (void) ) ;
int  PROTO( yyparse, (void) ) ;
void PROTO( yyerror, (char *) ) ;

void PROTO( bozo, (char *) ) ;
void PROTO( errmsg , (int, char*, ...) ) ;
void PROTO( compile_error, ( char *, ...) ) ;

INST *PROTO( execute, (INST *, CELL *, CELL *) ) ;
char *PROTO( find_kw_str, (int) ) ;

double strtod(), fmod() ;

#endif  /* MAWK_H */
@//E*O*F mawk0.97/mawk.h//
chmod u=rw,g=r,o=r mawk0.97/mawk.h
 
echo x - mawk0.97/memory.c
sed 's/^@//' > "mawk0.97/memory.c" <<'@//E*O*F mawk0.97/memory.c//'

/********************************************
memory.c
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/


/* $Log:	memory.c,v $
 * Revision 2.1  91/04/08  08:23:35  brennan
 * VERSION 0.97
 * 
*/


/* memory.c */

#include "mawk.h"

#ifdef  __TURBOC__
#define SUPPRESS_NEW_STRING_PROTO  /* get compiler off our back on
         the definition of new_STRING() */
#pragma  warn -pro
#endif

#include "memory.h"

STRING null_str = {1, 0, "" } ;

static STRING *char_string[127] ;
/* slots for strings of one character
   "\01" thru "\177"    */
  
STRING *new_STRING(s, xlen)   
  char *s ;  unsigned xlen ;
  /* WARNING: if s != NULL, don't access xlen
     because it won't be there   */
{ register STRING *p ;
  unsigned len ;

  if ( s )
        switch( len = strlen(s) )
        {
            case 0 : 
                p = &null_str  ; p->ref_cnt++ ;
                break ;

            case 1 :
                if ( *(unsigned char *)s < 128 )
                {   if ( p = char_string[*s-1] )
                        p->ref_cnt++ ;
                    else
                    { p = (STRING *) zmalloc(6) ;
                      p->ref_cnt = 2 ;  p->len = 1 ; 
                      p->str[0] = s[0] ;
                      p->str[1] = 0 ;
                      char_string[*s-1] = p ;
                    }

                    break ; /*case */
                }
                /* else FALL THRU */

            default :
                p = (STRING *) zmalloc(len+5) ;
                p->ref_cnt = 1 ; p->len = len ;
                (void) memcpy( p->str , s, len+1) ;
                break ;
        }
  else  
  { p = (STRING *) zmalloc( xlen+5 ) ;
    p->ref_cnt = 1 ; p->len = xlen ;
    /* zero out the end marker */
    p->str[xlen] = 0 ; 
  }

  return p ;
}


#ifdef   DEBUG

void  DB_free_STRING(sval)
  register STRING *sval ;
{ if ( -- sval->ref_cnt == 0 )  zfree(sval, sval->len+5) ; }

#endif
@//E*O*F mawk0.97/memory.c//
chmod u=rw,g=r,o=r mawk0.97/memory.c
 
echo x - mawk0.97/memory.h
sed 's/^@//' > "mawk0.97/memory.h" <<'@//E*O*F mawk0.97/memory.h//'

/********************************************
memory.h
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/


/* $Log:	memory.h,v $
 * Revision 2.1  91/04/08  08:23:37  brennan
 * VERSION 0.97
 * 
*/


/*  memory.h  */

#ifndef  MEMORY_H
#define  MEMORY_H

#include "zmalloc.h"

#define  new_CELL()  (CELL *) zmalloc(sizeof(CELL))
#define  free_CELL(p)  zfree(p,sizeof(CELL))

#ifndef  SUPPRESS_NEW_STRING_PROTO
STRING  *PROTO( new_STRING, (char *, ...) ) ;
#endif

#ifdef   DEBUG
void  PROTO( DB_free_STRING , (STRING *) ) ;

#define  free_STRING(s)  DB_free_STRING(s)

#else

#define  free_STRING(sval)   if ( -- (sval)->ref_cnt == 0 )\
                                zfree(sval, (sval)->len+5) ; else
#endif


#endif   /* MEMORY_H */
@//E*O*F mawk0.97/memory.h//
chmod u=rw,g=r,o=r mawk0.97/memory.h
 
echo x - mawk0.97/parse.y
sed 's/^@//' > "mawk0.97/parse.y" <<'@//E*O*F mawk0.97/parse.y//'

/********************************************
parse.y
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/

/* $Log:	parse.y,v $
 * Revision 2.1  91/04/08  08:23:39  brennan
 * VERSION 0.97
 * 
*/


%{
#include <stdio.h>
#include "mawk.h"
#include "code.h"
#include "symtype.h"
#include "memory.h"
#include "bi_funct.h"
#include "bi_vars.h"
#include "jmp.h"
#include "field.h"
#include "files.h"

extern void  PROTO( eat_nl, (void) ) ;
static void  PROTO( resize_fblock, (FBLOCK *, INST *) ) ;
static void  PROTO( code_array, (SYMTAB *) ) ;
static void  PROTO( code_call_id, (CA_REC *, SYMTAB *) ) ;
static int   PROTO( current_offset, (void) ) ;

static int scope ;
static FBLOCK *active_funct ;
      /* when scope is SCOPE_FUNCT  */

#define  code_address(x)  if( is_local(x) )\
                          { code1(L_PUSHA) ; code1((x)->offset) ; }\
                          else  code2(_PUSHA, (x)->stval.cp) 

%}

%union{
CELL *cp ;
SYMTAB *stp ;
INST  *start ; /* code starting address */
PF_CP  fp ;  /* ptr to a (print/printf) or (sub/gsub) function */
BI_REC *bip ; /* ptr to info about a builtin */
FBLOCK  *fbp  ; /* ptr to a function block */
ARG2_REC *arg2p ;
CA_REC   *ca_p  ;
int   ival ;
}

/*  two tokens to help with errors */
%token   UNEXPECTED   /* unexpected character */
%token   BAD_DECIMAL

%token   NL
%token   SEMI_COLON
%token   LBRACE  RBRACE
%token   LBOX     RBOX
%token   COMMA
%token   <ival> IO_OUT    /* > or output pipe */

%left   P_OR
%left   P_AND
%right  ASSIGN  ADD_ASG SUB_ASG MUL_ASG DIV_ASG MOD_ASG POW_ASG
%right  QMARK COLON
%left   OR
%left   AND
%left   IN
%left   MATCH  NOT_MATCH
%left   EQ  NEQ  LT LTE  GT  GTE
%left   CAT
%left   GETLINE
%left   PLUS      MINUS  
%left   MUL      DIV    MOD
%left   NOT   UMINUS
%nonassoc   IO_IN PIPE
%right  POW
%left   INC  DEC   /* ++ -- */
%left   DOLLAR    ID  FIELD  /* last two to remove a SR conflict
                                with getline */
%right  LPAREN   RPAREN     /* removes some SR conflicts */
%token  <cp>  CONSTANT  RE
%token  <stp> ID   
%token  <fbp> FUNCT_ID
%token  <bip> BUILTIN 
%token   <cp>  FIELD 

%token  PRINT PRINTF SPLIT MATCH_FUNC SUB GSUB LENGTH
/* keywords */
%token  DO WHILE FOR BREAK CONTINUE IF ELSE  IN
%token  DELETE  BEGIN  END  EXIT NEXT RETURN  FUNCTION

%type <start>  block  block_or_newline
%type <start>  statement_list statement mark
%type <start>  pattern  p_pattern
%type <start>  print_statement
%type <ival>   pr_args
%type <arg2p>  arg2  
%type <start>  builtin  
%type <start>  getline_file
%type <start>  lvalue field  fvalue
%type <start>  expr cat_expr p_expr  re_or_expr  sub_back
%type <start>  do_statement  while_statement  for_statement
%type <start>  if_statement if_else_statement
%type <start>  while_front  if_front  for_front
%type <start>  fexpr0 fexpr1
%type <start>  array_loop  array_loop_front
%type <start>  exit_statement  return_statement
%type <ival>   arglist args 
%type <stp>     id  array
%type <fp>     print   sub_or_gsub
%type <fbp>    funct_start funct_head
%type <ca_p>   call_args ca_front ca_back
%type <ival>   f_arglist f_args

%%
/*  productions  */

program :       program_block
        |       program  program_block 
        ;

program_block :  PA_block
              |  function_def
              |  error block
                 { if (scope == SCOPE_FUNCT)
                   { restore_ids() ; scope = SCOPE_MAIN ; }
                   code_ptr = main_code_ptr ;
                 }
              ;

PA_block  :  block

          |  BEGIN  
                { main_code_ptr = code_ptr ;
                  code_ptr = begin_code_ptr ; 
                  scope = SCOPE_BEGIN ;
                }

             block
                { begin_code_ptr = code_ptr ;
                  code_ptr = main_code_ptr ; 
                  scope = SCOPE_MAIN ;
                }

          |  END    
                { main_code_ptr = code_ptr ;
                  code_ptr = end_code_ptr ; 
                  scope = SCOPE_END ;
                }

             block
                { end_code_ptr = code_ptr ;
                  code_ptr = main_code_ptr ; 
                  scope = SCOPE_MAIN ;
                }

          |  pattern  /* this works just like an if statement */
             { code_jmp(_JZ, 0) ; }

             block_or_newline
             { patch_jmp( code_ptr ) ; }

    /* range pattern, see comment in execute.c near _RANGE */
          |  pattern COMMA 
             { code_push($1, code_ptr - $1) ;
               code_ptr = $1 ;
               code1(_RANGE) ; code1(1) ;
               code_ptr += 3 ;
               code_ptr += code_pop(code_ptr) ;
               code1(_STOP0) ;
               $1[2].op = code_ptr - ($1+1) ;
             }
             pattern
             { code1(_STOP0) ; }

             block_or_newline
             { $1[3].op = $6 - ($1+1) ;
               $1[4].op = code_ptr - ($1+1) ;
             }
          ;

pattern :  expr       %prec  LPAREN
        |  p_pattern

/*  these work just like short circuit booleans */
        |  pattern P_OR  
                { code1(_DUP) ;
                  code_jmp(_JNZ, 0) ;
                  code1(_POP) ;
                }
                pattern
                { patch_jmp(code_ptr) ; }

        |  pattern P_AND
                { code1(_DUP) ;
                  code_jmp(_JZ, 0) ;
                  code1(_POP) ;
                }
                pattern
                { patch_jmp(code_ptr) ; }
        ;

/* we want the not (!) operator to apply to expr if possible
   and then to a pattern.  Two types of pattern do it */

p_pattern  :  RE
              { $$ = code_ptr ;
                code2(_PUSHI, &field[0]) ;
                code2(_PUSHC, $1) ;
                code1(_MATCH) ;
              }

           |  LPAREN  pattern RPAREN
              { $$ = $2 ; }
           |  NOT  p_pattern
              { code1(_NOT) ; $$ = $2 ; }
           ;


block   :  LBRACE   statement_list  RBRACE
            { $$ = $2 ; }
        |  LBRACE   error  RBRACE 
            { $$ = code_ptr ; /* does nothing won't be executed */
              print_flag = getline_flag = paren_cnt = 0 ;
              yyerrok ; }
        ;

block_or_newline  :  block
                  |  NL     /* default print action */
                     { $$ = code_ptr ;
                       code1(_PUSHINT) ; code1(0) ;
                       code2(_PRINT, bi_print) ;
                     }

statement_list :  statement
        |  statement_list   statement
        ;


statement :  block
          |  expr   separator
             { code1(_POP) ; }
          |  /* empty */  separator
             { $$ = code_ptr ; }
          |  error  separator
              { $$ = code_ptr ;
                print_flag = getline_flag = 0 ;
                paren_cnt = 0 ;
                yyerrok ;
              }
          |  print_statement
          |  if_statement
          |  if_else_statement
          |  do_statement
          |  while_statement
          |  for_statement
          |  array_loop
          |  BREAK  separator
             { $$ = code_ptr ; BC_insert('B', code_ptr) ;
               code2(_JMP, 0) /* don't use code_jmp ! */ ; }
          |  CONTINUE  separator
             { $$ = code_ptr ; BC_insert('C', code_ptr) ;
               code2(_JMP, 0) ; }
          |  exit_statement
          |  return_statement
             { if ( scope != SCOPE_FUNCT )
                     compile_error("return outside function body") ;
             }
          |  NEXT  separator
              { if ( scope != SCOPE_MAIN )
                   compile_error( "improper use of next" ) ;
                $$ = code_ptr ; code1(_NEXT) ;
              }
          ;

separator  :  NL | SEMI_COLON
           ;

expr  :   cat_expr
      |   lvalue   ASSIGN   expr { code1(_ASSIGN) ; }
      |   lvalue   ADD_ASG  expr { code1(_ADD_ASG) ; }
      |   lvalue   SUB_ASG  expr { code1(_SUB_ASG) ; }
      |   lvalue   MUL_ASG  expr { code1(_MUL_ASG) ; }
      |   lvalue   DIV_ASG  expr { code1(_DIV_ASG) ; }
      |   lvalue   MOD_ASG  expr { code1(_MOD_ASG) ; }
      |   lvalue   POW_ASG  expr { code1(_POW_ASG) ; }
      |   expr EQ expr  { code1(_EQ) ; }
      |   expr NEQ expr { code1(_NEQ) ; }
      |   expr LT expr { code1(_LT) ; }
      |   expr LTE expr { code1(_LTE) ; }
      |   expr GT expr { code1(_GT) ; }
      |   expr GTE expr { code1(_GTE) ; }
      |   expr MATCH re_or_expr
          { code1(_MATCH) ; }
      |   expr NOT_MATCH  re_or_expr
          { code1(_MATCH) ; code1(_NOT) ; }

/* short circuit boolean evaluation */
      |   expr  OR
              { code1(_DUP) ;
                code_jmp(_JNZ, 0) ;
                code1(_POP) ;
              }
          expr
          { patch_jmp(code_ptr) ; code1(_TEST) ; }

      |   expr AND
              { code1(_DUP) ; code_jmp(_JZ, 0) ;
                code1(_POP) ; }
          expr
              { patch_jmp(code_ptr) ; code1(_TEST) ; }

      |  expr QMARK  { code_jmp(_JZ, 0) ; }
         expr COLON  { code_jmp(_JMP, 0) ; }
         expr
         { patch_jmp(code_ptr) ; patch_jmp($7) ; }
      ;

cat_expr :  p_expr             %prec CAT
         |  cat_expr  p_expr   %prec CAT 
            { code1(_CAT) ; }
         ;

p_expr  :   CONSTANT
          {  $$ = code_ptr ; code2(_PUSHC, $1) ; }
      |   lvalue  %prec CAT /* removes lvalue (++|--) sr conflict */
            { switch( code_ptr[-2].op )
              { case _PUSHA :
                      code_ptr[-2].op = _PUSHI ;
                      break ;
                case AE_PUSHA :
                      code_ptr[-2].op = AE_PUSHI ;
                      break ;
                case L_PUSHA :
                      code_ptr[-2].op = L_PUSHI ;
                      break ;
                case LAE_PUSHA :
                      code_ptr[-2].op = LAE_PUSHI ;
                      break ;
#ifdef  DEBUG
                default : bozo("p_expr->lvalue") ;
#endif
              }
            }
      |   LPAREN   expr  RPAREN
          { $$ = $2 ; }
      ;
p_expr  :   p_expr  PLUS   p_expr { code1(_ADD) ; } 
      |   p_expr MINUS  p_expr { code1(_SUB) ; }
      |   p_expr  MUL   p_expr { code1(_MUL) ; }
      |   p_expr  DIV  p_expr { code1(_DIV) ; }
      |   p_expr  MOD  p_expr { code1(_MOD) ; }
      |   p_expr  POW  p_expr { code1(_POW) ; }
      |   NOT  p_expr  
                { $$ = $2 ; code1(_NOT) ; }
      |   PLUS p_expr  %prec  UMINUS
                { $$ = $2 ; code1(_UPLUS) ; }
      |   MINUS p_expr %prec  UMINUS
                { $$ = $2 ; code1(_UMINUS) ; }
      |   builtin
      ;

p_expr  :  lvalue  INC   
        { code1(_POST_INC ) ; }
        |  lvalue  DEC  
        { code1(_POST_DEC) ; }
        |  INC  lvalue
        { $$ = $2 ; code1(_PRE_INC) ; }
        |  DEC  lvalue
        { $$ = $2 ; code1(_PRE_DEC) ; }
        ;

p_expr  :  field  INC   
        { code1(F_POST_INC ) ; }
        |  field  DEC  
        { code1(F_POST_DEC) ; }
        |  INC  field
        { $$ = $2 ; code1(F_PRE_INC) ; }
        |  DEC  field
        { $$ = $2 ; code1(F_PRE_DEC) ; }
        ;

lvalue :  id     
        { $$ = code_ptr ; code_address($1) ; }
       |  LPAREN  lvalue RPAREN
          { $$ = $2 ; }
       ;

id      :   ID  
            {
              switch($1->type)
              {
                case ST_NONE : /* new id */
                    $1->type = ST_VAR ;
                    $1->stval.cp = new_CELL() ;
                    $1->stval.cp->type = C_NOINIT ;
                    break ;

                case ST_LOCAL_NONE :
                    $1->type = ST_LOCAL_VAR ;
                    active_funct->typev[$1->offset] = ST_LOCAL_VAR ;
                    break ;

                case ST_VAR :
                case ST_LOCAL_VAR :  break ;

                default :
                    type_error($1) ;
                    break ;
              }
           }
        ;

arglist :  /* empty */
            { $$ = 0 ; }
        |  args
        ;

args    :  expr        %prec  LPAREN
            { $$ = 1 ; }
        |  args  COMMA  expr
            { $$ = $1 + 1 ; }
        ;

builtin :
        BUILTIN mark  LPAREN  arglist RPAREN
        { BI_REC *p = $1 ;
          $$ = $2 ;
          if ( p-> min_args > $4 || p->max_args < $4 )
            compile_error(
            "wrong number of arguments in call to %s" ,
            p->name ) ;
          if ( p->min_args != p->max_args ) /* variable args */
               code2(_PUSHINT , $4 ) ;
          code2(_BUILTIN , p->fp) ;
        }
        ;

/* an empty production to store the code_ptr */
mark : /* empty */
         { $$ = code_ptr ; }

print_statement : print mark pr_args pr_direction separator
            { code2(_PRINT, $1) ; $$ = $2 ;
              if ( $1 == bi_printf && $3 == 0 )
                    compile_error("no arguments in call to printf") ;
              print_flag = 0 ;
              $$ = $2 ;
            }
            ;

print   :  PRINT  { $$ = bi_print ; print_flag = 1 ;}
        |  PRINTF { $$ = bi_printf ; print_flag = 1 ; }
        ;

pr_args :  arglist { code1(_PUSHINT) ; code1($1) ; }
        |  LPAREN  arg2 RPAREN
           { $$ = $2->cnt ; zfree($2,sizeof(ARG2_REC)) ; 
             code1(_PUSHINT) ; code1($$) ; 
           }
        ;

arg2   :   expr  COMMA  expr
           { $$ = (ARG2_REC*) zmalloc(sizeof(ARG2_REC)) ;
             $$->start = $1 ;
             $$->cnt = 2 ;
           }
        |   arg2 COMMA  expr
            { $$ = $1 ; $$->cnt++ ; }
        ;

pr_direction : /* empty */
             |  IO_OUT  expr
                { code2(_PUSHINT, $1) ; }
             ;


/*  IF and IF-ELSE */

if_front :  IF LPAREN expr RPAREN
            {  $$ = $3 ; eat_nl() ; code_jmp(_JZ, 0) ; }
         ;

if_statement : if_front statement
                { patch_jmp( code_ptr ) ;  }
              ;

else    :  ELSE { eat_nl() ; code_jmp(_JMP, 0) ; }
        ;

if_else_statement :  if_front statement else statement
                { patch_jmp(code_ptr) ; patch_jmp($4) ; }


/*  LOOPS   */

do      :  DO
        { eat_nl() ; BC_new() ; }
        ;

do_statement : do statement WHILE LPAREN expr RPAREN separator
        { $$ = $2 ;
          code_jmp(_JNZ, $2) ; 
          BC_clear(code_ptr, $5) ; }
        ;

while_front :  WHILE LPAREN expr RPAREN
                { eat_nl() ; BC_new() ;
                  code_push($3, code_ptr-$3) ;
                  code_ptr = $$ = $3 ;
                  code_jmp(_JMP,0) ;
                }
            ;

while_statement :  while_front  statement
                { INST *c_addr = code_ptr ; /*continue address*/

                  patch_jmp( c_addr) ;
                  code_ptr += code_pop(c_addr) ;
                  code_jmp(_JNZ, $2) ;
                  BC_clear(code_ptr, c_addr) ;
                }
                ;

for_front  :  FOR LPAREN fexpr0 SEMI_COLON 
                         fexpr1 SEMI_COLON  fexpr0 RPAREN

              { $$ = $3 ; eat_nl() ; BC_new() ;
                /* push fexpr2 and 3 */
                code_push( $5, $7-$5) ;
                code_push( $7, code_ptr - $7) ;
                /* reset code_ptr */
                code_ptr = $5 ;
                code_jmp(_JMP, 0) ;
              }
           ;

for_statement  :  for_front  statement
              { INST *c_addr = code_ptr ;
                unsigned len = code_pop(code_ptr) ;

                code_ptr += len ;
                patch_jmp(code_ptr) ;
                len = code_pop(code_ptr) ;
                code_ptr += len ;
                code_jmp(_JNZ, $2) ;
                BC_clear( code_ptr, c_addr) ;
              }
              ;

fexpr0  :  /* empty */   { $$ = code_ptr; }
        |  expr   { code1(_POP) ; }
        ;

fexpr1  :  /*  empty */
            { /* this will be wiped out when the jmp is coded */
              $$ = code_ptr ; code2(_PUSHC, &cell_one) ; }
        |   expr
        ;

/* arrays  */

array   :   ID
            { switch($1->type)
              {
                case ST_NONE :  /* a new array */
                    $1->type = ST_ARRAY ;
                    $1->stval.array = new_ARRAY() ;
                    break ;

                case  ST_ARRAY :
                case  ST_LOCAL_ARRAY :
                    break ;

                case  ST_LOCAL_NONE  :
                    $1->type = ST_LOCAL_ARRAY ;
                    active_funct->typev[$1->offset] = ST_LOCAL_ARRAY ;
                    break ;

                default : type_error($1) ; break ;
              }
            }
        ;

expr    :  expr IN  array 
           { code_array($3) ; code1(A_TEST) ; }
        |  LPAREN arg2 RPAREN IN array
           { $$ = $2->start ;
             code1(A_CAT) ; code1($2->cnt) ;
             zfree($2, sizeof(ARG2_REC)) ;

             code_array($5) ;
             code1(A_TEST) ;
           }
        ;

lvalue  :  array mark LBOX  args  RBOX
           { 
             if ( $4 > 1 )
             { code1(A_CAT) ; code1($4) ; }
           
             if( is_local($1) )
             { code1(LAE_PUSHA) ; code1($1->offset) ; }
             else code2(AE_PUSHA, $1->stval.array) ;
             $$ = $2 ;
           }
        ;


/* delete A[i] */
statement :  DELETE  array mark LBOX args RBOX separator
             { 
               $$ = $3 ;
               if ( $5 > 1 ) { code1(A_CAT) ; code1($5) ; }
               code_array($2) ;
               code1(A_DEL) ;
             }

          ;

/*  for ( i in A )  statement */

array_loop_front :  FOR LPAREN id IN array RPAREN
                    { eat_nl() ; BC_new() ;
                      $$ = code_ptr ;

                      code_address($3) ;
                      code_array($5) ;
                      code1(A_LOOP) ; code1(_STOP) ;
                      code1(0) ; /* put offset of following code here*/
                    }
                 ;

array_loop :  array_loop_front  statement
              { code1(_STOP) ;  
                BC_clear( $2 - 2, code_ptr-1) ;
                $2[-1].op = code_ptr - & $2[-2] ;
              }
           ;

/*  fields   */

field   :  FIELD
           { $$ = code_ptr ; code2(F_PUSHA, $1) ; }
        |  DOLLAR  p_expr
           { $$ = $2 ; code1( FE_PUSHA ) ; }
        |  LPAREN  field  RPAREN
           { $$ = $2 ; }
        ;

p_expr   :  field   %prec CAT /* removes field (++|--) sr conflict */
           { if ( code_ptr[-2].op == F_PUSHA )
                   code_ptr[-2].op =  
                       ((CELL *)code_ptr[-1].ptr == field ||
                        (CELL *)code_ptr[-1].ptr >  field+NF )
                        ? _PUSHI : F_PUSHI ;
             else if ( code_ptr[-1].op == FE_PUSHA ) 
                   code_ptr[-1].op = FE_PUSHI ;
             else  bozo("missing F(E)_PUSHA") ;
           }
        ;

expr    :  field   ASSIGN   expr { code1(F_ASSIGN) ; }
        |  field   ADD_ASG  expr { code1(F_ADD_ASG) ; }
        |  field   SUB_ASG  expr { code1(F_SUB_ASG) ; }
        |  field   MUL_ASG  expr { code1(F_MUL_ASG) ; }
        |  field   DIV_ASG  expr { code1(F_DIV_ASG) ; }
        |  field   MOD_ASG  expr { code1(F_MOD_ASG) ; }
        |  field   POW_ASG  expr { code1(F_POW_ASG) ; }
        ;

/* split is handled different than a builtin because
   it takes an array and optionally a regular expression as args */

p_expr :  SPLIT LPAREN expr COMMA  array RPAREN
             { $$ = $3 ;
               code_array($5) ;
               code2(_PUSHI, &fs_shadow) ;
               code2(_BUILTIN, bi_split) ;
             }
          |  SPLIT LPAREN expr COMMA array COMMA
               { code_array($5) ; }
             split_back
             { $$ = $3 ; code2(_BUILTIN, bi_split) ; }
          ;

/* split back is not the same as
   re_or_expr RPAREN
   because the action is cast_for_split() instead
   of cast_to_RE()
*/

split_back :  expr RPAREN
             { 
               if ( code_ptr[-2].op == _PUSHC &&
                   ((CELL *)code_ptr[-1].ptr)->type == C_STRING )
                   cast_for_split(code_ptr[-1].ptr) ;
             }

           |  RE  RPAREN
             { code2(_PUSHC, $1) ; }
           ;


             

/*  match(expr, RE) */

p_expr : MATCH_FUNC LPAREN expr COMMA re_or_expr RPAREN
        { $$ = $3 ; code2(_BUILTIN, bi_match) ; }
     ;

re_or_expr  :   RE
                { $$ = code_ptr ;
                  code2(_PUSHC, $1) ;
                }
            |   expr    %prec  MATCH
                { if ( code_ptr[-2].op == _PUSHC &&
                       ((CELL *)code_ptr[-1].ptr)->type == C_STRING )
                     /* re compile now */
                     cast_to_RE((CELL *) code_ptr[-1].ptr) ;
                }
            ;

/* length w/o an argument */

p_expr :  LENGTH
          { $$ = code_ptr ;
            code2(_PUSHI, field) ;
            code2(_BUILTIN, bi_length) ;
          }
       ;

exit_statement :  EXIT   separator
                    { $$ = code_ptr ;
                      code1(_EXIT0) ; }
               |  EXIT   expr  separator
                    { $$ = $2 ; code1(_EXIT) ; }

return_statement :  RETURN   separator
                    { $$ = code_ptr ;
                      code1(_RET0) ; }
               |  RETURN   expr  separator
                    { $$ = $2 ; code1(_RET) ; }

/* getline */

p_expr :  getline      %prec  GETLINE
          { $$ = code_ptr ;
            code2(F_PUSHA, &field[0]) ;
            code1(_PUSHINT) ; code1(0) ; 
            code2(_BUILTIN, bi_getline) ;
            getline_flag = 0 ;
          }
       |  getline  fvalue     %prec  GETLINE
          { $$ = $2 ;
            code1(_PUSHINT) ; code1(0) ;
            code2(_BUILTIN, bi_getline) ;
            getline_flag = 0 ;
          }
       |  getline_file  p_expr    %prec IO_IN
          { code1(_PUSHINT) ; code1(F_IN) ;
            code2(_BUILTIN, bi_getline) ;
            /* getline_flag already off in yylex() */
          }
       |  p_expr PIPE GETLINE  
          { code2(F_PUSHA, &field[0]) ;
            code1(_PUSHINT) ; code1(PIPE_IN) ;
            code2(_BUILTIN, bi_getline) ;
          }
       |  p_expr PIPE GETLINE   fvalue
          { 
            code1(_PUSHINT) ; code1(PIPE_IN) ;
            code2(_BUILTIN, bi_getline) ;
          }
       ;

getline :   GETLINE  { getline_flag = 1 ; }

fvalue  :   lvalue  |  field  ;

getline_file  :  getline  IO_IN
                 { $$ = code_ptr ;
                   code2(F_PUSHA, field+0) ;
                 }
              |  getline fvalue IO_IN
                 { $$ = $2 ; }
              ;

/*==========================================
    sub and gsub  
  ==========================================*/

p_expr  :  sub_or_gsub LPAREN re_or_expr COMMA  expr  sub_back
           {
             if ( $6 - $5 == 2   &&
                  $5->op == _PUSHC  &&
                  ((CELL *) $5[1].ptr)->type == C_STRING )
             /* cast from STRING to REPL at compile time */
                 cast_to_REPL( (CELL *) $5[1].ptr ) ;

             code2(_BUILTIN, $1) ;
             $$ = $3 ;
           }

sub_or_gsub :  SUB  { $$ = bi_sub ; }
            |  GSUB { $$ = bi_gsub ; }
            ;

sub_back    :   RPAREN    /* substitute into $0  */
                { $$ = code_ptr ;
                  code2(F_PUSHA, &field[0]) ; 
                }

            |   COMMA fvalue  RPAREN
                { $$ = $2 ; }
            ;

/*================================================
    user defined functions
 *=================================*/

function_def  :  funct_start  block
                 { resize_fblock($1, code_ptr) ;
                   code_ptr = main_code_ptr ;
                   scope = SCOPE_MAIN ;
                   active_funct = (FBLOCK *) 0 ;
                   restore_ids() ;
                 }
              ;
                   

funct_start   :  funct_head  LPAREN  f_arglist  RPAREN
                 { eat_nl() ;
                   scope = SCOPE_FUNCT ;
                   active_funct = $1 ;
                   main_code_ptr = code_ptr ;

                   if ( $1->nargs = $3 )
                        $1->typev = (char *) memset(
                               zmalloc($3), ST_LOCAL_NONE, $3) ;
                   else $1->typev = (char *) 0 ;
                   code_ptr = $1->code = 
                       (INST *) zmalloc(PAGE_SZ*sizeof(INST)) ;
                 }
              ;
                  
funct_head    :  FUNCTION  ID
                 { FBLOCK  *fbp ;

                   if ( $2->type == ST_NONE )
                   {
                         $2->type = ST_FUNCT ;
                         fbp = $2->stval.fbp = 
                             (FBLOCK *) zmalloc(sizeof(FBLOCK)) ;
                         fbp->name = $2->name ;
                   }
                   else
                   {
                         type_error( $2 ) ;

                         /* this FBLOCK will not be put in
                            the symbol table */
                         fbp = (FBLOCK*) zmalloc(sizeof(FBLOCK)) ;
                         fbp->name = "" ;
                   }
                   $$ = fbp ;
                 }

              |  FUNCTION  FUNCT_ID
                 { $$ = $2 ; 
                   if ( $2->code ) 
                       compile_error("redefinition of %s" , $2->name) ;
                 }
              ;
                         
f_arglist  :  /* empty */ { $$ = 0 ; }
           |  f_args
           ;

f_args     :  ID
              { $1 = save_id($1->name) ;
                $1->type = ST_LOCAL_NONE ;
                $1->offset = 0 ;
                $$ = 1 ;
              }
           |  f_args  COMMA  ID
              { if ( is_local($3) ) 
                  compile_error("%s is duplicated in argument list",
                    $3->name) ;
                else
                { $3 = save_id($3->name) ;
                  $3->type = ST_LOCAL_NONE ;
                  $3->offset = $1 ;
                  $$ = $1 + 1 ;
                }
              }
           ;

/* a call to a user defined function */
             
p_expr  :  FUNCT_ID mark  call_args
           { $$ = $2 ;
             code2(_CALL, $1) ;

             if ( $3 )  code1($3->arg_num+1) ;
             else  code1(0) ;
               
             check_fcall($1, scope, active_funct, 
                         $3, token_lineno) ;
           }
        ;

call_args  :   LPAREN   RPAREN
               { $$ = (CA_REC *) 0 ; }
           |   ca_front  ca_back
               { $$ = $2 ;
                 $$->link = $1 ;
                 $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
               }
           ;

/* The funny definition of ca_front with the COMMA bound to the ID is to
   force a shift to avoid a reduce/reduce conflict
   ID->id or ID->array

   Or to avoid a decision, if the type of the ID has not yet been
   determined
*/

ca_front   :  LPAREN
              { $$ = (CA_REC *) 0 ; }
           |  ca_front  expr   COMMA
              { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
                $$->link = $1 ;
                $$->type = CA_EXPR  ;
                $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
              }
           |  ca_front  ID   COMMA
              { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
                $$->link = $1 ;
                $$->arg_num = $1 ? $1->arg_num+1 : 0 ;

                code_call_id($$, $2) ;
              }
           ;

ca_back    :  expr   RPAREN
              { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
                $$->type = CA_EXPR ;
              }

           |  ID    RPAREN
              { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
                code_call_id($$, $1) ;
              }
           ;


    

%%

/* resize the code for a user function */

static void  resize_fblock( fbp, code_ptr )
  FBLOCK *fbp ;
  INST *code_ptr ;
{ int size ;

  code1(_RET0) ; /* make sure there is always a return statement */

  if ( dump_code )  
  { code1(_HALT) ; /*stops da() */
    add_to_fdump_list(fbp) ;
  }

  if ( (size = code_ptr - fbp->code) > PAGE_SZ-1 )
        overflow("function code size", PAGE_SZ ) ;

  /* resize the code */
  fbp->code = (INST*) zrealloc(fbp->code, PAGE_SZ*sizeof(INST),
                       size * sizeof(INST) ) ;

}

static void code_array(p)
  register SYMTAB *p ;
{ if ( is_local(p) )
  { code1(LA_PUSHA) ; code1(p->offset) ; }
  else  code2(A_PUSHA, p->stval.array) ;
}

static  int  current_offset()
{
  switch( scope )
  { 
    case  SCOPE_MAIN :  return code_ptr - main_start ;
    case  SCOPE_BEGIN :  return code_ptr - begin_start ;
    case  SCOPE_END   :  return code_ptr - end_start ;
    case  SCOPE_FUNCT :  return code_ptr - active_funct->code ;
  }
}

static void  code_call_id( p, ip )
  register CA_REC *p ;
  register SYMTAB *ip ;
{ static CELL dummy ;

  switch( ip->type )
  {
    case  ST_VAR  :
            p->type = CA_EXPR ;
            code2(_PUSHI, ip->stval.cp) ;
            break ;

    case  ST_LOCAL_VAR  :
            p->type = CA_EXPR ;
            code1(L_PUSHI) ;
            code1(ip->offset) ;
            break ;

    case  ST_ARRAY  :
            p->type = CA_ARRAY ;
            code2(A_PUSHA, ip->stval.array) ;
            break ;

    case  ST_LOCAL_ARRAY :
            p->type = CA_ARRAY ;
            code1(LA_PUSHA) ;
            code1(ip->offset) ;
            break ;

    case  ST_NONE :
            p->type = ST_NONE ;
            p->call_offset = current_offset() ;
            p->sym_p = ip ;
            code2(_PUSHI, &dummy) ;
            break ;

    case  ST_LOCAL_NONE :
            p->type = ST_LOCAL_NONE ;
            p->call_offset = current_offset() ;
            p->type_p = & active_funct->typev[ip->offset] ;
            code1(L_PUSHI) ; 
            code1(ip->offset) ;
            break ;

  
#ifdef   DEBUG
    default :
            bozo("code_call_id") ;
#endif

  }
}

int parse()
{ int yy = yyparse() ;
  if ( resolve_list )  resolve_fcalls() ;
  return yy ;
}

@//E*O*F mawk0.97/parse.y//
chmod u=rw,g=r,o=r mawk0.97/parse.y
 
echo x - mawk0.97/print.c
sed 's/^@//' > "mawk0.97/print.c" <<'@//E*O*F mawk0.97/print.c//'

/********************************************
print.c
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/

/* $Log:	print.c,v $
 * Revision 2.2  91/04/09  12:39:23  brennan
 * added static to funct decls to satisfy STARDENT compiler
 * 
 * Revision 2.1  91/04/08  08:23:43  brennan
 * VERSION 0.97
 * 
*/

#include "mawk.h"
#include "bi_vars.h"
#include "bi_funct.h"
#include "memory.h"
#include "field.h"
#include "scan.h"
#include "files.h"

/*  static  functions */
static void  PROTO( print_cell, (CELL *, FILE *) ) ;
static void  PROTO( do_printf, (FILE *, char *, unsigned, CELL *) ) ;
static void  PROTO( do_sprintf, (char *, unsigned, CELL *) ) ;


static void print_cell(p, fp)
  register CELL *p ;
  register FILE *fp ;
{ register int len ;
  
  switch( p->type )
  {
    case C_NOINIT : break ;
    case C_MBSTRN :
    case C_STRING :
    case C_STRNUM :
        switch( len = string(p)->len )
        {
          case 0 :  break ;
          case 1 :
                    putc(string(p)->str[0],fp) ;
                    break ;

          default :
                    fwrite(string(p)->str, 1, len, fp) ;
        }
        break ;

    case C_DOUBLE :
        fprintf(fp, string(field + OFMT)->str, p->dval) ;
        break ;

    default :
        bozo("bad cell passed to print_cell") ;
  }
}

/* on entry to bi_print or bi_printf the stack is:

   sp[0] = an integer k
       if ( k < 0 )  output is to a file with name in sp[-1]
       { so open file and sp -= 2 }

   sp[0] = k >= 0 is the number of print args
   sp[-k]   holds the first argument 
*/

CELL *bi_print(sp)
  CELL *sp ; /* stack ptr passed in */
{ register CELL *p ;
  register int k ;
  FILE *fp ;

  if ( (k = sp->type) < 0 )
  { if ( (--sp)->type < C_STRING ) cast1_to_s(sp) ;
    fp = (FILE *) file_find( string(sp), k ) ;
    free_STRING(string(sp)) ;
    k = (--sp)->type ;
  }
  else  fp = stdout ;

  if ( k )  
  { p = sp - k ; /* clear k variables off the stack */
    sp = p - 1 ;
    while ( k-- > 1 ) 
    { print_cell(p,fp) ; print_cell(bi_vars+OFS,fp) ;
      cell_destroy(p) ; p++ ; }
    
    print_cell(p, fp) ;  cell_destroy(p) ;
  }
  else  
  { sp-- ;
    print_cell( &field[0], fp )  ; }

  print_cell( bi_vars + ORS , fp) ;
  return sp ;
}
  
/* the contents of format are preserved */
static void do_printf( fp, format, argcnt, cp)
  FILE *fp ;
  char *format ; unsigned argcnt ;
  CELL *cp ;  /* ptr to an array of arguments ( on the eval stack) */
{ register char *q ;
  char  save ;
  char *p = format ;

  while ( 1 )
  { if ( ! (q = strchr(p, '%'))  )
       if ( argcnt == 0 )
       { fputs(p, fp) ; return ; }
       else
         rt_error("too many arguments in call to printf(%s)", 
              format ) ; 

    if ( * ++q == '%' )
    { fwrite( p, q-p, 1, fp) ; p = q+1 ; continue ; }

    if ( argcnt == 0 )
        rt_error("too few arguments in call to printf(%s)", format) ; 

    if ( *q == '-' ) q++ ;
    while ( scan_code[*(unsigned char*)q] == SC_DIGIT )  q++ ;
    if ( *q == '.' )
    { q++ ;
      while ( scan_code[*(unsigned char*)q] == SC_DIGIT ) q++ ; }
    
    save = * ++q ;  *q = 0 ;
    switch( q[-1] )
    {
      case 'c' :  
      case 'd' :
      case 'o' :
      case 'x' :
            if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ;
            (void) fprintf(fp, p, (int) cp->dval) ;
            break ;
      case 'e' :
      case 'g' :
      case 'f' :
            if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ;
            (void) fprintf(fp, p, cp->dval) ;
            break ;
      case  's' :
            if ( cp->type < C_STRING ) cast1_to_s(cp) ;
            (void) fprintf(fp, p, string(cp)->str) ;
            break ;
      default :
            rt_error("bad format string in call to printf(%s)",
              format) ;
    }
    *q = save ; p = q ; argcnt-- ; cp++ ;
  }
}


CELL *bi_printf(sp)
  register CELL *sp ;
{ register int k ;
  register CELL *p ;
  FILE *fp ;

  if ( (k = sp->type) < 0 )
  { if ( (--sp)->type < C_STRING ) cast1_to_s(sp) ;
    fp = (FILE *) file_find( string(sp), k ) ;
    free_STRING(string(sp)) ;
    k = (--sp)->type ;
  }
  else  fp = stdout ;

  sp -= k-- ; /* sp points at the format string */
  if ( sp->type < C_STRING )  cast1_to_s(sp) ;
  do_printf(fp, string(sp)->str, k, sp+1) ;

  free_STRING(string(sp)) ;
  for ( p = sp+1 ; k-- ; p++ )  cell_destroy(p) ;
  return --sp ;
}

CELL *bi_sprintf(sp)
  CELL *sp ;
{ CELL *p ;
  int argcnt = sp->type ;
  void do_sprintf() ;

  sp -= argcnt-- ; /* sp points at the format string */
  if ( sp->type < C_STRING )  cast1_to_s(sp) ;
  do_sprintf(string(sp)->str, argcnt, sp+1) ;

  free_STRING(string(sp)) ;
  for ( p = sp+1 ; argcnt-- ; p++ )  cell_destroy(p) ;

  sp->ptr = (PTR) new_STRING( temp_buff.string_buff ) ;
  return sp ;
}


/* the contents of format are preserved */
static void do_sprintf( format, argcnt, cp)
  char *format ; 
  unsigned argcnt ;
  CELL *cp ;
{ register char *q ;
  char  save ;
  char *p = format ;
  register char *target = temp_buff.string_buff ;

  *target = 0 ;
  while ( 1 )
  { if ( ! (q = strchr(p, '%'))  )
       if ( argcnt == 0 )
       { strcpy(target, p) ; 
         /* check the result is not too large */
         if ( main_buff[-1] != 0 )
         { /* This may have damaged us -- try to croak out an error
              message and exit */
           rt_overflow("sprintf buffer", TEMP_BUFF_SZ) ;
         }
         return ; 
       }
       else
         rt_error("too many arguments in call to sprintf(%s)", 
             format ) ; 

    if ( * ++q == '%' )
    { unsigned len ;

      (void) memcpy(target, p, len = q-p ) ;
      p = q + 1 ; *(target += len) = 0 ;
      continue ;
    }

    if ( argcnt == 0 )
      rt_error("too few arguments in call to sprintf(%s)", format) ; 

    if ( *q == '-' ) q++ ;
    while ( scan_code[*(unsigned char*)q] == SC_DIGIT )  q++ ;
    if ( *q == '.' )
    { q++ ;
      while ( scan_code[*(unsigned char*)q] == SC_DIGIT ) q++ ; }
    
    save = * ++q ;  *q = 0 ;
    switch( q[-1] )
    {
      case 'c' :  
      case 'd' :
      case 'o' :
      case 'x' :
            if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ;
            (void) sprintf(target, p, (int) cp->dval ) ;
            target = strchr(target, 0) ;
            break ;
      case 'e' :
      case 'g' :
      case 'f' :
            if ( cp->type != C_DOUBLE ) cast1_to_d(cp) ;
            (void) sprintf(target, p, cp->dval ) ;
            target = strchr(target, 0) ;
            break ;
      case  's' :
            if ( cp->type < C_STRING ) cast1_to_s(cp) ;
            (void) sprintf(target, p, string(cp)->str ) ;
            target = strchr(target, 0) ;
            break ;
      default :
            rt_error("bad format string in call to sprintf(%s)", 
                format) ;
    }
    *q = save ; p = q ; argcnt-- ; cp++ ;
  }
}

@//E*O*F mawk0.97/print.c//
chmod u=rw,g=r,o=r mawk0.97/print.c
 
echo x - mawk0.97/re_cmpl.c
sed 's/^@//' > "mawk0.97/re_cmpl.c" <<'@//E*O*F mawk0.97/re_cmpl.c//'

/********************************************
re_cmpl.c
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/

/* $Log:	re_cmpl.c,v $
 * Revision 2.1  91/04/08  08:23:45  brennan
 * VERSION 0.97
 * 
*/


/*  re_cmpl.c  */

#include "mawk.h"
#include "memory.h"
#include "scan.h"
#include "regexp.h"
#include "repl.h"
#include  <string.h>

static  CELL *PROTO( REPL_compile, (STRING *) ) ;

typedef struct re_node {
STRING  *sval ;
PTR     re ;
struct re_node *link ;
}  RE_NODE ;

static RE_NODE *re_list ;  /* a list of compiled regular expressions */


PTR re_compile( sval )
  STRING *sval ;
{ register RE_NODE *p ;
  RE_NODE *q ;
  char *s ;

  /* search list */
  s = sval->str ;
  p = re_list ;
  q = (RE_NODE *) 0 ;
  while ( p )
    if ( strcmp(s, p->sval->str) == 0 )  /* found */
        if ( !q ) /* already at front */  goto _return ;
        else /* delete from list for move to front */
        { q->link = p->link ; goto found ; }
    else
    { q = p ; p = p->link ; }

  /* not found */
  p = (RE_NODE *) zmalloc( sizeof(RE_NODE) ) ;
  p->sval = sval ;
  sval->ref_cnt++ ;
  if( !(p->re = REcompile(s)) )
  { errmsg(0, "regular expression compile failed (%s)\n%s\n" ,
               REerrlist[REerrno] , s) ;  mawk_exit(1) ; }

found :
/* insert p at the front of the list */
  p->link = re_list ; re_list = p ;

_return :
  
#ifdef  DEBUG
  if ( dump_RE )  REmprint(p->re, stderr) ;
#endif
  return p->re ;
}



/* this is only used by da() */

char *re_uncompile( m )
  PTR  m ;
{ register RE_NODE *p ;

  for( p = re_list ; p ; p = p->link )
        if ( p->re == m )  return  p->sval->str ;
#ifdef  DEBUG
  bozo("non compiled machine") ;
#endif
}
  


/*=================================================*/
/*  replacement  operations   */

/* create a replacement CELL from a STRING *  */

static CELL *REPL_compile( sval )
  STRING  *sval ;
{ int i = 0 ;
  register char *p = sval->str ;
  register char *q ;
  char *xbuff ;
  CELL *cp ;

  q = xbuff = (char *) zmalloc( sval->len + 1 ) ;

  while ( 1 )
  {
      switch( *p )
      {
        case  0  :  *q = 0 ;
                    goto  done  ;

        case  '\\':
                if ( p[1] == '&' )
                { *q++ = '&' ; p += 2 ; continue ; }
                else  break ;

        case  '&':
                /* if empty we don't need to make a node */
                if ( q != xbuff )
                { *q = 0 ;
                  temp_buff.ptr_buff[i++] = (PTR) new_STRING(xbuff) ;
                }
                /* and a null node for the '&'  */
                temp_buff.ptr_buff[i++] = (PTR) 0  ;
                /*  reset  */
                p++ ;  q = xbuff ;
                continue ;

        default :
                break ;
      }

      *q++ = *p++ ;
  }

done :   
  /* if we have one empty string it will get made now */
  if ( q > xbuff || i == 0 )
          temp_buff.ptr_buff[i++] = (PTR) new_STRING(xbuff) ;

  if ( i > MAX_FIELD )
      overflow("replacement pieces", MAX_FIELD) ;

  cp = new_CELL() ;
  if ( i == 1 )
  {
    cp->type = C_REPL ;
    cp->ptr = temp_buff.ptr_buff[0] ;
  }
  else
  {
    STRING **sp = (STRING**)
                  (cp->ptr = zmalloc(sizeof(STRING *)*i)) ;
    int j = 0 ;

    while ( j < i ) *sp++ = (STRING *)temp_buff.ptr_buff[j++] ;

    cp->type = C_REPLV ;
    cp->vcnt = i ;
  }
  zfree(xbuff, sval->len+1) ;
  return cp ;
}

/* free memory used by a replacement CELL  */

void  repl_destroy( cp )
  register CELL *cp ;
{ register STRING **p ;
  unsigned cnt ;

  if ( cp->type == C_REPL )  free_STRING(string(cp)) ;
  else  /* an C_REPLV  */
  {
    p = (STRING **) cp->ptr ;
    for( cnt = cp->vcnt ; cnt ; cnt--) 
    {
      if ( *p ) free_STRING( *p ) ;
      p++ ;
    }
    zfree( cp->ptr, cp->vcnt * sizeof(STRING *) ) ;
  }
}

/* copy a C_REPLV cell to another CELL */

CELL  *replv_cpy( target, source )
  CELL *target , *source ;
{ STRING **t, **s ;
  unsigned cnt ;

  target->type = C_REPLV ;
  target->vcnt = source->vcnt ;

  target->ptr = (PTR) zmalloc( target->vcnt * sizeof(STRING *) ) ;
  cnt = target->vcnt ;
  t = (STRING **) target->ptr ;
  s = (STRING **) source->ptr ;
  while ( cnt-- )
  { 
    if ( *t = *s++ )   (*t)->ref_cnt++ ;
    t++ ;
  }
  return target ;
}

/* here's our old friend linked linear list with move to the front
   for compilation of replacement CELLs  */

typedef  struct repl_node {
  struct repl_node  *link ;
  STRING  *sval  ;  /* the input */
  CELL    *cp ;  /* the output */
}  REPL_NODE  ;

static  REPL_NODE  *repl_list ;

/* search the list (with move to the front) for a compiled
   separator.
   return a ptr to a CELL (C_REPL or C_REPLV)
*/

CELL *repl_compile( sval )
  STRING *sval ;
{ register REPL_NODE *p ;
  REPL_NODE *q ;
  char *s ;

  /* search the list */
  s = sval->str ;
  p = repl_list ;
  q = (REPL_NODE *) 0 ;
  while ( p )
    if ( strcmp(s, p->sval->str) == 0 )  /* found */
        if ( !q ) /* already at front */  return p->cp ;
        else /* delete from list for move to front */
        { q->link = p->link ; goto found ; }
    else
    { q = p ; p = p->link ; }

  /* not found */
  p = (REPL_NODE *) zmalloc( sizeof(REPL_NODE) ) ;
  p->sval = sval ;
  sval->ref_cnt++ ;
  p->cp = REPL_compile(sval) ;

found :
/* insert p at the front of the list */
  p->link = repl_list ; repl_list = p ;
  return p->cp ;
}

/* return the string for a CELL or type REPL or REPLV,
   this is only used by da()  */

char *repl_uncompile( cp )
  CELL *cp ;
{
  register REPL_NODE *p = repl_list ;

  if ( cp->type == C_REPL )
    while ( p )
      if ( p->cp->type == C_REPL &&
           p->cp->ptr  == cp->ptr )   return p->sval->str ;
      else  p = p->link ;
  else
    while ( p )
      if ( p->cp->type == C_REPLV &&
           memcmp( cp->ptr, p->cp->ptr, cp->vcnt * sizeof(STRING*)) 
           == 0  )   return  p->sval->str ;
      else  p = p->link ;

  bozo("unable to uncompile an repl") ;
}

/*
  convert a C_REPLV to  C_REPL
     replacing the &s with sval
*/

CELL  *replv_to_repl( cp, sval)
  CELL *cp ; STRING *sval ;
{ register STRING **p ;
  STRING **sblock = (STRING **) cp->ptr ;
  unsigned cnt , vcnt = cp->vcnt ;
  unsigned len ;
  char *target ;

#ifdef  DEBUG
  if ( cp->type != C_REPLV ) bozo("not replv") ;
#endif

  p = sblock ; cnt = vcnt ; len = 0 ;
  while ( cnt-- )
      if ( *p )  len += (*p++)->len ;
      else
      { *p++ = sval ; sval->ref_cnt++ ; len += sval->len ; }

  cp->type = C_REPL ;
  cp->ptr = (PTR) new_STRING((char *) 0, len) ;

  p = sblock ; cnt = vcnt ; target = string(cp)->str ;
  while ( cnt-- )
  { (void) memcpy(target, (*p)->str, (*p)->len) ;
    target += (*p)->len ;
    free_STRING(*p) ;
    p++ ;
  }

  zfree( sblock, vcnt * sizeof(STRING *) ) ;
  return cp ;
}

@//E*O*F mawk0.97/re_cmpl.c//
chmod u=rw,g=r,o=r mawk0.97/re_cmpl.c
 
echo x - mawk0.97/regexp.h
sed 's/^@//' > "mawk0.97/regexp.h" <<'@//E*O*F mawk0.97/regexp.h//'

/********************************************
regexp.h
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/

/*$Log:	regexp.h,v $
 * Revision 2.1  91/04/08  08:23:47  brennan
 * VERSION 0.97
 * 
*/

#include <stdio.h>

PTR   PROTO( REcompile , (char *) ) ;
int   PROTO( REtest, (char *, PTR) ) ;
char *PROTO( REmatch, (char *, PTR, unsigned *) ) ;
void  PROTO( REmprint, (PTR , FILE*) ) ;

extern  int  REerrno ;
extern  char *REerrlist[] ;


@//E*O*F mawk0.97/regexp.h//
chmod u=rw,g=r,o=r mawk0.97/regexp.h
 
echo x - mawk0.97/repl.h
sed 's/^@//' > "mawk0.97/repl.h" <<'@//E*O*F mawk0.97/repl.h//'

/********************************************
repl.h
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/

/*$Log:	repl.h,v $
 * Revision 2.1  91/04/08  08:23:49  brennan
 * VERSION 0.97
 * 
*/

/* repl.h */

#ifndef  REPL_H
#define  REPL_H

PTR  PROTO( re_compile, (STRING *) ) ;
char *PROTO( re_uncompile, (PTR) ) ;


CELL *PROTO( repl_compile, (STRING *) ) ;
char *PROTO( repl_uncompile, (CELL *) ) ;
void  PROTO( repl_destroy, (CELL *) ) ;
CELL *PROTO( replv_cpy, (CELL *, CELL *) ) ;
CELL *PROTO( replv_to_repl, (CELL *, STRING *) ) ;

#endif
@//E*O*F mawk0.97/repl.h//
chmod u=rw,g=r,o=r mawk0.97/repl.h
 
echo x - mawk0.97/scan.c
sed 's/^@//' > "mawk0.97/scan.c" <<'@//E*O*F mawk0.97/scan.c//'

/********************************************
scan.c
copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/


/* $Log:	scan.c,v $
 * Revision 2.2  91/04/09  12:39:27  brennan
 * added static to funct decls to satisfy STARDENT compiler
 * 
 * Revision 2.1  91/04/08  08:23:51  brennan
 * VERSION 0.97
 * 
*/


#include  "mawk.h"
#include  "sizes.h"
#include  "scan.h"
#include  "memory.h"
#include  "field.h"
#include  "init.h"
#include  "fin.h"
#include  "repl.h"
#include  <fcntl.h>
#include  <string.h>
#include  "files.h"


/* static functions */
static void PROTO(buff_create, (char *) ) ;
static int PROTO(slow_next, (void) ) ;
static void PROTO(eat_comment, (void) ) ;
static double PROTO(collect_decimal, (int, int *) ) ;
static int PROTO(collect_string, (void) ) ;
static int  PROTO(collect_RE, (void) ) ;
static char *PROTO(rm_escape, (char *) ) ;


/*-----------------------------
  program file management
 *----------------------------*/

static  unsigned char *buffer ;
static  unsigned char *buffp ;  
    /* unsigned so it works with 8 bit chars */
static  int  program_fd = -1  ; 
static  int  eof_flag ;


static void buff_create(s)
  char *s ;
{
  /* If program_fd == -1, program came from command line and s
     is it, else s is a filename */

  if ( program_fd == -1 )
  { buffer = buffp = (unsigned char *) s ; eof_flag = 1 ; }
  else /* s is a filename, open it */
  {
    if ( s[0] == '-' && s[1] == 0 ) program_fd = 0 ;
    else
    if ( (program_fd = open(s, O_RDONLY, 0)) == -1 )
    { errmsg( errno, "cannot open %s", s) ; mawk_exit(1) ; }

    buffp = buffer = (unsigned char *) zmalloc( BUFFSZ+1 ) ;

    eof_flag = fillbuff(program_fd, buffer, BUFFSZ) < BUFFSZ ;
  }
}

void scan_cleanup()
{ 
  if ( program_fd >= 0 ) zfree(buffer, BUFFSZ+1) ;
  if ( program_fd > 0 )  (void) close(program_fd) ;
  scan_code['\n'] = SC_SPACE ;
}


void  scan_init(flag, p)
  int flag ; /* on if program is from the command line */
  char *p ;
{ 
  if ( ! flag ) program_fd = 0 ;
  buff_create(p) ;

  eat_nl() ; /* scan to first token */
  if ( next() == 0 )  
  { errmsg(0, "no program") ; mawk_exit(1) ; }
  un_next() ;
}

/*--------------------------------
  global variables shared by yyparse() and yylex()
 *-------------------------------*/

int  current_token = -1 ; 
unsigned  token_lineno ;
unsigned  compile_error_count ;
int   paren_cnt ;
int   brace_cnt ;
int   print_flag ;  /* changes meaning of '>' */
int   getline_flag ; /* changes meaning of '<' */

extern  YYSTYPE  yylval ;

/*----------------------------------------
 file reading functions
 next() and un_next(c) are macros in scan.h

 *---------------------*/

static  unsigned lineno = 1 ;

/* used to help distinguish / as divide or start of RE  */

static int can_precede_re[] =
{ MATCH, NOT_MATCH, COMMA, RBRACE, 
LPAREN, NOT, P_OR, P_AND, NL,  -1 } ;

/* read one character -- slowly */
static int slow_next()
{ 
  if ( *buffp == 0  )
      if ( !eof_flag ) 
      { buffp = buffer ;
        eof_flag = fillbuff(program_fd, buffer,BUFFSZ) < BUFFSZ ;
      }

  return *buffp++ ; /* note can un_next() , eof which is zero */
}

static void eat_comment()
{ register int c ;

  while ( (c = next()) != '\n' && scan_code[c] ) ;
  un_next() ;
}

void eat_nl() /* eat all space including newlines */
{
  while ( 1 )
    switch( scan_code[next()] )
    { 
      case SC_COMMENT : 
         eat_comment() ;
         break ;
         
      case  SC_NL  :   lineno++ ;
      /* fall thru  */
      case  SC_SPACE  :   break ;
      default :  
          un_next() ; return ;
    }
}

int yylex()
{ 
  register int c ;

  token_lineno = lineno ;

reswitch:

    switch( scan_code[c = next()] )
    {
      case  0  :  /* if no terminator on the line put one */
          if ( (c = current_token) == RBRACE ||
                c == NL || c == SEMI_COLON ) ct_ret(EOF) ;
          else
          { un_next() ;  ct_ret(NL) ; }