[alt.sources] mawk0.97.shar 6 of 6

brennan@ssc-vax.UUCP (Mike Brennan) (05/11/91)

------------------cut here----------------
                          else\
                          { stackp->m=mx;stackp->s=sx;stackp->u=ux;}
#endif


#define   CASE_UANY(x)  case  x + U_OFF :  case  x + U_ON


int  REtest( str, machine)
  char *str ;
  VOID *machine ;
{ register STATE *m = (STATE *) machine ;
  register char *s = str ;
  register RT_STATE *stackp ;
  int u_flag ;
  char *str_end ;
  char *ts ; /*convenient temps */
  STATE *tm ;

  /* handle the easy case quickly */
  if ( (m+1)->type == M_ACCEPT && m->type == M_STR )
        return  (int ) str_str(s, m->data.str, m->len) ;
  else
  { u_flag = U_ON ; str_end = (char *) 0 ;
    stackp = RE_run_stack_base - 1 ;
    goto  reswitch ;
  }

refill :
  if ( stackp < RE_run_stack_base )  return  0 ;
  m = stackp->m ;
  s = stackp->s ;
  u_flag  = stackp-- -> u ;


reswitch  :

  switch( m->type + u_flag )
  {
    case M_STR + U_OFF + END_OFF :
            if ( strncmp(s, m->data.str, m->len) ) goto refill ;
            s += m->len ;  m++ ;
            goto reswitch ;

    case M_STR + U_OFF + END_ON :
            if ( strcmp(s, m->data.str) ) goto refill ;
            s += m->len ;  m++ ;
            goto reswitch ;

    case M_STR + U_ON + END_OFF :
            if ( !(s = str_str(s, m->data.str, m->len)) ) goto refill ;
            push(m, s+1, U_ON) ;
            s += m->len ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_STR + U_ON + END_ON :
            if ( !str_end )  str_end = strchr(s, 0) ;
            ts = str_end - m->len ;
            if (ts < s || memcmp(ts,m->data.str,m->len+1)) goto refill ; 
            s = str_end ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_CLASS + U_OFF + END_OFF :
            if ( !ison(*m->data.bvp, s[0] ) )  goto refill ;
            s++ ; m++ ;
            goto reswitch ;

    case M_CLASS + U_OFF + END_ON :
            if ( s[1] || !ison(*m->data.bvp,s[0]) )  goto refill ;
            s++ ; m++ ;
            goto reswitch ;

    case M_CLASS + U_ON + END_OFF :
            while ( !ison(*m->data.bvp,s[0]) )
                if ( s[0] == 0 )  goto refill ;
                else  s++ ;
            s++ ;
            push(m, s, U_ON) ;
            m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_CLASS + U_ON + END_ON :
            if ( ! str_end )  str_end = strchr(s,0) ;
            if ( ! ison(*m->data.bvp, str_end[-1]) ) goto refill ;
            s = str_end ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_ANY + U_OFF + END_OFF :
            if ( s[0] == 0 )  goto refill ;
            s++ ; m++ ;
            goto  reswitch ;

    case M_ANY + U_OFF + END_ON :
            if ( s[0] == 0 || s[1] != 0 )  goto refill ;
            s++ ; m++ ;
            goto reswitch ;

    case M_ANY + U_ON + END_OFF :
            if ( s[0] == 0 )  goto refill ;
            s++ ; 
            push(m, s, U_ON) ;
            m++ ; u_flag = U_OFF ;
            goto  reswitch ;

    case M_ANY + U_ON + END_ON :
            if ( s[0] == 0 )  goto refill ;
            if ( ! str_end )  str_end = strchr(s,0) ;
            s = str_end ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case  M_START + U_OFF + END_OFF :
    case  M_START + U_ON  + END_OFF :
            if ( s != str )  goto  refill ;
            m++ ;  u_flag = U_OFF ;
            goto  reswitch ;

    case  M_START + U_OFF + END_ON :
    case  M_START + U_ON  + END_ON :
            if ( s != str || s[0] != 0 )  goto  refill ;
            m++ ; u_flag = U_OFF ;
            goto  reswitch ;

    case  M_END + U_OFF  :
            if ( s[0]  != 0 )  goto  refill ;
            m++ ; goto reswitch ;

    case  M_END + U_ON :
            s = strchr(s, 0) ;
            m++ ; u_flag = U_OFF ;
            goto reswitch ;

    CASE_UANY(M_U) :
            u_flag = U_ON ; m++ ;
            goto reswitch ;

    CASE_UANY(M_1J) :
            m += m->data.jump ;
            goto reswitch ;

    CASE_UANY(M_2JA) : /* take the non jump branch */
            /* don't stack an ACCEPT */
            if ( (tm = m + m->data.jump)->type == M_ACCEPT ) return 1 ;
            push(tm, s, u_flag) ;
            m++ ;
            goto reswitch ;

    CASE_UANY(M_2JB) : /* take the jump branch */
            /* don't stack an ACCEPT */
            if ( (tm = m + 1)->type == M_ACCEPT ) return 1 ;
            push(tm, s, u_flag) ;
            m += m->data.jump ;
            goto reswitch ;

    CASE_UANY(M_ACCEPT) :
            return 1 ;

    default :
            RE_panic("unexpected case in REtest") ;
  }
}

  

#ifdef  MAWK

char *is_string_split( p, lenp )
  register STATE *p ;
  unsigned *lenp ;
{
  if ( p[0].type == M_STR && p[1].type == M_ACCEPT )
  { *lenp = p->len ;
    return  p->data.str ;
  }
  else   return  (char *) 0 ;
}
#else /* mawk provides its own str_str */

char *str_str(target, key, klen)
  register char *target ;
  register char *key ;
  unsigned klen ;
{ int c = key[0] ;

  switch( klen )
  { case 0 :  return (char *) 0 ;
    case 1 :  return strchr(target, c) ;
    case 2 :  
              while ( target = strchr(target, c) )
                    if ( target[1] == key[1] ) return target ;
                    else target++ ;
              break ;

    default :
              klen-- ; key++ ;
              while ( target = strchr(target, c) )
                    if ( memcmp(target+1,key,klen) == 0 ) return target ;
                    else target++ ;
              break ;
  }
  return (char *) 0 ;
}
              

#endif  /* MAWK */
@//E*O*F mawk0.97/rexp/rexp2.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp2.c
 
echo x - mawk0.97/rexp/rexp3.c
sed 's/^@//' > "mawk0.97/rexp/rexp3.c" <<'@//E*O*F mawk0.97/rexp/rexp3.c//'

/********************************************
rexp3.c
copyright 1991, Michael D. Brennan

This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/

/*  rexp3.c   */

/*  match a string against a machine   */

#include "rexp.h"
#include <string.h>


/*  check that a bit is on  */
#define  ison(b,x) ( (b)[(x)>>3] & ( 1 << ((x)&7)  ))


extern RT_STATE *RE_run_stack_base; 
extern RT_STATE *RE_run_stack_limit ;
RT_STATE  *RE_new_run_stack() ;


#define  push(mx,sx,ssx,ux)   if (++stackp == RE_run_stack_limit)\
                                stackp = RE_new_run_stack() ;\
               stackp->m=mx;stackp->s=sx;stackp->ss=ssx;stackp->u=ux;


#define   CASE_UANY(x)  case  x + U_OFF :  case  x + U_ON

/* returns start of first longest match and the length by
   reference.  If no match returns NULL and length zero */

char *REmatch(str, machine, lenp)
  char *str ;
  VOID   *machine ;
  unsigned *lenp ;
{ register STATE *m = (STATE *) machine ;
  register char *s = str ;
  char *ss ;
  register RT_STATE *stackp ;
  int u_flag ;
  char *str_end, *ts ;

  /* state of current best match stored here */
  char *cb_ss ;  /* the start */
  char *cb_e  ;  /* the end , pts at first char not matched */

  *lenp = 0 ;

  /* check for the easy case */
  if ( (m+1)->type == M_ACCEPT && m->type == M_STR )
  { if ( ts = str_str(s, m->data.str, m->len) ) *lenp = m->len ;
    return ts ;
  }
    
  u_flag = U_ON ; cb_ss = ss = str_end = (char *) 0 ;
  stackp = RE_run_stack_base - 1 ;
  goto  reswitch ;

refill :
  if ( stackp < RE_run_stack_base )  /* empty stack */
  { if ( cb_ss )  *lenp = cb_e - cb_ss ;
    return cb_ss ;
  }
  ss = stackp->ss ;
  s = stackp-- -> s ;
  if ( cb_ss )  /* does new state start too late ? */
      if ( ss )
      { if ( cb_ss < ss )  goto refill ; }
      else
      if ( cb_ss < s ) goto refill ;

  m = (stackp+1)->m ;
  u_flag  = (stackp+1)->u ;


reswitch  :

  switch( m->type + u_flag )
  {
    case M_STR + U_OFF + END_OFF :
            if ( strncmp(s, m->data.str, m->len) ) goto refill ;
	    if ( !ss )  
	        if ( cb_ss && s > cb_ss ) goto refill ;
		else ss = s ;
            s += m->len ;  m++ ;
            goto reswitch ;

    case M_STR + U_OFF + END_ON :
            if ( strcmp(s, m->data.str) ) goto refill ;
	    if ( !ss )  
	        if ( cb_ss && s > cb_ss ) goto refill ;
		else ss = s ;
            s += m->len ;  m++ ;
            goto reswitch ;

    case M_STR + U_ON + END_OFF :
            if ( !(s = str_str(s, m->data.str, m->len)) ) goto refill ;
            push(m, s+1,ss, U_ON) ;
	    if ( !ss )  
	        if ( cb_ss && s > cb_ss ) goto refill ;
		else ss = s ;
            s += m->len ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_STR + U_ON + END_ON :
            if ( !str_end )  str_end = strchr(s, 0) ;
            ts = str_end - m->len ;
            if (ts < s || memcmp(ts,m->data.str,m->len+1)) goto refill ; 
	    if ( !ss )  
		if ( cb_ss && ts > cb_ss )  goto refill ;
		else  ss = ts ;
            s = str_end ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_CLASS + U_OFF + END_OFF :
            if ( !ison(*m->data.bvp, s[0] ) )  goto refill ;
	    if ( !ss )
		if ( cb_ss && s > cb_ss )  goto refill ;
		else  ss = s ;
            s++ ; m++ ;
            goto reswitch ;

    case M_CLASS + U_OFF + END_ON :
            if ( s[1] || !ison(*m->data.bvp,s[0]) )  goto refill ;
	    if ( !ss )
		if ( cb_ss && s > cb_ss )  goto refill ;
		else  ss = s ;
            s++ ; m++ ;
            goto reswitch ;

    case M_CLASS + U_ON + END_OFF :
            while ( !ison(*m->data.bvp,s[0]) )
                if ( s[0] == 0 )  goto refill ;
                else  s++ ;

            s++ ;
            push(m, s, ss, U_ON) ;
	    if ( !ss )
		if ( cb_ss && s-1 > cb_ss )  goto refill ;
		else  ss = s-1 ;
            m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_CLASS + U_ON + END_ON :
            if ( ! str_end )  str_end = strchr(s,0) ;
            if ( ! ison(*m->data.bvp, str_end[-1]) ) goto refill ;
	    if ( !ss )
		if ( cb_ss && str_end-1 > cb_ss )  goto refill ;
		else  ss = str_end-1 ;
            s = str_end ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case M_ANY + U_OFF + END_OFF :
            if ( s[0] == 0 )  goto refill ;
	    if ( !ss )
		if ( cb_ss && s > cb_ss )  goto refill ;
		else ss = s ;
            s++ ; m++ ;
            goto  reswitch ;

    case M_ANY + U_OFF + END_ON :
            if ( s[0] == 0 || s[1] != 0 )  goto refill ;
	    if ( !ss )
		if ( cb_ss && s > cb_ss )  goto refill ;
		else ss = s ;
            s++ ; m++ ;
            goto reswitch ;

    case M_ANY + U_ON + END_OFF :
            if ( s[0] == 0 )  goto refill ;
            s++ ; 
            push(m, s, ss, U_ON) ;
	    if ( !ss )
		if ( cb_ss && s-1 > cb_ss )  goto refill ;
		else  ss = s-1 ;
            m++ ; u_flag = U_OFF ;
            goto  reswitch ;

    case M_ANY + U_ON + END_ON :
            if ( s[0] == 0 )  goto refill ;
            if ( ! str_end )  str_end = strchr(s,0) ;
	    if ( !ss )
		if ( cb_ss && str_end-1 > cb_ss )  goto refill ;
		else  ss = str_end - 1 ;
            s = str_end ; m++ ; u_flag = U_OFF ;
            goto reswitch ;

    case  M_START + U_OFF + END_OFF :
    case  M_START + U_ON  + END_OFF :
            if ( s != str )  goto  refill ;
	    ss = s ;
            m++ ;  u_flag = U_OFF ;
            goto  reswitch ;

    case  M_START + U_OFF + END_ON :
    case  M_START + U_ON  + END_ON :
            if ( s != str || s[0] != 0 )  goto  refill ;
	    ss = s ;
            m++ ; u_flag = U_OFF ;
            goto  reswitch ;

    case  M_END + U_OFF  :
            if ( s[0]  != 0 )  goto  refill ;
	    if ( !ss ) 
		if ( cb_ss && s > cb_ss )  goto refill ;
		else  ss = s ;
            m++ ; goto reswitch ;

    case  M_END + U_ON :
	    s = str_end ? str_end : (str_end =  strchr(s,0)) ;
	    if ( !ss ) 
		if ( cb_ss && s > cb_ss )  goto refill ;
		else  ss = s ;
            m++ ; u_flag = U_OFF ;
            goto reswitch ;

    CASE_UANY(M_U) :
	    if ( !ss ) 
		if ( cb_ss && s > cb_ss )  goto refill ;
		else  ss = s ;
            u_flag = U_ON ; m++ ;
            goto reswitch ;

    CASE_UANY(M_1J) :
            m += m->data.jump ;
            goto reswitch ;

    CASE_UANY(M_2JA) : /* take the non jump branch */
            push(m+m->data.jump, s, ss, u_flag) ;
            m++ ;
            goto reswitch ;

    CASE_UANY(M_2JB) : /* take the jump branch */
            push(m+1, s, ss, u_flag) ;
            m += m->data.jump ;
            goto reswitch ;

    case M_ACCEPT + U_OFF :
	    if ( !ss )  ss = s ;
	    if ( !cb_ss || ss < cb_ss || ss == cb_ss && s > cb_e )
	    { /* we have a new current best */
	      cb_ss = ss ; cb_e = s ;
	    }
	    goto  refill ;

    case  M_ACCEPT + U_ON :
	    if ( !ss )  ss = s ;
	    else
		s = str_end ? str_end : (str_end = strchr(s,0)) ;

	    if ( !cb_ss || ss < cb_ss || ss == cb_ss && s > cb_e )
	    { /* we have a new current best */
	      cb_ss = ss ; cb_e = s ;
	    }
	    goto  refill ;

    default :
            RE_panic("unexpected case in REmatch") ;
  }
}

@//E*O*F mawk0.97/rexp/rexp3.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp3.c
 
echo x - mawk0.97/rexp/rexpdb.c
sed 's/^@//' > "mawk0.97/rexp/rexpdb.c" <<'@//E*O*F mawk0.97/rexp/rexpdb.c//'

/********************************************
rexpdb.c
copyright 1991, Michael D. Brennan

This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/


/* rexpdb.c */


#include "rexp.h"
#include <ctype.h>

/*  print a machine for debugging  */

static  char *xlat[] = {
"M_STR"  ,
"M_CLASS" ,
"M_ANY" ,
"M_START" ,
"M_END" ,
"M_U",
"M_1J" ,
"M_2JA" ,
"M_2JB" ,
"M_ACCEPT" } ;

void  REmprint(m, f)
  VOID *m ; FILE *f ;
{ register STATE *p = (STATE *) m ;
  char *end_on_string ;

  while ( 1 )
  { 
    if ( p->type >= END_ON ) 
    { p->type -= END_ON ; end_on_string = "$" ; }
    else end_on_string = "" ;

    if ( p->type < 0 || p->type >= END_ON )
    { fprintf(f, "unknown STATE type\n") ; return ; }

    fprintf(f, "%-10s" , xlat[p->type]) ;
    switch( p->type )
    {
     case M_STR : fprintf(f, "%s", p->data.str ) ;
                  break ;

     case M_1J:
     case M_2JA:  
     case M_2JB : fprintf(f, "%d", p->data.jump) ;
                 break ;
     case M_CLASS:
          { unsigned char *q = (unsigned char *) p->data.bvp ;
            unsigned char *r = q +  sizeof(BV) ;
            while ( q < r )  fprintf(f, "%x " , *q++) ;
          }
          break ;
    }
    fprintf(f, "%s\n" , end_on_string) ;
    if ( end_on_string[0] )  p->type += END_ON ;
    if ( p->type == M_ACCEPT )  return ;
    p++ ;
   }
}

@//E*O*F mawk0.97/rexp/rexpdb.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexpdb.c
 
echo mkdir - mawk0.97/test
mkdir mawk0.97/test
chmod u=rwx,g=rx,o=rx mawk0.97/test
 
echo x - mawk0.97/test/README
sed 's/^@//' > "mawk0.97/test/README" <<'@//E*O*F mawk0.97/test/README//'

Some benchmarks of Mawk and other (new) awks are in the file
benchmarks.

Most of the programs are one liners, but a few are interesting
and duplicated in examples directory.

To run these tests or others yourself, use the
shell script test.sh

----------------------------------------------------

One area mawk needs to improve is array[expr] access when
expr is numeric (see primes and qsort).
Mawk always converts numeric expressions to string for
array access which leads to a lot of redundant conversions.
Better array access will be added for release 1.0.

-------------------------------------------

run wfrq0.awk or words0.awk to see how much RS as a 
regular expression can speed up some programs

@//E*O*F mawk0.97/test/README//
chmod u=r,g=r,o=r mawk0.97/test/README
 
echo x - mawk0.97/test/benchmarks
sed 's/^@//' > "mawk0.97/test/benchmarks" <<'@//E*O*F mawk0.97/test/benchmarks//'
The following are some timing tests of Mawk versus three
other (new) awks.  Times are user + sys in seconds. First
col is mawk time, second col is other awk time and last
col is the ratio.


Mawk vs.  Awk  on Stardent 3000, SysV 3.0
    cat           4.0    4.8   1.20
    wc            8.1    6.1   0.75
    fields       20.8   26.3   1.26
    reg0          4.7    6.0   1.28
    reg1          5.6    6.0   1.07
    reg2         18.1    6.0   0.33
    loops         6.0   12.6   2.10
    words        18.1   18.4   1.02
    newton *      0.9    1.7   1.89
    concat       14.1   15.0   1.06
    primes *      2.6    3.1   1.19
    squeeze       5.3    2.9   0.55
    qsort         6.8   21.3   3.13
    wfrq          8.9   10.0   1.12
    deps **       3.1    5.2   1.68
			       1.15 #

Mawk vs.  Gawk 2.11.1  on Sun3, SunOS 4.0
    cat           6.1    8.1   1.33
    wc           12.6   40.6   3.22
    fields       35.7  117.6   3.29
    reg0          6.7   11.0   1.64
    reg1          8.5   12.6   1.48
    reg2         34.3   55.5   1.62
    loops        40.4  214.6   5.31
    words        31.3  110.8   3.54
    newton        6.7   25.4   3.79
    concat       20.9   65.7   3.14
    primes       38.8   28.3   0.73
    squeeze       2.2    4.6   2.09
    qsort        36.1   42.3   1.17
    wfrq         66.5  199.4   3.00
    deps         16.2   42.9   2.65
			       2.24 #

Mawk vs.  Nawk  on  VAX  3600  Ultrix 4.1
    cat           5.7    7.7   1.35
    wc           12.8   12.4   0.97
    fields       34.1   58.9   1.73
    reg0          7.1    8.6   1.21
    reg1          8.9   21.8   2.45
    reg2         36.7   58.4   1.59
    loops        30.5  117.7   3.86
    words        31.0   58.7   1.89
    newton        5.6   11.9   2.12
    concat       21.3   28.9   1.36
    primes       36.3   17.2   0.47
    squeeze       2.2    3.1   1.41
    qsort        39.3   29.5   0.75
    wfrq         76.2  173.9   2.28
    deps         18.1   32.4   1.79
			       1.50 #

* newton , primes and loops take no input. 
Newton computed the square roots of 1 to 1000 by Newtons method 
and primes was a sieve for primes < 5000.  Loops was three nested
loops 100 x 50 x 50 with a sum on the inside.

** deps input was *.c on mawk source

The other programs read a file of 20000+ C source lines.
The input files were blownup by 4 (80000+) on the Stardent.

# geometric mean of col 3 --  (a1 * a2 * ... an ) ^ (1/n) .

@//E*O*F mawk0.97/test/benchmarks//
chmod u=r,g=r,o=r mawk0.97/test/benchmarks
 
echo x - mawk0.97/test/cat.awk
sed 's/^@//' > "mawk0.97/test/cat.awk" <<'@//E*O*F mawk0.97/test/cat.awk//'
{ print } 
@//E*O*F mawk0.97/test/cat.awk//
chmod u=r,g=r,o=r mawk0.97/test/cat.awk
 
echo x - mawk0.97/test/concat.awk
sed 's/^@//' > "mawk0.97/test/concat.awk" <<'@//E*O*F mawk0.97/test/concat.awk//'


# test concatenation
#

{ print $NF  $( (NF+1)/2 )  $1 }
@//E*O*F mawk0.97/test/concat.awk//
chmod u=r,g=r,o=r mawk0.97/test/concat.awk
 
echo x - mawk0.97/test/fields.awk
sed 's/^@//' > "mawk0.97/test/fields.awk" <<'@//E*O*F mawk0.97/test/fields.awk//'

# print each field
# one per line
#

{ for(i=1;i<=NF;i++) print $i}
@//E*O*F mawk0.97/test/fields.awk//
chmod u=r,g=r,o=r mawk0.97/test/fields.awk
 
echo x - mawk0.97/test/loops.awk
sed 's/^@//' > "mawk0.97/test/loops.awk" <<'@//E*O*F mawk0.97/test/loops.awk//'

# test looping speed
#

BEGIN {

for(i=1; i<=100 ; i++)
{ 
  j = i ;
  while ( j >= 0 ) 
  {
    k = 0 
    do  
    { sum += k + j + i
      k++
    }
    while ( k <= j )
    j--
  }
}

print sum
}
@//E*O*F mawk0.97/test/loops.awk//
chmod u=r,g=r,o=r mawk0.97/test/loops.awk
 
echo x - mawk0.97/test/newton.awk
sed 's/^@//' > "mawk0.97/test/newton.awk" <<'@//E*O*F mawk0.97/test/newton.awk//'


# compute square root by newton's method
#
function SQRT(x)
{ new = x/2
  do  
  { old = new
    new = (old*old+x)/(2*old)
  }
  while ( abs(new-old) > 1e-6 )

  return  (new+old)/2
}

function abs(x)
{ return x>=0?x:-x }

  
BEGIN {
  for(i = 1 ; i <= 1000 ; i++) print i, SQRT(i)
}
@//E*O*F mawk0.97/test/newton.awk//
chmod u=r,g=r,o=r mawk0.97/test/newton.awk
 
echo x - mawk0.97/test/primes.awk
sed 's/^@//' > "mawk0.97/test/primes.awk" <<'@//E*O*F mawk0.97/test/primes.awk//'

# find all primes 
#  <= ARGV[1]
#
BEGIN {  stop = ARGV[1]
   prime[ p_cnt = 1 ] =  3

# keep track of integer part of square root by adding
# odd integers 
   odd = test = 5
   root = 2
   squares = 9

   
while ( test <= stop )
{
   if ( test >= squares )
   { root++
     odd += 2
     squares += odd 
   }

   flag = 1
   for ( i = 1 ; prime[i] <= root ; i++ )
   	if ( test % prime[i] == 0 )  #  not prime
	{ flag = 0 ; break }

   if ( flag )  prime[ ++p_cnt ] = test

   test += 2
}

prime[0] = 2

for(i = 0 ; i <= p_cnt ; i++ )  print prime[i]

}


     
@//E*O*F mawk0.97/test/primes.awk//
chmod u=r,g=r,o=r mawk0.97/test/primes.awk
 
echo x - mawk0.97/test/qsort.awk
sed 's/^@//' > "mawk0.97/test/qsort.awk" <<'@//E*O*F mawk0.97/test/qsort.awk//'


# qsort text files
#

function middle(x,y,z)  #return middle of 3
{
  if ( x <= y )  
  { if ( z >= y )  return y
    if ( z <  x )  return x
    return z
  }

  if ( z >= x )  return x
  if ( z <  y )  return y
  return z
}


function  isort(A , n,    i, j, hold)
{
  # if needed a sentinal at A[0] will be created

  for( i = 2 ; i <= n ; i++)
  {
    hold = A[ j = i ]
    while ( A[j-1] > hold )
    { j-- ; A[j+1] = A[j] }

    A[j] = hold
  }
}


# recursive quicksort
function  qsort(A, left, right    ,i , j, pivot, hold)
{
  
  pivot = middle(A[left], A[int((left+right)/2)], A[right])

  i = left
  j = right

  while ( i <= j )
  {
    while ( A[i] < pivot )  i++ 
    while ( A[j] > pivot )  j--

    if ( i <= j )
    { hold = A[i]
      A[i++] = A[j]
      A[j--] = hold
    }
  }

  if ( j - left > BLOCK )  qsort(A,left,j)
  if ( right - i > BLOCK )  qsort(A,i,right)
}

BEGIN { BLOCK = 5 }


{ line[NR] = $0 ""   # sort as string
}

END  {

  if ( NR > BLOCK )  qsort(line, 1, NR)

  isort(line, NR)

  for(i = 1 ; i <= NR ; i++) print line[i]
}
  



    
@//E*O*F mawk0.97/test/qsort.awk//
chmod u=r,g=r,o=r mawk0.97/test/qsort.awk
 
echo x - mawk0.97/test/reg0.awk
sed 's/^@//' > "mawk0.97/test/reg0.awk" <<'@//E*O*F mawk0.97/test/reg0.awk//'

/return/ {cnt++}  END{print cnt}
@//E*O*F mawk0.97/test/reg0.awk//
chmod u=r,g=r,o=r mawk0.97/test/reg0.awk
 
echo x - mawk0.97/test/reg1.awk
sed 's/^@//' > "mawk0.97/test/reg1.awk" <<'@//E*O*F mawk0.97/test/reg1.awk//'

/return|switch/ {cnt++}  END{print cnt}
@//E*O*F mawk0.97/test/reg1.awk//
chmod u=r,g=r,o=r mawk0.97/test/reg1.awk
 
echo x - mawk0.97/test/reg2.awk
sed 's/^@//' > "mawk0.97/test/reg2.awk" <<'@//E*O*F mawk0.97/test/reg2.awk//'

/[A-Za-z_][A-Za-z0-9_]*\[.*\][ \t]*=/ {cnt++}  END{print cnt}
@//E*O*F mawk0.97/test/reg2.awk//
chmod u=r,g=r,o=r mawk0.97/test/reg2.awk
 
echo x - mawk0.97/test/sample
sed 's/^@//' > "mawk0.97/test/sample" <<'@//E*O*F mawk0.97/test/sample//'
dump  cat.awk  ../TB  mawk gawk awk
out   wc.awk ../TB  mawk gawk awk
dump  fields.awk ../TB mawk gawk awk
out  reg0.awk ../TB mawk gawk awk
out  reg1.awk ../TB mawk gawk awk
out  reg2.awk ../TB mawk gawk awk
out  words0.awk  ../TB  mawk gawk
out  words1.awk  ../TB  mawk
dump newton.awk  /dev/null  mawk  gawk
dump concat.awk  ../TB  mawk gawk awk
dump primes.awk  5000   mawk gawk
dump squeeze.awk  ../parse.y mawk gawk
dump qsort.awk  ../parse.y  mawk  gawk
@//E*O*F mawk0.97/test/sample//
chmod u=r,g=r,o=r mawk0.97/test/sample
 
echo x - mawk0.97/test/squeeze.awk
sed 's/^@//' > "mawk0.97/test/squeeze.awk" <<'@//E*O*F mawk0.97/test/squeeze.awk//'

# test gsub
#
# squeeze space to single space


{ gsub( /[ \t]+/, " ") ; print }
@//E*O*F mawk0.97/test/squeeze.awk//
chmod u=r,g=r,o=r mawk0.97/test/squeeze.awk
 
echo x - mawk0.97/test/test.sh
sed 's/^@//' > "mawk0.97/test/test.sh" <<'@//E*O*F mawk0.97/test/test.sh//'
#! /bin/sh

###############
#  shell script for timing mawk and other awks
#
#  reads input file of the form
#  dump_or_not  program_file  input_file  list of awks
#
#  usage:  test.sh < input_file
#


if [ $# != 0 ]
then
  name=`basename $0`
  echo "usage: $name < input_file"  1>&2
  exit 1
fi


while  read  direct program file  awk_list
do

echo 
echo

if [ $direct = dump ]
then
for i in $awk_list
do
echo "$i -f $program $file"
/bin/time $i -f $program $file >/dev/null
done

else
for i in $awk_list
do
echo "$i -f $program $file"
/bin/time $i -f $program $file 
done
fi

done   2>&1    # send the timing to stdout
@//E*O*F mawk0.97/test/test.sh//
chmod u=rx,g=rx,o=rx mawk0.97/test/test.sh
 
echo x - mawk0.97/test/wc.awk
sed 's/^@//' > "mawk0.97/test/wc.awk" <<'@//E*O*F mawk0.97/test/wc.awk//'

{sum += NF}
END{ print NR, sum}
@//E*O*F mawk0.97/test/wc.awk//
chmod u=r,g=r,o=r mawk0.97/test/wc.awk
 
echo x - mawk0.97/test/wfrq.awk
sed 's/^@//' > "mawk0.97/test/wfrq.awk" <<'@//E*O*F mawk0.97/test/wfrq.awk//'


# wfrq.awk 
# find the twenty most frequent words in a document
# 
# counts words in   array   cnt[ word ]
#
# uses a heap to select the twenty most frequent
#
#


BEGIN { FS = "[^a-zA-Z]+" ;  BIG = 999999 }

{ for( i = 1 ; i <= NF ; i++ )  cnt[$i]++ }

END { delete  cnt[ "" ]

# load twenty values
# into the heap   word[1..20] and count[1..20]
#
#   heap condition --
#   count[i] <= count[2*i] and count[i] <= count[2*i+1]

j = 1

# remove twenty values from cnt[] , put in the heap

for( i in cnt )
{
  word[j] = i  ; count[j] = cnt[i] 
  delete cnt[i] ;
  if ( ++j == 21 )  break ;
}

# make some sentinals
# to stop down_heap()
#

for( i = j ; i < 43 ; i++ )  count[i] = BIG

h_empty = j  # save the first empty slot
# make a heap with the smallest in slot 1
for( i = h_empty - 1 ; i > 0 ; i-- )  down_heap(i) 

# examine the rest of the values
for ( i in cnt )
  if ( (j = cnt[i]) > count[1] )
  { # its bigger
    # take the smallest out of the heap and readjust
    word[1] = i ; count[1] = j
    down_heap(1)
  }

h_empty-- ;

# what's left are the twenty largest
# smallest at the top
#

i = 20
while ( h_empty > 1 )
{
  buffer[i--] = sprintf ("%3d %s"  , count[1], word[1])
  count[1] = count[h_empty] ; word[1] = word[h_empty]
  count[h_empty] = BIG
  down_heap(1)
  h_empty--
}
  buffer[i--] = sprintf ("%3d %s"  , count[1], word[1])

  for(j = 1 ; j <= 20 ; j++ )  print buffer[j]
}

# let the i th element drop to its correct position

function down_heap(i,       k) 
{
  while ( 1 )
  {
      if ( count[2*i] <= count[2*i+1] )  k = 2*i
      else  k = 2*i + 1 

      if ( count[i] <= count[k] )  return

      hold = word[k] ; word[k] = word[i] ; word[i] = hold
      hold = count[k] ; count[k] = count[i] ; count[i] = hold
      i = k
   }
}

@//E*O*F mawk0.97/test/wfrq.awk//
chmod u=r,g=r,o=r mawk0.97/test/wfrq.awk
 
echo x - mawk0.97/test/wfrq0.awk
sed 's/^@//' > "mawk0.97/test/wfrq0.awk" <<'@//E*O*F mawk0.97/test/wfrq0.awk//'

#  this program is the same as wfrq.awk except it runs
#  about twice as fast because it uses RS as a regular 
#  expression
#

function down_heap(i,  k) 
{
  while ( 1 )
  {
      if ( count[2*i] <= count[2*i+1] )  k = 2*i
      else  k = 2*i + 1 

      if ( count[i] <= count[k] )  return

      hold = word[k] ; word[k] = word[i] ; word[i] = hold
      hold = count[k] ; count[k] = count[i] ; count[i] = hold
      i = k
   }
}


BEGIN { RS = "[^a-zA-Z]+" ;  BIG = 999999 }

{ cnt[$0]++ }

END { delete  cnt[ "" ]

# load twenty values
j = 1
for( i in cnt )
{
  word[j] = i  ; count[j] = cnt[i] 
  delete cnt[i] ;
  if ( ++j == 21 )  break ;
}

# make some sentinals
for( i = j ; i < 43 ; i++ )  count[i] = BIG

h_empty = j  # save the first empty slot
# make a heap with the smallest in slot 1
for( i = h_empty - 1 ; i > 0 ; i-- )  down_heap(i) 

# examine the rest of the values
for ( i in cnt )
  if ( (j = cnt[i]) > count[1] )
  { # its bigger
    # take the smallest out of the heap and readjust
    word[1] = i ; count[1] = j
    down_heap(1)
  }

h_empty-- ;

# what's left are the twenty largest
# smallest at the top
#

i = 20
while ( h_empty > 1 )
{
  buffer[i--] = sprintf ("%3d %s"  , count[1], word[1])
  count[1] = count[h_empty] ; word[1] = word[h_empty]
  count[h_empty] = BIG
  down_heap(1)
  h_empty--
}
  buffer[i--] = sprintf ("%3d %s"  , count[1], word[1])

  for(j = 1 ; j <= 20 ; j++ )  print buffer[j]
}
@//E*O*F mawk0.97/test/wfrq0.awk//
chmod u=r,g=r,o=r mawk0.97/test/wfrq0.awk
 
echo x - mawk0.97/test/words.awk
sed 's/^@//' > "mawk0.97/test/words.awk" <<'@//E*O*F mawk0.97/test/words.awk//'

# words0.awk

# find real words
# i.e contigous letters

BEGIN { FS = "[^A-Za-z]+" } # split fields on not letters

{
# $1 and $NF  might be  empty

  if ( NF > 0 )
  {
    cnt += NF

    if ( $NF == "" )  cnt--
    if ( NF > 1 && $1 == "" ) cnt--
  }
}

END { print cnt}


@//E*O*F mawk0.97/test/words.awk//
chmod u=r,g=r,o=r mawk0.97/test/words.awk
 
echo x - mawk0.97/test/words0.awk
sed 's/^@//' > "mawk0.97/test/words0.awk" <<'@//E*O*F mawk0.97/test/words0.awk//'


# use non letters as RS
#
#

BEGIN { RS = "[^A-Za-z][^A-Za-z]*" 
    getline
    if ( $0 == "" )  NR = 0
}

END { print NR }
@//E*O*F mawk0.97/test/words0.awk//
chmod u=r,g=r,o=r mawk0.97/test/words0.awk
 
echo mkdir - mawk0.97/examples
mkdir mawk0.97/examples
chmod u=rwx,g=rx,o=rx mawk0.97/examples
 
echo x - mawk0.97/examples/decl.awk
sed 's/^@//' > "mawk0.97/examples/decl.awk" <<'@//E*O*F mawk0.97/examples/decl.awk//'

# parse a C declaration by recursive descent
# based on a C program in KR ANSI edition
#
# run on a C file it finds the declarations
#
# restrictions: one declaration per line
#               doesn't understand struct {...}
#               makes assumptions about type names
#
#
#  some awks need double escapes on strings used as
#  regular expressions.  If not run on mawk, use gdecl.awk


################################################
#   lexical scanner -- gobble()
#   input : string s -- treated as a regular expression
#   gobble eats SPACE, then eats longest match of s off front
#   of global variable line.
#   Cuts the matched part off of line
#


function gobble(s,  x)  
{
  sub( /^ /, "", line)  # eat SPACE if any

  # surround s with parenthesis to make sure ^ acts on the
  # whole thing

  match(line, "^" "(" s ")")
  x = substr(line, 1, RLENGTH)
  line = substr(line, RLENGTH+1)
  return x 
}


function ptr_to(n,  x)  # print "pointer to" , n times
{ n = int(n)
  if ( n <= 0 )  return ""
  x = "pointer to" ; n--
  while ( n-- )  x = x " pointer to"
  return x
}


#recursively get a decl
# returns an english description of the declaration or
# "" if not a C declaration.

function  decl(   x, t, ptr_part)
{

  x = gobble("[* ]+")   # get list of *** ...
  gsub(/ /, "", x)   # remove all SPACES
  ptr_part = ptr_to( length(x) )

  # We expect to see either an identifier or '('
  #

  if ( gobble("\(") )
  { 
    # this is the recursive descent part
    # we expect to match a declaration and closing ')'
    # If not return "" to indicate  failure

      if ( (x = decl()) == "" || gobble( "\)" ) == "" ) return ""

  }
  else  #  expecting an identifier
  {
    if ( (x = gobble(id)) == "" )  return ""
    x = x ":"
  }

  # finally look for ()
  # or  [ opt_size ]

  while ( 1 )
     if ( gobble( funct_mark ) )  x = x " function returning"
     else
     if ( t = gobble( array_mark ) )
     { gsub(/ /, "", t)
       x = x " array" t " of"
     }
     else  break


   x = x " "  ptr_part
   return x
}
    

BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" 
        funct_mark = "\([ \t]*\)"
	array_mark = "\[[ \t]*[_A-Za-z0-9]*[ \t]*\]"

# I've assumed types are keywords or all CAPS or end in _t
# Other conventions could be added.

    type0 = "int|char|short|long|double|float|void" 
    type1 = "[_A-Z][_A-Z0-9]*"  #  types are CAPS
    type2 = "[_A-Za-z][_A-Za-z0-9]*_t"  # end in _t

    types = "(" type0 "|" type1 "|" type2 ")"
}


{   

    gsub( "/\*([^*]|\*[^/])*(\*/|$)" , " ") # remove comments
    gsub( /[ \t]+/, " ")  # squeeze white space to a single space


    line = $0

    scope = gobble( "extern|static" )

    if ( type = gobble("(struct|union|enum) ") )
    		type = type gobble(id)  #  get the tag
    else
    {

       type = gobble("(un)?signed ") gobble( types )

    }
    
    if ( ! type )  next
    
    if ( (x = decl()) && gobble( ";") )
    {
      x  =  x " " type
      if ( scope )  x = x " (" scope ")"
      gsub( /  +/, " ", x)  # 
      print x
    }

}




@//E*O*F mawk0.97/examples/decl.awk//
chmod u=rx,g=rx,o=rx mawk0.97/examples/decl.awk
 
echo x - mawk0.97/examples/deps.awk
sed 's/^@//' > "mawk0.97/examples/deps.awk" <<'@//E*O*F mawk0.97/examples/deps.awk//'

# find include dependencies in C source
#
# mawk -f deps.awk  C_source_files
#         -- prints a dependency list suitable for make
#         -- ignores   #include <   >
#


BEGIN {  stack_index = 0 # stack[] holds the input files

  for(i = 1 ; i < ARGC ; i++)
  { 
    file = ARGV[i]
    if ( file !~ /\.c$/ )  continue  # skip it
    outfile = substr(file, 1, length(file)-2) ".o"

    # INCLUDED[] stores the set of included files
    # -- start with the empty set
    for( j in INCLUDED ) delete INCLUDED[j]

    while ( 1 )
    {
        if ( getline line < file <= 0 )  # no open or EOF
	{ close(file)
	  if ( stack_index == 0 )  break # empty stack
	  else  
	  { file = stack[ stack_index-- ]
	    continue
	  }
        }

	if ( line ~ /^#include[ \t]+".*"/ )
	{
	  split(line, X, "\"")  # filename is in X[2]

	  if ( X[2] in INCLUDED ) # we've already included it
		continue

	  #push current file 
	  stack[ ++stack_index ] = file
	  INCLUDED[ file = X[2] ] = ""
        }
    }  # end of while
    
   # test if INCLUDED is empty
   flag = 0 # on once the front is printed 
   for( j in INCLUDED )
      if ( ! flag )  
      { printf "%s : %s" , outfile, j ; flag = 1 }
      else  printf " %s" , j

   if ( flag )  print ""

  }# end of loop over files in ARGV[i]

}
@//E*O*F mawk0.97/examples/deps.awk//
chmod u=rx,g=rx,o=rx mawk0.97/examples/deps.awk
 
echo x - mawk0.97/examples/gdecl.awk
sed 's/^@//' > "mawk0.97/examples/gdecl.awk" <<'@//E*O*F mawk0.97/examples/gdecl.awk//'

# parse a C declaration by recursive descent
# 
#  decl.awk with extra escapes \

################################################
############################################


#   lexical scanner -- gobble()
#   input : string s -- treated as a regular expression
#   gobble eats SPACE, then eats longest match of s off front
#   of global variable line.
#   Cuts the matched part off of line
#


function gobble(s,  x)  
{
  sub( /^ /, "", line)  # eat SPACE if any

  # surround s with parenthesis to make sure ^ acts on the
  # whole thing

  match(line, "^" "(" s ")")
  x = substr(line, 1, RLENGTH)
  line = substr(line, RLENGTH+1)
  return x 
}


function ptr_to(n,  x)  # print "pointer to" , n times
{ n = int(n)
  if ( n <= 0 )  return ""
  x = "pointer to" ; n--
  while ( n-- )  x = x " pointer to"
  return x
}


#recursively get a decl
# returns an english description of the declaration or
# "" if not a C declaration.

function  decl(   x, t, ptr_part)
{

  x = gobble("[* ]+")   # get list of *** ...
  gsub(/ /, "", x)   # remove all SPACES
  ptr_part = ptr_to( length(x) )

  # We expect to see either an identifier or '('
  #

  if ( gobble("\\(") )
  { 
    # this is the recursive descent part
    # we expect to match a declaration and closing ')'
    # If not return "" to indicate  failure

      if ( (x = decl()) == "" || gobble( "\\)" ) == "" ) return ""

  }
  else  #  expecting an identifier
  {
    if ( (x = gobble(id)) == "" )  return ""
    x = x ":"
  }

  # finally look for ()
  # or  [ opt_size ]

  while ( 1 )
     if ( gobble( funct_mark ) )  x = x " function returning"
     else
     if ( t = gobble( array_mark ) )
     { gsub(/ /, "", t)
       x = x " array" t " of"
     }
     else  break


   x = x " "  ptr_part
   return x
}
    

BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" 
        funct_mark = "\\([ \t]*\\)"
	array_mark = "\\[[ \t]*[_A-Za-z0-9]*[ \t]*\\]"

# I've assumed types are keywords or all CAPS or end in _t
# Other conventions could be added.

    type0 = "int|char|short|long|double|float|void" 
    type1 = "[_A-Z][_A-Z0-9]*"  #  types are CAPS
    type2 = "[_A-Za-z][_A-Za-z0-9]*_t"  # end in _t

    types = "(" type0 "|" type1 "|" type2 ")"
}


{   

    gsub( /\/\*([^*]|\*[^\/])*(\*\/|$)/ , " ") # remove comments
    gsub( /[ \t]+/, " ")  # squeeze white space to a single space


    line = $0

    scope = gobble( "extern|static" )

    if ( type = gobble("(struct|union|enum) ") )
    		type = type gobble(id)  #  get the tag
    else
    {

       type = gobble("(un)?signed ") gobble( types )

    }
    
    if ( ! type )  next
    
    if ( (x = decl()) && gobble( ";") )
    {
      x  =  x " " type
      if ( scope )  x = x " (" scope ")"
      gsub( /  +/, " ", x)  # 
      print x
    }

}




@//E*O*F mawk0.97/examples/gdecl.awk//
chmod u=rx,g=rx,o=rx mawk0.97/examples/gdecl.awk
 
echo x - mawk0.97/examples/nocomment.awk
sed 's/^@//' > "mawk0.97/examples/nocomment.awk" <<'@//E*O*F mawk0.97/examples/nocomment.awk//'

# remove C comments
#

BEGIN {
 RS = "/\*([^*]|\*[^/])*\*/"
 ORS = " "
 getline hold
}

{ # hold one record because we don't want ORS on the last
  # record

  print hold
  hold = $0
}

END { printf "%s", hold }
@//E*O*F mawk0.97/examples/nocomment.awk//
chmod u=r,g=r,o=r mawk0.97/examples/nocomment.awk
 
echo mkdir - mawk0.97/msdos
mkdir mawk0.97/msdos
chmod u=rwx,g=rx,o=rx mawk0.97/msdos
 
echo x - mawk0.97/msdos/INSTALL
sed 's/^@//' > "mawk0.97/msdos/INSTALL" <<'@//E*O*F mawk0.97/msdos/INSTALL//'

how to make mawk under MsDOS
---------------------------

Read the DOS section in the manual first.

In addition to a C compiler, you will need an
assembler and a yacc compatable parser generator.

Assuming you keep the same directory structure:

1)  run  mklib.bat in this directory to create msdos.lib

2)  move the makefile and mawk.dep  to ..
    ( The supplied dos makefile is for Borland's make.  I used to
      make mawk with MS make -- which make you use is no big deal.)

3)  If you want a Unix style command line for mawk, you'll need to
    write a function called reargv(int *, char ***) which passes
    mawk the modified argc and argv.  Compile and add to
    msdos.lib.

    The supplied reargv.c works with POLYSHELL by Polytron; for a
    different shell you could use it as an example.

    If this made sense and all went well, go to machine.h
    and
      #define  HAVE_REARGV   1

    If ! (sense || well ) , don't worry HAVE_REARGV is 0 by
    default.

4)  YACC --
On the PC I use bison.1.14, it works fine and is easy to find.
Surely easier to get than mawk.  Unfortunately I cannot distribute
the generated parse.c file.

    If you use bison, I suggest #if 0  in parse.c
    on the part that grows the parser stack.
    Then you won't need alloca().
    ( For mawk programs if the parser stack needs to grow, something
      is fatally wrong.)
    Also unless you are debugging the grammar, you don't need
    the tables yyrhs[] or yyprhs[] -- cut them out and save some data
    space.

You can make parse.c under unix with bison or another yacc and
move it to dos.

5)  The rand48.asm is hardwired for small model.  I've never made
a large model mawk, but as far as I can tell I think the only problem
is in rand48.asm.  The stack machine instructions (INST) assume a
pointer is a pointer is a pointer, so if you change to far data
pointers you'll also need to change to far code pointers.

Rand48 is overkill, you could use another random number generator
although the one supplied with your compiler is a near 
certainty to be poor.

On small model, you have about 40K of free data space which is plenty
except for programs that hold a medium or larger data file entirely
in memory.

==================================================================

The reason system() and pipes are missing is I haven't decided
entirely how to handle the runaway loop problem.  Dos makes
asynchronous termination of a program difficult, because DOS
itself might not be able to handle exit().

Hooking int 0x1a forces me to deal with cleanup and makes ^C
a pain.  What is the effect of my 0x1a on children? (That's why
no system() or pipes yet).  What do I have to check after a child
terminates, children can muck up the state of the parent process.


I have used mawk as is on DOS for about 6 months, what's missing
I rarely use  except to emit error messages and I use
errmsg() for that.  If I stall about fixing this stuff, DOS
might go away and I'll never have to deal with it.
@//E*O*F mawk0.97/msdos/INSTALL//
chmod u=r,g=r,o=r mawk0.97/msdos/INSTALL
 
echo x - mawk0.97/msdos/makefile
sed 's/^@//' > "mawk0.97/msdos/makefile" <<'@//E*O*F mawk0.97/msdos/makefile//'

#  this is a makefile for mawk 
#  using Borland's make v3.0
#  and CFLAGS for TurboC

@.SWAP

CFLAGS = -c -ms -f -wnod -wpro -A
LINKFLAGS = /c/m

!if $d(DEBUG)
CFLAGS = $(CFLAGS) -DDEBUG
LINKFLAGS = $(LINKFLAGS)/v
!else
#  optimize
CFLAGS = $(CFLAGS) -v- -O -DHAVE_REARGV=1
!endif


####################
#  user tuned macros
####################

# location of your C library
CLIB=c:\lib

# if you have a 80x87 coprocessor
#FLOATLIB=fp87
# otherwise
FLOATLIB=emu

#take this out if you don't need it, i.e.,
# if you have a reargv()
WILDARGS=\lib\wildargs.obj  #expand filenames

#  You'll need a yacc like parser
YACC = bison -dyv

######################  end of tuneable macros


OBS = parse.obj \
array.obj \
bi_funct.obj \
bi_vars.obj \
cast.obj \
code.obj \
da.obj \
error.obj \
execute.obj \
fcall.obj \
field.obj \
files.obj \
fin.obj \
hash.obj \
init.obj \
jmp.obj \
kw.obj \
main.obj \
matherr.obj \
memory.obj \
print.obj \
re_cmpl.obj \
scan.obj \
scancode.obj \
split.obj \
zmalloc.obj

REXP_C = rexp\rexp.c \
rexp\rexp0.c \
rexp\rexp1.c \
rexp\rexp2.c  \
rexp\rexpdb.c


LIBS = msdos\msdos.lib \
rexp\rexp.lib \
$(CLIB)\$(FLOATLIB) \
$(CLIB)\maths \
$(CLIB)\cs

mawk.exe : $(OBS)  rexp\rexp.lib
	tlink $(LINKFLAGS) @&&!
	$(CLIB)\c0s $(WILDARGS) $(OBS)
	mawk,mawk
	$(LIBS)
!

rexp\rexp.lib : $(REXP_C)
	cd rexp
	del *.obj
	make

@.c.obj :
	tcc $(CFLAGS) $*.c 

parse.c :  parse.y
	$(YACC)  parse.y
	rename y_tab.c parse.c
	rename  y_tab.h parse.h

scancode.c :  makescan.c  scan.h
	tcc makescan.c
	makescan.exe > scancode.c
	rm makescan.obj  makescan.exe



# the dependencies of the obj's on h's
# you can make this with mawk -f deps.awk *.c

array.o : bi_vars.h sizes.h zmalloc.h memory.h types.h machine.h mawk.h symtype.h
bi_funct.o : fin.h bi_vars.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h repl.h files.h bi_funct.h mawk.h symtype.h init.h
bi_vars.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h mawk.h symtype.h init.h
cast.o : parse.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h repl.h mawk.h symtype.h
code.o : sizes.h memory.h zmalloc.h types.h machine.h code.h mawk.h init.h
da.o : sizes.h memory.h zmalloc.h types.h machine.h field.h repl.h code.h bi_funct.h mawk.h symtype.h
error.o : parse.h bi_vars.h sizes.h types.h machine.h scan.h mawk.h symtype.h
execute.o : sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h code.h repl.h bi_funct.h mawk.h symtype.h
fcall.o : sizes.h memory.h zmalloc.h types.h machine.h code.h mawk.h symtype.h
field.o : parse.h bi_vars.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h scan.h repl.h mawk.h symtype.h init.h
files.o : fin.h sizes.h memory.h zmalloc.h types.h machine.h files.h mawk.h
fin.o : parse.h fin.h bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h mawk.h symtype.h
hash.o : sizes.h memory.h zmalloc.h types.h machine.h mawk.h symtype.h
init.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h mawk.h symtype.h init.h
jmp.o : sizes.h memory.h zmalloc.h types.h machine.h code.h jmp.h mawk.h init.h
kw.o : parse.h sizes.h types.h machine.h mawk.h symtype.h init.h
main.o : fin.h bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h files.h mawk.h init.h
makescan.o : parse.h scan.h symtype.h
matherr.o : sizes.h types.h machine.h mawk.h
memory.o : sizes.h memory.h zmalloc.h types.h machine.h mawk.h
parse.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h files.h bi_funct.h mawk.h jmp.h symtype.h
print.o : bi_vars.h parse.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h files.h bi_funct.h mawk.h symtype.h
re_cmpl.o : parse.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h scan.h repl.h mawk.h symtype.h
scan.o : parse.h fin.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h repl.h files.h mawk.h symtype.h init.h
split.o : bi_vars.h parse.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h scan.h bi_funct.h mawk.h symtype.h
zmalloc.o : sizes.h zmalloc.h types.h machine.h mawk.h
@//E*O*F mawk0.97/msdos/makefile//
chmod u=r,g=r,o=r mawk0.97/msdos/makefile
 
echo x - mawk0.97/msdos/mklib.bat
sed 's/^@//' > "mawk0.97/msdos/mklib.bat" <<'@//E*O*F mawk0.97/msdos/mklib.bat//'

masm /mx /z  rand48.asm ;
tcc -c -f -O -v-  rand48_0.c

lib  msdos.lib  -+rand48.obj -+rand48_0.obj  ;



@//E*O*F mawk0.97/msdos/mklib.bat//
chmod u=r,g=r,o=r mawk0.97/msdos/mklib.bat
 
echo x - mawk0.97/msdos/rand48.asm
sed 's/^@//' > "mawk0.97/msdos/rand48.asm" <<'@//E*O*F mawk0.97/msdos/rand48.asm//'

;**************************************************
;  rand.asm
;  PC versions of the rand48 family
;  1988 Michael D. Brennan
;  
;
;  rand.asm  and rand0.c  are the source files
;     for rand.lib  (see also rand48.h)
;**************************************************


public   _srand48, _seed48 , _lcong48
public	 _mrand48, _jrand48 ; uniform on [0,2^32)
public   _lrand48, _nrand48 ; uniform on [0,2^31)
public   _urand48, _vrand48 ; uniform on [0,2^16)

;  rand.asm

standard_scale0 = 0e66dh
standard_scale1 = 0deech
standard_scale2 = 5h
standard_shift  = 0bh

@.model  small

@.data
prod   dw   3 dup(?)  ;  build  scale*seed here
seed   dw   3 dup(?)  ;  default seed storage for drand, lrand etc
scale  dw   standard_scale0
       dw   standard_scale1
       dw   standard_scale2
shift  dw   standard_shift
seedbuf	dw	3 dup(?)  ;  place old seed  for seed48()

@.code


;****************************************
;  rgen -- the basic linear congruence
;  call with  bx->the seed
;*************************************

rgen	proc	near
     ;   seed <- scale * seed + shift  with overflow

     ;  first get  scale*seed into prod

	mov	ax, scale   ;  0,0
	mul	word ptr [bx]
	mov	prod, ax
	mov	cx, dx     ;   save the overflow in cx

	mov	ax, scale+2    ; 1,0
	mul	word ptr [bx]
	add	ax, cx
	adc	dx, 0
	mov	prod+2, ax
	mov	cx, dx

	mov	ax, scale+4   ;  2,0
	mul	word ptr [bx]
	add	ax, cx
	mov	prod+4, ax

	mov	ax, scale     ;  0,1
	mul	word ptr [bx+2]
	add	ax, prod+2
	adc	dx, 0
	mov	prod+2, ax
	mov	cx, dx

	mov	ax, scale+2   ;   1,1
	mul	word ptr [bx+2]
	add	ax, cx
	add	prod+4, ax

	mov	ax, scale   ;  0,2
	mul	word ptr [bx+4]
	add     ax, prod+4
	                 ;  move product to seed
	mov	word ptr [bx+4], ax
	mov	ax, prod+2
	mov	word ptr [bx+2], ax
	mov	ax, prod
	mov	word ptr [bx], ax
			  ;   add shift to seed
	mov	ax, shift
	add	word ptr [bx], ax
	adc	word ptr [bx+2], 0
	adc	word ptr [bx+4], 0

	ret
rgen	endp

set_scale	proc   near ;  set scale and shift to standard
	mov	scale, standard_scale0
	mov	scale+2, standard_scale1
	mov	scale+4, standard_scale2
	mov	shift, standard_shift
	ret
set_scale	endp

;  void  srand48(long seedval)

_srand48	proc	near
	push	bp
	mov	bp, sp
	mov	seed, 330eh ;
	mov	ax, [bp+4]
	mov	seed+2, ax
	mov	ax, [bp+6]
	mov	seed+4, ax
	call	set_scale
	pop	bp
	ret
_srand48	endp


;  short  *seed48( short  newseed[3] )

_seed48		proc   near
	push	bp
	mov	bp, sp
	push	si
	push	di
	push	ds
	pop	es
	cld
	mov	di, offset seedbuf  ;  save old seed
	mov	si, offset seed
	mov	cx, 3
	rep	movsw
	mov	si, [bp+4]   ;  load the new seed
	mov	di, offset seed
	mov	cx, 3
	rep	movsw
	call	set_scale
	mov	ax, offset seedbuf
	pop	di
	pop	si
	pop	bp
	ret
_seed48	endp


;  long  mrand48()
;  long  jrand48(short seed[3])

_mrand48	proc	near
	mov	bx, offset seed
	call	rgen
	mov	dx, seed+4
	mov	ax, seed+2
	ret
_mrand48	endp

_jrand48	proc	near
	push	bp
	mov	bp, sp
	mov	bx, [bp+4]
	call	rgen
	mov	dx, word ptr [bx+4]
	mov	ax, word ptr [bx+2]
	pop	bp
	ret
_jrand48	endp

_nrand48	proc 	near
	push	bp
	mov	bp, sp
	mov	bx, [bp+4]
	call	rgen
	mov	dx, [bx+4]
	mov	ax, [bx+2]
	shr	dx, 1
	rcr	ax, 1
	pop	bp
	ret
_nrand48	endp

_lrand48	proc	near
	mov	bx, offset seed
	call	rgen
	mov	dx, seed+4
	mov	ax, seed+2
	shr     dx, 1
	rcr     ax, 1
	ret
_lrand48	endp

_vrand48	proc   near
	push	bp
	mov	bp, sp
	mov	bx, [bp+4]
	call    rgen
	mov	ax, [bx+4]
	pop	bp
	ret
_vrand48	endp

_urand48	proc	near
	mov	bx, offset seed
	call	rgen
	mov	ax, seed+4
	ret
_urand48	endp

_lcong48	proc 	near
	push	bp
	mov	bp, sp
	push	si
	push	di
	push	ds
	pop	es
	cld
	mov	si, [bp+4]
	mov	di, offset seed
	mov	cx, 7
	rep	movsw
	pop	di
	pop	si
	pop	bp
	ret
_lcong48	endp

end
@//E*O*F mawk0.97/msdos/rand48.asm//
chmod u=r,g=r,o=r mawk0.97/msdos/rand48.asm
 
echo x - mawk0.97/msdos/rand48.h
sed 's/^@//' > "mawk0.97/msdos/rand48.h" <<'@//E*O*F mawk0.97/msdos/rand48.h//'

/*  rand48.h  */
/*  see Unix drand(3) for expansive discussion */

double  drand48(void) ;
double  erand48(unsigned short[3]) ;

long   mrand48(void) ;
long   jrand48(unsigned short[3]) ;
/* uniform on [-2^31, 2^31)  or
   [0,2^32)  depending on how you interpret the sign bit */

long  lrand48(void) ;
long  nrand48(unsigned short[3]) ;
/* uniform on [0,2^31)
   lrand48() == mrand48()>>1   */

unsigned  urand48(void) ;
unsigned  vrand48(unsigned short[3]) ;
/*  for 16bit machines uniform on [0,2^16)  */

/* SEEDING  */
void  srand48(long seedval) ;
unsigned short *seed48(unsigned short seedv[3]) ;
void  lcong(unsigned short[7] ) ;


void  srand48() ;
long  mrand48(), jrand48(), lrand48(), nrand48() ;
unsigned urand48(), vrand48() ;
double  drand48(), erand48() ;
@//E*O*F mawk0.97/msdos/rand48.h//
chmod u=r,g=r,o=r mawk0.97/msdos/rand48.h
 
echo x - mawk0.97/msdos/rand48_0.c
sed 's/^@//' > "mawk0.97/msdos/rand48_0.c" <<'@//E*O*F mawk0.97/msdos/rand48_0.c//'


/*   rand0.c    */

unsigned long  mrand48() ;
unsigned long  jrand48(short [3]) ;

double  drand48()
{ return  (double) mrand48() / 4294967296.0 ; }

double  erand48(short x[3])
{ return  (double) jrand48(x) /  4294967296.0 ; }
@//E*O*F mawk0.97/msdos/rand48_0.c//
chmod u=r,g=r,o=r mawk0.97/msdos/rand48_0.c
 
echo x - mawk0.97/msdos/reargv.c
sed 's/^@//' > "mawk0.97/msdos/reargv.c" <<'@//E*O*F mawk0.97/msdos/reargv.c//'

/*  reargv.c
    --  set arguments via POLYSHELL
    --  no errors, don't change anything if
    --  it seems shell is not activated   */

char *strchr(), *getenv() ;

static  char *basename(char *s)
/* strip path and extension , upcase the rest */
{ 
  register char *p ;

  for ( p = strchr(s,0) ; p > s ; p-- )
    switch( p[-1] )
     { case '\\' :
       case ':'  :
       case '/'  :  return p ;
       case '.'  :  p[-1] = 0 ;  break ;
       default   :
	    if ( p[-1] >= 'a' && p[-1] <= 'z' )   p[-1] -= 32 ;
	    break ;
     }

  return  p ;
}

/*---------------------
  reargv  --  recompute  argc and argv for PolyShell
    if not under shell do nothing
 *-------------------------------  */

extern  char *progname ;
extern  unsigned char _osmajor ;

void  reargv(int *argcp , char ***argvp)
{ register char *p ;
  char **v , *q, *cmdline, **vx ;
  int cnt, cntx ;

  if ( _osmajor == 2 )  /* ugh */
     (*argvp)[0] = progname ;
  else { (*argvp)[0] = basename( (*argvp)[0] ) ;
	 progname = (*argvp)[0] ; }

  if ( ! (cmdline = getenv("CMDLINE")) )  return ;

  if ( *(q = strchr(cmdline,0) - 1) != 0xff )
      return ;  /*  shexpand set wrong */

  for ( *q = 0, cnt = 1 , p = cmdline ; p < q ; p++ )
     if ( *p == 0xff ) { cnt++ ; *p = 0 ; }

  if ( ! (v = (char **) malloc((cnt+1)*sizeof(char*))) )
       return ;  /* shouldn't happen */

  p = cmdline ;
  vx = v ; cntx = cnt ;
  while ( cnt )
   { *v++ = p ;
     cnt-- ;
     while ( *p )  p++ ;
     p++ ;
   }
  *v = (char *) 0 ;
  v = vx ;

  v[0] = basename( v[0] ) ;
  if ( strcmp(v[0], (*argvp)[0]) )  return  ;/* running under command
	and sh earlier  */
  /* running under PolyShell  */
  *argcp = cntx ;  *argvp = v ;
  progname = v[0] ;
}
@//E*O*F mawk0.97/msdos/reargv.c//
chmod u=r,g=r,o=r mawk0.97/msdos/reargv.c
 
echo Inspecting for damage in transit...
temp=/tmp/shar$$; dtemp=/tmp/.shar$$
trap "rm -f $temp $dtemp; exit" 0 1 2 3 15
cat > $temp <<\!!!
     110     194    1897 packing.list
      21      66     407 README
      38     203    1246 LIMITATIONS
      78     450    3560 Makefile
     639    3453   20668 mawk.manual
     235    1062    5676 array.c
     781    2886   17818 bi_funct.c
      68     279    1720 bi_funct.h
      87     284    1910 bi_vars.c
      61     193    1374 bi_vars.h
     354    1074    7389 cast.c
     102     361    2538 code.c
     141     407    3421 code.h
     386    1119    9666 da.c
     345    1188    7739 error.c
     927    3424   26735 execute.c
     365    1360    9733 fcall.c
     404    1378    9426 field.c
      65     199    1432 field.h
     299    1085    7060 files.c
      41     132    1006 files.h
     368    1464    8781 fin.c
      48     185    1167 fin.h
     169     622    3566 hash.c
     202     718    4475 init.c
      44     160    1143 init.h
     217     864    5679 jmp.c
      35     122     876 jmp.h
      83     270    1572 kw.c
     160     584    4357 machine.h
     151     453    3001 main.c
     107     378    2495 makescan.c
     106     285    2019 matherr.c
     142     567    3652 mawk.h
      95     312    2268 memory.c
      50     129    1088 memory.h
    1076    3677   30281 parse.y
     285    1176    7197 print.c
     321    1215    7100 re_cmpl.c
      32     112     793 regexp.h
      37     128     898 repl.h
     694    2586   18298 scan.c
     107     315    2906 scan.h
      23      36     832 scancode.c
      56     201    1446 sizes.h
     174     644    3952 split.c
     162     633    4106 symtype.h
     112     344    2840 types.h
     129     537    3196 zmalloc.c
      44     118     905 zmalloc.h
      26      72     446 Makefile
     186     815    5015 rexp.c
     153     565    3636 rexp.h
     423    1718   10188 rexp0.c
     183     708    3987 rexp1.c
     282    1139    7507 rexp2.c
     272    1335    7457 rexp3.c
      74     252    1649 rexpdb.c
      23     106     710 README
      72     343    2430 benchmarks
       1       3      11 cat.awk
       6      12      59 concat.awk
       6      13      69 fields.awk
      23      49     201 loops.awk
      22      53     286 newton.awk
      40     129     564 primes.awk
      78     234    1076 qsort.awk
       2       4      34 reg0.awk
       2       4      41 reg1.awk
       2       5      63 reg2.awk
      13      71     461 sample
       7      19      82 squeeze.awk
      43     111     621 test.sh
       3       7      33 wc.awk
      91     363    1765 wfrq.awk
      72     297    1433 wfrq0.awk
      23      64     275 words.awk
      12      29     127 words0.awk
     143     546    3030 decl.awk
      57     241    1284 deps.awk
     136     493    2776 gdecl.awk
      18      44     213 nocomment.awk
      80     522    3002 INSTALL
     137     553    4067 makefile
       8      16     107 mklib.bat
     227     620    3670 rand48.asm
      31     106     774 rand48.h
      12      33     231 rand48_0.c
      74     333    1727 reargv.c
   13839   51659  344417 total
!!!
wc mawk0.97/packing.list mawk0.97/README mawk0.97/LIMITATIONS \
 mawk0.97/Makefile mawk0.97/mawk.manual mawk0.97/array.c \
 mawk0.97/bi_funct.c mawk0.97/bi_funct.h mawk0.97/bi_vars.c \
 mawk0.97/bi_vars.h mawk0.97/cast.c mawk0.97/code.c \
 mawk0.97/code.h mawk0.97/da.c mawk0.97/error.c mawk0.97/execute.c \
 mawk0.97/fcall.c mawk0.97/field.c mawk0.97/field.h \
 mawk0.97/files.c mawk0.97/files.h mawk0.97/fin.c mawk0.97/fin.h \
 mawk0.97/hash.c mawk0.97/init.c mawk0.97/init.h mawk0.97/jmp.c \
 mawk0.97/jmp.h mawk0.97/kw.c mawk0.97/machine.h mawk0.97/main.c \
 mawk0.97/makescan.c mawk0.97/matherr.c mawk0.97/mawk.h \
 mawk0.97/memory.c mawk0.97/memory.h mawk0.97/parse.y \
 mawk0.97/print.c mawk0.97/re_cmpl.c mawk0.97/regexp.h \
 mawk0.97/repl.h mawk0.97/scan.c mawk0.97/scan.h mawk0.97/scancode.c \
 mawk0.97/sizes.h mawk0.97/split.c mawk0.97/symtype.h \
 mawk0.97/types.h mawk0.97/zmalloc.c mawk0.97/zmalloc.h \
 mawk0.97/rexp/Makefile mawk0.97/rexp/rexp.c mawk0.97/rexp/rexp.h \
 mawk0.97/rexp/rexp0.c mawk0.97/rexp/rexp1.c mawk0.97/rexp/rexp2.c \
 mawk0.97/rexp/rexp3.c mawk0.97/rexp/rexpdb.c mawk0.97/test/README \
 mawk0.97/test/benchmarks mawk0.97/test/cat.awk mawk0.97/test/concat.awk \
 mawk0.97/test/fields.awk mawk0.97/test/loops.awk mawk0.97/test/newton.awk \
 mawk0.97/test/primes.awk mawk0.97/test/qsort.awk mawk0.97/test/reg0.awk \
 mawk0.97/test/reg1.awk mawk0.97/test/reg2.awk mawk0.97/test/sample \
 mawk0.97/test/squeeze.awk mawk0.97/test/test.sh mawk0.97/test/wc.awk \
 mawk0.97/test/wfrq.awk mawk0.97/test/wfrq0.awk mawk0.97/test/words.awk \
 mawk0.97/test/words0.awk mawk0.97/examples/decl.awk \
 mawk0.97/examples/deps.awk mawk0.97/examples/gdecl.awk \
 mawk0.97/examples/nocomment.awk mawk0.97/msdos/INSTALL \
 mawk0.97/msdos/makefile mawk0.97/msdos/mklib.bat mawk0.97/msdos/rand48.asm \
 mawk0.97/msdos/rand48.h mawk0.97/msdos/rand48_0.c mawk0.97/msdos/reargv.c \
 | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp
if [ -s $dtemp ]
then echo "Ouch [diff of wc output]:" ; cat $dtemp
else echo "No problems found."
fi
exit 0