brennan@ssc-vax.UUCP (Mike Brennan) (05/11/91)
------------------cut here---------------- else\ { stackp->m=mx;stackp->s=sx;stackp->u=ux;} #endif #define CASE_UANY(x) case x + U_OFF : case x + U_ON int REtest( str, machine) char *str ; VOID *machine ; { register STATE *m = (STATE *) machine ; register char *s = str ; register RT_STATE *stackp ; int u_flag ; char *str_end ; char *ts ; /*convenient temps */ STATE *tm ; /* handle the easy case quickly */ if ( (m+1)->type == M_ACCEPT && m->type == M_STR ) return (int ) str_str(s, m->data.str, m->len) ; else { u_flag = U_ON ; str_end = (char *) 0 ; stackp = RE_run_stack_base - 1 ; goto reswitch ; } refill : if ( stackp < RE_run_stack_base ) return 0 ; m = stackp->m ; s = stackp->s ; u_flag = stackp-- -> u ; reswitch : switch( m->type + u_flag ) { case M_STR + U_OFF + END_OFF : if ( strncmp(s, m->data.str, m->len) ) goto refill ; s += m->len ; m++ ; goto reswitch ; case M_STR + U_OFF + END_ON : if ( strcmp(s, m->data.str) ) goto refill ; s += m->len ; m++ ; goto reswitch ; case M_STR + U_ON + END_OFF : if ( !(s = str_str(s, m->data.str, m->len)) ) goto refill ; push(m, s+1, U_ON) ; s += m->len ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_STR + U_ON + END_ON : if ( !str_end ) str_end = strchr(s, 0) ; ts = str_end - m->len ; if (ts < s || memcmp(ts,m->data.str,m->len+1)) goto refill ; s = str_end ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_CLASS + U_OFF + END_OFF : if ( !ison(*m->data.bvp, s[0] ) ) goto refill ; s++ ; m++ ; goto reswitch ; case M_CLASS + U_OFF + END_ON : if ( s[1] || !ison(*m->data.bvp,s[0]) ) goto refill ; s++ ; m++ ; goto reswitch ; case M_CLASS + U_ON + END_OFF : while ( !ison(*m->data.bvp,s[0]) ) if ( s[0] == 0 ) goto refill ; else s++ ; s++ ; push(m, s, U_ON) ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_CLASS + U_ON + END_ON : if ( ! str_end ) str_end = strchr(s,0) ; if ( ! ison(*m->data.bvp, str_end[-1]) ) goto refill ; s = str_end ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_ANY + U_OFF + END_OFF : if ( s[0] == 0 ) goto refill ; s++ ; m++ ; goto reswitch ; case M_ANY + U_OFF + END_ON : if ( s[0] == 0 || s[1] != 0 ) goto refill ; s++ ; m++ ; goto reswitch ; case M_ANY + U_ON + END_OFF : if ( s[0] == 0 ) goto refill ; s++ ; push(m, s, U_ON) ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_ANY + U_ON + END_ON : if ( s[0] == 0 ) goto refill ; if ( ! str_end ) str_end = strchr(s,0) ; s = str_end ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_START + U_OFF + END_OFF : case M_START + U_ON + END_OFF : if ( s != str ) goto refill ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_START + U_OFF + END_ON : case M_START + U_ON + END_ON : if ( s != str || s[0] != 0 ) goto refill ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_END + U_OFF : if ( s[0] != 0 ) goto refill ; m++ ; goto reswitch ; case M_END + U_ON : s = strchr(s, 0) ; m++ ; u_flag = U_OFF ; goto reswitch ; CASE_UANY(M_U) : u_flag = U_ON ; m++ ; goto reswitch ; CASE_UANY(M_1J) : m += m->data.jump ; goto reswitch ; CASE_UANY(M_2JA) : /* take the non jump branch */ /* don't stack an ACCEPT */ if ( (tm = m + m->data.jump)->type == M_ACCEPT ) return 1 ; push(tm, s, u_flag) ; m++ ; goto reswitch ; CASE_UANY(M_2JB) : /* take the jump branch */ /* don't stack an ACCEPT */ if ( (tm = m + 1)->type == M_ACCEPT ) return 1 ; push(tm, s, u_flag) ; m += m->data.jump ; goto reswitch ; CASE_UANY(M_ACCEPT) : return 1 ; default : RE_panic("unexpected case in REtest") ; } } #ifdef MAWK char *is_string_split( p, lenp ) register STATE *p ; unsigned *lenp ; { if ( p[0].type == M_STR && p[1].type == M_ACCEPT ) { *lenp = p->len ; return p->data.str ; } else return (char *) 0 ; } #else /* mawk provides its own str_str */ char *str_str(target, key, klen) register char *target ; register char *key ; unsigned klen ; { int c = key[0] ; switch( klen ) { case 0 : return (char *) 0 ; case 1 : return strchr(target, c) ; case 2 : while ( target = strchr(target, c) ) if ( target[1] == key[1] ) return target ; else target++ ; break ; default : klen-- ; key++ ; while ( target = strchr(target, c) ) if ( memcmp(target+1,key,klen) == 0 ) return target ; else target++ ; break ; } return (char *) 0 ; } #endif /* MAWK */ @//E*O*F mawk0.97/rexp/rexp2.c// chmod u=rw,g=r,o=r mawk0.97/rexp/rexp2.c echo x - mawk0.97/rexp/rexp3.c sed 's/^@//' > "mawk0.97/rexp/rexp3.c" <<'@//E*O*F mawk0.97/rexp/rexp3.c//' /******************************************** rexp3.c copyright 1991, Michael D. Brennan This is a source file for mawk an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* rexp3.c */ /* match a string against a machine */ #include "rexp.h" #include <string.h> /* check that a bit is on */ #define ison(b,x) ( (b)[(x)>>3] & ( 1 << ((x)&7) )) extern RT_STATE *RE_run_stack_base; extern RT_STATE *RE_run_stack_limit ; RT_STATE *RE_new_run_stack() ; #define push(mx,sx,ssx,ux) if (++stackp == RE_run_stack_limit)\ stackp = RE_new_run_stack() ;\ stackp->m=mx;stackp->s=sx;stackp->ss=ssx;stackp->u=ux; #define CASE_UANY(x) case x + U_OFF : case x + U_ON /* returns start of first longest match and the length by reference. If no match returns NULL and length zero */ char *REmatch(str, machine, lenp) char *str ; VOID *machine ; unsigned *lenp ; { register STATE *m = (STATE *) machine ; register char *s = str ; char *ss ; register RT_STATE *stackp ; int u_flag ; char *str_end, *ts ; /* state of current best match stored here */ char *cb_ss ; /* the start */ char *cb_e ; /* the end , pts at first char not matched */ *lenp = 0 ; /* check for the easy case */ if ( (m+1)->type == M_ACCEPT && m->type == M_STR ) { if ( ts = str_str(s, m->data.str, m->len) ) *lenp = m->len ; return ts ; } u_flag = U_ON ; cb_ss = ss = str_end = (char *) 0 ; stackp = RE_run_stack_base - 1 ; goto reswitch ; refill : if ( stackp < RE_run_stack_base ) /* empty stack */ { if ( cb_ss ) *lenp = cb_e - cb_ss ; return cb_ss ; } ss = stackp->ss ; s = stackp-- -> s ; if ( cb_ss ) /* does new state start too late ? */ if ( ss ) { if ( cb_ss < ss ) goto refill ; } else if ( cb_ss < s ) goto refill ; m = (stackp+1)->m ; u_flag = (stackp+1)->u ; reswitch : switch( m->type + u_flag ) { case M_STR + U_OFF + END_OFF : if ( strncmp(s, m->data.str, m->len) ) goto refill ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; s += m->len ; m++ ; goto reswitch ; case M_STR + U_OFF + END_ON : if ( strcmp(s, m->data.str) ) goto refill ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; s += m->len ; m++ ; goto reswitch ; case M_STR + U_ON + END_OFF : if ( !(s = str_str(s, m->data.str, m->len)) ) goto refill ; push(m, s+1,ss, U_ON) ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; s += m->len ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_STR + U_ON + END_ON : if ( !str_end ) str_end = strchr(s, 0) ; ts = str_end - m->len ; if (ts < s || memcmp(ts,m->data.str,m->len+1)) goto refill ; if ( !ss ) if ( cb_ss && ts > cb_ss ) goto refill ; else ss = ts ; s = str_end ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_CLASS + U_OFF + END_OFF : if ( !ison(*m->data.bvp, s[0] ) ) goto refill ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; s++ ; m++ ; goto reswitch ; case M_CLASS + U_OFF + END_ON : if ( s[1] || !ison(*m->data.bvp,s[0]) ) goto refill ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; s++ ; m++ ; goto reswitch ; case M_CLASS + U_ON + END_OFF : while ( !ison(*m->data.bvp,s[0]) ) if ( s[0] == 0 ) goto refill ; else s++ ; s++ ; push(m, s, ss, U_ON) ; if ( !ss ) if ( cb_ss && s-1 > cb_ss ) goto refill ; else ss = s-1 ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_CLASS + U_ON + END_ON : if ( ! str_end ) str_end = strchr(s,0) ; if ( ! ison(*m->data.bvp, str_end[-1]) ) goto refill ; if ( !ss ) if ( cb_ss && str_end-1 > cb_ss ) goto refill ; else ss = str_end-1 ; s = str_end ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_ANY + U_OFF + END_OFF : if ( s[0] == 0 ) goto refill ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; s++ ; m++ ; goto reswitch ; case M_ANY + U_OFF + END_ON : if ( s[0] == 0 || s[1] != 0 ) goto refill ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; s++ ; m++ ; goto reswitch ; case M_ANY + U_ON + END_OFF : if ( s[0] == 0 ) goto refill ; s++ ; push(m, s, ss, U_ON) ; if ( !ss ) if ( cb_ss && s-1 > cb_ss ) goto refill ; else ss = s-1 ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_ANY + U_ON + END_ON : if ( s[0] == 0 ) goto refill ; if ( ! str_end ) str_end = strchr(s,0) ; if ( !ss ) if ( cb_ss && str_end-1 > cb_ss ) goto refill ; else ss = str_end - 1 ; s = str_end ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_START + U_OFF + END_OFF : case M_START + U_ON + END_OFF : if ( s != str ) goto refill ; ss = s ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_START + U_OFF + END_ON : case M_START + U_ON + END_ON : if ( s != str || s[0] != 0 ) goto refill ; ss = s ; m++ ; u_flag = U_OFF ; goto reswitch ; case M_END + U_OFF : if ( s[0] != 0 ) goto refill ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; m++ ; goto reswitch ; case M_END + U_ON : s = str_end ? str_end : (str_end = strchr(s,0)) ; if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; m++ ; u_flag = U_OFF ; goto reswitch ; CASE_UANY(M_U) : if ( !ss ) if ( cb_ss && s > cb_ss ) goto refill ; else ss = s ; u_flag = U_ON ; m++ ; goto reswitch ; CASE_UANY(M_1J) : m += m->data.jump ; goto reswitch ; CASE_UANY(M_2JA) : /* take the non jump branch */ push(m+m->data.jump, s, ss, u_flag) ; m++ ; goto reswitch ; CASE_UANY(M_2JB) : /* take the jump branch */ push(m+1, s, ss, u_flag) ; m += m->data.jump ; goto reswitch ; case M_ACCEPT + U_OFF : if ( !ss ) ss = s ; if ( !cb_ss || ss < cb_ss || ss == cb_ss && s > cb_e ) { /* we have a new current best */ cb_ss = ss ; cb_e = s ; } goto refill ; case M_ACCEPT + U_ON : if ( !ss ) ss = s ; else s = str_end ? str_end : (str_end = strchr(s,0)) ; if ( !cb_ss || ss < cb_ss || ss == cb_ss && s > cb_e ) { /* we have a new current best */ cb_ss = ss ; cb_e = s ; } goto refill ; default : RE_panic("unexpected case in REmatch") ; } } @//E*O*F mawk0.97/rexp/rexp3.c// chmod u=rw,g=r,o=r mawk0.97/rexp/rexp3.c echo x - mawk0.97/rexp/rexpdb.c sed 's/^@//' > "mawk0.97/rexp/rexpdb.c" <<'@//E*O*F mawk0.97/rexp/rexpdb.c//' /******************************************** rexpdb.c copyright 1991, Michael D. Brennan This is a source file for mawk an implementation of the Awk programming language as defined in Aho, Kernighan and Weinberger, The AWK Programming Language, Addison-Wesley, 1988. See the accompaning file, LIMITATIONS, for restrictions regarding modification and redistribution of this program in source or binary form. ********************************************/ /* rexpdb.c */ #include "rexp.h" #include <ctype.h> /* print a machine for debugging */ static char *xlat[] = { "M_STR" , "M_CLASS" , "M_ANY" , "M_START" , "M_END" , "M_U", "M_1J" , "M_2JA" , "M_2JB" , "M_ACCEPT" } ; void REmprint(m, f) VOID *m ; FILE *f ; { register STATE *p = (STATE *) m ; char *end_on_string ; while ( 1 ) { if ( p->type >= END_ON ) { p->type -= END_ON ; end_on_string = "$" ; } else end_on_string = "" ; if ( p->type < 0 || p->type >= END_ON ) { fprintf(f, "unknown STATE type\n") ; return ; } fprintf(f, "%-10s" , xlat[p->type]) ; switch( p->type ) { case M_STR : fprintf(f, "%s", p->data.str ) ; break ; case M_1J: case M_2JA: case M_2JB : fprintf(f, "%d", p->data.jump) ; break ; case M_CLASS: { unsigned char *q = (unsigned char *) p->data.bvp ; unsigned char *r = q + sizeof(BV) ; while ( q < r ) fprintf(f, "%x " , *q++) ; } break ; } fprintf(f, "%s\n" , end_on_string) ; if ( end_on_string[0] ) p->type += END_ON ; if ( p->type == M_ACCEPT ) return ; p++ ; } } @//E*O*F mawk0.97/rexp/rexpdb.c// chmod u=rw,g=r,o=r mawk0.97/rexp/rexpdb.c echo mkdir - mawk0.97/test mkdir mawk0.97/test chmod u=rwx,g=rx,o=rx mawk0.97/test echo x - mawk0.97/test/README sed 's/^@//' > "mawk0.97/test/README" <<'@//E*O*F mawk0.97/test/README//' Some benchmarks of Mawk and other (new) awks are in the file benchmarks. Most of the programs are one liners, but a few are interesting and duplicated in examples directory. To run these tests or others yourself, use the shell script test.sh ---------------------------------------------------- One area mawk needs to improve is array[expr] access when expr is numeric (see primes and qsort). Mawk always converts numeric expressions to string for array access which leads to a lot of redundant conversions. Better array access will be added for release 1.0. ------------------------------------------- run wfrq0.awk or words0.awk to see how much RS as a regular expression can speed up some programs @//E*O*F mawk0.97/test/README// chmod u=r,g=r,o=r mawk0.97/test/README echo x - mawk0.97/test/benchmarks sed 's/^@//' > "mawk0.97/test/benchmarks" <<'@//E*O*F mawk0.97/test/benchmarks//' The following are some timing tests of Mawk versus three other (new) awks. Times are user + sys in seconds. First col is mawk time, second col is other awk time and last col is the ratio. Mawk vs. Awk on Stardent 3000, SysV 3.0 cat 4.0 4.8 1.20 wc 8.1 6.1 0.75 fields 20.8 26.3 1.26 reg0 4.7 6.0 1.28 reg1 5.6 6.0 1.07 reg2 18.1 6.0 0.33 loops 6.0 12.6 2.10 words 18.1 18.4 1.02 newton * 0.9 1.7 1.89 concat 14.1 15.0 1.06 primes * 2.6 3.1 1.19 squeeze 5.3 2.9 0.55 qsort 6.8 21.3 3.13 wfrq 8.9 10.0 1.12 deps ** 3.1 5.2 1.68 1.15 # Mawk vs. Gawk 2.11.1 on Sun3, SunOS 4.0 cat 6.1 8.1 1.33 wc 12.6 40.6 3.22 fields 35.7 117.6 3.29 reg0 6.7 11.0 1.64 reg1 8.5 12.6 1.48 reg2 34.3 55.5 1.62 loops 40.4 214.6 5.31 words 31.3 110.8 3.54 newton 6.7 25.4 3.79 concat 20.9 65.7 3.14 primes 38.8 28.3 0.73 squeeze 2.2 4.6 2.09 qsort 36.1 42.3 1.17 wfrq 66.5 199.4 3.00 deps 16.2 42.9 2.65 2.24 # Mawk vs. Nawk on VAX 3600 Ultrix 4.1 cat 5.7 7.7 1.35 wc 12.8 12.4 0.97 fields 34.1 58.9 1.73 reg0 7.1 8.6 1.21 reg1 8.9 21.8 2.45 reg2 36.7 58.4 1.59 loops 30.5 117.7 3.86 words 31.0 58.7 1.89 newton 5.6 11.9 2.12 concat 21.3 28.9 1.36 primes 36.3 17.2 0.47 squeeze 2.2 3.1 1.41 qsort 39.3 29.5 0.75 wfrq 76.2 173.9 2.28 deps 18.1 32.4 1.79 1.50 # * newton , primes and loops take no input. Newton computed the square roots of 1 to 1000 by Newtons method and primes was a sieve for primes < 5000. Loops was three nested loops 100 x 50 x 50 with a sum on the inside. ** deps input was *.c on mawk source The other programs read a file of 20000+ C source lines. The input files were blownup by 4 (80000+) on the Stardent. # geometric mean of col 3 -- (a1 * a2 * ... an ) ^ (1/n) . @//E*O*F mawk0.97/test/benchmarks// chmod u=r,g=r,o=r mawk0.97/test/benchmarks echo x - mawk0.97/test/cat.awk sed 's/^@//' > "mawk0.97/test/cat.awk" <<'@//E*O*F mawk0.97/test/cat.awk//' { print } @//E*O*F mawk0.97/test/cat.awk// chmod u=r,g=r,o=r mawk0.97/test/cat.awk echo x - mawk0.97/test/concat.awk sed 's/^@//' > "mawk0.97/test/concat.awk" <<'@//E*O*F mawk0.97/test/concat.awk//' # test concatenation # { print $NF $( (NF+1)/2 ) $1 } @//E*O*F mawk0.97/test/concat.awk// chmod u=r,g=r,o=r mawk0.97/test/concat.awk echo x - mawk0.97/test/fields.awk sed 's/^@//' > "mawk0.97/test/fields.awk" <<'@//E*O*F mawk0.97/test/fields.awk//' # print each field # one per line # { for(i=1;i<=NF;i++) print $i} @//E*O*F mawk0.97/test/fields.awk// chmod u=r,g=r,o=r mawk0.97/test/fields.awk echo x - mawk0.97/test/loops.awk sed 's/^@//' > "mawk0.97/test/loops.awk" <<'@//E*O*F mawk0.97/test/loops.awk//' # test looping speed # BEGIN { for(i=1; i<=100 ; i++) { j = i ; while ( j >= 0 ) { k = 0 do { sum += k + j + i k++ } while ( k <= j ) j-- } } print sum } @//E*O*F mawk0.97/test/loops.awk// chmod u=r,g=r,o=r mawk0.97/test/loops.awk echo x - mawk0.97/test/newton.awk sed 's/^@//' > "mawk0.97/test/newton.awk" <<'@//E*O*F mawk0.97/test/newton.awk//' # compute square root by newton's method # function SQRT(x) { new = x/2 do { old = new new = (old*old+x)/(2*old) } while ( abs(new-old) > 1e-6 ) return (new+old)/2 } function abs(x) { return x>=0?x:-x } BEGIN { for(i = 1 ; i <= 1000 ; i++) print i, SQRT(i) } @//E*O*F mawk0.97/test/newton.awk// chmod u=r,g=r,o=r mawk0.97/test/newton.awk echo x - mawk0.97/test/primes.awk sed 's/^@//' > "mawk0.97/test/primes.awk" <<'@//E*O*F mawk0.97/test/primes.awk//' # find all primes # <= ARGV[1] # BEGIN { stop = ARGV[1] prime[ p_cnt = 1 ] = 3 # keep track of integer part of square root by adding # odd integers odd = test = 5 root = 2 squares = 9 while ( test <= stop ) { if ( test >= squares ) { root++ odd += 2 squares += odd } flag = 1 for ( i = 1 ; prime[i] <= root ; i++ ) if ( test % prime[i] == 0 ) # not prime { flag = 0 ; break } if ( flag ) prime[ ++p_cnt ] = test test += 2 } prime[0] = 2 for(i = 0 ; i <= p_cnt ; i++ ) print prime[i] } @//E*O*F mawk0.97/test/primes.awk// chmod u=r,g=r,o=r mawk0.97/test/primes.awk echo x - mawk0.97/test/qsort.awk sed 's/^@//' > "mawk0.97/test/qsort.awk" <<'@//E*O*F mawk0.97/test/qsort.awk//' # qsort text files # function middle(x,y,z) #return middle of 3 { if ( x <= y ) { if ( z >= y ) return y if ( z < x ) return x return z } if ( z >= x ) return x if ( z < y ) return y return z } function isort(A , n, i, j, hold) { # if needed a sentinal at A[0] will be created for( i = 2 ; i <= n ; i++) { hold = A[ j = i ] while ( A[j-1] > hold ) { j-- ; A[j+1] = A[j] } A[j] = hold } } # recursive quicksort function qsort(A, left, right ,i , j, pivot, hold) { pivot = middle(A[left], A[int((left+right)/2)], A[right]) i = left j = right while ( i <= j ) { while ( A[i] < pivot ) i++ while ( A[j] > pivot ) j-- if ( i <= j ) { hold = A[i] A[i++] = A[j] A[j--] = hold } } if ( j - left > BLOCK ) qsort(A,left,j) if ( right - i > BLOCK ) qsort(A,i,right) } BEGIN { BLOCK = 5 } { line[NR] = $0 "" # sort as string } END { if ( NR > BLOCK ) qsort(line, 1, NR) isort(line, NR) for(i = 1 ; i <= NR ; i++) print line[i] } @//E*O*F mawk0.97/test/qsort.awk// chmod u=r,g=r,o=r mawk0.97/test/qsort.awk echo x - mawk0.97/test/reg0.awk sed 's/^@//' > "mawk0.97/test/reg0.awk" <<'@//E*O*F mawk0.97/test/reg0.awk//' /return/ {cnt++} END{print cnt} @//E*O*F mawk0.97/test/reg0.awk// chmod u=r,g=r,o=r mawk0.97/test/reg0.awk echo x - mawk0.97/test/reg1.awk sed 's/^@//' > "mawk0.97/test/reg1.awk" <<'@//E*O*F mawk0.97/test/reg1.awk//' /return|switch/ {cnt++} END{print cnt} @//E*O*F mawk0.97/test/reg1.awk// chmod u=r,g=r,o=r mawk0.97/test/reg1.awk echo x - mawk0.97/test/reg2.awk sed 's/^@//' > "mawk0.97/test/reg2.awk" <<'@//E*O*F mawk0.97/test/reg2.awk//' /[A-Za-z_][A-Za-z0-9_]*\[.*\][ \t]*=/ {cnt++} END{print cnt} @//E*O*F mawk0.97/test/reg2.awk// chmod u=r,g=r,o=r mawk0.97/test/reg2.awk echo x - mawk0.97/test/sample sed 's/^@//' > "mawk0.97/test/sample" <<'@//E*O*F mawk0.97/test/sample//' dump cat.awk ../TB mawk gawk awk out wc.awk ../TB mawk gawk awk dump fields.awk ../TB mawk gawk awk out reg0.awk ../TB mawk gawk awk out reg1.awk ../TB mawk gawk awk out reg2.awk ../TB mawk gawk awk out words0.awk ../TB mawk gawk out words1.awk ../TB mawk dump newton.awk /dev/null mawk gawk dump concat.awk ../TB mawk gawk awk dump primes.awk 5000 mawk gawk dump squeeze.awk ../parse.y mawk gawk dump qsort.awk ../parse.y mawk gawk @//E*O*F mawk0.97/test/sample// chmod u=r,g=r,o=r mawk0.97/test/sample echo x - mawk0.97/test/squeeze.awk sed 's/^@//' > "mawk0.97/test/squeeze.awk" <<'@//E*O*F mawk0.97/test/squeeze.awk//' # test gsub # # squeeze space to single space { gsub( /[ \t]+/, " ") ; print } @//E*O*F mawk0.97/test/squeeze.awk// chmod u=r,g=r,o=r mawk0.97/test/squeeze.awk echo x - mawk0.97/test/test.sh sed 's/^@//' > "mawk0.97/test/test.sh" <<'@//E*O*F mawk0.97/test/test.sh//' #! /bin/sh ############### # shell script for timing mawk and other awks # # reads input file of the form # dump_or_not program_file input_file list of awks # # usage: test.sh < input_file # if [ $# != 0 ] then name=`basename $0` echo "usage: $name < input_file" 1>&2 exit 1 fi while read direct program file awk_list do echo echo if [ $direct = dump ] then for i in $awk_list do echo "$i -f $program $file" /bin/time $i -f $program $file >/dev/null done else for i in $awk_list do echo "$i -f $program $file" /bin/time $i -f $program $file done fi done 2>&1 # send the timing to stdout @//E*O*F mawk0.97/test/test.sh// chmod u=rx,g=rx,o=rx mawk0.97/test/test.sh echo x - mawk0.97/test/wc.awk sed 's/^@//' > "mawk0.97/test/wc.awk" <<'@//E*O*F mawk0.97/test/wc.awk//' {sum += NF} END{ print NR, sum} @//E*O*F mawk0.97/test/wc.awk// chmod u=r,g=r,o=r mawk0.97/test/wc.awk echo x - mawk0.97/test/wfrq.awk sed 's/^@//' > "mawk0.97/test/wfrq.awk" <<'@//E*O*F mawk0.97/test/wfrq.awk//' # wfrq.awk # find the twenty most frequent words in a document # # counts words in array cnt[ word ] # # uses a heap to select the twenty most frequent # # BEGIN { FS = "[^a-zA-Z]+" ; BIG = 999999 } { for( i = 1 ; i <= NF ; i++ ) cnt[$i]++ } END { delete cnt[ "" ] # load twenty values # into the heap word[1..20] and count[1..20] # # heap condition -- # count[i] <= count[2*i] and count[i] <= count[2*i+1] j = 1 # remove twenty values from cnt[] , put in the heap for( i in cnt ) { word[j] = i ; count[j] = cnt[i] delete cnt[i] ; if ( ++j == 21 ) break ; } # make some sentinals # to stop down_heap() # for( i = j ; i < 43 ; i++ ) count[i] = BIG h_empty = j # save the first empty slot # make a heap with the smallest in slot 1 for( i = h_empty - 1 ; i > 0 ; i-- ) down_heap(i) # examine the rest of the values for ( i in cnt ) if ( (j = cnt[i]) > count[1] ) { # its bigger # take the smallest out of the heap and readjust word[1] = i ; count[1] = j down_heap(1) } h_empty-- ; # what's left are the twenty largest # smallest at the top # i = 20 while ( h_empty > 1 ) { buffer[i--] = sprintf ("%3d %s" , count[1], word[1]) count[1] = count[h_empty] ; word[1] = word[h_empty] count[h_empty] = BIG down_heap(1) h_empty-- } buffer[i--] = sprintf ("%3d %s" , count[1], word[1]) for(j = 1 ; j <= 20 ; j++ ) print buffer[j] } # let the i th element drop to its correct position function down_heap(i, k) { while ( 1 ) { if ( count[2*i] <= count[2*i+1] ) k = 2*i else k = 2*i + 1 if ( count[i] <= count[k] ) return hold = word[k] ; word[k] = word[i] ; word[i] = hold hold = count[k] ; count[k] = count[i] ; count[i] = hold i = k } } @//E*O*F mawk0.97/test/wfrq.awk// chmod u=r,g=r,o=r mawk0.97/test/wfrq.awk echo x - mawk0.97/test/wfrq0.awk sed 's/^@//' > "mawk0.97/test/wfrq0.awk" <<'@//E*O*F mawk0.97/test/wfrq0.awk//' # this program is the same as wfrq.awk except it runs # about twice as fast because it uses RS as a regular # expression # function down_heap(i, k) { while ( 1 ) { if ( count[2*i] <= count[2*i+1] ) k = 2*i else k = 2*i + 1 if ( count[i] <= count[k] ) return hold = word[k] ; word[k] = word[i] ; word[i] = hold hold = count[k] ; count[k] = count[i] ; count[i] = hold i = k } } BEGIN { RS = "[^a-zA-Z]+" ; BIG = 999999 } { cnt[$0]++ } END { delete cnt[ "" ] # load twenty values j = 1 for( i in cnt ) { word[j] = i ; count[j] = cnt[i] delete cnt[i] ; if ( ++j == 21 ) break ; } # make some sentinals for( i = j ; i < 43 ; i++ ) count[i] = BIG h_empty = j # save the first empty slot # make a heap with the smallest in slot 1 for( i = h_empty - 1 ; i > 0 ; i-- ) down_heap(i) # examine the rest of the values for ( i in cnt ) if ( (j = cnt[i]) > count[1] ) { # its bigger # take the smallest out of the heap and readjust word[1] = i ; count[1] = j down_heap(1) } h_empty-- ; # what's left are the twenty largest # smallest at the top # i = 20 while ( h_empty > 1 ) { buffer[i--] = sprintf ("%3d %s" , count[1], word[1]) count[1] = count[h_empty] ; word[1] = word[h_empty] count[h_empty] = BIG down_heap(1) h_empty-- } buffer[i--] = sprintf ("%3d %s" , count[1], word[1]) for(j = 1 ; j <= 20 ; j++ ) print buffer[j] } @//E*O*F mawk0.97/test/wfrq0.awk// chmod u=r,g=r,o=r mawk0.97/test/wfrq0.awk echo x - mawk0.97/test/words.awk sed 's/^@//' > "mawk0.97/test/words.awk" <<'@//E*O*F mawk0.97/test/words.awk//' # words0.awk # find real words # i.e contigous letters BEGIN { FS = "[^A-Za-z]+" } # split fields on not letters { # $1 and $NF might be empty if ( NF > 0 ) { cnt += NF if ( $NF == "" ) cnt-- if ( NF > 1 && $1 == "" ) cnt-- } } END { print cnt} @//E*O*F mawk0.97/test/words.awk// chmod u=r,g=r,o=r mawk0.97/test/words.awk echo x - mawk0.97/test/words0.awk sed 's/^@//' > "mawk0.97/test/words0.awk" <<'@//E*O*F mawk0.97/test/words0.awk//' # use non letters as RS # # BEGIN { RS = "[^A-Za-z][^A-Za-z]*" getline if ( $0 == "" ) NR = 0 } END { print NR } @//E*O*F mawk0.97/test/words0.awk// chmod u=r,g=r,o=r mawk0.97/test/words0.awk echo mkdir - mawk0.97/examples mkdir mawk0.97/examples chmod u=rwx,g=rx,o=rx mawk0.97/examples echo x - mawk0.97/examples/decl.awk sed 's/^@//' > "mawk0.97/examples/decl.awk" <<'@//E*O*F mawk0.97/examples/decl.awk//' # parse a C declaration by recursive descent # based on a C program in KR ANSI edition # # run on a C file it finds the declarations # # restrictions: one declaration per line # doesn't understand struct {...} # makes assumptions about type names # # # some awks need double escapes on strings used as # regular expressions. If not run on mawk, use gdecl.awk ################################################ # lexical scanner -- gobble() # input : string s -- treated as a regular expression # gobble eats SPACE, then eats longest match of s off front # of global variable line. # Cuts the matched part off of line # function gobble(s, x) { sub( /^ /, "", line) # eat SPACE if any # surround s with parenthesis to make sure ^ acts on the # whole thing match(line, "^" "(" s ")") x = substr(line, 1, RLENGTH) line = substr(line, RLENGTH+1) return x } function ptr_to(n, x) # print "pointer to" , n times { n = int(n) if ( n <= 0 ) return "" x = "pointer to" ; n-- while ( n-- ) x = x " pointer to" return x } #recursively get a decl # returns an english description of the declaration or # "" if not a C declaration. function decl( x, t, ptr_part) { x = gobble("[* ]+") # get list of *** ... gsub(/ /, "", x) # remove all SPACES ptr_part = ptr_to( length(x) ) # We expect to see either an identifier or '(' # if ( gobble("\(") ) { # this is the recursive descent part # we expect to match a declaration and closing ')' # If not return "" to indicate failure if ( (x = decl()) == "" || gobble( "\)" ) == "" ) return "" } else # expecting an identifier { if ( (x = gobble(id)) == "" ) return "" x = x ":" } # finally look for () # or [ opt_size ] while ( 1 ) if ( gobble( funct_mark ) ) x = x " function returning" else if ( t = gobble( array_mark ) ) { gsub(/ /, "", t) x = x " array" t " of" } else break x = x " " ptr_part return x } BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" funct_mark = "\([ \t]*\)" array_mark = "\[[ \t]*[_A-Za-z0-9]*[ \t]*\]" # I've assumed types are keywords or all CAPS or end in _t # Other conventions could be added. type0 = "int|char|short|long|double|float|void" type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t types = "(" type0 "|" type1 "|" type2 ")" } { gsub( "/\*([^*]|\*[^/])*(\*/|$)" , " ") # remove comments gsub( /[ \t]+/, " ") # squeeze white space to a single space line = $0 scope = gobble( "extern|static" ) if ( type = gobble("(struct|union|enum) ") ) type = type gobble(id) # get the tag else { type = gobble("(un)?signed ") gobble( types ) } if ( ! type ) next if ( (x = decl()) && gobble( ";") ) { x = x " " type if ( scope ) x = x " (" scope ")" gsub( / +/, " ", x) # print x } } @//E*O*F mawk0.97/examples/decl.awk// chmod u=rx,g=rx,o=rx mawk0.97/examples/decl.awk echo x - mawk0.97/examples/deps.awk sed 's/^@//' > "mawk0.97/examples/deps.awk" <<'@//E*O*F mawk0.97/examples/deps.awk//' # find include dependencies in C source # # mawk -f deps.awk C_source_files # -- prints a dependency list suitable for make # -- ignores #include < > # BEGIN { stack_index = 0 # stack[] holds the input files for(i = 1 ; i < ARGC ; i++) { file = ARGV[i] if ( file !~ /\.c$/ ) continue # skip it outfile = substr(file, 1, length(file)-2) ".o" # INCLUDED[] stores the set of included files # -- start with the empty set for( j in INCLUDED ) delete INCLUDED[j] while ( 1 ) { if ( getline line < file <= 0 ) # no open or EOF { close(file) if ( stack_index == 0 ) break # empty stack else { file = stack[ stack_index-- ] continue } } if ( line ~ /^#include[ \t]+".*"/ ) { split(line, X, "\"") # filename is in X[2] if ( X[2] in INCLUDED ) # we've already included it continue #push current file stack[ ++stack_index ] = file INCLUDED[ file = X[2] ] = "" } } # end of while # test if INCLUDED is empty flag = 0 # on once the front is printed for( j in INCLUDED ) if ( ! flag ) { printf "%s : %s" , outfile, j ; flag = 1 } else printf " %s" , j if ( flag ) print "" }# end of loop over files in ARGV[i] } @//E*O*F mawk0.97/examples/deps.awk// chmod u=rx,g=rx,o=rx mawk0.97/examples/deps.awk echo x - mawk0.97/examples/gdecl.awk sed 's/^@//' > "mawk0.97/examples/gdecl.awk" <<'@//E*O*F mawk0.97/examples/gdecl.awk//' # parse a C declaration by recursive descent # # decl.awk with extra escapes \ ################################################ ############################################ # lexical scanner -- gobble() # input : string s -- treated as a regular expression # gobble eats SPACE, then eats longest match of s off front # of global variable line. # Cuts the matched part off of line # function gobble(s, x) { sub( /^ /, "", line) # eat SPACE if any # surround s with parenthesis to make sure ^ acts on the # whole thing match(line, "^" "(" s ")") x = substr(line, 1, RLENGTH) line = substr(line, RLENGTH+1) return x } function ptr_to(n, x) # print "pointer to" , n times { n = int(n) if ( n <= 0 ) return "" x = "pointer to" ; n-- while ( n-- ) x = x " pointer to" return x } #recursively get a decl # returns an english description of the declaration or # "" if not a C declaration. function decl( x, t, ptr_part) { x = gobble("[* ]+") # get list of *** ... gsub(/ /, "", x) # remove all SPACES ptr_part = ptr_to( length(x) ) # We expect to see either an identifier or '(' # if ( gobble("\\(") ) { # this is the recursive descent part # we expect to match a declaration and closing ')' # If not return "" to indicate failure if ( (x = decl()) == "" || gobble( "\\)" ) == "" ) return "" } else # expecting an identifier { if ( (x = gobble(id)) == "" ) return "" x = x ":" } # finally look for () # or [ opt_size ] while ( 1 ) if ( gobble( funct_mark ) ) x = x " function returning" else if ( t = gobble( array_mark ) ) { gsub(/ /, "", t) x = x " array" t " of" } else break x = x " " ptr_part return x } BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" funct_mark = "\\([ \t]*\\)" array_mark = "\\[[ \t]*[_A-Za-z0-9]*[ \t]*\\]" # I've assumed types are keywords or all CAPS or end in _t # Other conventions could be added. type0 = "int|char|short|long|double|float|void" type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t types = "(" type0 "|" type1 "|" type2 ")" } { gsub( /\/\*([^*]|\*[^\/])*(\*\/|$)/ , " ") # remove comments gsub( /[ \t]+/, " ") # squeeze white space to a single space line = $0 scope = gobble( "extern|static" ) if ( type = gobble("(struct|union|enum) ") ) type = type gobble(id) # get the tag else { type = gobble("(un)?signed ") gobble( types ) } if ( ! type ) next if ( (x = decl()) && gobble( ";") ) { x = x " " type if ( scope ) x = x " (" scope ")" gsub( / +/, " ", x) # print x } } @//E*O*F mawk0.97/examples/gdecl.awk// chmod u=rx,g=rx,o=rx mawk0.97/examples/gdecl.awk echo x - mawk0.97/examples/nocomment.awk sed 's/^@//' > "mawk0.97/examples/nocomment.awk" <<'@//E*O*F mawk0.97/examples/nocomment.awk//' # remove C comments # BEGIN { RS = "/\*([^*]|\*[^/])*\*/" ORS = " " getline hold } { # hold one record because we don't want ORS on the last # record print hold hold = $0 } END { printf "%s", hold } @//E*O*F mawk0.97/examples/nocomment.awk// chmod u=r,g=r,o=r mawk0.97/examples/nocomment.awk echo mkdir - mawk0.97/msdos mkdir mawk0.97/msdos chmod u=rwx,g=rx,o=rx mawk0.97/msdos echo x - mawk0.97/msdos/INSTALL sed 's/^@//' > "mawk0.97/msdos/INSTALL" <<'@//E*O*F mawk0.97/msdos/INSTALL//' how to make mawk under MsDOS --------------------------- Read the DOS section in the manual first. In addition to a C compiler, you will need an assembler and a yacc compatable parser generator. Assuming you keep the same directory structure: 1) run mklib.bat in this directory to create msdos.lib 2) move the makefile and mawk.dep to .. ( The supplied dos makefile is for Borland's make. I used to make mawk with MS make -- which make you use is no big deal.) 3) If you want a Unix style command line for mawk, you'll need to write a function called reargv(int *, char ***) which passes mawk the modified argc and argv. Compile and add to msdos.lib. The supplied reargv.c works with POLYSHELL by Polytron; for a different shell you could use it as an example. If this made sense and all went well, go to machine.h and #define HAVE_REARGV 1 If ! (sense || well ) , don't worry HAVE_REARGV is 0 by default. 4) YACC -- On the PC I use bison.1.14, it works fine and is easy to find. Surely easier to get than mawk. Unfortunately I cannot distribute the generated parse.c file. If you use bison, I suggest #if 0 in parse.c on the part that grows the parser stack. Then you won't need alloca(). ( For mawk programs if the parser stack needs to grow, something is fatally wrong.) Also unless you are debugging the grammar, you don't need the tables yyrhs[] or yyprhs[] -- cut them out and save some data space. You can make parse.c under unix with bison or another yacc and move it to dos. 5) The rand48.asm is hardwired for small model. I've never made a large model mawk, but as far as I can tell I think the only problem is in rand48.asm. The stack machine instructions (INST) assume a pointer is a pointer is a pointer, so if you change to far data pointers you'll also need to change to far code pointers. Rand48 is overkill, you could use another random number generator although the one supplied with your compiler is a near certainty to be poor. On small model, you have about 40K of free data space which is plenty except for programs that hold a medium or larger data file entirely in memory. ================================================================== The reason system() and pipes are missing is I haven't decided entirely how to handle the runaway loop problem. Dos makes asynchronous termination of a program difficult, because DOS itself might not be able to handle exit(). Hooking int 0x1a forces me to deal with cleanup and makes ^C a pain. What is the effect of my 0x1a on children? (That's why no system() or pipes yet). What do I have to check after a child terminates, children can muck up the state of the parent process. I have used mawk as is on DOS for about 6 months, what's missing I rarely use except to emit error messages and I use errmsg() for that. If I stall about fixing this stuff, DOS might go away and I'll never have to deal with it. @//E*O*F mawk0.97/msdos/INSTALL// chmod u=r,g=r,o=r mawk0.97/msdos/INSTALL echo x - mawk0.97/msdos/makefile sed 's/^@//' > "mawk0.97/msdos/makefile" <<'@//E*O*F mawk0.97/msdos/makefile//' # this is a makefile for mawk # using Borland's make v3.0 # and CFLAGS for TurboC @.SWAP CFLAGS = -c -ms -f -wnod -wpro -A LINKFLAGS = /c/m !if $d(DEBUG) CFLAGS = $(CFLAGS) -DDEBUG LINKFLAGS = $(LINKFLAGS)/v !else # optimize CFLAGS = $(CFLAGS) -v- -O -DHAVE_REARGV=1 !endif #################### # user tuned macros #################### # location of your C library CLIB=c:\lib # if you have a 80x87 coprocessor #FLOATLIB=fp87 # otherwise FLOATLIB=emu #take this out if you don't need it, i.e., # if you have a reargv() WILDARGS=\lib\wildargs.obj #expand filenames # You'll need a yacc like parser YACC = bison -dyv ###################### end of tuneable macros OBS = parse.obj \ array.obj \ bi_funct.obj \ bi_vars.obj \ cast.obj \ code.obj \ da.obj \ error.obj \ execute.obj \ fcall.obj \ field.obj \ files.obj \ fin.obj \ hash.obj \ init.obj \ jmp.obj \ kw.obj \ main.obj \ matherr.obj \ memory.obj \ print.obj \ re_cmpl.obj \ scan.obj \ scancode.obj \ split.obj \ zmalloc.obj REXP_C = rexp\rexp.c \ rexp\rexp0.c \ rexp\rexp1.c \ rexp\rexp2.c \ rexp\rexpdb.c LIBS = msdos\msdos.lib \ rexp\rexp.lib \ $(CLIB)\$(FLOATLIB) \ $(CLIB)\maths \ $(CLIB)\cs mawk.exe : $(OBS) rexp\rexp.lib tlink $(LINKFLAGS) @&&! $(CLIB)\c0s $(WILDARGS) $(OBS) mawk,mawk $(LIBS) ! rexp\rexp.lib : $(REXP_C) cd rexp del *.obj make @.c.obj : tcc $(CFLAGS) $*.c parse.c : parse.y $(YACC) parse.y rename y_tab.c parse.c rename y_tab.h parse.h scancode.c : makescan.c scan.h tcc makescan.c makescan.exe > scancode.c rm makescan.obj makescan.exe # the dependencies of the obj's on h's # you can make this with mawk -f deps.awk *.c array.o : bi_vars.h sizes.h zmalloc.h memory.h types.h machine.h mawk.h symtype.h bi_funct.o : fin.h bi_vars.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h repl.h files.h bi_funct.h mawk.h symtype.h init.h bi_vars.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h mawk.h symtype.h init.h cast.o : parse.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h repl.h mawk.h symtype.h code.o : sizes.h memory.h zmalloc.h types.h machine.h code.h mawk.h init.h da.o : sizes.h memory.h zmalloc.h types.h machine.h field.h repl.h code.h bi_funct.h mawk.h symtype.h error.o : parse.h bi_vars.h sizes.h types.h machine.h scan.h mawk.h symtype.h execute.o : sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h code.h repl.h bi_funct.h mawk.h symtype.h fcall.o : sizes.h memory.h zmalloc.h types.h machine.h code.h mawk.h symtype.h field.o : parse.h bi_vars.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h scan.h repl.h mawk.h symtype.h init.h files.o : fin.h sizes.h memory.h zmalloc.h types.h machine.h files.h mawk.h fin.o : parse.h fin.h bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h mawk.h symtype.h hash.o : sizes.h memory.h zmalloc.h types.h machine.h mawk.h symtype.h init.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h mawk.h symtype.h init.h jmp.o : sizes.h memory.h zmalloc.h types.h machine.h code.h jmp.h mawk.h init.h kw.o : parse.h sizes.h types.h machine.h mawk.h symtype.h init.h main.o : fin.h bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h files.h mawk.h init.h makescan.o : parse.h scan.h symtype.h matherr.o : sizes.h types.h machine.h mawk.h memory.o : sizes.h memory.h zmalloc.h types.h machine.h mawk.h parse.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h files.h bi_funct.h mawk.h jmp.h symtype.h print.o : bi_vars.h parse.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h files.h bi_funct.h mawk.h symtype.h re_cmpl.o : parse.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h scan.h repl.h mawk.h symtype.h scan.o : parse.h fin.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h repl.h files.h mawk.h symtype.h init.h split.o : bi_vars.h parse.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h scan.h bi_funct.h mawk.h symtype.h zmalloc.o : sizes.h zmalloc.h types.h machine.h mawk.h @//E*O*F mawk0.97/msdos/makefile// chmod u=r,g=r,o=r mawk0.97/msdos/makefile echo x - mawk0.97/msdos/mklib.bat sed 's/^@//' > "mawk0.97/msdos/mklib.bat" <<'@//E*O*F mawk0.97/msdos/mklib.bat//' masm /mx /z rand48.asm ; tcc -c -f -O -v- rand48_0.c lib msdos.lib -+rand48.obj -+rand48_0.obj ; @//E*O*F mawk0.97/msdos/mklib.bat// chmod u=r,g=r,o=r mawk0.97/msdos/mklib.bat echo x - mawk0.97/msdos/rand48.asm sed 's/^@//' > "mawk0.97/msdos/rand48.asm" <<'@//E*O*F mawk0.97/msdos/rand48.asm//' ;************************************************** ; rand.asm ; PC versions of the rand48 family ; 1988 Michael D. Brennan ; ; ; rand.asm and rand0.c are the source files ; for rand.lib (see also rand48.h) ;************************************************** public _srand48, _seed48 , _lcong48 public _mrand48, _jrand48 ; uniform on [0,2^32) public _lrand48, _nrand48 ; uniform on [0,2^31) public _urand48, _vrand48 ; uniform on [0,2^16) ; rand.asm standard_scale0 = 0e66dh standard_scale1 = 0deech standard_scale2 = 5h standard_shift = 0bh @.model small @.data prod dw 3 dup(?) ; build scale*seed here seed dw 3 dup(?) ; default seed storage for drand, lrand etc scale dw standard_scale0 dw standard_scale1 dw standard_scale2 shift dw standard_shift seedbuf dw 3 dup(?) ; place old seed for seed48() @.code ;**************************************** ; rgen -- the basic linear congruence ; call with bx->the seed ;************************************* rgen proc near ; seed <- scale * seed + shift with overflow ; first get scale*seed into prod mov ax, scale ; 0,0 mul word ptr [bx] mov prod, ax mov cx, dx ; save the overflow in cx mov ax, scale+2 ; 1,0 mul word ptr [bx] add ax, cx adc dx, 0 mov prod+2, ax mov cx, dx mov ax, scale+4 ; 2,0 mul word ptr [bx] add ax, cx mov prod+4, ax mov ax, scale ; 0,1 mul word ptr [bx+2] add ax, prod+2 adc dx, 0 mov prod+2, ax mov cx, dx mov ax, scale+2 ; 1,1 mul word ptr [bx+2] add ax, cx add prod+4, ax mov ax, scale ; 0,2 mul word ptr [bx+4] add ax, prod+4 ; move product to seed mov word ptr [bx+4], ax mov ax, prod+2 mov word ptr [bx+2], ax mov ax, prod mov word ptr [bx], ax ; add shift to seed mov ax, shift add word ptr [bx], ax adc word ptr [bx+2], 0 adc word ptr [bx+4], 0 ret rgen endp set_scale proc near ; set scale and shift to standard mov scale, standard_scale0 mov scale+2, standard_scale1 mov scale+4, standard_scale2 mov shift, standard_shift ret set_scale endp ; void srand48(long seedval) _srand48 proc near push bp mov bp, sp mov seed, 330eh ; mov ax, [bp+4] mov seed+2, ax mov ax, [bp+6] mov seed+4, ax call set_scale pop bp ret _srand48 endp ; short *seed48( short newseed[3] ) _seed48 proc near push bp mov bp, sp push si push di push ds pop es cld mov di, offset seedbuf ; save old seed mov si, offset seed mov cx, 3 rep movsw mov si, [bp+4] ; load the new seed mov di, offset seed mov cx, 3 rep movsw call set_scale mov ax, offset seedbuf pop di pop si pop bp ret _seed48 endp ; long mrand48() ; long jrand48(short seed[3]) _mrand48 proc near mov bx, offset seed call rgen mov dx, seed+4 mov ax, seed+2 ret _mrand48 endp _jrand48 proc near push bp mov bp, sp mov bx, [bp+4] call rgen mov dx, word ptr [bx+4] mov ax, word ptr [bx+2] pop bp ret _jrand48 endp _nrand48 proc near push bp mov bp, sp mov bx, [bp+4] call rgen mov dx, [bx+4] mov ax, [bx+2] shr dx, 1 rcr ax, 1 pop bp ret _nrand48 endp _lrand48 proc near mov bx, offset seed call rgen mov dx, seed+4 mov ax, seed+2 shr dx, 1 rcr ax, 1 ret _lrand48 endp _vrand48 proc near push bp mov bp, sp mov bx, [bp+4] call rgen mov ax, [bx+4] pop bp ret _vrand48 endp _urand48 proc near mov bx, offset seed call rgen mov ax, seed+4 ret _urand48 endp _lcong48 proc near push bp mov bp, sp push si push di push ds pop es cld mov si, [bp+4] mov di, offset seed mov cx, 7 rep movsw pop di pop si pop bp ret _lcong48 endp end @//E*O*F mawk0.97/msdos/rand48.asm// chmod u=r,g=r,o=r mawk0.97/msdos/rand48.asm echo x - mawk0.97/msdos/rand48.h sed 's/^@//' > "mawk0.97/msdos/rand48.h" <<'@//E*O*F mawk0.97/msdos/rand48.h//' /* rand48.h */ /* see Unix drand(3) for expansive discussion */ double drand48(void) ; double erand48(unsigned short[3]) ; long mrand48(void) ; long jrand48(unsigned short[3]) ; /* uniform on [-2^31, 2^31) or [0,2^32) depending on how you interpret the sign bit */ long lrand48(void) ; long nrand48(unsigned short[3]) ; /* uniform on [0,2^31) lrand48() == mrand48()>>1 */ unsigned urand48(void) ; unsigned vrand48(unsigned short[3]) ; /* for 16bit machines uniform on [0,2^16) */ /* SEEDING */ void srand48(long seedval) ; unsigned short *seed48(unsigned short seedv[3]) ; void lcong(unsigned short[7] ) ; void srand48() ; long mrand48(), jrand48(), lrand48(), nrand48() ; unsigned urand48(), vrand48() ; double drand48(), erand48() ; @//E*O*F mawk0.97/msdos/rand48.h// chmod u=r,g=r,o=r mawk0.97/msdos/rand48.h echo x - mawk0.97/msdos/rand48_0.c sed 's/^@//' > "mawk0.97/msdos/rand48_0.c" <<'@//E*O*F mawk0.97/msdos/rand48_0.c//' /* rand0.c */ unsigned long mrand48() ; unsigned long jrand48(short [3]) ; double drand48() { return (double) mrand48() / 4294967296.0 ; } double erand48(short x[3]) { return (double) jrand48(x) / 4294967296.0 ; } @//E*O*F mawk0.97/msdos/rand48_0.c// chmod u=r,g=r,o=r mawk0.97/msdos/rand48_0.c echo x - mawk0.97/msdos/reargv.c sed 's/^@//' > "mawk0.97/msdos/reargv.c" <<'@//E*O*F mawk0.97/msdos/reargv.c//' /* reargv.c -- set arguments via POLYSHELL -- no errors, don't change anything if -- it seems shell is not activated */ char *strchr(), *getenv() ; static char *basename(char *s) /* strip path and extension , upcase the rest */ { register char *p ; for ( p = strchr(s,0) ; p > s ; p-- ) switch( p[-1] ) { case '\\' : case ':' : case '/' : return p ; case '.' : p[-1] = 0 ; break ; default : if ( p[-1] >= 'a' && p[-1] <= 'z' ) p[-1] -= 32 ; break ; } return p ; } /*--------------------- reargv -- recompute argc and argv for PolyShell if not under shell do nothing *------------------------------- */ extern char *progname ; extern unsigned char _osmajor ; void reargv(int *argcp , char ***argvp) { register char *p ; char **v , *q, *cmdline, **vx ; int cnt, cntx ; if ( _osmajor == 2 ) /* ugh */ (*argvp)[0] = progname ; else { (*argvp)[0] = basename( (*argvp)[0] ) ; progname = (*argvp)[0] ; } if ( ! (cmdline = getenv("CMDLINE")) ) return ; if ( *(q = strchr(cmdline,0) - 1) != 0xff ) return ; /* shexpand set wrong */ for ( *q = 0, cnt = 1 , p = cmdline ; p < q ; p++ ) if ( *p == 0xff ) { cnt++ ; *p = 0 ; } if ( ! (v = (char **) malloc((cnt+1)*sizeof(char*))) ) return ; /* shouldn't happen */ p = cmdline ; vx = v ; cntx = cnt ; while ( cnt ) { *v++ = p ; cnt-- ; while ( *p ) p++ ; p++ ; } *v = (char *) 0 ; v = vx ; v[0] = basename( v[0] ) ; if ( strcmp(v[0], (*argvp)[0]) ) return ;/* running under command and sh earlier */ /* running under PolyShell */ *argcp = cntx ; *argvp = v ; progname = v[0] ; } @//E*O*F mawk0.97/msdos/reargv.c// chmod u=r,g=r,o=r mawk0.97/msdos/reargv.c echo Inspecting for damage in transit... temp=/tmp/shar$$; dtemp=/tmp/.shar$$ trap "rm -f $temp $dtemp; exit" 0 1 2 3 15 cat > $temp <<\!!! 110 194 1897 packing.list 21 66 407 README 38 203 1246 LIMITATIONS 78 450 3560 Makefile 639 3453 20668 mawk.manual 235 1062 5676 array.c 781 2886 17818 bi_funct.c 68 279 1720 bi_funct.h 87 284 1910 bi_vars.c 61 193 1374 bi_vars.h 354 1074 7389 cast.c 102 361 2538 code.c 141 407 3421 code.h 386 1119 9666 da.c 345 1188 7739 error.c 927 3424 26735 execute.c 365 1360 9733 fcall.c 404 1378 9426 field.c 65 199 1432 field.h 299 1085 7060 files.c 41 132 1006 files.h 368 1464 8781 fin.c 48 185 1167 fin.h 169 622 3566 hash.c 202 718 4475 init.c 44 160 1143 init.h 217 864 5679 jmp.c 35 122 876 jmp.h 83 270 1572 kw.c 160 584 4357 machine.h 151 453 3001 main.c 107 378 2495 makescan.c 106 285 2019 matherr.c 142 567 3652 mawk.h 95 312 2268 memory.c 50 129 1088 memory.h 1076 3677 30281 parse.y 285 1176 7197 print.c 321 1215 7100 re_cmpl.c 32 112 793 regexp.h 37 128 898 repl.h 694 2586 18298 scan.c 107 315 2906 scan.h 23 36 832 scancode.c 56 201 1446 sizes.h 174 644 3952 split.c 162 633 4106 symtype.h 112 344 2840 types.h 129 537 3196 zmalloc.c 44 118 905 zmalloc.h 26 72 446 Makefile 186 815 5015 rexp.c 153 565 3636 rexp.h 423 1718 10188 rexp0.c 183 708 3987 rexp1.c 282 1139 7507 rexp2.c 272 1335 7457 rexp3.c 74 252 1649 rexpdb.c 23 106 710 README 72 343 2430 benchmarks 1 3 11 cat.awk 6 12 59 concat.awk 6 13 69 fields.awk 23 49 201 loops.awk 22 53 286 newton.awk 40 129 564 primes.awk 78 234 1076 qsort.awk 2 4 34 reg0.awk 2 4 41 reg1.awk 2 5 63 reg2.awk 13 71 461 sample 7 19 82 squeeze.awk 43 111 621 test.sh 3 7 33 wc.awk 91 363 1765 wfrq.awk 72 297 1433 wfrq0.awk 23 64 275 words.awk 12 29 127 words0.awk 143 546 3030 decl.awk 57 241 1284 deps.awk 136 493 2776 gdecl.awk 18 44 213 nocomment.awk 80 522 3002 INSTALL 137 553 4067 makefile 8 16 107 mklib.bat 227 620 3670 rand48.asm 31 106 774 rand48.h 12 33 231 rand48_0.c 74 333 1727 reargv.c 13839 51659 344417 total !!! wc mawk0.97/packing.list mawk0.97/README mawk0.97/LIMITATIONS \ mawk0.97/Makefile mawk0.97/mawk.manual mawk0.97/array.c \ mawk0.97/bi_funct.c mawk0.97/bi_funct.h mawk0.97/bi_vars.c \ mawk0.97/bi_vars.h mawk0.97/cast.c mawk0.97/code.c \ mawk0.97/code.h mawk0.97/da.c mawk0.97/error.c mawk0.97/execute.c \ mawk0.97/fcall.c mawk0.97/field.c mawk0.97/field.h \ mawk0.97/files.c mawk0.97/files.h mawk0.97/fin.c mawk0.97/fin.h \ mawk0.97/hash.c mawk0.97/init.c mawk0.97/init.h mawk0.97/jmp.c \ mawk0.97/jmp.h mawk0.97/kw.c mawk0.97/machine.h mawk0.97/main.c \ mawk0.97/makescan.c mawk0.97/matherr.c mawk0.97/mawk.h \ mawk0.97/memory.c mawk0.97/memory.h mawk0.97/parse.y \ mawk0.97/print.c mawk0.97/re_cmpl.c mawk0.97/regexp.h \ mawk0.97/repl.h mawk0.97/scan.c mawk0.97/scan.h mawk0.97/scancode.c \ mawk0.97/sizes.h mawk0.97/split.c mawk0.97/symtype.h \ mawk0.97/types.h mawk0.97/zmalloc.c mawk0.97/zmalloc.h \ mawk0.97/rexp/Makefile mawk0.97/rexp/rexp.c mawk0.97/rexp/rexp.h \ mawk0.97/rexp/rexp0.c mawk0.97/rexp/rexp1.c mawk0.97/rexp/rexp2.c \ mawk0.97/rexp/rexp3.c mawk0.97/rexp/rexpdb.c mawk0.97/test/README \ mawk0.97/test/benchmarks mawk0.97/test/cat.awk mawk0.97/test/concat.awk \ mawk0.97/test/fields.awk mawk0.97/test/loops.awk mawk0.97/test/newton.awk \ mawk0.97/test/primes.awk mawk0.97/test/qsort.awk mawk0.97/test/reg0.awk \ mawk0.97/test/reg1.awk mawk0.97/test/reg2.awk mawk0.97/test/sample \ mawk0.97/test/squeeze.awk mawk0.97/test/test.sh mawk0.97/test/wc.awk \ mawk0.97/test/wfrq.awk mawk0.97/test/wfrq0.awk mawk0.97/test/words.awk \ mawk0.97/test/words0.awk mawk0.97/examples/decl.awk \ mawk0.97/examples/deps.awk mawk0.97/examples/gdecl.awk \ mawk0.97/examples/nocomment.awk mawk0.97/msdos/INSTALL \ mawk0.97/msdos/makefile mawk0.97/msdos/mklib.bat mawk0.97/msdos/rand48.asm \ mawk0.97/msdos/rand48.h mawk0.97/msdos/rand48_0.c mawk0.97/msdos/reargv.c \ | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp if [ -s $dtemp ] then echo "Ouch [diff of wc output]:" ; cat $dtemp else echo "No problems found." fi exit 0