amiga-request@abcfd20.larc.nasa.gov (Amiga Sources/Binaries Moderator) (08/20/90)
Submitted-by: loftus@wpllabs.uucp (William P Loftus) Posting-number: Volume 90, Issue 232 Archive-name: unix/flex-2.3/part05 #!/bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh <file", e.g.. If this archive is complete, you # will see the following message at the end: # "End of archive 5 (of 13)." # Contents: dfa.c flex.Doc # Wrapped by tadguy@abcfd20 on Sun Aug 19 18:41:44 1990 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'dfa.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'dfa.c'\" else echo shar: Extracting \"'dfa.c'\" \(26919 characters\) sed "s/^X//" >'dfa.c' <<'END_OF_FILE' X/* dfa - DFA construction routines */ X X/*- X * Copyright (c) 1990 The Regents of the University of California. X * All rights reserved. X * X * This code is derived from software contributed to Berkeley by X * Vern Paxson. X * X * The United States Government has rights in this work pursuant X * to contract no. DE-AC03-76SF00098 between the United States X * Department of Energy and the University of California. X * X * Redistribution and use in source and binary forms are permitted provided X * that: (1) source distributions retain this entire copyright notice and X * comment, and (2) distributions including binaries display the following X * acknowledgement: ``This product includes software developed by the X * University of California, Berkeley and its contributors'' in the X * documentation or other materials provided with the distribution and in X * all advertising materials mentioning features or use of this software. X * Neither the name of the University nor the names of its contributors may X * be used to endorse or promote products derived from this software without X * specific prior written permission. X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED X * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF X * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. X */ X X#ifndef lint Xstatic char rcsid[] = X "@(#) $Header: /usr/fsys/odin/a/vern/flex/RCS/dfa.c,v 2.7 90/06/27 23:48:15 vern Exp $ (LBL)"; X#endif X X#include "flexdef.h" X X X/* declare functions that have forward references */ X Xvoid dump_associated_rules PROTO((FILE*, int)); Xvoid dump_transitions PROTO((FILE*, int[])); Xvoid sympartition PROTO((int[], int, int[], int[])); Xint symfollowset PROTO((int[], int, int, int[])); X X X/* check_for_backtracking - check a DFA state for backtracking X * X * synopsis X * int ds, state[numecs]; X * check_for_backtracking( ds, state ); X * X * ds is the number of the state to check and state[] is its out-transitions, X * indexed by equivalence class, and state_rules[] is the set of rules X * associated with this state X */ X Xvoid check_for_backtracking( ds, state ) Xint ds; Xint state[]; X X { X if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state ) X { /* state is non-accepting */ X ++num_backtracking; X X if ( backtrack_report ) X { X fprintf( backtrack_file, "State #%d is non-accepting -\n", ds ); X X /* identify the state */ X dump_associated_rules( backtrack_file, ds ); X X /* now identify it further using the out- and jam-transitions */ X dump_transitions( backtrack_file, state ); X X putc( '\n', backtrack_file ); X } X } X } X X X/* check_trailing_context - check to see if NFA state set constitutes X * "dangerous" trailing context X * X * synopsis X * int nfa_states[num_states+1], num_states; X * int accset[nacc+1], nacc; X * check_trailing_context( nfa_states, num_states, accset, nacc ); X * X * NOTES X * Trailing context is "dangerous" if both the head and the trailing X * part are of variable size \and/ there's a DFA state which contains X * both an accepting state for the head part of the rule and NFA states X * which occur after the beginning of the trailing context. X * When such a rule is matched, it's impossible to tell if having been X * in the DFA state indicates the beginning of the trailing context X * or further-along scanning of the pattern. In these cases, a warning X * message is issued. X * X * nfa_states[1 .. num_states] is the list of NFA states in the DFA. X * accset[1 .. nacc] is the list of accepting numbers for the DFA state. X */ X Xvoid check_trailing_context( nfa_states, num_states, accset, nacc ) Xint *nfa_states, num_states; Xint *accset; Xregister int nacc; X X { X register int i, j; X X for ( i = 1; i <= num_states; ++i ) X { X int ns = nfa_states[i]; X register int type = state_type[ns]; X register int ar = assoc_rule[ns]; X X if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) X { /* do nothing */ X } X X else if ( type == STATE_TRAILING_CONTEXT ) X { X /* potential trouble. Scan set of accepting numbers for X * the one marking the end of the "head". We assume that X * this looping will be fairly cheap since it's rare that X * an accepting number set is large. X */ X for ( j = 1; j <= nacc; ++j ) X if ( accset[j] & YY_TRAILING_HEAD_MASK ) X { X fprintf( stderr, X "%s: Dangerous trailing context in rule at line %d\n", X program_name, rule_linenum[ar] ); X return; X } X } X } X } X X X/* dump_associated_rules - list the rules associated with a DFA state X * X * synopisis X * int ds; X * FILE *file; X * dump_associated_rules( file, ds ); X * X * goes through the set of NFA states associated with the DFA and X * extracts the first MAX_ASSOC_RULES unique rules, sorts them, X * and writes a report to the given file X */ X Xvoid dump_associated_rules( file, ds ) XFILE *file; Xint ds; X X { X register int i, j; X register int num_associated_rules = 0; X int rule_set[MAX_ASSOC_RULES + 1]; X int *dset = dss[ds]; X int size = dfasiz[ds]; X X for ( i = 1; i <= size; ++i ) X { X register rule_num = rule_linenum[assoc_rule[dset[i]]]; X X for ( j = 1; j <= num_associated_rules; ++j ) X if ( rule_num == rule_set[j] ) X break; X X if ( j > num_associated_rules ) X { /* new rule */ X if ( num_associated_rules < MAX_ASSOC_RULES ) X rule_set[++num_associated_rules] = rule_num; X } X } X X bubble( rule_set, num_associated_rules ); X X fprintf( file, " associated rule line numbers:" ); X X for ( i = 1; i <= num_associated_rules; ++i ) X { X if ( i % 8 == 1 ) X putc( '\n', file ); X X fprintf( file, "\t%d", rule_set[i] ); X } X X putc( '\n', file ); X } X X X/* dump_transitions - list the transitions associated with a DFA state X * X * synopisis X * int state[numecs]; X * FILE *file; X * dump_transitions( file, state ); X * X * goes through the set of out-transitions and lists them in human-readable X * form (i.e., not as equivalence classes); also lists jam transitions X * (i.e., all those which are not out-transitions, plus EOF). The dump X * is done to the given file. X */ X Xvoid dump_transitions( file, state ) XFILE *file; Xint state[]; X X { X register int i, ec; X int out_char_set[CSIZE]; X X for ( i = 0; i < csize; ++i ) X { X ec = abs( ecgroup[i] ); X out_char_set[i] = state[ec]; X } X X fprintf( file, " out-transitions: " ); X X list_character_set( file, out_char_set ); X X /* now invert the members of the set to get the jam transitions */ X for ( i = 0; i < csize; ++i ) X out_char_set[i] = ! out_char_set[i]; X X fprintf( file, "\n jam-transitions: EOF " ); X X list_character_set( file, out_char_set ); X X putc( '\n', file ); X } X X X/* epsclosure - construct the epsilon closure of a set of ndfa states X * X * synopsis X * int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc; X * int hashval; X * int *epsclosure(); X * t = epsclosure( t, &numstates, accset, &nacc, &hashval ); X * X * NOTES X * the epsilon closure is the set of all states reachable by an arbitrary X * number of epsilon transitions which themselves do not have epsilon X * transitions going out, unioned with the set of states which have non-null X * accepting numbers. t is an array of size numstates of nfa state numbers. X * Upon return, t holds the epsilon closure and numstates is updated. accset X * holds a list of the accepting numbers, and the size of accset is given X * by nacc. t may be subjected to reallocation if it is not large enough X * to hold the epsilon closure. X * X * hashval is the hash value for the dfa corresponding to the state set X */ X Xint *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) Xint *t, *ns_addr, accset[], *nacc_addr, *hv_addr; X X { X register int stkpos, ns, tsp; X int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; X int stkend, nstate; X static int did_stk_init = false, *stk; X X#define MARK_STATE(state) \ X trans1[state] = trans1[state] - MARKER_DIFFERENCE; X X#define IS_MARKED(state) (trans1[state] < 0) X X#define UNMARK_STATE(state) \ X trans1[state] = trans1[state] + MARKER_DIFFERENCE; X X#define CHECK_ACCEPT(state) \ X { \ X nfaccnum = accptnum[state]; \ X if ( nfaccnum != NIL ) \ X accset[++nacc] = nfaccnum; \ X } X X#define DO_REALLOCATION \ X { \ X current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ X ++num_reallocs; \ X t = reallocate_integer_array( t, current_max_dfa_size ); \ X stk = reallocate_integer_array( stk, current_max_dfa_size ); \ X } \ X X#define PUT_ON_STACK(state) \ X { \ X if ( ++stkend >= current_max_dfa_size ) \ X DO_REALLOCATION \ X stk[stkend] = state; \ X MARK_STATE(state) \ X } X X#define ADD_STATE(state) \ X { \ X if ( ++numstates >= current_max_dfa_size ) \ X DO_REALLOCATION \ X t[numstates] = state; \ X hashval = hashval + state; \ X } X X#define STACK_STATE(state) \ X { \ X PUT_ON_STACK(state) \ X CHECK_ACCEPT(state) \ X if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ X ADD_STATE(state) \ X } X X if ( ! did_stk_init ) X { X stk = allocate_integer_array( current_max_dfa_size ); X did_stk_init = true; X } X X nacc = stkend = hashval = 0; X X for ( nstate = 1; nstate <= numstates; ++nstate ) X { X ns = t[nstate]; X X /* the state could be marked if we've already pushed it onto X * the stack X */ X if ( ! IS_MARKED(ns) ) X PUT_ON_STACK(ns) X X CHECK_ACCEPT(ns) X hashval = hashval + ns; X } X X for ( stkpos = 1; stkpos <= stkend; ++stkpos ) X { X ns = stk[stkpos]; X transsym = transchar[ns]; X X if ( transsym == SYM_EPSILON ) X { X tsp = trans1[ns] + MARKER_DIFFERENCE; X X if ( tsp != NO_TRANSITION ) X { X if ( ! IS_MARKED(tsp) ) X STACK_STATE(tsp) X X tsp = trans2[ns]; X X if ( tsp != NO_TRANSITION ) X if ( ! IS_MARKED(tsp) ) X STACK_STATE(tsp) X } X } X } X X /* clear out "visit" markers */ X X for ( stkpos = 1; stkpos <= stkend; ++stkpos ) X { X if ( IS_MARKED(stk[stkpos]) ) X { X UNMARK_STATE(stk[stkpos]) X } X else X flexfatal( "consistency check failed in epsclosure()" ); X } X X *ns_addr = numstates; X *hv_addr = hashval; X *nacc_addr = nacc; X X return ( t ); X } X X X/* increase_max_dfas - increase the maximum number of DFAs */ X Xvoid increase_max_dfas() X X { X current_max_dfas += MAX_DFAS_INCREMENT; X X ++num_reallocs; X X base = reallocate_integer_array( base, current_max_dfas ); X def = reallocate_integer_array( def, current_max_dfas ); X dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); X accsiz = reallocate_integer_array( accsiz, current_max_dfas ); X dhash = reallocate_integer_array( dhash, current_max_dfas ); X dss = reallocate_int_ptr_array( dss, current_max_dfas ); X dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); X X if ( nultrans ) X nultrans = reallocate_integer_array( nultrans, current_max_dfas ); X } X X X/* ntod - convert an ndfa to a dfa X * X * synopsis X * ntod(); X * X * creates the dfa corresponding to the ndfa we've constructed. the X * dfa starts out in state #1. X */ X Xvoid ntod() X X { X int *accset, ds, nacc, newds; X int sym, hashval, numstates, dsize; X int num_full_table_rows; /* used only for -f */ X int *nset, *dset; X int targptr, totaltrans, i, comstate, comfreq, targ; X int *epsclosure(), snstods(), symlist[CSIZE + 1]; X int num_start_states; X int todo_head, todo_next; X X /* note that the following are indexed by *equivalence classes* X * and not by characters. Since equivalence classes are indexed X * beginning with 1, even if the scanner accepts NUL's, this X * means that (since every character is potentially in its own X * equivalence class) these arrays must have room for indices X * from 1 to CSIZE, so their size must be CSIZE + 1. X */ X int duplist[CSIZE + 1], state[CSIZE + 1]; X int targfreq[CSIZE + 1], targstate[CSIZE + 1]; X X /* this is so find_table_space(...) will know where to start looking in X * chk/nxt for unused records for space to put in the state X */ X if ( fullspd ) X firstfree = 0; X X accset = allocate_integer_array( num_rules + 1 ); X nset = allocate_integer_array( current_max_dfa_size ); X X /* the "todo" queue is represented by the head, which is the DFA X * state currently being processed, and the "next", which is the X * next DFA state number available (not in use). We depend on the X * fact that snstods() returns DFA's \in increasing order/, and thus X * need only know the bounds of the dfas to be processed. X */ X todo_head = todo_next = 0; X X for ( i = 0; i <= csize; ++i ) X { X duplist[i] = NIL; X symlist[i] = false; X } X X for ( i = 0; i <= num_rules; ++i ) X accset[i] = NIL; X X if ( trace ) X { X dumpnfa( scset[1] ); X fputs( "\n\nDFA Dump:\n\n", stderr ); X } X X inittbl(); X X /* check to see whether we should build a separate table for transitions X * on NUL characters. We don't do this for full-speed (-F) scanners, X * since for them we don't have a simple state number lying around with X * which to index the table. We also don't bother doing it for scanners X * unless (1) NUL is in its own equivalence class (indicated by a X * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is X * the last equivalence class, and (3) the number of equivalence classes X * is the same as the number of characters. This latter case comes about X * when useecs is false or when its true but every character still X * manages to land in its own class (unlikely, but it's cheap to check X * for). If all these things are true then the character code needed X * to represent NUL's equivalence class for indexing the tables is X * going to take one more bit than the number of characters, and therefore X * we won't be assured of being able to fit it into a YY_CHAR variable. X * This rules out storing the transitions in a compressed table, since X * the code for interpreting them uses a YY_CHAR variable (perhaps it X * should just use an integer, though; this is worth pondering ... ###). X * X * Finally, for full tables, we want the number of entries in the X * table to be a power of two so the array references go fast (it X * will just take a shift to compute the major index). If encoding X * NUL's transitions in the table will spoil this, we give it its X * own table (note that this will be the case if we're not using X * equivalence classes). X */ X X /* note that the test for ecgroup[0] == numecs below accomplishes X * both (1) and (2) above X */ X if ( ! fullspd && ecgroup[0] == numecs ) X { /* NUL is alone in its equivalence class, which is the last one */ X int use_NUL_table = (numecs == csize); X X if ( fulltbl && ! use_NUL_table ) X { /* we still may want to use the table if numecs is a power of 2 */ X int power_of_two; X X for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 ) X if ( numecs == power_of_two ) X { X use_NUL_table = true; X break; X } X } X X if ( use_NUL_table ) X nultrans = allocate_integer_array( current_max_dfas ); X /* from now on, nultrans != nil indicates that we're X * saving null transitions for later, separate encoding X */ X } X X X if ( fullspd ) X { X for ( i = 0; i <= numecs; ++i ) X state[i] = 0; X place_state( state, 0, 0 ); X } X X else if ( fulltbl ) X { X if ( nultrans ) X /* we won't be including NUL's transitions in the table, X * so build it for entries from 0 .. numecs - 1 X */ X num_full_table_rows = numecs; X X else X /* take into account the fact that we'll be including X * the NUL entries in the transition table. Build it X * from 0 .. numecs. X */ X num_full_table_rows = numecs + 1; X X /* declare it "short" because it's a real long-shot that that X * won't be large enough. X */ X printf( "static short int yy_nxt[][%d] =\n {\n", X /* '}' so vi doesn't get too confused */ X num_full_table_rows ); X X /* generate 0 entries for state #0 */ X for ( i = 0; i < num_full_table_rows; ++i ) X mk2data( 0 ); X X /* force ',' and dataflush() next call to mk2data */ X datapos = NUMDATAITEMS; X X /* force extra blank line next dataflush() */ X dataline = NUMDATALINES; X } X X /* create the first states */ X X num_start_states = lastsc * 2; X X for ( i = 1; i <= num_start_states; ++i ) X { X numstates = 1; X X /* for each start condition, make one state for the case when X * we're at the beginning of the line (the '%' operator) and X * one for the case when we're not X */ X if ( i % 2 == 1 ) X nset[numstates] = scset[(i / 2) + 1]; X else X nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); X X nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); X X if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) X { X numas += nacc; X totnst += numstates; X ++todo_next; X X if ( variable_trailing_context_rules && nacc > 0 ) X check_trailing_context( nset, numstates, accset, nacc ); X } X } X X if ( ! fullspd ) X { X if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) X flexfatal( "could not create unique end-of-buffer state" ); X X ++numas; X ++num_start_states; X ++todo_next; X } X X while ( todo_head < todo_next ) X { X targptr = 0; X totaltrans = 0; X X for ( i = 1; i <= numecs; ++i ) X state[i] = 0; X X ds = ++todo_head; X X dset = dss[ds]; X dsize = dfasiz[ds]; X X if ( trace ) X fprintf( stderr, "state # %d:\n", ds ); X X sympartition( dset, dsize, symlist, duplist ); X X for ( sym = 1; sym <= numecs; ++sym ) X { X if ( symlist[sym] ) X { X symlist[sym] = 0; X X if ( duplist[sym] == NIL ) X { /* symbol has unique out-transitions */ X numstates = symfollowset( dset, dsize, sym, nset ); X nset = epsclosure( nset, &numstates, accset, X &nacc, &hashval ); X X if ( snstods( nset, numstates, accset, X nacc, hashval, &newds ) ) X { X totnst = totnst + numstates; X ++todo_next; X numas += nacc; X X if ( variable_trailing_context_rules && nacc > 0 ) X check_trailing_context( nset, numstates, X accset, nacc ); X } X X state[sym] = newds; X X if ( trace ) X fprintf( stderr, "\t%d\t%d\n", sym, newds ); X X targfreq[++targptr] = 1; X targstate[targptr] = newds; X ++numuniq; X } X X else X { X /* sym's equivalence class has the same transitions X * as duplist(sym)'s equivalence class X */ X targ = state[duplist[sym]]; X state[sym] = targ; X X if ( trace ) X fprintf( stderr, "\t%d\t%d\n", sym, targ ); X X /* update frequency count for destination state */ X X i = 0; X while ( targstate[++i] != targ ) X ; X X ++targfreq[i]; X ++numdup; X } X X ++totaltrans; X duplist[sym] = NIL; X } X } X X numsnpairs = numsnpairs + totaltrans; X X if ( caseins && ! useecs ) X { X register int j; X X for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) X state[i] = state[j]; X } X X if ( ds > num_start_states ) X check_for_backtracking( ds, state ); X X if ( nultrans ) X { X nultrans[ds] = state[NUL_ec]; X state[NUL_ec] = 0; /* remove transition */ X } X X if ( fulltbl ) X { X /* supply array's 0-element */ X if ( ds == end_of_buffer_state ) X mk2data( -end_of_buffer_state ); X else X mk2data( end_of_buffer_state ); X X for ( i = 1; i < num_full_table_rows; ++i ) X /* jams are marked by negative of state number */ X mk2data( state[i] ? state[i] : -ds ); X X /* force ',' and dataflush() next call to mk2data */ X datapos = NUMDATAITEMS; X X /* force extra blank line next dataflush() */ X dataline = NUMDATALINES; X } X X else if ( fullspd ) X place_state( state, ds, totaltrans ); X X else if ( ds == end_of_buffer_state ) X /* special case this state to make sure it does what it's X * supposed to, i.e., jam on end-of-buffer X */ X stack1( ds, 0, 0, JAMSTATE ); X X else /* normal, compressed state */ X { X /* determine which destination state is the most common, and X * how many transitions to it there are X */ X X comfreq = 0; X comstate = 0; X X for ( i = 1; i <= targptr; ++i ) X if ( targfreq[i] > comfreq ) X { X comfreq = targfreq[i]; X comstate = targstate[i]; X } X X bldtbl( state, ds, totaltrans, comstate, comfreq ); X } X } X X if ( fulltbl ) X dataend(); X X else if ( ! fullspd ) X { X cmptmps(); /* create compressed template entries */ X X /* create tables for all the states with only one out-transition */ X while ( onesp > 0 ) X { X mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], X onedef[onesp] ); X --onesp; X } X X mkdeftbl(); X } X } X X X/* snstods - converts a set of ndfa states into a dfa state X * X * synopsis X * int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval; X * int snstods(); X * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds ); X * X * on return, the dfa state number is in newds. X */ X Xint snstods( sns, numstates, accset, nacc, hashval, newds_addr ) Xint sns[], numstates, accset[], nacc, hashval, *newds_addr; X X { X int didsort = 0; X register int i, j; X int newds, *oldsns; X X for ( i = 1; i <= lastdfa; ++i ) X if ( hashval == dhash[i] ) X { X if ( numstates == dfasiz[i] ) X { X oldsns = dss[i]; X X if ( ! didsort ) X { X /* we sort the states in sns so we can compare it to X * oldsns quickly. we use bubble because there probably X * aren't very many states X */ X bubble( sns, numstates ); X didsort = 1; X } X X for ( j = 1; j <= numstates; ++j ) X if ( sns[j] != oldsns[j] ) X break; X X if ( j > numstates ) X { X ++dfaeql; X *newds_addr = i; X return ( 0 ); X } X X ++hshcol; X } X X else X ++hshsave; X } X X /* make a new dfa */ X X if ( ++lastdfa >= current_max_dfas ) X increase_max_dfas(); X X newds = lastdfa; X X dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) ); X X if ( ! dss[newds] ) X flexfatal( "dynamic memory failure in snstods()" ); X X /* if we haven't already sorted the states in sns, we do so now, so that X * future comparisons with it can be made quickly X */ X X if ( ! didsort ) X bubble( sns, numstates ); X X for ( i = 1; i <= numstates; ++i ) X dss[newds][i] = sns[i]; X X dfasiz[newds] = numstates; X dhash[newds] = hashval; X X if ( nacc == 0 ) X { X if ( reject ) X dfaacc[newds].dfaacc_set = (int *) 0; X else X dfaacc[newds].dfaacc_state = 0; X X accsiz[newds] = 0; X } X X else if ( reject ) X { X /* we sort the accepting set in increasing order so the disambiguating X * rule that the first rule listed is considered match in the event of X * ties will work. We use a bubble sort since the list is probably X * quite small. X */ X X bubble( accset, nacc ); X X dfaacc[newds].dfaacc_set = X (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) ); X X if ( ! dfaacc[newds].dfaacc_set ) X flexfatal( "dynamic memory failure in snstods()" ); X X /* save the accepting set for later */ X for ( i = 1; i <= nacc; ++i ) X dfaacc[newds].dfaacc_set[i] = accset[i]; X X accsiz[newds] = nacc; X } X X else X { /* find lowest numbered rule so the disambiguating rule will work */ X j = num_rules + 1; X X for ( i = 1; i <= nacc; ++i ) X if ( accset[i] < j ) X j = accset[i]; X X dfaacc[newds].dfaacc_state = j; X } X X *newds_addr = newds; X X return ( 1 ); X } X X X/* symfollowset - follow the symbol transitions one step X * X * synopsis X * int ds[current_max_dfa_size], dsize, transsym; X * int nset[current_max_dfa_size], numstates; X * numstates = symfollowset( ds, dsize, transsym, nset ); X */ X Xint symfollowset( ds, dsize, transsym, nset ) Xint ds[], dsize, transsym, nset[]; X X { X int ns, tsp, sym, i, j, lenccl, ch, numstates; X int ccllist; X X numstates = 0; X X for ( i = 1; i <= dsize; ++i ) X { /* for each nfa state ns in the state set of ds */ X ns = ds[i]; X sym = transchar[ns]; X tsp = trans1[ns]; X X if ( sym < 0 ) X { /* it's a character class */ X sym = -sym; X ccllist = cclmap[sym]; X lenccl = ccllen[sym]; X X if ( cclng[sym] ) X { X for ( j = 0; j < lenccl; ++j ) X { /* loop through negated character class */ X ch = ccltbl[ccllist + j]; X X if ( ch == 0 ) X ch = NUL_ec; X X if ( ch > transsym ) X break; /* transsym isn't in negated ccl */ X X else if ( ch == transsym ) X /* next 2 */ goto bottom; X } X X /* didn't find transsym in ccl */ X nset[++numstates] = tsp; X } X X else X for ( j = 0; j < lenccl; ++j ) X { X ch = ccltbl[ccllist + j]; X X if ( ch == 0 ) X ch = NUL_ec; X X if ( ch > transsym ) X break; X X else if ( ch == transsym ) X { X nset[++numstates] = tsp; X break; X } X } X } X X else if ( sym >= 'A' && sym <= 'Z' && caseins ) X flexfatal( "consistency check failed in symfollowset" ); X X else if ( sym == SYM_EPSILON ) X { /* do nothing */ X } X X else if ( abs( ecgroup[sym] ) == transsym ) X nset[++numstates] = tsp; X Xbottom: X ; X } X X return ( numstates ); X } X X X/* sympartition - partition characters with same out-transitions X * X * synopsis X * integer ds[current_max_dfa_size], numstates, duplist[numecs]; X * symlist[numecs]; X * sympartition( ds, numstates, symlist, duplist ); X */ X Xvoid sympartition( ds, numstates, symlist, duplist ) Xint ds[], numstates, duplist[]; Xint symlist[]; X X { X int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; X X /* partitioning is done by creating equivalence classes for those X * characters which have out-transitions from the given state. Thus X * we are really creating equivalence classes of equivalence classes. X */ X X for ( i = 1; i <= numecs; ++i ) X { /* initialize equivalence class list */ X duplist[i] = i - 1; X dupfwd[i] = i + 1; X } X X duplist[1] = NIL; X dupfwd[numecs] = NIL; X X for ( i = 1; i <= numstates; ++i ) X { X ns = ds[i]; X tch = transchar[ns]; X X if ( tch != SYM_EPSILON ) X { X if ( tch < -lastccl || tch > csize ) X { X if ( tch > csize && tch <= CSIZE ) X flexerror( "scanner requires -8 flag" ); X X else X flexfatal( X "bad transition character detected in sympartition()" ); X } X X if ( tch >= 0 ) X { /* character transition */ X /* abs() needed for fake %t ec's */ X int ec = abs( ecgroup[tch] ); X X mkechar( ec, dupfwd, duplist ); X symlist[ec] = 1; X } X X else X { /* character class */ X tch = -tch; X X lenccl = ccllen[tch]; X cclp = cclmap[tch]; X mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs, X NUL_ec ); X X if ( cclng[tch] ) X { X j = 0; X X for ( k = 0; k < lenccl; ++k ) X { X ich = ccltbl[cclp + k]; X X if ( ich == 0 ) X ich = NUL_ec; X X for ( ++j; j < ich; ++j ) X symlist[j] = 1; X } X X for ( ++j; j <= numecs; ++j ) X symlist[j] = 1; X } X X else X for ( k = 0; k < lenccl; ++k ) X { X ich = ccltbl[cclp + k]; X X if ( ich == 0 ) X ich = NUL_ec; X X symlist[ich] = 1; X } X } X } X } X } END_OF_FILE if test 26919 -ne `wc -c <'dfa.c'`; then echo shar: \"'dfa.c'\" unpacked with wrong size! fi # end of 'dfa.c' fi if test -f 'flex.Doc' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'flex.Doc'\" else echo shar: Extracting \"'flex.Doc'\" \(25372 characters\) sed "s/^X//" >'flex.Doc' <<'END_OF_FILE' X X X XFLEX(1) USER COMMANDS FLEX(1) X X X XNAME X flex - fast lexical analyzer generator X XSYNOPSIS X flex [-bcdfinpstvFILT8 -C[efmF] -Sskeleton] [filename ...] X XDESCRIPTION X flex is a tool for generating scanners: programs which X recognized lexical patterns in text. flex reads the given X input files, or its standard input if no file names are X given, for a description of a scanner to generate. The X description is in the form of pairs of regular expressions X and C code, called rules. flex generates as output a C X source file, lex.yy.c, which defines a routine yylex(). This X file is compiled and linked with the -lfl library to produce X an executable. When the executable is run, it analyzes its X input for occurrences of the regular expressions. Whenever X it finds one, it executes the corresponding C code. X X For full documentation, see flexdoc(1). This manual entry is X intended for use as a quick reference. X XOPTIONS X flex has the following options: X X -b Generate backtracking information to lex.backtrack. X This is a list of scanner states which require back- X tracking and the input characters on which they do so. X By adding rules one can remove backtracking states. If X all backtracking states are eliminated and -f or -F is X used, the generated scanner will run faster. X X -c is a do-nothing, deprecated option included for POSIX X compliance. X X NOTE: in previous releases of flex -c specified table- X compression options. This functionality is now given X by the -C flag. To ease the the impact of this change, X when flex encounters -c, it currently issues a warning X message and assumes that -C was desired instead. In X the future this "promotion" of -c to -C will go away in X the name of full POSIX compliance (unless the POSIX X meaning is removed first). X X -d makes the generated scanner run in debug mode. When- X ever a pattern is recognized and the global X yy_flex_debug is non-zero (which is the default), the X scanner will write to stderr a line of the form: X X --accepting rule at line 53 ("the matched text") X X The line number refers to the location of the rule in X X X XVersion 2.3 Last change: 26 May 1990 1 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X the file defining the scanner (i.e., the file that was X fed to flex). Messages are also generated when the X scanner backtracks, accepts the default rule, reaches X the end of its input buffer (or encounters a NUL; the X two look the same as far as the scanner's concerned), X or reaches an end-of-file. X X -f specifies (take your pick) full table or fast scanner. X No table compression is done. The result is large but X fast. This option is equivalent to -Cf (see below). X X -i instructs flex to generate a case-insensitive scanner. X The case of letters given in the flex input patterns X will be ignored, and tokens in the input will be X matched regardless of case. The matched text given in X yytext will have the preserved case (i.e., it will not X be folded). X X -n is another do-nothing, deprecated option included only X for POSIX compliance. X X -p generates a performance report to stderr. The report X consists of comments regarding features of the flex X input file which will cause a loss of performance in X the resulting scanner. X X -s causes the default rule (that unmatched scanner input X is echoed to stdout) to be suppressed. If the scanner X encounters input that does not match any of its rules, X it aborts with an error. X X -t instructs flex to write the scanner it generates to X standard output instead of lex.yy.c. X X -v specifies that flex should write to stderr a summary of X statistics regarding the scanner it generates. X X -F specifies that the fast scanner table representation X should be used. This representation is about as fast X as the full table representation (-f), and for some X sets of patterns will be considerably smaller (and for X others, larger). See flexdoc(1) for details. X X This option is equivalent to -CF (see below). X X -I instructs flex to generate an interactive scanner, that X is, a scanner which stops immediately rather than look- X ing ahead if it knows that the currently scanned text X cannot be part of a longer rule's match. Again, see X flexdoc(1) for details. X X Note, -I cannot be used in conjunction with full or X X X XVersion 2.3 Last change: 26 May 1990 2 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X fast tables, i.e., the -f, -F, -Cf, or -CF flags. X X -L instructs flex not to generate #line directives in X lex.yy.c. The default is to generate such directives so X error messages in the actions will be correctly located X with respect to the original flex input file, and not X to the fairly meaningless line numbers of lex.yy.c. X X -T makes flex run in trace mode. It will generate a lot X of messages to stdout concerning the form of the input X and the resultant non-deterministic and deterministic X finite automata. This option is mostly for use in X maintaining flex. X X -8 instructs flex to generate an 8-bit scanner. On some X sites, this is the default. On others, the default is X 7-bit characters. To see which is the case, check the X verbose (-v) output for "equivalence classes created". X If the denominator of the number shown is 128, then by X default flex is generating 7-bit characters. If it is X 256, then the default is 8-bit characters. X X -C[efmF] X controls the degree of table compression. X X -Ce directs flex to construct equivalence classes, X i.e., sets of characters which have identical lexical X properties. Equivalence classes usually give dramatic X reductions in the final table/object file sizes (typi- X cally a factor of 2-5) and are pretty cheap X performance-wise (one array look-up per character X scanned). X X -Cf specifies that the full scanner tables should be X generated - flex should not compress the tables by tak- X ing advantages of similar transition functions for dif- X ferent states. X X -CF specifies that the alternate fast scanner represen- X tation (described in flexdoc(1)) should be used. X X -Cm directs flex to construct meta-equivalence classes, X which are sets of equivalence classes (or characters, X if equivalence classes are not being used) that are X commonly used together. Meta-equivalence classes are X often a big win when using compressed tables, but they X have a moderate performance impact (one or two "if" X tests and one array look-up per character scanned). X X A lone -C specifies that the scanner tables should be X compressed but neither equivalence classes nor meta- X equivalence classes should be used. X X X XVersion 2.3 Last change: 26 May 1990 3 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X The options -Cf or -CF and -Cm do not make sense X together - there is no opportunity for meta-equivalence X classes if the table is not being compressed. Other- X wise the options may be freely mixed. X X The default setting is -Cem, which specifies that flex X should generate equivalence classes and meta- X equivalence classes. This setting provides the highest X degree of table compression. You can trade off X faster-executing scanners at the cost of larger tables X with the following generally being true: X X slowest & smallest X -Cem X -Cm X -Ce X -C X -C{f,F}e X -C{f,F} X fastest & largest X X X -C options are not cumulative; whenever the flag is X encountered, the previous -C settings are forgotten. X X -Sskeleton_file X overrides the default skeleton file from which flex X constructs its scanners. You'll never need this option X unless you are doing flex maintenance or development. X XSUMMARY OF FLEX REGULAR EXPRESSIONS X The patterns in the input are written using an extended set X of regular expressions. These are: X X x match the character 'x' X . any character except newline X [xyz] a "character class"; in this case, the pattern X matches either an 'x', a 'y', or a 'z' X [abj-oZ] a "character class" with a range in it; matches X an 'a', a 'b', any letter from 'j' through 'o', X or a 'Z' X [^A-Z] a "negated character class", i.e., any character X but those in the class. In this case, any X character EXCEPT an uppercase letter. X [^A-Z\n] any character EXCEPT an uppercase letter or X a newline X r* zero or more r's, where r is any regular expression X r+ one or more r's X r? zero or one r's (that is, "an optional r") X r{2,5} anywhere from two to five r's X r{2,} two or more r's X r{4} exactly 4 r's X X X XVersion 2.3 Last change: 26 May 1990 4 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X {name} the expansion of the "name" definition X (see above) X "[xyz]\"foo" X the literal string: [xyz]"foo X \X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v', X then the ANSI-C interpretation of \x. X Otherwise, a literal 'X' (used to escape X operators such as '*') X \123 the character with octal value 123 X \x2a the character with hexadecimal value 2a X (r) match an r; parentheses are used to override X precedence (see below) X X X rs the regular expression r followed by the X regular expression s; called "concatenation" X X X r|s either an r or an s X X X r/s an r but only if it is followed by an s. The X s is not part of the matched text. This type X of pattern is called as "trailing context". X ^r an r, but only at the beginning of a line X r$ an r, but only at the end of a line. Equivalent X to "r/\n". X X X <s>r an r, but only in start condition s (see X below for discussion of start conditions) X <s1,s2,s3>r X same, but in any of start conditions s1, X s2, or s3 X X X <<EOF>> an end-of-file X <s1,s2><<EOF>> X an end-of-file when in start condition s1 or s2 X X The regular expressions listed above are grouped according X to precedence, from highest precedence at the top to lowest X at the bottom. Those grouped together have equal pre- X cedence. X X Some notes on patterns: X X - Negated character classes match newlines unless "\n" X (or an equivalent escape sequence) is one of the char- X acters explicitly present in the negated character X class (e.g., "[^A-Z\n]"). X X X X XVersion 2.3 Last change: 26 May 1990 5 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X - A rule can have at most one instance of trailing con- X text (the '/' operator or the '$' operator). The start X condition, '^', and "<<EOF>>" patterns can only occur X at the beginning of a pattern, and, as well as with '/' X and '$', cannot be grouped inside parentheses. The X following are all illegal: X X foo/bar$ X foo|(bar$) X foo|^bar X <sc1>foo<sc2>bar X X XSUMMARY OF SPECIAL ACTIONS X In addition to arbitrary C code, the following can appear in X actions: X X - ECHO copies yytext to the scanner's output. X X - BEGIN followed by the name of a start condition places X the scanner in the corresponding start condition. X X - REJECT directs the scanner to proceed on to the "second X best" rule which matched the input (or a prefix of the X input). yytext and yyleng are set up appropriately. X Note that REJECT is a particularly expensive feature in X terms scanner performance; if it is used in any of the X scanner's actions it will slow down all of the X scanner's matching. Furthermore, REJECT cannot be used X with the -f or -F options. X X Note also that unlike the other special actions, REJECT X is a branch; code immediately following it in the X action will not be executed. X X - yymore() tells the scanner that the next time it X matches a rule, the corresponding token should be X appended onto the current value of yytext rather than X replacing it. X X - yyless(n) returns all but the first n characters of the X current token back to the input stream, where they will X be rescanned when the scanner looks for the next match. X yytext and yyleng are adjusted appropriately (e.g., X yyleng will now be equal to n ). X X - unput(c) puts the character c back onto the input X stream. It will be the next character scanned. X X - input() reads the next character from the input stream X (this routine is called yyinput() if the scanner is X compiled using C++). X X X XVersion 2.3 Last change: 26 May 1990 6 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X - yyterminate() can be used in lieu of a return statement X in an action. It terminates the scanner and returns a X 0 to the scanner's caller, indicating "all done". X X By default, yyterminate() is also called when an end- X of-file is encountered. It is a macro and may be rede- X fined. X X - YY_NEW_FILE is an action available only in <<EOF>> X rules. It means "Okay, I've set up a new input file, X continue scanning". X X - yy_create_buffer( file, size ) takes a FILE pointer and X an integer size. It returns a YY_BUFFER_STATE handle to X a new input buffer large enough to accomodate size X characters and associated with the given file. When in X doubt, use YY_BUF_SIZE for the size. X X - yy_switch_to_buffer( new_buffer ) switches the X scanner's processing to scan for tokens from the given X buffer, which must be a YY_BUFFER_STATE. X X - yy_delete_buffer( buffer ) deletes the given buffer. X XVALUES AVAILABLE TO THE USER X - char *yytext holds the text of the current token. It X may not be modified. X X - int yyleng holds the length of the current token. It X may not be modified. X X - FILE *yyin is the file which by default flex reads X from. It may be redefined but doing so only makes X sense before scanning begins. Changing it in the mid- X dle of scanning will have unexpected results since flex X buffers its input. Once scanning terminates because an X end-of-file has been seen, void yyrestart( FILE X *new_file ) may be called to point yyin at the new X input file. X X - FILE *yyout is the file to which ECHO actions are done. X It can be reassigned by the user. X X - YY_CURRENT_BUFFER returns a YY_BUFFER_STATE handle to X the current buffer. X XMACROS THE USER CAN REDEFINE X - YY_DECL controls how the scanning routine is declared. X By default, it is "int yylex()", or, if prototypes are X being used, "int yylex(void)". This definition may be X changed by redefining the "YY_DECL" macro. Note that X if you give arguments to the scanning routine using a X X X XVersion 2.3 Last change: 26 May 1990 7 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X K&R-style/non-prototyped function declaration, you must X terminate the definition with a semi-colon (;). X X - The nature of how the scanner gets its input can be X controlled by redefining the YY_INPUT macro. X YY_INPUT's calling sequence is X "YY_INPUT(buf,result,max_size)". Its action is to X place up to max_size characters in the character array X buf and return in the integer variable result either X the number of characters read or the constant YY_NULL X (0 on Unix systems) to indicate EOF. The default X YY_INPUT reads from the global file-pointer "yyin". A X sample redefinition of YY_INPUT (in the definitions X section of the input file): X X %{ X #undef YY_INPUT X #define YY_INPUT(buf,result,max_size) \ X { \ X int c = getchar(); \ X result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \ X } X %} X X X - When the scanner receives an end-of-file indication X from YY_INPUT, it then checks the yywrap() function. X If yywrap() returns false (zero), then it is assumed X that the function has gone ahead and set up yyin to X point to another input file, and scanning continues. X If it returns true (non-zero), then the scanner ter- X minates, returning 0 to its caller. X X The default yywrap() always returns 1. Presently, to X redefine it you must first "#undef yywrap", as it is X currently implemented as a macro. It is likely that X yywrap() will soon be defined to be a function rather X than a macro. X X - YY_USER_ACTION can be redefined to provide an action X which is always executed prior to the matched rule's X action. X X - The macro YY_USER_INIT may be redefined to provide an X action which is always executed before the first scan. X X - In the generated scanner, the actions are all gathered X in one large switch statement and separated using X YY_BREAK, which may be redefined. By default, it is X simply a "break", to separate each rule's action from X the following rule's. X X X X XVersion 2.3 Last change: 26 May 1990 8 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X XFILES X flex.skel X skeleton scanner. X X lex.yy.c X generated scanner (called lexyy.c on some systems). X X lex.backtrack X backtracking information for -b flag (called lex.bck on X some systems). X X -lfl library with which to link the scanners. X XSEE ALSO X flexdoc(1), lex(1), yacc(1), sed(1), awk(1). X X M. E. Lesk and E. Schmidt, LEX - Lexical Analyzer Generator X XDIAGNOSTICS X reject_used_but_not_detected undefined or X X yymore_used_but_not_detected undefined - These errors can X occur at compile time. They indicate that the scanner uses X REJECT or yymore() but that flex failed to notice the fact, X meaning that flex scanned the first two sections looking for X occurrences of these actions and failed to find any, but X somehow you snuck some in (via a #include file, for exam- X ple). Make an explicit reference to the action in your flex X input file. (Note that previously flex supported a X %used/%unused mechanism for dealing with this problem; this X feature is still supported but now deprecated, and will go X away soon unless the author hears from people who can argue X compellingly that they need it.) X X flex scanner jammed - a scanner compiled with -s has encoun- X tered an input string which wasn't matched by any of its X rules. X X flex input buffer overflowed - a scanner rule matched a X string long enough to overflow the scanner's internal input X buffer (16K bytes - controlled by YY_BUF_MAX in X "flex.skel"). X X scanner requires -8 flag - Your scanner specification X includes recognizing 8-bit characters and you did not X specify the -8 flag (and your site has not installed flex X with -8 as the default). X X fatal flex scanner internal error--end of buffer missed - X This can occur in an scanner which is reentered after a X long-jump has jumped out (or over) the scanner's activation X frame. Before reentering the scanner, use: X X X XVersion 2.3 Last change: 26 May 1990 9 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X yyrestart( yyin ); X X X too many %t classes! - You managed to put every single char- X acter into its own %t class. flex requires that at least X one of the classes share characters. X XAUTHOR X Vern Paxson, with the help of many ideas and much inspira- X tion from Van Jacobson. Original version by Jef Poskanzer. X X See flexdoc(1) for additional credits and the address to X send comments to. X XDEFICIENCIES / BUGS X Some trailing context patterns cannot be properly matched X and generate warning messages ("Dangerous trailing con- X text"). These are patterns where the ending of the first X part of the rule matches the beginning of the second part, X such as "zx*/xy*", where the 'x*' matches the 'x' at the X beginning of the trailing context. (Note that the POSIX X draft states that the text matched by such patterns is unde- X fined.) X X For some trailing context rules, parts which are actually X fixed-length are not recognized as such, leading to the X abovementioned performance loss. In particular, parts using X '|' or {n} (such as "foo{3}") are always considered X variable-length. X X Combining trailing context with the special '|' action can X result in fixed trailing context being turned into the more X expensive variable trailing context. For example, this hap- X pens in the following example: X X %% X abc | X xyz/def X X X Use of unput() invalidates yytext and yyleng. X X Use of unput() to push back more text than was matched can X result in the pushed-back text matching a beginning-of-line X ('^') rule even though it didn't come at the beginning of X the line (though this is rare!). X X Pattern-matching of NUL's is substantially slower than X matching other characters. X X flex does not generate correct #line directives for code X internal to the scanner; thus, bugs in flex.skel yield bogus X X X XVersion 2.3 Last change: 26 May 1990 10 X X X X X X XFLEX(1) USER COMMANDS FLEX(1) X X X X line numbers. X X Due to both buffering of input and read-ahead, you cannot X intermix calls to <stdio.h> routines, such as, for example, X getchar(), with flex rules and expect it to work. Call X input() instead. X X The total table entries listed by the -v flag excludes the X number of table entries needed to determine what rule has X been matched. The number of entries is equal to the number X of DFA states if the scanner does not use REJECT, and some- X what greater than the number of states if it does. X X REJECT cannot be used with the -f or -F options. X X Some of the macros, such as yywrap(), may in the future X become functions which live in the -lfl library. This will X doubtless break a lot of code, but may be required for X POSIX-compliance. X X The flex internal algorithms need documentation. X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X XVersion 2.3 Last change: 26 May 1990 11 X X X END_OF_FILE if test 25372 -ne `wc -c <'flex.Doc'`; then echo shar: \"'flex.Doc'\" unpacked with wrong size! fi # end of 'flex.Doc' fi echo shar: End of archive 5 \(of 13\). cp /dev/null ark5isdone MISSING="" for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 13 archives. rm -f ark[1-9]isdone ark[1-9][0-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0 -- Mail submissions (sources or binaries) to <amiga@uunet.uu.net>. Mail comments to the moderator at <amiga-request@uunet.uu.net>. Post requests for sources, and general discussion to comp.sys.amiga.