[comp.sources.unix] v14i083: Flex, a lex replacement, Part05/05

rsalz@uunet.uu.net (Rich Salz) (05/04/88)

Submitted-by: Vern Paxson <vern@lbl-csam.arpa>
Posting-number: Volume 14, Issue 83
Archive-name: flex/part05

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of archive 5 (of 5)."
# Contents:  tblcmp.c
# Wrapped by rsalz@fig.bbn.com on Tue May  3 17:31:35 1988
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'tblcmp.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'tblcmp.c'\"
else
echo shar: Extracting \"'tblcmp.c'\" \(39351 characters\)
sed "s/^X//" >'tblcmp.c' <<'END_OF_FILE'
X/* tblcmp - table compression routines */
X
X/*
X * Copyright (c) 1987, the University of California
X * 
X * The United States Government has rights in this work pursuant to
X * contract no. DE-AC03-76SF00098 between the United States Department of
X * Energy and the University of California.
X * 
X * This program may be redistributed.  Enhancements and derivative works
X * may be created provided the new works, if made available to the general
X * public, are made available for use by anyone.
X */
X
X#include "flexdef.h"
X
X/* bldtbl - build table entries for dfa state
X *
X * synopsis
X *   int state[numecs], statenum, totaltrans, comstate, comfreq;
X *   bldtbl( state, statenum, totaltrans, comstate, comfreq );
X *
X * State is the statenum'th dfa state.  It is indexed by equivalence class and
X * gives the number of the state to enter for a given equivalence class.
X * totaltrans is the total number of transitions out of the state.  Comstate
X * is that state which is the destination of the most transitions out of State.
X * Comfreq is how many transitions there are out of State to Comstate.
X *
X * A note on terminology:
X *    "protos" are transition tables which have a high probability of
X * either being redundant (a state processed later will have an identical
X * transition table) or nearly redundant (a state processed later will have
X * many of the same out-transitions).  A "most recently used" queue of
X * protos is kept around with the hope that most states will find a proto
X * which is similar enough to be usable, and therefore compacting the
X * output tables.
X *    "templates" are a special type of proto.  If a transition table is
X * homogeneous or nearly homogeneous (all transitions go to the same
X * destination) then the odds are good that future states will also go
X * to the same destination state on basically the same character set.
X * These homogeneous states are so common when dealing with large rule
X * sets that they merit special attention.  If the transition table were
X * simply made into a proto, then (typically) each subsequent, similar
X * state will differ from the proto for two out-transitions.  One of these
X * out-transitions will be that character on which the proto does not go
X * to the common destination, and one will be that character on which the
X * state does not go to the common destination.  Templates, on the other
X * hand, go to the common state on EVERY transition character, and therefore
X * cost only one difference.
X */
X
bldtbl( state, statenum, totaltrans, comstate, comfreq )
int state[], statenum, totaltrans, comstate, comfreq;
X
X    {
X    int extptr, extrct[2][CSIZE + 1];
X    int mindiff, minprot, i, d;
X    int checkcom;
X
X    /* If extptr is 0 then the first array of extrct holds the result of the
X     * "best difference" to date, which is those transitions which occur in
X     * "state" but not in the proto which, to date, has the fewest differences
X     * between itself and "state".  If extptr is 1 then the second array of
X     * extrct hold the best difference.  The two arrays are toggled
X     * between so that the best difference to date can be kept around and
X     * also a difference just created by checking against a candidate "best"
X     * proto.
X     */
X
X    extptr = 0;
X
X    /* if the state has too few out-transitions, don't bother trying to
X     * compact its tables
X     */
X
X    if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
X	mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
X
X    else
X	{
X	/* checkcom is true if we should only check "state" against
X	 * protos which have the same "comstate" value
X	 */
X
X	checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
X
X	minprot = firstprot;
X	mindiff = totaltrans;
X
X	if ( checkcom )
X	    {
X	    /* find first proto which has the same "comstate" */
X	    for ( i = firstprot; i != NIL; i = protnext[i] )
X		if ( protcomst[i] == comstate )
X		    {
X		    minprot = i;
X		    mindiff = tbldiff( state, minprot, extrct[extptr] );
X		    break;
X		    }
X	    }
X
X	else
X	    {
X	    /* since we've decided that the most common destination out
X	     * of "state" does not occur with a high enough frequency,
X	     * we set the "comstate" to zero, assuring that if this state
X	     * is entered into the proto list, it will not be considered
X	     * a template.
X	     */
X	    comstate = 0;
X
X	    if ( firstprot != NIL )
X		{
X		minprot = firstprot;
X		mindiff = tbldiff( state, minprot, extrct[extptr] );
X		}
X	    }
X
X	/* we now have the first interesting proto in "minprot".  If
X	 * it matches within the tolerances set for the first proto,
X	 * we don't want to bother scanning the rest of the proto list
X	 * to see if we have any other reasonable matches.
X	 */
X
X	if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
X	    { /* not a good enough match.  Scan the rest of the protos */
X	    for ( i = minprot; i != NIL; i = protnext[i] )
X		{
X		d = tbldiff( state, i, extrct[1 - extptr] );
X		if ( d < mindiff )
X		    {
X		    extptr = 1 - extptr;
X		    mindiff = d;
X		    minprot = i;
X		    }
X		}
X	    }
X
X	/* check if the proto we've decided on as our best bet is close
X	 * enough to the state we want to match to be usable
X	 */
X
X	if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
X	    {
X	    /* no good.  If the state is homogeneous enough, we make a
X	     * template out of it.  Otherwise, we make a proto.
X	     */
X
X	    if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE )
X		mktemplate( state, statenum, comstate );
X
X	    else
X		{
X		mkprot( state, statenum, comstate );
X		mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
X		}
X	    }
X
X	else
X	    { /* use the proto */
X	    mkentry( extrct[extptr], numecs, statenum,
X		     prottbl[minprot], mindiff );
X
X	    /* if this state was sufficiently different from the proto
X	     * we built it from, make it, too, a proto
X	     */
X
X	    if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
X		mkprot( state, statenum, comstate );
X
X	    /* since mkprot added a new proto to the proto queue, it's possible
X	     * that "minprot" is no longer on the proto queue (if it happened
X	     * to have been the last entry, it would have been bumped off).
X	     * If it's not there, then the new proto took its physical place
X	     * (though logically the new proto is at the beginning of the
X	     * queue), so in that case the following call will do nothing.
X	     */
X
X	    mv2front( minprot );
X	    }
X	}
X    }
X
X
X/* cmptmps - compress template table entries
X *
X * synopsis
X *    cmptmps();
X *
X *  template tables are compressed by using the 'template equivalence
X *  classes', which are collections of transition character equivalence
X *  classes which always appear together in templates - really meta-equivalence
X *  classes.  until this point, the tables for templates have been stored
X *  up at the top end of the nxt array; they will now be compressed and have
X *  table entries made for them.
X */
X
cmptmps()
X
X    {
X    int tmpstorage[CSIZE + 1];
X    register int *tmp = tmpstorage, i, j;
X    int totaltrans, trans;
X
X    peakpairs = numtemps * numecs + tblend;
X
X    if ( usemecs )
X	{
X	/* create equivalence classes base on data gathered on template
X	 * transitions
X	 */
X
X	nummecs = cre8ecs( tecfwd, tecbck, numecs );
X	}
X    
X    else
X	nummecs = numecs;
X
X    if ( lastdfa + numtemps + 1 >= current_max_dfas )
X	increase_max_dfas();
X
X    /* loop through each template */
X
X    for ( i = 1; i <= numtemps; ++i )
X	{
X	totaltrans = 0;	/* number of non-jam transitions out of this template */
X
X	for ( j = 1; j <= numecs; ++j )
X	    {
X	    trans = tnxt[numecs * i + j];
X
X	    if ( usemecs )
X		{
X		/* the absolute value of tecbck is the meta-equivalence class
X		 * of a given equivalence class, as set up by cre8ecs
X		 */
X		if ( tecbck[j] > 0 )
X		    {
X		    tmp[tecbck[j]] = trans;
X
X		    if ( trans > 0 )
X			++totaltrans;
X		    }
X		}
X
X	    else
X		{
X		tmp[j] = trans;
X
X		if ( trans > 0 )
X		    ++totaltrans;
X		}
X	    }
X
X	/* it is assumed (in a rather subtle way) in the skeleton that
X	 * if we're using meta-equivalence classes, the def[] entry for
X	 * all templates is the jam template, i.e., templates never default
X	 * to other non-jam table entries (e.g., another template)
X	 */
X
X	/* leave room for the jam-state after the last real state */
X	mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
X	}
X    }
X
X
X
X/* expand_nxt_chk - expand the next check arrays */
X
expand_nxt_chk()
X
X    {
X    register int old_max = current_max_xpairs;
X
X    current_max_xpairs += MAX_XPAIRS_INCREMENT;
X
X    ++num_reallocs;
X
X    nxt = reallocate_integer_array( nxt, current_max_xpairs );
X    chk = reallocate_integer_array( chk, current_max_xpairs );
X
X    bzero( (char *) (chk + old_max),
X	   MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) );
X    }
X
X
X/* find_table_space - finds a space in the table for a state to be placed
X *
X * synopsis
X *     int *state, numtrans, block_start;
X *     int find_table_space();
X *
X *     block_start = find_table_space( state, numtrans );
X *
X * State is the state to be added to the full speed transition table.
X * Numtrans is the number of out-transitions for the state.
X *
X * find_table_space() returns the position of the start of the first block (in
X * chk) able to accommodate the state
X *
X * In determining if a state will or will not fit, find_table_space() must take
X * into account the fact that an end-of-buffer state will be added at [0],
X * and an action number will be added in [-1].
X */
X
int find_table_space( state, numtrans )
int *state, numtrans;
X    
X    {
X    /* firstfree is the position of the first possible occurrence of two
X     * consecutive unused records in the chk and nxt arrays
X     */
X    register int i;
X    register int *state_ptr, *chk_ptr;
X    register int *ptr_to_last_entry_in_state;
X
X    /* if there are too many out-transitions, put the state at the end of
X     * nxt and chk
X     */
X    if ( numtrans > MAX_XTIONS_FOR_FULL_INTERIOR_FIT )
X	{
X	/* if table is empty, return the first available spot in chk/nxt,
X	 * which should be 1
X	 */
X	if ( tblend < 2 )
X	    return ( 1 );
X
X	i = tblend - numecs;	/* start searching for table space near the
X				 * end of chk/nxt arrays
X				 */
X	}
X
X    else
X	i = firstfree;		/* start searching for table space from the
X				 * beginning (skipping only the elements
X				 * which will definitely not hold the new
X				 * state)
X				 */
X
X    while ( 1 )		/* loops until a space is found */
X	{
X	if ( i + numecs > current_max_xpairs )
X	    expand_nxt_chk();
X
X	/* loops until space for end-of-buffer and action number are found */
X	while ( 1 )
X	    {
X	    if ( chk[i - 1] == 0 )	/* check for action number space */
X		{
X		if ( chk[i] == 0 )	/* check for end-of-buffer space */
X		    break;
X
X		else
X		    i += 2;	/* since i != 0, there is no use checking to
X				 * see if (++i) - 1 == 0, because that's the
X				 * same as i == 0, so we skip a space
X				 */
X		}
X
X	    else
X		++i;
X
X	    if ( i + numecs > current_max_xpairs )
X		expand_nxt_chk();
X	    }
X
X	/* if we started search from the beginning, store the new firstfree for
X	 * the next call of find_table_space()
X	 */
X	if ( numtrans <= MAX_XTIONS_FOR_FULL_INTERIOR_FIT )
X	    firstfree = i + 1;
X
X	/* check to see if all elements in chk (and therefore nxt) that are
X	 * needed for the new state have not yet been taken
X	 */
X
X	state_ptr = &state[1];
X	ptr_to_last_entry_in_state = &chk[i + numecs + 1];
X
X	for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state;
X	      ++chk_ptr )
X	    if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
X		break;
X
X	if ( chk_ptr == ptr_to_last_entry_in_state )
X	    return ( i );
X
X	else
X	    ++i;
X	}
X    }
X
X
X/* genctbl - generates full speed compressed transition table
X *
X * synopsis
X *     genctbl();
X */
X
genctbl()
X
X    {
X    register int i;
X
X    /* table of verify for transition and offset to next state */
X    printf( "static struct yy_trans_info yy_transition[%d] =\n",
X	    tblend + numecs + 1 );
X    printf( "    {\n" );
X    
X    /* We want the transition to be represented as the offset to the
X     * next state, not the actual state number, which is what it currently is.
X     * The offset is base[nxt[i]] - base[chk[i]].  That's just the
X     * difference between the starting points of the two involved states
X     * (to - from).
X     *
X     * first, though, we need to find some way to put in our end-of-buffer
X     * flags and states.  We do this by making a state with absolutely no
X     * transitions.  We put it at the end of the table.
X     */
X    /* at this point, we're guaranteed that there's enough room in nxt[]
X     * and chk[] to hold tblend + numecs entries.  We need just two slots.
X     * One for the action and one for the end-of-buffer transition.  We
X     * now *assume* that we're guaranteed the only character we'll try to
X     * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to
X     * make sure there's room for jam entries for other characters.
X     */
X
X    base[lastdfa + 1] = tblend + 2;
X    nxt[tblend + 1] = END_OF_BUFFER_ACTION;
X    chk[tblend + 1] = numecs + 1;
X    chk[tblend + 2] = 1; /* anything but EOB */
X    nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */
X
X    /* make sure every state has a end-of-buffer transition and an action # */
X    for ( i = 0; i <= lastdfa; ++i )
X	{
X	chk[base[i]] = EOB_POSITION;
X	chk[base[i] - 1] = ACTION_POSITION;
X	nxt[base[i] - 1] = dfaacc[i].dfaacc_state;	/* action number */
X	}
X
X    for ( i = 0; i <= lastsc * 2; ++i )
X	nxt[base[i] - 1] = DEFAULT_ACTION;
X
X    dataline = 0;
X    datapos = 0;
X
X    for ( i = 0; i <= tblend; ++i )
X	{
X	if ( chk[i] == EOB_POSITION )
X	    transition_struct_out( 0, base[lastdfa + 1] - i );
X
X	else if ( chk[i] == ACTION_POSITION )
X	    transition_struct_out( 0, nxt[i] );
X
X	else if ( chk[i] > numecs || chk[i] == 0 )
X	    transition_struct_out( 0, 0 );		/* unused slot */
X
X	else	/* verify, transition */
X	    transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) );
X	}
X
X
X    /* here's the final, end-of-buffer state */
X    transition_struct_out( chk[tblend + 1], nxt[tblend + 1] );
X    transition_struct_out( chk[tblend + 2], nxt[tblend + 2] );
X
X    printf( "    };\n" );
X    printf( "\n" );
X
X    /* table of pointers to start states */
X    printf( "static struct yy_trans_info *yy_state_ptr[%d] =\n",
X	lastsc * 2 + 1 );
X    printf( "    {\n" );
X
X    for ( i = 0; i <= lastsc * 2; ++i )
X	printf( "    &yy_transition[%d],\n", base[i] );
X
X    printf( "    };\n" );
X
X    if ( useecs )
X	genecs();
X    }
X
X
X/* gentabs - generate data statements for the transition tables
X *
X * synopsis
X *    gentabs();
X */
X
gentabs()
X
X    {
X    int i, j, k, *accset, nacc, *acc_array;
X    char clower();
X
X    /* *everything* is done in terms of arrays starting at 1, so provide
X     * a null entry for the zero element of all FTL arrays
X     */
X    static char ftl_long_decl[] = "static long int %c[%d] =\n    {   0,\n";
X    static char ftl_short_decl[] = "static short int %c[%d] =\n    {   0,\n";
X    static char ftl_char_decl[] = "static char %c[%d] =\n    {   0,\n";
X
X    acc_array = allocate_integer_array( current_max_dfas );
X    nummt = 0;
X
X    if ( fulltbl )
X	jambase = lastdfa + 1;	/* home of "jam" pseudo-state */
X
X    printf( "#define YY_JAM %d\n", jamstate );
X    printf( "#define YY_JAM_BASE %d\n", jambase );
X
X    if ( usemecs )
X	printf( "#define YY_TEMPLATE %d\n", lastdfa + 2 );
X
X    if ( reject )
X	{
X	/* write out accepting list and pointer list
X	 * first we generate the ACCEPT array.  In the process, we compute
X	 * the indices that will go into the ALIST array, and save the
X	 * indices in the dfaacc array
X	 */
X
X	printf( accnum > 127 ? ftl_short_decl : ftl_char_decl,
X		ACCEPT, max( numas, 1 ) + 1 );
X
X	j = 1;	/* index into ACCEPT array */
X
X	for ( i = 1; i <= lastdfa; ++i )
X	    {
X	    acc_array[i] = j;
X
X	    if ( accsiz[i] != 0 )
X		{
X		accset = dfaacc[i].dfaacc_set;
X		nacc = accsiz[i];
X
X		if ( trace )
X		    fprintf( stderr, "state # %d accepts: ", i );
X
X		for ( k = 1; k <= nacc; ++k )
X		    {
X		    ++j;
X		    mkdata( accset[k] );
X
X		    if ( trace )
X			{
X			fprintf( stderr, "[%d]", accset[k] );
X
X			if ( k < nacc )
X			    fputs( ", ", stderr );
X			else
X			    putc( '\n', stderr );
X			}
X		    }
X		}
X	    }
X
X	/* add accepting number for the "jam" state */
X	acc_array[i] = j;
X
X	dataend();
X	}
X    
X    else
X	{
X	for ( i = 1; i <= lastdfa; ++i )
X	    acc_array[i] = dfaacc[i].dfaacc_state;
X	
X	acc_array[i] = 0; /* add (null) accepting number for jam state */
X	}
X
X    /* spit out ALIST array.  If we're doing "reject", it'll be pointers
X     * into the ACCEPT array.  Otherwise it's actual accepting numbers.
X     * In either case, we just dump the numbers.
X     */
X
X    /* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays
X     * beginning at 0 and for "jam" state
X     */
X    k = lastdfa + 2;
X
X    if ( reject )
X	/* we put a "cap" on the table associating lists of accepting
X	 * numbers with state numbers.  This is needed because we tell
X	 * where the end of an accepting list is by looking at where
X	 * the list for the next state starts.
X	 */
X	++k;
X
X    printf( ((reject && numas > 126) || accnum > 127) ?
X	    ftl_short_decl : ftl_char_decl, ALIST, k );
X
X    /* set up default actions */
X    for ( i = 1; i <= lastsc * 2; ++i )
X	acc_array[i] = DEFAULT_ACTION;
X
X    acc_array[end_of_buffer_state] = END_OF_BUFFER_ACTION;
X
X    for ( i = 1; i <= lastdfa; ++i )
X	{
X	mkdata( acc_array[i] );
X
X	if ( ! reject && trace && acc_array[i] )
X	    fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] );
X	}
X
X    /* add entry for "jam" state */
X    mkdata( acc_array[i] );
X
X    if ( reject )
X	/* add "cap" for the list */
X	mkdata( acc_array[i] );
X
X    dataend();
X
X    if ( useecs )
X	genecs();
X
X    if ( usemecs )
X	{
X	/* write out meta-equivalence classes (used to index templates with) */
X
X	if ( trace )
X	    fputs( "\n\nMeta-Equivalence Classes:\n", stderr );
X
X	printf( ftl_char_decl, MATCHARRAY, numecs + 1 );
X
X	for ( i = 1; i <= numecs; ++i )
X	    {
X	    if ( trace )
X		fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) );
X
X	    mkdata( abs( tecbck[i] ) );
X	    }
X
X	dataend();
X	}
X
X    if ( ! fulltbl )
X	{
X	int total_states = lastdfa + numtemps;
X
X	printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X		BASEARRAY, total_states + 1 );
X
X	for ( i = 1; i <= lastdfa; ++i )
X	    {
X	    register int d = def[i];
X
X	    if ( base[i] == JAMSTATE )
X		base[i] = jambase;
X
X	    if ( d == JAMSTATE )
X		def[i] = jamstate;
X
X	    else if ( d < 0 )
X		{
X		/* template reference */
X		++tmpuses;
X		def[i] = lastdfa - d + 1;
X		}
X
X	    mkdata( base[i] );
X	    }
X
X	/* generate jam state's base index */
X	mkdata( base[i] );
X
X	for ( ++i /* skip jam state */; i <= total_states; ++i )
X	    {
X	    mkdata( base[i] );
X	    def[i] = jamstate;
X	    }
X
X	dataend();
X
X	printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X		DEFARRAY, total_states + 1 );
X
X	for ( i = 1; i <= total_states; ++i )
X	    mkdata( def[i] );
X
X	dataend();
X
X	printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X		NEXTARRAY, tblend + 1 );
X
X	for ( i = 1; i <= tblend; ++i )
X	    {
X	    if ( nxt[i] == 0 || chk[i] == 0 )
X		nxt[i] = jamstate;	/* new state is the JAM state */
X
X	    mkdata( nxt[i] );
X	    }
X
X	dataend();
X
X	printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X		CHECKARRAY, tblend + 1 );
X
X	for ( i = 1; i <= tblend; ++i )
X	    {
X	    if ( chk[i] == 0 )
X		++nummt;
X
X	    mkdata( chk[i] );
X	    }
X
X	dataend();
X	}
X    }
X
X
X/* generate equivalence-class tables */
X
genecs()
X
X    {
X    register int i, j;
X    static char ftl_char_decl[] = "static char %c[%d] =\n    {   0,\n";
X    int numrows;
X
X    printf( ftl_char_decl, ECARRAY, CSIZE + 1 );
X
X    for ( i = 1; i <= CSIZE; ++i )
X	{
X	if ( caseins && (i >= 'A') && (i <= 'Z') )
X	    ecgroup[i] = ecgroup[clower( i )];
X
X	ecgroup[i] = abs( ecgroup[i] );
X	mkdata( ecgroup[i] );
X	}
X
X    dataend();
X
X    if ( trace )
X	{
X	fputs( "\n\nEquivalence Classes:\n\n", stderr );
X
X	numrows = (CSIZE + 1) / 8;
X
X	for ( j = 1; j <= numrows; ++j )
X	    {
X	    for ( i = j; i <= CSIZE; i = i + numrows )
X		{
X		if ( i >= 1 && i <= 31 )
X		    fprintf( stderr, "^%c = %-2d",
X			     'A' + i - 1, ecgroup[i] );
X
X		else if ( i >= 32 && i <= 126 )
X		    fprintf( stderr, " %c = %-2d", i, ecgroup[i] );
X
X		else if ( i == 127 )
X		    fprintf( stderr, "^@ = %-2d", ecgroup[i] );
X
X		else
X		    fprintf( stderr, "\nSomething Weird: %d = %d\n", i,
X			     ecgroup[i] );
X
X		putc( '\t', stderr );
X		}
X
X	    putc( '\n', stderr );
X	    }
X	}
X    }
X
X
X/* inittbl - initialize transition tables
X *
X * synopsis
X *   inittbl();
X *
X * Initializes "firstfree" to be one beyond the end of the table.  Initializes
X * all "chk" entries to be zero.  Note that templates are built in their
X * own tbase/tdef tables.  They are shifted down to be contiguous
X * with the non-template entries during table generation.
X */
inittbl()
X
X    {
X    register int i;
X
X    bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) );
X
X    tblend = 0;
X    firstfree = tblend + 1;
X    numtemps = 0;
X
X    if ( usemecs )
X	{
X	/* set up doubly-linked meta-equivalence classes
X	 * these are sets of equivalence classes which all have identical
X	 * transitions out of TEMPLATES
X	 */
X
X	tecbck[1] = NIL;
X
X	for ( i = 2; i <= numecs; ++i )
X	    {
X	    tecbck[i] = i - 1;
X	    tecfwd[i - 1] = i;
X	    }
X
X	tecfwd[numecs] = NIL;
X	}
X    }
X
X
X/* make_tables - generate transition tables
X *
X * synopsis
X *     make_tables();
X *
X * Generates transition tables and finishes generating output file
X */
X
make_tables()
X
X    {
X    if ( fullspd )
X	{ /* need to define YY_TRANS_OFFSET_TYPE as a size large
X	   * enough to hold the biggest offset
X	   */
X	int total_table_size = tblend + numecs + 1;
X
X	printf( "#define YY_TRANS_OFFSET_TYPE %s\n",
X		total_table_size > MAX_SHORT ? "long" : "short" );
X	}
X    
X    if ( fullspd || fulltbl )
X	skelout();
X
X    /* compute the tables and copy them to output file */
X    if ( fullspd )
X	genctbl();
X
X    else
X	gentabs();
X
X    skelout();
X
X    (void) fclose( temp_action_file );
X    temp_action_file = fopen( action_file_name, "r" );
X
X    /* copy prolog from action_file to output file */
X    action_out();
X
X    skelout();
X
X    /* copy actions from action_file to output file */
X    action_out();
X
X    skelout();
X
X    /* copy remainder of input to output */
X
X    line_directive_out( stdout );
X    (void) flexscan(); /* copy remainder of input to output */
X    }
X
X
X/* mkdeftbl - make the default, "jam" table entries
X *
X * synopsis
X *   mkdeftbl();
X */
X
mkdeftbl()
X
X    {
X    int i;
X
X    jamstate = lastdfa + 1;
X
X    if ( tblend + numecs > current_max_xpairs )
X	expand_nxt_chk();
X
X    for ( i = 1; i <= numecs; ++i )
X	{
X	nxt[tblend + i] = 0;
X	chk[tblend + i] = jamstate;
X	}
X
X    jambase = tblend;
X
X    base[jamstate] = jambase;
X
X    /* should generate a run-time array bounds check if
X     * ever used as a default
X     */
X    def[jamstate] = BAD_SUBSCRIPT;
X
X    tblend += numecs;
X    ++numtemps;
X    }
X
X
X/* mkentry - create base/def and nxt/chk entries for transition array
X *
X * synopsis
X *   int state[numchars + 1], numchars, statenum, deflink, totaltrans;
X *   mkentry( state, numchars, statenum, deflink, totaltrans );
X *
X * "state" is a transition array "numchars" characters in size, "statenum"
X * is the offset to be used into the base/def tables, and "deflink" is the
X * entry to put in the "def" table entry.  If "deflink" is equal to
X * "JAMSTATE", then no attempt will be made to fit zero entries of "state"
X * (i.e., jam entries) into the table.  It is assumed that by linking to
X * "JAMSTATE" they will be taken care of.  In any case, entries in "state"
X * marking transitions to "SAME_TRANS" are treated as though they will be
X * taken care of by whereever "deflink" points.  "totaltrans" is the total
X * number of transitions out of the state.  If it is below a certain threshold,
X * the tables are searched for an interior spot that will accommodate the
X * state array.
X */
X
mkentry( state, numchars, statenum, deflink, totaltrans )
register int *state;
int numchars, statenum, deflink, totaltrans;
X
X    {
X    register int minec, maxec, i, baseaddr;
X    int tblbase, tbllast;
X
X    if ( totaltrans == 0 )
X	{ /* there are no out-transitions */
X	if ( deflink == JAMSTATE )
X	    base[statenum] = JAMSTATE;
X	else
X	    base[statenum] = 0;
X
X	def[statenum] = deflink;
X	return;
X	}
X
X    for ( minec = 1; minec <= numchars; ++minec )
X	{
X	if ( state[minec] != SAME_TRANS )
X	    if ( state[minec] != 0 || deflink != JAMSTATE )
X		break;
X	}
X
X    if ( totaltrans == 1 )
X	{
X	/* there's only one out-transition.  Save it for later to fill
X	 * in holes in the tables.
X	 */
X	stack1( statenum, minec, state[minec], deflink );
X	return;
X	}
X
X    for ( maxec = numchars; maxec > 0; --maxec )
X	{
X	if ( state[maxec] != SAME_TRANS )
X	    if ( state[maxec] != 0 || deflink != JAMSTATE )
X		break;
X	}
X
X    /* Whether we try to fit the state table in the middle of the table
X     * entries we have already generated, or if we just take the state
X     * table at the end of the nxt/chk tables, we must make sure that we
X     * have a valid base address (i.e., non-negative).  Note that not only are
X     * negative base addresses dangerous at run-time (because indexing the
X     * next array with one and a low-valued character might generate an
X     * array-out-of-bounds error message), but at compile-time negative
X     * base addresses denote TEMPLATES.
X     */
X
X    /* find the first transition of state that we need to worry about. */
X    if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
X	{ /* attempt to squeeze it into the middle of the tabls */
X	baseaddr = firstfree;
X
X	while ( baseaddr < minec )
X	    {
X	    /* using baseaddr would result in a negative base address below
X	     * find the next free slot
X	     */
X	    for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
X		;
X	    }
X
X	if ( baseaddr + maxec - minec >= current_max_xpairs )
X	    expand_nxt_chk();
X
X	for ( i = minec; i <= maxec; ++i )
X	    if ( state[i] != SAME_TRANS )
X		if ( state[i] != 0 || deflink != JAMSTATE )
X		    if ( chk[baseaddr + i - minec] != 0 )
X			{ /* baseaddr unsuitable - find another */
X			for ( ++baseaddr;
X			      baseaddr < current_max_xpairs &&
X			      chk[baseaddr] != 0;
X			      ++baseaddr )
X			    ;
X
X			if ( baseaddr + maxec - minec >= current_max_xpairs )
X			    expand_nxt_chk();
X
X			/* reset the loop counter so we'll start all
X			 * over again next time it's incremented
X			 */
X
X			i = minec - 1;
X			}
X	}
X
X    else
X	{
X	/* ensure that the base address we eventually generate is
X	 * non-negative
X	 */
X	baseaddr = max( tblend + 1, minec );
X	}
X
X    tblbase = baseaddr - minec;
X    tbllast = tblbase + maxec;
X
X    if ( tbllast >= current_max_xpairs )
X	expand_nxt_chk();
X
X    base[statenum] = tblbase;
X    def[statenum] = deflink;
X
X    for ( i = minec; i <= maxec; ++i )
X	if ( state[i] != SAME_TRANS )
X	    if ( state[i] != 0 || deflink != JAMSTATE )
X		{
X		nxt[tblbase + i] = state[i];
X		chk[tblbase + i] = statenum;
X		}
X
X    if ( baseaddr == firstfree )
X	/* find next free slot in tables */
X	for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
X	    ;
X
X    tblend = max( tblend, tbllast );
X    }
X
X
X/* mk1tbl - create table entries for a state (or state fragment) which
X *            has only one out-transition
X *
X * synopsis
X *   int state, sym, onenxt, onedef;
X *   mk1tbl( state, sym, onenxt, onedef );
X */
X
mk1tbl( state, sym, onenxt, onedef )
int state, sym, onenxt, onedef;
X
X    {
X    if ( firstfree < sym )
X	firstfree = sym;
X
X    while ( chk[firstfree] != 0 )
X	if ( ++firstfree >= current_max_xpairs )
X	    expand_nxt_chk();
X
X    base[state] = firstfree - sym;
X    def[state] = onedef;
X    chk[firstfree] = state;
X    nxt[firstfree] = onenxt;
X
X    if ( firstfree > tblend )
X	{
X	tblend = firstfree++;
X
X	if ( firstfree >= current_max_xpairs )
X	    expand_nxt_chk();
X	}
X    }
X
X
X/* mkprot - create new proto entry
X *
X * synopsis
X *   int state[], statenum, comstate;
X *   mkprot( state, statenum, comstate );
X */
X
mkprot( state, statenum, comstate )
int state[], statenum, comstate;
X
X    {
X    int i, slot, tblbase;
X
X    if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
X	{
X	/* gotta make room for the new proto by dropping last entry in
X	 * the queue
X	 */
X	slot = lastprot;
X	lastprot = protprev[lastprot];
X	protnext[lastprot] = NIL;
X	}
X
X    else
X	slot = numprots;
X
X    protnext[slot] = firstprot;
X
X    if ( firstprot != NIL )
X	protprev[firstprot] = slot;
X
X    firstprot = slot;
X    prottbl[slot] = statenum;
X    protcomst[slot] = comstate;
X
X    /* copy state into save area so it can be compared with rapidly */
X    tblbase = numecs * (slot - 1);
X
X    for ( i = 1; i <= numecs; ++i )
X	protsave[tblbase + i] = state[i];
X    }
X
X
X/* mktemplate - create a template entry based on a state, and connect the state
X *              to it
X *
X * synopsis
X *   int state[], statenum, comstate, totaltrans;
X *   mktemplate( state, statenum, comstate, totaltrans );
X */
X
mktemplate( state, statenum, comstate )
int state[], statenum, comstate;
X
X    {
X    int i, numdiff, tmpbase, tmp[CSIZE + 1];
X    char transset[CSIZE + 1];
X    int tsptr;
X
X    ++numtemps;
X
X    tsptr = 0;
X
X    /* calculate where we will temporarily store the transition table
X     * of the template in the tnxt[] array.  The final transition table
X     * gets created by cmptmps()
X     */
X
X    tmpbase = numtemps * numecs;
X
X    if ( tmpbase + numecs >= current_max_template_xpairs )
X	{
X	current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
X
X	++num_reallocs;
X
X	tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs );
X	}
X
X    for ( i = 1; i <= numecs; ++i )
X	if ( state[i] == 0 )
X	    tnxt[tmpbase + i] = 0;
X	else
X	    {
X	    transset[tsptr++] = i;
X	    tnxt[tmpbase + i] = comstate;
X	    }
X
X    if ( usemecs )
X	mkeccl( transset, tsptr, tecfwd, tecbck, numecs );
X
X    mkprot( tnxt + tmpbase, -numtemps, comstate );
X
X    /* we rely on the fact that mkprot adds things to the beginning
X     * of the proto queue
X     */
X
X    numdiff = tbldiff( state, firstprot, tmp );
X    mkentry( tmp, numecs, statenum, -numtemps, numdiff );
X    }
X
X
X/* mv2front - move proto queue element to front of queue
X *
X * synopsis
X *   int qelm;
X *   mv2front( qelm );
X */
X
mv2front( qelm )
int qelm;
X
X    {
X    if ( firstprot != qelm )
X	{
X	if ( qelm == lastprot )
X	    lastprot = protprev[lastprot];
X
X	protnext[protprev[qelm]] = protnext[qelm];
X
X	if ( protnext[qelm] != NIL )
X	    protprev[protnext[qelm]] = protprev[qelm];
X
X	protprev[qelm] = NIL;
X	protnext[qelm] = firstprot;
X	protprev[firstprot] = qelm;
X	firstprot = qelm;
X	}
X    }
X
X
X/* ntod - convert an ndfa to a dfa
X *
X * synopsis
X *    ntod();
X *
X *  creates the dfa corresponding to the ndfa we've constructed.  the
X *  dfa starts out in state #1.
X */
ntod()
X
X    {
X    int *accset, ds, nacc, newds;
X    int duplist[CSIZE + 1], sym, hashval, numstates, dsize;
X    int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1];
X    int *nset, *dset;
X    int targptr, totaltrans, i, comstate, comfreq, targ;
X    int *epsclosure(), snstods(), symlist[CSIZE + 1];
X
X    /* this is so find_table_space(...) will know where to start looking in
X     * chk/nxt for unused records for space to put in the state
X     */
X    if ( fullspd )
X	firstfree = 0;
X
X    accset = allocate_integer_array( accnum + 1 );
X    nset = allocate_integer_array( current_max_dfa_size );
X
X    todo_head = todo_next = 0;
X
X#define ADD_QUEUE_ELEMENT(element) \
X	if ( ++element >= current_max_dfas ) \
X	    { /* check for queue overflowing */ \
X	    if ( todo_head == 0 ) \
X		increase_max_dfas(); \
X	    else \
X		element = 0; \
X	    }
X
X#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1))
X
X    for ( i = 0; i <= CSIZE; ++i )
X	{
X	duplist[i] = NIL;
X	symlist[i] = false;
X	}
X
X    for ( i = 0; i <= accnum; ++i )
X	accset[i] = NIL;
X
X    if ( trace )
X	{
X	dumpnfa( scset[1] );
X	fputs( "\n\nDFA Dump:\n\n", stderr );
X	}
X
X    inittbl();
X
X    if ( fullspd )
X	{
X	for ( i = 0; i <= numecs; ++i )
X	    state[i] = 0;
X	place_state( state, 0, 0 );
X	}
X
X    if ( fulltbl )
X	{
X	/* declare it "short" because it's a real long-shot that that
X	 * won't be large enough
X	 */
X	printf( "static short int %c[][%d] =\n    {\n", NEXTARRAY,
X		numecs + 1 ); /* '}' so vi doesn't get too confused */
X
X	/* generate 0 entries for state #0 */
X	for ( i = 0; i <= numecs; ++i )
X	    mk2data( 0 );
X
X	/* force ',' and dataflush() next call to mk2data */
X	datapos = NUMDATAITEMS;
X
X	/* force extra blank line next dataflush() */
X	dataline = NUMDATALINES;
X	}
X
X    /* create the first states */
X
X    for ( i = 1; i <= lastsc * 2; ++i )
X	{
X	numstates = 1;
X
X	/* for each start condition, make one state for the case when
X	 * we're at the beginning of the line (the '%' operator) and
X	 * one for the case when we're not
X	 */
X	if ( i % 2 == 1 )
X	    nset[numstates] = scset[(i / 2) + 1];
X	else
X	    nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
X
X	nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
X
X	if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
X	    {
X	    numas = numas + nacc;
X	    totnst = totnst + numstates;
X
X	    todo[todo_next] = ds;
X	    ADD_QUEUE_ELEMENT(todo_next);
X	    }
X	}
X
X    if ( fulltbl )
X	{
X	if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
X	    flexfatal( "could not create unique end-of-buffer state" );
X
X	numas += 1;
X
X	todo[todo_next] = end_of_buffer_state;
X	ADD_QUEUE_ELEMENT(todo_next);
X	}
X
X    while ( todo_head != todo_next )
X	{
X	targptr = 0;
X	totaltrans = 0;
X
X	for ( i = 1; i <= numecs; ++i )
X	    state[i] = 0;
X
X	ds = todo[todo_head];
X	todo_head = NEXT_QUEUE_ELEMENT(todo_head);
X
X	dset = dss[ds];
X	dsize = dfasiz[ds];
X
X	if ( trace )
X	    fprintf( stderr, "state # %d:\n", ds );
X
X	sympartition( dset, dsize, symlist, duplist );
X
X	for ( sym = 1; sym <= numecs; ++sym )
X	    {
X	    if ( symlist[sym] )
X		{
X		symlist[sym] = 0;
X
X		if ( duplist[sym] == NIL )
X		    { /* symbol has unique out-transitions */
X		    numstates = symfollowset( dset, dsize, sym, nset );
X		    nset = epsclosure( nset, &numstates, accset,
X				       &nacc, &hashval );
X
X		    if ( snstods( nset, numstates, accset,
X				  nacc, hashval, &newds ) )
X			{
X			totnst = totnst + numstates;
X			todo[todo_next] = newds;
X			ADD_QUEUE_ELEMENT(todo_next);
X			numas = numas + nacc;
X			}
X
X		    state[sym] = newds;
X
X		    if ( trace )
X			fprintf( stderr, "\t%d\t%d\n", sym, newds );
X
X		    targfreq[++targptr] = 1;
X		    targstate[targptr] = newds;
X		    ++numuniq;
X		    }
X
X		else
X		    {
X		    /* sym's equivalence class has the same transitions
X		     * as duplist(sym)'s equivalence class
X		     */
X		    targ = state[duplist[sym]];
X		    state[sym] = targ;
X
X		    if ( trace )
X			fprintf( stderr, "\t%d\t%d\n", sym, targ );
X
X		    /* update frequency count for destination state */
X
X		    i = 0;
X		    while ( targstate[++i] != targ )
X			;
X
X		    ++targfreq[i];
X		    ++numdup;
X		    }
X
X		++totaltrans;
X		duplist[sym] = NIL;
X		}
X	    }
X
X	numsnpairs = numsnpairs + totaltrans;
X
X	if ( caseins && ! useecs )
X	    {
X	    register int j;
X
X	    for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
X		state[i] = state[j];
X	    }
X
X	if ( fulltbl )
X	    {
X	    /* supply array's 0-element */
X	    if ( ds == end_of_buffer_state )
X		mk2data( 0 );
X	    else
X		mk2data( end_of_buffer_state );
X
X	    for ( i = 1; i <= numecs; ++i )
X		mk2data( state[i] );
X
X	    /* force ',' and dataflush() next call to mk2data */
X	    datapos = NUMDATAITEMS;
X
X	    /* force extra blank line next dataflush() */
X	    dataline = NUMDATALINES;
X	    }
X
X        else if ( fullspd )
X	    place_state( state, ds, totaltrans );
X
X	else
X	    {
X	    /* determine which destination state is the most common, and
X	     * how many transitions to it there are
X	     */
X
X	    comfreq = 0;
X	    comstate = 0;
X
X	    for ( i = 1; i <= targptr; ++i )
X		if ( targfreq[i] > comfreq )
X		    {
X		    comfreq = targfreq[i];
X		    comstate = targstate[i];
X		    }
X
X	    bldtbl( state, ds, totaltrans, comstate, comfreq );
X	    }
X	}
X
X    if ( fulltbl )
X	dataend();
X
X    else
X	{
X	cmptmps();  /* create compressed template entries */
X
X	/* create tables for all the states with only one out-transition */
X	while ( onesp > 0 )
X	    {
X	    mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
X		    onedef[onesp] );
X	    --onesp;
X	    }
X
X	mkdeftbl();
X	}
X    
X    }
X
X
X/* place_state - place a state into full speed transition table
X *
X * synopsis
X *     int *state, statenum, transnum;
X *     place_state( state, statenum, transnum );
X *
X * State is the statenum'th state.  It is indexed by equivalence class and
X * gives the number of the state to enter for a given equivalence class.
X * Transnum is the number of out-transitions for the state.
X */
X
place_state( state, statenum, transnum )
int *state, statenum, transnum;
X
X    {
X    register int i;
X    register int *state_ptr;
X    int position = find_table_space( state, transnum );
X
X    /* base is the table of start positions */
X    base[statenum] = position;
X
X    /* put in action number marker; this non-zero number makes sure that
X     * find_table_space() knows that this position in chk/nxt is taken
X     * and should not be used for another accepting number in another state
X     */
X    chk[position - 1] = 1;
X
X    /* put in end-of-buffer marker; this is for the same purposes as above */
X    chk[position] = 1;
X
X    /* place the state into chk and nxt */
X    state_ptr = &state[1];
X
X    for ( i = 1; i <= numecs; ++i, ++state_ptr )
X	if ( *state_ptr != 0 )
X	    {
X	    chk[position + i] = i;
X	    nxt[position + i] = *state_ptr;
X	    }
X
X    if ( position + numecs > tblend )
X	tblend = position + numecs;
X    }
X
X
X/* stack1 - save states with only one out-transition to be processed later
X *
X * synopsis
X *   int statenum, sym, nextstate, deflink;
X *   stack1( statenum, sym, nextstate, deflink );
X *
X * if there's room for another state one the "one-transition" stack, the
X * state is pushed onto it, to be processed later by mk1tbl.  If there's
X * no room, we process the sucker right now.
X */
X
stack1( statenum, sym, nextstate, deflink )
int statenum, sym, nextstate, deflink;
X
X    {
X    if ( onesp >= ONE_STACK_SIZE )
X	mk1tbl( statenum, sym, nextstate, deflink );
X
X    else
X	{
X	++onesp;
X	onestate[onesp] = statenum;
X	onesym[onesp] = sym;
X	onenext[onesp] = nextstate;
X	onedef[onesp] = deflink;
X	}
X    }
X
X
X/* tbldiff - compute differences between two state tables
X *
X * synopsis
X *   int state[], pr, ext[];
X *   int tbldiff, numdifferences;
X *   numdifferences = tbldiff( state, pr, ext )
X *
X * "state" is the state array which is to be extracted from the pr'th
X * proto.  "pr" is both the number of the proto we are extracting from
X * and an index into the save area where we can find the proto's complete
X * state table.  Each entry in "state" which differs from the corresponding
X * entry of "pr" will appear in "ext".
X * Entries which are the same in both "state" and "pr" will be marked
X * as transitions to "SAME_TRANS" in "ext".  The total number of differences
X * between "state" and "pr" is returned as function value.  Note that this
X * number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
X */
X
int tbldiff( state, pr, ext )
int state[], pr, ext[];
X
X    {
X    register int i, *sp = state, *ep = ext, *protp;
X    register int numdiff = 0;
X
X    protp = &protsave[numecs * (pr - 1)];
X
X    for ( i = numecs; i > 0; --i )
X	{
X	if ( *++protp == *++sp )
X	    *++ep = SAME_TRANS;
X	else
X	    {
X	    *++ep = *sp;
X	    ++numdiff;
X	    }
X	}
X
X    return ( numdiff );
X    }
END_OF_FILE
if test 39351 -ne `wc -c <'tblcmp.c'`; then
    echo shar: \"'tblcmp.c'\" unpacked with wrong size!
fi
# end of 'tblcmp.c'
fi
echo shar: End of archive 5 \(of 5\).
cp /dev/null ark5isdone
MISSING=""
for I in 1 2 3 4 5 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have unpacked all 5 archives.
    rm -f ark[1-9]isdone
else
    echo You still need to unpack the following archives:
    echo "        " ${MISSING}
fi
##  End of shell archive.
exit 0
-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.