[comp.sources.unix] v19i057: Flex, a fast LEX replacement, Part03/07

rsalz@uunet.uu.net (Rich Salz) (06/23/89)

Submitted-by: Vern Paxson <vern@csam.lbl.gov>
Posting-number: Volume 19, Issue 57
Archive-name: flex2/part03

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of archive 3 (of 7)."
# Contents:  flex/main.c flex/nfa.c
# Wrapped by rsalz@prune.bbn.com on Thu Jun 22 19:01:46 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'flex/main.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'flex/main.c'\"
else
echo shar: Extracting \"'flex/main.c'\" \(16556 characters\)
sed "s/^X//" >'flex/main.c' <<'END_OF_FILE'
X/* flex - tool to generate fast lexical analyzers
X *
X *
X * Copyright (c) 1989 The Regents of the University of California.
X * All rights reserved.
X *
X * This code is derived from software contributed to Berkeley by
X * Vern Paxson.
X * 
X * The United States Government has rights in this work pursuant to
X * contract no. DE-AC03-76SF00098 between the United States Department of
X * Energy and the University of California.
X *
X * Redistribution and use in source and binary forms are permitted
X * provided that the above copyright notice and this paragraph are
X * duplicated in all such forms and that any documentation,
X * advertising materials, and other materials related to such
X * distribution and use acknowledge that the software was developed
X * by the University of California, Berkeley.  The name of the
X * University may not be used to endorse or promote products derived
X * from this software without specific prior written permission.
X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
X *
X */
X
X#ifndef lint
X
Xstatic char copyright[] =
X    "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
Xstatic char CR_continuation[] = "@(#) All rights reserved.\n";
X
Xstatic char rcsid[] =
X    "@(#) $Header: main.c,v 2.2 89/06/20 16:36:26 vern Exp $ (LBL)";
X
X#endif
X
X
X#include "flexdef.h"
X
Xstatic char flex_version[] = "2.1 (beta)";
X
X
X/* these globals are all defined and commented in flexdef.h */
Xint printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
Xint interactive, caseins, useecs, fulltbl, usemecs;
Xint fullspd, gen_line_dirs, performance_report, backtrack_report;
Xint yymore_used, reject, real_reject, continued_action;
Xint yymore_really_used, reject_really_used;
Xint datapos, dataline, linenum;
XFILE *skelfile = NULL;
Xchar *infilename = NULL;
Xint onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
Xint onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
Xint current_mns, num_rules, current_max_rules, lastnfa;
Xint *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
Xint *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum;
Xint current_state_type;
Xint variable_trailing_context_rules;
Xint numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
Xint protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
Xint numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1];
Xint tecbck[CSIZE + 1];
Xint lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
Xchar **scname;
Xint current_max_dfa_size, current_max_xpairs;
Xint current_max_template_xpairs, current_max_dfas;
Xint lastdfa, *nxt, *chk, *tnxt;
Xint *base, *def, tblend, firstfree, **dss, *dfasiz;
Xunion dfaacc_union *dfaacc;
Xint *accsiz, *dhash, numas;
Xint numsnpairs, jambase, jamstate;
Xint lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
Xint current_max_ccl_tbl_size;
Xchar *ccltbl;
Xchar *starttime, *endtime, nmstr[MAXLINE];
Xint sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
Xint tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
Xint num_backtracking, bol_needed;
XFILE *temp_action_file;
XFILE *backtrack_file;
Xint end_of_buffer_state;
X#ifndef SHORT_FILE_NAMES
Xchar action_file_name[] = "/tmp/flexXXXXXX";
X#else
Xchar action_file_name[] = "flexXXXXXX.tmp";
X#endif
X
X#ifndef SHORT_FILE_NAMES
Xstatic char outfile[] = "lex.yy.c";
X#else
Xstatic char outfile[] = "lexyy.c";
X#endif
Xstatic int outfile_created = 0;
X
X
X/* flex - main program
X *
X * synopsis (from the shell)
X *    flex [-v] [file ...]
X */
X
Xmain( argc, argv )
Xint argc;
Xchar **argv;
X
X    {
X    flexinit( argc, argv );
X
X    readin();
X
X    if ( syntaxerror )
X	flexend( 1 );
X
X    if ( yymore_really_used == REALLY_USED )
X	yymore_used = true;
X    else if ( yymore_really_used == REALLY_NOT_USED )
X	yymore_used = false;
X
X    if ( reject_really_used == REALLY_USED )
X	reject = true;
X    else if ( reject_really_used == REALLY_NOT_USED )
X	reject = false;
X
X    if ( performance_report )
X	{
X	if ( yymore_used )
X	    fprintf( stderr,
X		     "yymore() entails a minor performance penalty\n" );
X
X	if ( interactive )
X	    fprintf( stderr,
X		 "-I (interactive) entails a minor performance penalty\n" );
X
X	if ( reject )
X	    fprintf( stderr,
X		     "REJECT entails a large performance penalty\n" );
X
X	if ( variable_trailing_context_rules )
X	    fprintf( stderr,
X"Variable trailing context rules entail a large performance penalty\n" );
X	}
X
X    if ( reject )
X	real_reject = true;
X
X    if ( variable_trailing_context_rules )
X	reject = true;
X
X    if ( (fulltbl || fullspd) && reject )
X	{
X	if ( real_reject )
X	    flexerror( "REJECT cannot be used with -f or -F" );
X	else
X	    flexerror(
X	"variable trailing context rules cannot be used with -f or -F" );
X	}
X
X    /* convert the ndfa to a dfa */
X    ntod();
X
X    /* generate the C state transition tables from the DFA */
X    make_tables();
X
X    /* note, flexend does not return.  It exits with its argument as status. */
X
X    flexend( 0 );
X
X    /*NOTREACHED*/
X    }
X
X
X/* flexend - terminate flex
X *
X * synopsis
X *    int status;
X *    flexend( status );
X *
X *    status is exit status.
X *
X * note
X *    This routine does not return.
X */
X
Xflexend( status )
Xint status;
X
X    {
X    int tblsiz;
X    char *flex_gettime();
X
X    if ( skelfile != NULL )
X	(void) fclose( skelfile );
X
X    if ( temp_action_file )
X	{
X	(void) fclose( temp_action_file );
X	(void) unlink( action_file_name );
X	}
X
X    if ( status != 0 && outfile_created )
X	{
X	(void) fclose( stdout );
X	(void) unlink( outfile );
X	}
X
X    if ( backtrack_report )
X	{
X	if ( num_backtracking == 0 )
X	    fprintf( backtrack_file, "No backtracking.\n" );
X	else if ( fullspd || fulltbl )
X	    fprintf( backtrack_file,
X		     "%d backtracking (non-accepting) states.\n",
X		     num_backtracking );
X	else
X	    fprintf( backtrack_file, "Compressed tables always backtrack.\n" );
X
X	(void) fclose( backtrack_file );
X	}
X
X    if ( printstats )
X	{
X	endtime = flex_gettime();
X
X	fprintf( stderr, "flex version %s usage statistics:\n", flex_version );
X	fprintf( stderr, "  started at %s, finished at %s\n",
X		 starttime, endtime );
X
X	fprintf( stderr, "  %d/%d NFA states\n", lastnfa, current_mns );
X	fprintf( stderr, "  %d/%d DFA states (%d words)\n", lastdfa,
X			 current_max_dfas, totnst );
X	fprintf( stderr, "  %d rules\n", num_rules - 1 /* - 1 for def. rule */ );
X
X	if ( num_backtracking == 0 )
X	    fprintf( stderr, "  No backtracking\n" );
X	else if ( fullspd || fulltbl )
X	    fprintf( stderr, "  %d backtracking (non-accepting) states\n",
X		     num_backtracking );
X	else
X	    fprintf( stderr, "  compressed tables always backtrack\n" );
X
X	if ( bol_needed )
X	    fprintf( stderr, "  Beginning-of-line patterns used\n" );
X
X	fprintf( stderr, "  %d/%d start conditions\n", lastsc,
X			 current_max_scs );
X	fprintf( stderr, "  %d epsilon states, %d double epsilon states\n",
X		 numeps, eps2 );
X
X	if ( lastccl == 0 )
X	    fprintf( stderr, "  no character classes\n" );
X	else
X	    fprintf( stderr,
X	"  %d/%d character classes needed %d/%d words of storage, %d reused\n",
X		     lastccl, current_maxccls,
X		     cclmap[lastccl] + ccllen[lastccl],
X		     current_max_ccl_tbl_size, cclreuse );
X
X	fprintf( stderr, "  %d state/nextstate pairs created\n", numsnpairs );
X	fprintf( stderr, "  %d/%d unique/duplicate transitions\n",
X		 numuniq, numdup );
X
X	if ( fulltbl )
X	    {
X	    tblsiz = lastdfa * numecs;
X	    fprintf( stderr, "  %d table entries\n", tblsiz );
X	    }
X
X	else
X	    {
X	    tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend;
X
X	    fprintf( stderr, "  %d/%d base-def entries created\n",
X		     lastdfa + numtemps, current_max_dfas );
X	    fprintf( stderr, "  %d/%d (peak %d) nxt-chk entries created\n",
X		     tblend, current_max_xpairs, peakpairs );
X	    fprintf( stderr,
X		     "  %d/%d (peak %d) template nxt-chk entries created\n",
X		     numtemps * nummecs, current_max_template_xpairs,
X		     numtemps * numecs );
X	    fprintf( stderr, "  %d empty table entries\n", nummt );
X	    fprintf( stderr, "  %d protos created\n", numprots );
X	    fprintf( stderr, "  %d templates created, %d uses\n",
X		     numtemps, tmpuses );
X	    }
X
X	if ( useecs )
X	    {
X	    tblsiz = tblsiz + CSIZE;
X	    fprintf( stderr, "  %d/%d equivalence classes created\n",
X		     numecs, CSIZE );
X	    }
X
X	if ( usemecs )
X	    {
X	    tblsiz = tblsiz + numecs;
X	    fprintf( stderr, "  %d/%d meta-equivalence classes created\n",
X		     nummecs, CSIZE );
X	    }
X
X	fprintf( stderr, "  %d (%d saved) hash collisions, %d DFAs equal\n",
X		 hshcol, hshsave, dfaeql );
X	fprintf( stderr, "  %d sets of reallocations needed\n", num_reallocs );
X	fprintf( stderr, "  %d total table entries needed\n", tblsiz );
X	}
X
X#ifndef VMS
X    exit( status );
X#else
X    exit( status + 1 );
X#endif
X    }
X
X
X/* flexinit - initialize flex
X *
X * synopsis
X *    int argc;
X *    char **argv;
X *    flexinit( argc, argv );
X */
X
Xflexinit( argc, argv )
Xint argc;
Xchar **argv;
X
X    {
X    int i, sawcmpflag, use_stdout;
X    char *arg, *skelname = NULL, *flex_gettime(), clower(), *mktemp();
X
X    printstats = syntaxerror = trace = spprdflt = interactive = caseins = false;
X    backtrack_report = performance_report = ddebug = fulltbl = fullspd = false;
X    yymore_used = continued_action = reject = false;
X    yymore_really_used = reject_really_used = false;
X    gen_line_dirs = usemecs = useecs = true;
X
X    sawcmpflag = false;
X    use_stdout = false;
X
X    /* read flags */
X    for ( --argc, ++argv; argc ; --argc, ++argv )
X	{
X	if ( argv[0][0] != '-' || argv[0][1] == '\0' )
X	    break;
X
X	arg = argv[0];
X
X	for ( i = 1; arg[i] != '\0'; ++i )
X	    switch ( arg[i] )
X		{
X		case 'b':
X		    backtrack_report = true;
X		    break;
X
X		case 'c':
X		    if ( i != 1 )
X			flexerror( "-c flag must be given separately" );
X
X		    if ( ! sawcmpflag )
X			{
X			useecs = false;
X			usemecs = false;
X			fulltbl = false;
X			sawcmpflag = true;
X			}
X
X		    for ( ++i; arg[i] != '\0'; ++i )
X			switch ( clower( arg[i] ) )
X			    {
X			    case 'e':
X				useecs = true;
X				break;
X
X			    case 'F':
X				fullspd = true;
X				break;
X
X			    case 'f':
X				fulltbl = true;
X				break;
X
X			    case 'm':
X				usemecs = true;
X				break;
X
X			    default:
X				lerrif( "unknown -c option %c",
X					(int) arg[i] );
X				break;
X			    }
X		    
X		    goto get_next_arg;
X
X		case 'd':
X		    ddebug = true;
X		    break;
X
X		case 'f':
X		    useecs = usemecs = false;
X		    fulltbl = true;
X		    break;
X
X		case 'F':
X		    useecs = usemecs = false;
X		    fullspd = true;
X		    break;
X
X		case 'I':
X		    interactive = true;
X		    break;
X
X		case 'i':
X		    caseins = true;
X		    break;
X
X		case 'L':
X		    gen_line_dirs = false;
X		    break;
X
X		case 'p':
X		    performance_report = true;
X		    break;
X
X		case 'S':
X		    if ( i != 1 )
X			flexerror( "-S flag must be given separately" );
X
X		    skelname = arg + i + 1;
X		    goto get_next_arg;
X
X		case 's':
X		    spprdflt = true;
X		    break;
X
X		case 't':
X		    use_stdout = true;
X		    break;
X
X		case 'T':
X		    trace = true;
X		    break;
X
X		case 'v':
X		    printstats = true;
X		    break;
X
X		default:
X		    lerrif( "unknown flag %c", (int) arg[i] );
X		    break;
X		}
X
Xget_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */
X	;
X	}
X
X    if ( (fulltbl || fullspd) && usemecs )
X	flexerror( "full table and -cm don't make sense together" );
X
X    if ( (fulltbl || fullspd) && interactive )
X	flexerror( "full table and -I are (currently) incompatible" );
X
X    if ( fulltbl && fullspd )
X	flexerror( "full table and -F are mutually exclusive" );
X
X    if ( ! skelname )
X	{
X	static char skeleton_name_storage[400];
X
X	skelname = skeleton_name_storage;
X	(void) strcpy( skelname, DEFAULT_SKELETON_FILE );
X	}
X
X    if ( ! use_stdout )
X	{
X	FILE *prev_stdout = freopen( outfile, "w", stdout );
X
X	if ( prev_stdout == NULL )
X	    flexerror( "could not create lex.yy.c" );
X
X	outfile_created = 1;
X	}
X
X    if ( argc )
X	{
X	if ( argc > 1 )
X	    flexerror( "extraneous argument(s) given" );
X
X	yyin = fopen( infilename = argv[0], "r" );
X
X	if ( yyin == NULL )
X	    lerrsf( "can't open %s", argv[0] );
X	}
X
X    else
X	yyin = stdin;
X
X    if ( backtrack_report )
X	{
X#ifndef SHORT_FILE_NAMES
X	backtrack_file = fopen( "lex.backtrack", "w" );
X#else
X	backtrack_file = fopen( "lex.bck", "w" );
X#endif
X
X	if ( backtrack_file == NULL )
X	    flexerror( "could not create lex.backtrack" );
X	}
X
X    else
X	backtrack_file = NULL;
X
X
X    lastccl = 0;
X    lastsc = 0;
X
X    /* initialize the statistics */
X    starttime = flex_gettime();
X
X    if ( (skelfile = fopen( skelname, "r" )) == NULL )
X	lerrsf( "can't open skeleton file %s", skelname );
X
X    (void) mktemp( action_file_name );
X
X    if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL )
X	lerrsf( "can't open temporary action file %s", action_file_name );
X
X    lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0;
X    numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0;
X    numuniq = numdup = hshsave = eofseen = datapos = dataline = 0;
X    num_backtracking = onesp = numprots = 0;
X    variable_trailing_context_rules = bol_needed = false;
X
X    linenum = sectnum = 1;
X    firstprot = NIL;
X
X    /* used in mkprot() so that the first proto goes in slot 1
X     * of the proto queue
X     */
X    lastprot = 1;
X
X    if ( useecs )
X	{
X	/* set up doubly-linked equivalence classes */
X	ecgroup[1] = NIL;
X
X	for ( i = 2; i <= CSIZE; ++i )
X	    {
X	    ecgroup[i] = i - 1;
X	    nextecm[i - 1] = i;
X	    }
X
X	nextecm[CSIZE] = NIL;
X	}
X
X    else
X	{ /* put everything in its own equivalence class */
X	for ( i = 1; i <= CSIZE; ++i )
X	    {
X	    ecgroup[i] = i;
X	    nextecm[i] = BAD_SUBSCRIPT;	/* to catch errors */
X	    }
X	}
X
X    set_up_initial_allocations();
X    }
X
X
X/* readin - read in the rules section of the input file(s)
X *
X * synopsis
X *    readin();
X */
X
Xreadin()
X
X    {
X    if ( ddebug )
X	puts( "#define FLEX_DEBUG" );
X
X    if ( fulltbl )
X	puts( "#define FLEX_FULL_TABLE" );
X    else if ( fullspd )
X	puts( "#define FLEX_FAST_COMPRESSED" );
X    else
X	puts( "#define FLEX_COMPRESSED" );
X
X    skelout();
X
X    line_directive_out( stdout );
X
X    if ( yyparse() )
X	lerrif( "fatal parse error at line %d", linenum );
X
X    if ( useecs )
X	{
X	numecs = cre8ecs( nextecm, ecgroup, CSIZE );
X	ccl2ecl();
X	}
X
X    else
X	numecs = CSIZE;
X
X    }
X
X
X
X/* set_up_initial_allocations - allocate memory for internal tables */
X
Xset_up_initial_allocations()
X
X    {
X    current_mns = INITIAL_MNS;
X    firstst = allocate_integer_array( current_mns );
X    lastst = allocate_integer_array( current_mns );
X    finalst = allocate_integer_array( current_mns );
X    transchar = allocate_integer_array( current_mns );
X    trans1 = allocate_integer_array( current_mns );
X    trans2 = allocate_integer_array( current_mns );
X    accptnum = allocate_integer_array( current_mns );
X    assoc_rule = allocate_integer_array( current_mns );
X    state_type = allocate_integer_array( current_mns );
X
X    current_max_rules = INITIAL_MAX_RULES;
X    rule_type = allocate_integer_array( current_max_rules );
X    rule_linenum = allocate_integer_array( current_max_rules );
X
X    current_max_scs = INITIAL_MAX_SCS;
X    scset = allocate_integer_array( current_max_scs );
X    scbol = allocate_integer_array( current_max_scs );
X    scxclu = allocate_integer_array( current_max_scs );
X    sceof = allocate_integer_array( current_max_scs );
X    scname = allocate_char_ptr_array( current_max_scs );
X    actvsc = allocate_integer_array( current_max_scs );
X
X    current_maxccls = INITIAL_MAX_CCLS;
X    cclmap = allocate_integer_array( current_maxccls );
X    ccllen = allocate_integer_array( current_maxccls );
X    cclng = allocate_integer_array( current_maxccls );
X
X    current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE;
X    ccltbl = allocate_character_array( current_max_ccl_tbl_size );
X
X    current_max_dfa_size = INITIAL_MAX_DFA_SIZE;
X
X    current_max_xpairs = INITIAL_MAX_XPAIRS;
X    nxt = allocate_integer_array( current_max_xpairs );
X    chk = allocate_integer_array( current_max_xpairs );
X
X    current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS;
X    tnxt = allocate_integer_array( current_max_template_xpairs );
X
X    current_max_dfas = INITIAL_MAX_DFAS;
X    base = allocate_integer_array( current_max_dfas );
X    def = allocate_integer_array( current_max_dfas );
X    dfasiz = allocate_integer_array( current_max_dfas );
X    accsiz = allocate_integer_array( current_max_dfas );
X    dhash = allocate_integer_array( current_max_dfas );
X    dss = allocate_int_ptr_array( current_max_dfas );
X    dfaacc = allocate_dfaacc_union( current_max_dfas );
X    }
END_OF_FILE
if test 16556 -ne `wc -c <'flex/main.c'`; then
    echo shar: \"'flex/main.c'\" unpacked with wrong size!
fi
# end of 'flex/main.c'
fi
if test -f 'flex/nfa.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'flex/nfa.c'\"
else
echo shar: Extracting \"'flex/nfa.c'\" \(17293 characters\)
sed "s/^X//" >'flex/nfa.c' <<'END_OF_FILE'
X/* nfa - NFA construction routines */
X
X/*
X * Copyright (c) 1989 The Regents of the University of California.
X * All rights reserved.
X *
X * This code is derived from software contributed to Berkeley by
X * Vern Paxson.
X * 
X * The United States Government has rights in this work pursuant to
X * contract no. DE-AC03-76SF00098 between the United States Department of
X * Energy and the University of California.
X *
X * Redistribution and use in source and binary forms are permitted
X * provided that the above copyright notice and this paragraph are
X * duplicated in all such forms and that any documentation,
X * advertising materials, and other materials related to such
X * distribution and use acknowledge that the software was developed
X * by the University of California, Berkeley.  The name of the
X * University may not be used to endorse or promote products derived
X * from this software without specific prior written permission.
X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
X */
X
X#ifndef lint
X
Xstatic char copyright[] =
X    "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
Xstatic char CR_continuation[] = "@(#) All rights reserved.\n";
X
Xstatic char rcsid[] =
X    "@(#) $Header: nfa.c,v 2.0 89/06/20 15:50:05 vern Locked $ (LBL)";
X
X#endif
X
X#include "flexdef.h"
X
X/* add_accept - add an accepting state to a machine
X *
X * synopsis
X *
X *   add_accept( mach, accepting_number );
X *
X * accepting_number becomes mach's accepting number.
X */
X
Xadd_accept( mach, accepting_number )
Xint mach;
X
X    {
X    /* hang the accepting number off an epsilon state.  if it is associated
X     * with a state that has a non-epsilon out-transition, then the state
X     * will accept BEFORE it makes that transition, i.e., one character
X     * too soon
X     */
X
X    if ( transchar[finalst[mach]] == SYM_EPSILON )
X	accptnum[finalst[mach]] = accepting_number;
X
X    else
X	{
X	int astate = mkstate( SYM_EPSILON );
X	accptnum[astate] = accepting_number;
X	mach = link_machines( mach, astate );
X	}
X    }
X
X
X/* copysingl - make a given number of copies of a singleton machine
X *
X * synopsis
X *
X *   newsng = copysingl( singl, num );
X *
X *     newsng - a new singleton composed of num copies of singl
X *     singl  - a singleton machine
X *     num    - the number of copies of singl to be present in newsng
X */
X
Xint copysingl( singl, num )
Xint singl, num;
X
X    {
X    int copy, i;
X
X    copy = mkstate( SYM_EPSILON );
X
X    for ( i = 1; i <= num; ++i )
X	copy = link_machines( copy, dupmachine( singl ) );
X
X    return ( copy );
X    }
X
X
X/* dumpnfa - debugging routine to write out an nfa
X *
X * synopsis
X *    int state1;
X *    dumpnfa( state1 );
X */
X
Xdumpnfa( state1 )
Xint state1;
X
X    {
X    int sym, tsp1, tsp2, anum, ns;
X
X    fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n",
X	     state1 );
X
X    /* we probably should loop starting at firstst[state1] and going to
X     * lastst[state1], but they're not maintained properly when we "or"
X     * all of the rules together.  So we use our knowledge that the machine
X     * starts at state 1 and ends at lastnfa.
X     */
X
X    /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
X    for ( ns = 1; ns <= lastnfa; ++ns )
X	{
X	fprintf( stderr, "state # %4d\t", ns );
X
X	sym = transchar[ns];
X	tsp1 = trans1[ns];
X	tsp2 = trans2[ns];
X	anum = accptnum[ns];
X
X	fprintf( stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2 );
X
X	if ( anum != NIL )
X	    fprintf( stderr, "  [%d]", anum );
X
X	fprintf( stderr, "\n" );
X	}
X
X    fprintf( stderr, "********** end of dump\n" );
X    }
X
X
X/* dupmachine - make a duplicate of a given machine
X *
X * synopsis
X *
X *   copy = dupmachine( mach );
X *
X *     copy - holds duplicate of mach
X *     mach - machine to be duplicated
X *
X * note that the copy of mach is NOT an exact duplicate; rather, all the
X * transition states values are adjusted so that the copy is self-contained,
X * as the original should have been.
X *
X * also note that the original MUST be contiguous, with its low and high
X * states accessible by the arrays firstst and lastst
X */
X
Xint dupmachine( mach )
Xint mach;
X
X    {
X    int i, init, state_offset;
X    int state = 0;
X    int last = lastst[mach];
X
X    for ( i = firstst[mach]; i <= last; ++i )
X	{
X	state = mkstate( transchar[i] );
X
X	if ( trans1[i] != NO_TRANSITION )
X	    {
X	    mkxtion( finalst[state], trans1[i] + state - i );
X
X	    if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION )
X		mkxtion( finalst[state], trans2[i] + state - i );
X	    }
X
X	accptnum[state] = accptnum[i];
X	}
X
X    if ( state == 0 )
X	flexfatal( "empty machine in dupmachine()" );
X
X    state_offset = state - i + 1;
X
X    init = mach + state_offset;
X    firstst[init] = firstst[mach] + state_offset;
X    finalst[init] = finalst[mach] + state_offset;
X    lastst[init] = lastst[mach] + state_offset;
X
X    return ( init );
X    }
X
X/* finish_rule - finish up the processing for a rule
X *
X * synopsis
X *
X *   finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
X *
X * An accepting number is added to the given machine.  If variable_trail_rule
X * is true then the rule has trailing context and both the head and trail
X * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
X * the machine recognizes a pattern with trailing context and headcnt is
X * the number of characters in the matched part of the pattern, or zero
X * if the matched part has variable length.  trailcnt is the number of
X * trailing context characters in the pattern, or zero if the trailing
X * context has variable length.
X */
X
Xfinish_rule( mach, variable_trail_rule, headcnt, trailcnt )
Xint mach, variable_trail_rule, headcnt, trailcnt;
X
X    {
X    add_accept( mach, num_rules );
X
X    /* we did this in new_rule(), but it often gets the wrong
X     * number because we do it before we start parsing the current rule
X     */
X    rule_linenum[num_rules] = linenum;
X
X    fprintf( temp_action_file, "case %d:\n", num_rules );
X
X    if ( variable_trail_rule )
X	{
X	rule_type[num_rules] = RULE_VARIABLE;
X
X	if ( performance_report )
X	    fprintf( stderr, "Variable trailing context rule at line %d\n",
X		     rule_linenum[num_rules] );
X
X	variable_trailing_context_rules = true;
X	}
X
X    else
X	{
X	rule_type[num_rules] = RULE_NORMAL;
X
X	if ( headcnt > 0 || trailcnt > 0 )
X	    {
X	    /* do trailing context magic to not match the trailing characters */
X	    char *scanner_cp = "yy_c_buf_p = yy_cp";
X	    char *scanner_bp = "yy_bp";
X
X	    fprintf( temp_action_file,
X	"*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
X
X	    if ( headcnt > 0 )
X		{
X		if ( headcnt > 0 )
X		    fprintf( temp_action_file, "%s = %s + %d;\n",
X			     scanner_cp, scanner_bp, headcnt );
X
X		else
X		    fprintf( temp_action_file, "%s = %s;\n",
X			     scanner_cp, scanner_bp );
X		}
X
X	    else
X		fprintf( temp_action_file,
X			 "%s -= %d;\n", scanner_cp, trailcnt );
X	
X	    fprintf( temp_action_file,
X		     "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
X	    }
X	}
X
X    line_directive_out( temp_action_file );
X    }
X
X
X/* link_machines - connect two machines together
X *
X * synopsis
X *
X *   new = link_machines( first, last );
X *
X *     new    - a machine constructed by connecting first to last
X *     first  - the machine whose successor is to be last
X *     last   - the machine whose predecessor is to be first
X *
X * note: this routine concatenates the machine first with the machine
X *  last to produce a machine new which will pattern-match first first
X *  and then last, and will fail if either of the sub-patterns fails.
X *  FIRST is set to new by the operation.  last is unmolested.
X */
X
Xint link_machines( first, last )
Xint first, last;
X
X    {
X    if ( first == NIL )
X	return ( last );
X
X    else if ( last == NIL )
X	return ( first );
X
X    else
X	{
X	mkxtion( finalst[first], last );
X	finalst[first] = finalst[last];
X	lastst[first] = max( lastst[first], lastst[last] );
X	firstst[first] = min( firstst[first], firstst[last] );
X
X	return ( first );
X	}
X    }
X
X
X/* mark_beginning_as_normal - mark each "beginning" state in a machine
X *                            as being a "normal" (i.e., not trailing context-
X *                            associated) states
X *
X * synopsis
X *
X *   mark_beginning_as_normal( mach )
X *
X *     mach - machine to mark
X *
X * The "beginning" states are the epsilon closure of the first state
X */
X
Xmark_beginning_as_normal( mach )
Xregister int mach;
X
X    {
X    switch ( state_type[mach] )
X	{
X	case STATE_NORMAL:
X	    /* oh, we've already visited here */
X	    return;
X
X	case STATE_TRAILING_CONTEXT:
X	    state_type[mach] = STATE_NORMAL;
X
X	    if ( transchar[mach] == SYM_EPSILON )
X		{
X		if ( trans1[mach] != NO_TRANSITION )
X		    mark_beginning_as_normal( trans1[mach] );
X
X		if ( trans2[mach] != NO_TRANSITION )
X		    mark_beginning_as_normal( trans2[mach] );
X		}
X	    break;
X
X	default:
X	    flexerror( "bad state type in mark_beginning_as_normal()" );
X	    break;
X	}
X    }
X
X
X/* mkbranch - make a machine that branches to two machines
X *
X * synopsis
X *
X *   branch = mkbranch( first, second );
X *
X *     branch - a machine which matches either first's pattern or second's
X *     first, second - machines whose patterns are to be or'ed (the | operator)
X *
X * note that first and second are NEITHER destroyed by the operation.  Also,
X * the resulting machine CANNOT be used with any other "mk" operation except
X * more mkbranch's.  Compare with mkor()
X */
X
Xint mkbranch( first, second )
Xint first, second;
X
X    {
X    int eps;
X
X    if ( first == NO_TRANSITION )
X	return ( second );
X
X    else if ( second == NO_TRANSITION )
X	return ( first );
X
X    eps = mkstate( SYM_EPSILON );
X
X    mkxtion( eps, first );
X    mkxtion( eps, second );
X
X    return ( eps );
X    }
X
X
X/* mkclos - convert a machine into a closure
X *
X * synopsis
X *   new = mkclos( state );
X *
X *     new - a new state which matches the closure of "state"
X */
X
Xint mkclos( state )
Xint state;
X
X    {
X    return ( mkopt( mkposcl( state ) ) );
X    }
X
X
X/* mkopt - make a machine optional
X *
X * synopsis
X *
X *   new = mkopt( mach );
X *
X *     new  - a machine which optionally matches whatever mach matched
X *     mach - the machine to make optional
X *
X * notes:
X *     1. mach must be the last machine created
X *     2. mach is destroyed by the call
X */
X
Xint mkopt( mach )
Xint mach;
X
X    {
X    int eps;
X
X    if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
X	{
X	eps = mkstate( SYM_EPSILON );
X	mach = link_machines( mach, eps );
X	}
X
X    /* can't skimp on the following if FREE_EPSILON(mach) is true because
X     * some state interior to "mach" might point back to the beginning
X     * for a closure
X     */
X    eps = mkstate( SYM_EPSILON );
X    mach = link_machines( eps, mach );
X
X    mkxtion( mach, finalst[mach] );
X
X    return ( mach );
X    }
X
X
X/* mkor - make a machine that matches either one of two machines
X *
X * synopsis
X *
X *   new = mkor( first, second );
X *
X *     new - a machine which matches either first's pattern or second's
X *     first, second - machines whose patterns are to be or'ed (the | operator)
X *
X * note that first and second are both destroyed by the operation
X * the code is rather convoluted because an attempt is made to minimize
X * the number of epsilon states needed
X */
X
Xint mkor( first, second )
Xint first, second;
X
X    {
X    int eps, orend;
X
X    if ( first == NIL )
X	return ( second );
X
X    else if ( second == NIL )
X	return ( first );
X
X    else
X	{
X	/* see comment in mkopt() about why we can't use the first state
X	 * of "first" or "second" if they satisfy "FREE_EPSILON"
X	 */
X	eps = mkstate( SYM_EPSILON );
X
X	first = link_machines( eps, first );
X
X	mkxtion( first, second );
X
X	if ( SUPER_FREE_EPSILON(finalst[first]) &&
X	     accptnum[finalst[first]] == NIL )
X	    {
X	    orend = finalst[first];
X	    mkxtion( finalst[second], orend );
X	    }
X
X	else if ( SUPER_FREE_EPSILON(finalst[second]) &&
X		  accptnum[finalst[second]] == NIL )
X	    {
X	    orend = finalst[second];
X	    mkxtion( finalst[first], orend );
X	    }
X
X	else
X	    {
X	    eps = mkstate( SYM_EPSILON );
X
X	    first = link_machines( first, eps );
X	    orend = finalst[first];
X
X	    mkxtion( finalst[second], orend );
X	    }
X	}
X
X    finalst[first] = orend;
X    return ( first );
X    }
X
X
X/* mkposcl - convert a machine into a positive closure
X *
X * synopsis
X *   new = mkposcl( state );
X *
X *    new - a machine matching the positive closure of "state"
X */
X
Xint mkposcl( state )
Xint state;
X
X    {
X    int eps;
X
X    if ( SUPER_FREE_EPSILON(finalst[state]) )
X	{
X	mkxtion( finalst[state], state );
X	return ( state );
X	}
X
X    else
X	{
X	eps = mkstate( SYM_EPSILON );
X	mkxtion( eps, state );
X	return ( link_machines( state, eps ) );
X	}
X    }
X
X
X/* mkrep - make a replicated machine
X *
X * synopsis
X *   new = mkrep( mach, lb, ub );
X *
X *    new - a machine that matches whatever "mach" matched from "lb"
X *          number of times to "ub" number of times
X *
X * note
X *   if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
X */
X
Xint mkrep( mach, lb, ub )
Xint mach, lb, ub;
X
X    {
X    int base_mach, tail, copy, i;
X
X    base_mach = copysingl( mach, lb - 1 );
X
X    if ( ub == INFINITY )
X	{
X	copy = dupmachine( mach );
X	mach = link_machines( mach,
X			      link_machines( base_mach, mkclos( copy ) ) );
X	}
X
X    else
X	{
X	tail = mkstate( SYM_EPSILON );
X
X	for ( i = lb; i < ub; ++i )
X	    {
X	    copy = dupmachine( mach );
X	    tail = mkopt( link_machines( copy, tail ) );
X	    }
X
X	mach = link_machines( mach, link_machines( base_mach, tail ) );
X	}
X
X    return ( mach );
X    }
X
X
X/* mkstate - create a state with a transition on a given symbol
X *
X * synopsis
X *
X *   state = mkstate( sym );
X *
X *     state - a new state matching sym
X *     sym   - the symbol the new state is to have an out-transition on
X *
X * note that this routine makes new states in ascending order through the
X * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
X * relies on machines being made in ascending order and that they are
X * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
X * that it admittedly is)
X */
X
Xint mkstate( sym )
Xint sym;
X
X    {
X    if ( ++lastnfa >= current_mns )
X	{
X	if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
X	    lerrif( "input rules are too complicated (>= %d NFA states)",
X		    current_mns );
X	
X	++num_reallocs;
X
X	firstst = reallocate_integer_array( firstst, current_mns );
X	lastst = reallocate_integer_array( lastst, current_mns );
X	finalst = reallocate_integer_array( finalst, current_mns );
X	transchar = reallocate_integer_array( transchar, current_mns );
X	trans1 = reallocate_integer_array( trans1, current_mns );
X	trans2 = reallocate_integer_array( trans2, current_mns );
X	accptnum = reallocate_integer_array( accptnum, current_mns );
X	assoc_rule = reallocate_integer_array( assoc_rule, current_mns );
X	state_type = reallocate_integer_array( state_type, current_mns );
X	}
X
X    firstst[lastnfa] = lastnfa;
X    finalst[lastnfa] = lastnfa;
X    lastst[lastnfa] = lastnfa;
X    transchar[lastnfa] = sym;
X    trans1[lastnfa] = NO_TRANSITION;
X    trans2[lastnfa] = NO_TRANSITION;
X    accptnum[lastnfa] = NIL;
X    assoc_rule[lastnfa] = num_rules;
X    state_type[lastnfa] = current_state_type;
X
X    /* fix up equivalence classes base on this transition.  Note that any
X     * character which has its own transition gets its own equivalence class.
X     * Thus only characters which are only in character classes have a chance
X     * at being in the same equivalence class.  E.g. "a|b" puts 'a' and 'b'
X     * into two different equivalence classes.  "[ab]" puts them in the same
X     * equivalence class (barring other differences elsewhere in the input).
X     */
X
X    if ( sym < 0 )
X	{
X	/* we don't have to update the equivalence classes since that was
X	 * already done when the ccl was created for the first time
X	 */
X	}
X
X    else if ( sym == SYM_EPSILON )
X	++numeps;
X
X    else
X	{
X	if ( useecs )
X	    mkechar( sym, nextecm, ecgroup );
X	}
X
X    return ( lastnfa );
X    }
X
X
X/* mkxtion - make a transition from one state to another
X *
X * synopsis
X *
X *   mkxtion( statefrom, stateto );
X *
X *     statefrom - the state from which the transition is to be made
X *     stateto   - the state to which the transition is to be made
X */
X
Xmkxtion( statefrom, stateto )
Xint statefrom, stateto;
X
X    {
X    if ( trans1[statefrom] == NO_TRANSITION )
X	trans1[statefrom] = stateto;
X
X    else if ( (transchar[statefrom] != SYM_EPSILON) ||
X	      (trans2[statefrom] != NO_TRANSITION) )
X	flexfatal( "found too many transitions in mkxtion()" );
X
X    else
X	{ /* second out-transition for an epsilon state */
X	++eps2;
X	trans2[statefrom] = stateto;
X	}
X    }
X
X/* new_rule - initialize for a new rule
X *
X * synopsis
X *
X *   new_rule();
X *
X * the global num_rules is incremented and the any corresponding dynamic
X * arrays (such as rule_type[]) are grown as needed.
X */
X
Xnew_rule()
X
X    {
X    if ( ++num_rules >= current_max_rules )
X	{
X	++num_reallocs;
X	current_max_rules += MAX_RULES_INCREMENT;
X	rule_type = reallocate_integer_array( rule_type, current_max_rules );
X	rule_linenum =
X	    reallocate_integer_array( rule_linenum, current_max_rules );
X	}
X
X    if ( num_rules > MAX_RULE )
X	lerrif( "too many rules (> %d)!", MAX_RULE );
X
X    rule_linenum[num_rules] = linenum;
X    }
END_OF_FILE
if test 17293 -ne `wc -c <'flex/nfa.c'`; then
    echo shar: \"'flex/nfa.c'\" unpacked with wrong size!
fi
# end of 'flex/nfa.c'
fi
echo shar: End of archive 3 \(of 7\).
cp /dev/null ark3isdone
MISSING=""
for I in 1 2 3 4 5 6 7 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have unpacked all 7 archives.
    rm -f ark[1-9]isdone
else
    echo You still need to unpack the following archives:
    echo "        " ${MISSING}
fi
##  End of shell archive.
exit 0
-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.