[mod.sources] v06i051: Cross-reference for Yacc

sources-request@mirror.UUCP (07/14/86)

Submitted by: Cathy Taylor <seismo!mcvax!comp.lancs.ac.uk!cathy>
Mod.sources: Volume 6, Issue 51
Archive-name: yyref

[  I changed the name to yyref, added a Makefile, and broke up the
   mail message I received into a README and a manpage.  --r$ ]

The following source forms the xref program for syntactically correct
Yacc files.

All tokens and non-terminals in the grammar are located, together with
a list of the line numbers at which they occur. The output is a
numbered listing of the Yacc grammar up to the second '%%' or EOF,
followed by the xref table. Should a syntax error occur before normal
termination, the grammar listing ends and as much of the table as has
been constructed is printed. All comments /* .. */ , %{ .. %} and
actions { .. } are ignored, although they appear in the listing.

#!/bin/sh
# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
# Contents:  Makefile README yyref.1 yyref-grammer yyref-lex
#	yyref-line.h
 
echo x - Makefile
sed 's/^XX//' > "Makefile" <<'@//E*O*F Makefile//'

XXCFLAGS=
XXyyref:	yyref-lex yyref-grammar
XX	lex yyref-lex
XX	yacc -vd yyref-grammar
XX	cc $(CFLAGS) y.tab.c -o yyref

XXclean:
XX	rm -f lex.yy.c y.tab.? y.output yyref
@//E*O*F Makefile//
chmod u=rw,g=rw,o=rw Makefile
 
echo x - README
sed 's/^XX//' > "README" <<'@//E*O*F README//'
XXA COUPLE OF NOTES ON CONFIGURATION PARAMETERS:
XX(See the code section of yyref-grammar)
XX	Table data is stored in "table[]", which holds MAXIDENTS terminal/non-
XXterminal names, each of length <= MAXCHARS. A total of MAXDEFS defining (rhs of
XXrule) occurances can be held, and a total of MAXOCCS lhs occurances. Each
XXidentifier which may be the start token is printed following the message held
XXin "start_maybe[]" - likewise all possible tokens are accompanied by
XX"token_maybe[]". All defining occurances are printed following
XX"declared_at_mark[]", and all other occurances following "occurs_at_mark[]".

@//E*O*F README//
chmod u=rw,g=rw,o=rw README
 
echo x - yyref.1
sed 's/^XX//' > "yyref.1" <<'@//E*O*F yyref.1//'
XX.TH YYREF 1 LOCAL
XX.SH NAME
XXyyref \- generate cross\-reference for YACC input
XX.SH SYNOPSIS
XX.B yyref
XX[ < inputfile ]
XX.SH DESCRIPTION
XX.I Yyref
XXgenerates cross\-references for
XX.IR yacc (1)
XXinput files.
XXIt reads source from standard input, and upon EOF or seeing the second
XX.B %%
XXthat marks the end of the rules portion, generates a cross\-reference
XXlist of all parser tokens.
XX.PP
XXIn case of syntax errors in the input,
XX.I yyref
XXtries to generate as much information as it can.
XX.PP
XXThe output consists of a number listing of the header and rules part,
XXfollowed by the cross\-reference:
XX.RS
XX.nf
XX   1 :	%{
XX   2 :	# include <ctype.h>
XX   3 :	%}
XX   4 :	
XX   5 :	
XX   6 :		/*******************************************************\
XX   7 :		*							*
XX   8 :		*	Date : Fri Jul  4 00:50:04 BST 1986		*
XX   9 :		*							*
XX  10 :		\*******************************************************/
XX  11 :	
XX  12 :	%token	IDENTIFIER START_TOKEN NUMBER
XX  13 :	
XX  14 :	%start	small
XX  15 :	
XX  16 :	%%
XX  17 :	
XX  18 :	small
XX  19 :		:	start middle
XX  20 :				{
XX  21 :					printf("\n\nMiddle");
XX  22 :					yyclearin;
XX  23 :					return(0);
XX  24 :				}
XX  25 :			end postamble
XX  26 :		;
XX  27 :	
XX  28 :	start
XX  29 :		:	/* empty */
XX  30 :		|	START_TOKEN
XX  31 :		;
XX  32 :	
XX  33 :	middle
XX  34 :		:
XX  35 :		;
XX  36 :	
XX  37 :	middle
XX  38 :		:	MID_TOKEN
XX  39 :		|	/* empty */
XX  40 :		;
XX  41 :	
XX  42 :	%%


XX'small' -
XX~~~~~~~		*18 , never occurs on rhs of rule - start rule?
XX'start' -
XX~~~~~~~		*28 , 19
XX'middle' -
XX~~~~~~~		*33 , *37, 19
XX'end' -
XX~~~~~~~		is not declared - token??, 25
XX'postamble' -
XX~~~~~~~		is not declared - token??, 25
XX'START_TOKEN' -
XX~~~~~~~		is not declared - token??, 30
XX'MID_TOKEN' -
XX~~~~~~~		is not declared - token??, 38

XX	End of X-ref
XX	~~~~~~~~~~~~
XX.fi
XX.RE
XX.SH BUGS
XXShould be able to understand the
XX.I %token
XXand similar directives.
@//E*O*F yyref.1//
chmod u=rw,g=rw,o=rw yyref.1
 
echo x - yyref-grammer
sed 's/^XX//' > "yyref-grammer" <<'@//E*O*F yyref-grammer//'
XX%{

XX# include <ctype.h>
XX# include <stdio.h>

XX%}


XX	/*******************************************************\
XX	*							*
XX	*	X_reference program for YACC files		*
XX	*	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~		*
XX	*							*
XX	*	Cathy Taylor,					*
XX	*	c/o Department of Computing,			*
XX	*	University of Lancaster,			*
XX	*	Bailrigg, Lancaster, England.			*
XX	*	Date : Fri Jul  4 00:50:04 BST 1986		*
XX	*							*
XX	\*******************************************************/


XX	/***********************************************\
XX	*						*
XX	*	Yacc Input Syntax			*
XX	*	~~~~~~~~~~~~~~~~~			*
XX	*						*
XX	*	Adapted from the document		*
XX	*	'YACC - Yet Another Compiler Compiler'	*
XX	*		by				*
XX	*	   S. C. Johnson			*
XX	*						*
XX	*	Date: Tue Jul  1 02:40:18 BST 1986	*
XX	*						*
XX	\***********************************************/


XX%token	IDENTIFIER CHARACTER NUMBER
XX%token	LEFT RIGHT NONASSOC TOKEN PREC TYPE START UNION
XX%token	PER PERCURL ACT
XX%token	COLON SEMICOLON COMMA OR LESS GREATER

XX%start	spec

XX%%

XXspec
XX	:	defs PER rules tail
XX			{
XX				printf("\n\n");
XX				yyclearin;
XX				return(0);
XX			}
XX	;

XXtail
XX	:	/* empty */
XX	|	PER 
XX	;

XXdefs
XX	:	/* empty */
XX	|	def_bk
XX	;

XXdef_bk
XX	:	def
XX	|	def_bk def
XX	;

XXdef
XX	:	START IDENTIFIER
XX	|	UNION
XX	|	PERCURL
XX	|	rword tag nlist
XX	;

XXrword
XX	:	TOKEN
XX	|	LEFT
XX	|	RIGHT
XX	|	NONASSOC
XX	|	TYPE
XX	;

XXtag
XX	:	/* empty */
XX	|	LESS IDENTIFIER GREATER
XX	;

XXnlist
XX	:	nmno
XX	|	nlist opt_comma nmno
XX	;

XXopt_comma
XX	:	/* empty */
XX	|	COMMA
XX	;

XXnmno
XX	:	IDENTIFIER opt_num
XX	;

XXopt_num
XX	:	/* empty */
XX	|	NUMBER
XX	;
XXrules
XX	:	rule
XX	|	rules rule
XX	;

XXrule
XX	:	IDENTIFIER
XX		{
XX			yyaction(ON_C_IDENT,line);
XX		}
XX		COLON body SEMICOLON
XX	;

XXbody
XX	:	body_block
XX	|	body OR body_block
XX	;

XXbody_block
XX	:	/* empty */
XX	|	body_block body_entity
XX	;

XXbody_entity
XX	:	opt_prec id_ent
XX	|	ACT
XX	;

XXid_ent
XX	:	IDENTIFIER
XX		{
XX			yyaction(ON_IDENT,line);
XX		}
XX	|	CHARACTER
XX	;

XXopt_prec
XX	:	/* empty */
XX	|	PREC
XX	;


XX%%

XX# include	<stdio.h>
XX# include	"lex.yy.c"
XX# include	"yyref-line.h"

XX#define	ON_C_IDENT	000
XX#define	ON_IDENT	001

XX#define	MAXIDENTS	1000
XX#define	MAXCHARS	100
XX#define	MAXDEFS		20
XX#define	MAXOCCS		1000

XXstruct	IREC {
XX		char	ident[MAXCHARS];
XX		int	desc[MAXDEFS];
XX		int	nextdesc;
XX		int	occ[MAXOCCS];
XX		int	nextocc;
XX		} table[MAXIDENTS];


XXyyaction (action,ln)
XXint	action;
XXint	ln;
XX{
XX	int	id;
XX	
XX	id = 0;
XX	while (	strcmp(table[id].ident,yytext) != 0 && strcmp(table[id].ident,"") != 0 )
XX		id++;

XX	if ( strcmp(table[id].ident, yytext) != 0 )
XX	{

XX	/*******************************************************\
XX	*							*
XX	*	New non-terminal to be stored.			*
XX	*	No distinction is made here between tokens	*
XX	*	and (non) terminals.				*
XX	*							*
XX	\*******************************************************/

XX		strcpy(table[id].ident,yytext);
XX		table[id].nextdesc = 0;
XX		table[id].nextocc = 0;
XX	} /* fi */

XX	switch (action) {
XX	case ON_C_IDENT:

XX	/*******************************************************\
XX	*							*
XX	*	Add to list of definition lines.		*
XX	*							*
XX	\*******************************************************/

XX		table[id].desc[table[id].nextdesc++] = ln;
XX		break;

XX	case ON_IDENT:
XX				
XX	/*******************************************************\
XX	*							*
XX	*	Add to list of occurance lines.			*
XX	*							*
XX	\*******************************************************/

XX		table[id].occ[table[id].nextocc++] = ln;
XX		break;

XX	default		:
XX		fprintf (stdout, "yyaction: invalid action\n");
XX		return (-1);
XX		} /* hctiws */
XX	return (0);
XX} /* corp */

XXnline(ln)
XXint	ln;
XX{
XX	printf("%4d :\t",ln);
XX}


XX	char	declared_at_mark[] = "*";
XX	char	occurs_at_mark[] = "";
XX	char	token_maybe[] = "is not declared - token??";
XX	char	start_maybe[] = "never occurs on rhs of rule - start rule?";
XX	
XX/*
XX*	Strings for output
XX*/

XXmain ()
XX{
XX	int	ind,id;

XX	strcpy(table[0].ident,"");

XX	line = 0;
XX	nline(++line);

XX	yyparse ();

XX	id = 0;
XX	while( strcmp(table[id].ident,"") != 0 )
XX	{
XX		printf("\n'%s' -\n~~~~~~~\t\t",table[id].ident);
XX		if (table[id].nextdesc == 0 )
XX		    printf("%s",token_maybe);
XX		else
XX		{
XX		    ind = 0;
XX		    printf("*%d ",table[id].desc[ind++]);
XX		    for ( ind=1; ind < table[id].nextdesc ; ind++)
XX			printf(", %s%d",declared_at_mark,table[id].desc[ind]);
XX		}
XX		if (table[id].occ[0] == 0)
XX		    printf(", %s",start_maybe);
XX		else
XX		{
XX		    for ( ind = 0; ind < table[id].nextocc ; ind++ )
XX			printf(", %s%d",occurs_at_mark,table[id].occ[ind]);
XX		}
XX		id++;
XX	}
XX	printf("\n\n\tEnd of X-ref\n\t~~~~~~~~~~~~\n");
XX} /* niam */

XXyyerror(mess)
XXchar	*mess;
XX{
XX	printf("\n\t%s\n",mess);
XX} /* corp */
@//E*O*F yyref-grammer//
chmod u=rw,g=rw,o=rw yyref-grammer
 
echo x - yyref-lex
sed 's/^XX//' > "yyref-lex" <<'@//E*O*F yyref-lex//'

XX	/*******************************************************\
XX	*							*
XX	*	X_ref for YACC - LEX file			*
XX	*	~~~~~~~~~~~~~~~~~~~~~~~~~			*
XX	*	Date: Tue Jul  1 03:36:21 BST 1986		*
XX	*							*
XX	\*******************************************************/

XX%{

XX# include	<stdio.h>
XX# include	"yyref-line.h"

XX#define		TRUE 1
XX#define 		FALSE 0

XXint	recognised;
XXchar	c;

XX%}

XX	/* abbreviations */

XXdigit		[0-9]
XXu_case		[A-Z]
XXl_case		[a-z]
XXid_char		[A-Za-z0-9_]
XXletter		[A-Za-z]
XXwhite		[\t ]


XX%%

XX"/*"			{
XX				ECHO;
XX				recognised = FALSE;
XX				c = nextchar();
XX				while (recognised == FALSE)
XX				{
XX					while (c != '*')
XX						c = nextchar();
XX					c = nextchar();
XX					if (c == '\/')
XX						recognised = TRUE;
XX				}
XX			}
XX"%{"			{
XX				ECHO;
XX				recognised = FALSE;
XX				c = nextchar();
XX				while (recognised == FALSE)
XX				{
XX					while (c != '\%')
XX						c = nextchar();
XX					c = nextchar();
XX					if (c == '\}')
XX						recognised = TRUE;
XX				}
XX				return(PERCURL);
XX			}
XX"{"			{

XX/*
XX*	Although LEX can cope with the full definition,
XX*	( "{"[^\}]*"}" ) this may overrun the lex buffer (200 chars).
XX*	Thus this routine.
XX*/

XX				ECHO;
XX				c = nextchar();
XX				while (c != '\}')
XX					c = nextchar();
XX				return(ACT);
XX			}
XX{letter}{id_char}*	{
XX				ECHO;
XX				return(IDENTIFIER);
XX			}
XX"'"\\?[^']+"'"		{
XX				ECHO;
XX				return(CHARACTER);
XX			}
XX{white}+		{	
XX				ECHO;
XX			}
XX{digit}+		{
XX				ECHO;
XX				return(NUMBER);
XX			}
XX"%"{white}*"left"	{
XX				ECHO;
XX				return(LEFT);
XX			}
XX"%"{white}*"right"	{
XX				ECHO;
XX				return(RIGHT);
XX			}
XX"%"{white}*"nonassoc"	{
XX				ECHO;
XX				return(NONASSOC);
XX			}
XX"%"{white}*"token"	{
XX				ECHO;
XX				return(TOKEN);
XX			}
XX"%"{white}*"prec"	{
XX				ECHO;
XX				return(PREC);
XX			}
XX"%"{white}*"type"	{
XX				ECHO;
XX				return(TYPE);
XX			}
XX"%"{white}*"start"	{
XX				ECHO;
XX				return(START);
XX			}
XX"%"{white}*"union"	{
XX				ECHO;
XX				return(UNION);
XX			}
XX"%%"			{
XX				ECHO;
XX				return(PER);
XX			}
XX":"			{
XX				ECHO;
XX				return(COLON);
XX			}
XX";"			{
XX				ECHO;
XX				return(SEMICOLON);
XX			}
XX","			{
XX				ECHO;
XX				return(COMMA);
XX			}
XX"|"			{
XX				ECHO;
XX				return(OR);
XX			}
XX"<"			{
XX				ECHO;
XX				return(LESS);
XX			}
XX">"			{
XX				ECHO;
XX				return(GREATER);
XX			}
XX"\n"			{
XX				ECHO;
XX				nline(++line);
XX			}

XX%%

XXyywrap()
XX{
XX	/* wrap-up procedure */
XX	return(1);
XX}

XXnextchar()
XX{
XX	char	c;
XX	
XX	c = input();
XX	printf("%c",c);
XX	if (c == '\n')
XX		nline(++line);
XX	return(c);
XX}
@//E*O*F yyref-lex//
chmod u=rw,g=rw,o=rw yyref-lex
 
echo x - yyref-line.h
sed 's/^XX//' > "yyref-line.h" <<'@//E*O*F yyref-line.h//'

XX	int	line;
@//E*O*F yyref-line.h//
chmod u=rw,g=rw,o=rw yyref-line.h
 
echo Inspecting for damage in transit...
temp=/tmp/sharin$$; dtemp=/tmp/sharout$$
trap "rm -f $temp $dtemp; exit" 0 1 2 3 15
cat > $temp <<\!!!
       9      21     157 Makefile
      10      87     601 README
      91     330    1806 yyref.1
     286     591    4433 yyref-grammer
     173     293    2323 yyref-lex
       2       2      12 yyref-line.h
     571    1324    9332 total
!!!
wc  Makefile README yyref.1 yyref-grammer yyref-lex yyref-line.h | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp
if test -s $dtemp
then echo "Ouch [diff of wc output]:" ; cat $dtemp
else echo "No problems found."
fi
exit 0