[net.sources] improved version of cpr

dvadura@watdaisy.UUCP (Dennis Vadura) (10/07/86)

I have changed cpr to know about more functions than the previous versions.
It is no longer necessary to start the function name at the start of a line
in order for cpr to recognize it as a function.

so things like:

struct foo *f()...	causes f to be found as a function name

where before you had to say

struct foo *
f()

The second major change is the addition of the -pnum option, where num is a
number from 0 to 16 indicating the portion of the page measured from the top
within which a new function may begin before a form feed is generated.
That is, if you say -p12 (the default) then 12/16 == 3/4, which indicates
a new function will be placed on this page as long as we can start it within
the top 75% of the page.

Thus -p0 is a new page for each new function found, and -p16 is no formfeeds.

NOTE:  The program still has stroubles with #ifdef ...{ .. #else ... { ...
       #endif, as this causes the parenthesis nesting level to be thrown off.

--------------------------X Cut Here X---------------------------------------

# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by watdaisy!dvadura on Tue Oct  7 09:40:08 EDT 1986
# Contents:  cpr.c
 
echo x - cpr.c
sed 's/^@//' > "cpr.c" <<'@//E*O*F cpr.c//'
/*
 *	This program prints the files named in its argument list, preceding
 *	the output with a table of contents. Each file is assumed to be C
 *	source code (but doesn't have to be) in that the program searches
 *	for the beginning and end of functions. Function names are added to
 *	the table of contents, provided the name starts at the beginning of
 *	a line. The function name in the output is double striken.
 *
 *	By default blank space is inserted after every closing '}'
 *	character. Thus functions and structure declarations are nicely
 *	isolated in the output. The only drawback to this is that structure
 *	initialization tables sometimes produce lots of white space.
 *	The "-r" option removes this space, or changes it to the indicated
 *	length.
 *
 *	The option "-l" indicates that the following argument is to be
 *	the page length used for output (changing the page length hasn't been
 *	tested much).
 *
 *	The option "-s" indicates that the table of contents should be sorted
 *	by function name within each file.
 *
 *	The option "-n" indicates that output lines should be numbered with
 *	the corresponding line number from the input file.
 *
 *	The option "-p" indicates what proportion of the page in steps of 16
 *	should be used for deciding if a new function needs a new page.
 *	That is -p12 (the default) indicates that if a function starts
 *	within the top 12/16 (3/4) of the page then do it, otherwise put it
 *	on a new page.  Thus the higher the number (upto 16) the closer to
 *	the bottom of the page will functions be started. -p0 says put each
 *	func on a new page.
 *
 *	Try it! You'll like it. (I call it cpr.c)
 *
 *	Written by:
 *		Paul Breslin
 *		Human Computing Resources Corp.
 *		10 St. Mary St.
 *		Toronto, Ontario
 *		Canada, M4Y 1P9
 *
 *		-- ...!decvax!utcsrgv!hcr!phb
 *
 *      Sorting and standard input reading from:
 *		Rick Wise, CALCULON Corp., Rockville, MD.
 *		-- ...!decvax!harpo!seismo!rlgvax!cvl!umcp-cs!cal-unix!wise
 *
 *	File modified time,
 *	numbered output,
 *	optional white space,
 *	improved function start tests from:
 *		David Wasley, U.C.Berkeley
 *		-- ...!ucbvax!topaz.dlw
 *	Modified the -r to leave variable amounts of space
 *		Patrick Powell, U. Waterloo
 *
 *      Changed handling of form feeds to start a new page AND print heading:
 *		Terry Doner, U of Waterloo
 *
 *	Fixed up to locate more functions, and added -p option
 *		Dennis Vadura, U of Waterloo
 *
 *		It will find things like  struct foo *f()...
 *		but not things like 	int
 *					f
 *					()...
 *		ie. the constraint is that the () must appear on the same line
 *		as the function name.
 */
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <ctype.h>
#include <signal.h>
#include <string.h>
extern int errno;	/* system error number */
extern char *sys_errlist[];	/* error message */

#define BP		0xC		/* Form feed			*/

#define TOC_SIZE	4096

#define	NEWFILE		1
#define NEWFUNCTION	2

FILE	*File;

int	Braces;				/* Keeps track of brace depth	*/
int	LineNumber;			/* Count output lines		*/
int	PageNumber = 0;			/* You figure this one out	*/
int	PageLength = 66;		/* -l <len> Normal paper length	*/
int	PagePart = 12;			/* Decision on paging for new fn*/
int	PageEnd;			/* Accounts for space at bottom	*/
int	SawFunction;
int	InComment;
int	InString;

long	FileLineNumber;			/* Input file line number	*/

char	*ProgName;
char	Today[30];
char	*Name;				/* Current file name		*/

char	FunctionName[80];
char	FileDate[24];			/* Last modified time of file	*/

char	SortFlag;			/* -s == sort table of contents	*/
char	NumberFlag;			/* -n == output line numbers	*/
int	Space_to_leave = 5;		/* -r<number> space to leave	*/
int	TabWidth = 0;			/* -t <number> width of tabs 	*/

main(argc, argv)
char	**argv;
  {
	register int	i;
	char		*ctime();
	time_t		thetime, time();
	char		*parm;
	int		c;

	ProgName = argv[0];
	thetime	 = time((time_t *)0);
	strcpy(Today,ctime(&thetime));

	for( i=1; argc > i; ++i )
	  {
		if( argv[i][0] != '-' 
		||  argv[i][1] == '\0' ) break;

		parm = argv[i];
		while( c = *++parm ) switch( c ){
		    case 't':
			if( argc < 3 ) Usage();
			TabWidth = atoi(argv[++i]);
			if( TabWidth < 0 )
				TabWidth = 0;
			break;

		    case 'l':
			if( argc < 3 ) Usage();
			PageLength = atoi(argv[++i]);
			break;

		    case 's':
			++SortFlag;
			break;

		    case 'n':
			++NumberFlag;
			break;

		    case 'r':
			if( (c = parm[1]) && isdigit( c )
				&&( c = atoi( parm+1 )) > 0 ){
				Space_to_leave = c;
			} else {
				Space_to_leave = 0;
			}
			while( *parm ){
				++parm;
			}
			--parm;
			break;

		    case 'p':
			if( (c = parm[1]) && isdigit( c )
				&&( c = atoi( parm+1 )) >= 0 ){
				PagePart = (c <= 16) ? c: 16;
			}
			while( *parm ){
				++parm;
			}
			--parm;
			break;

		    default:
			Usage();
			break;
		  }
	  }
	PageEnd = PageLength - ((PageLength > 30) ? 2 : 1);

	StartTempFile();

	if( i == argc )
	  {				/* no file names */
		File = stdin;
		Name = "Standard Input";
		List();
	  }

	for(; i < argc; ++i )
	  {
		if( strcmp(argv[i], "-") == 0 )
		  {
			File = stdin;
			Name = "Standard Input";
		  }
		else
		  {
			if( (File = fopen( Name = argv[i], "r" )) == NULL )
			  {
			fprintf(stderr,"%s: Can't open file '%s': %s\n",
					ProgName, Name, sys_errlist[errno] );
				continue;
			  }
		  }
		List();
		if( File != stdin ) fclose(File);
	  }

	if( PageNumber > 1 || LineNumber > 0 )
		putchar(BP);
	EndTempFile();

	DumpTableOfContents();
	DumpTempFiles();
	Done();
  }

Usage()
  {
	fprintf(stderr, "Usage: %s [-n] [-t tabwidth] [-p[num]] [-r[num]] [-s] [-l pagelength] [files] [-]\n",
		ProgName);
	exit(1);
  }

int	SaveOut;
char	*TempName;
char	*Temp2Name;

StartTempFile()
  {
	int		Done();
	extern char	*mktemp();

	CatchSignalsPlease(Done);

	SaveOut  = dup(1);
	TempName = mktemp("/tmp/cprXXXXXX");
	if( freopen(TempName, "w", stdout) == NULL )
	  {
		fprintf(stderr, "%s: Can't open temp file '%s': %s\n", ProgName,
			TempName, sys_errlist[errno]);
		exit(1);
	  }
  }

EndTempFile()
  {
	Temp2Name = mktemp("/tmp/CPRXXXXXX");
	if( freopen(Temp2Name, "w", stdout) == NULL )
	  {
		fprintf(stderr, "%s: Can't open temp file '%s': %s\n", ProgName,
			Temp2Name, sys_errlist[errno]);
		exit(1);
	  }
  }

DumpTempFiles()
  {
	register int	pid, w;

	fclose(stdout);
	dup(SaveOut);

	while( (pid = fork()) < 0 ) sleep(1);
	if( pid )
		while ((w = wait(0)) != pid  &&  w != -1);
	else
	  {
		CatchSignalsPlease(SIG_DFL);

		execl( "/bin/cat", "cat", Temp2Name, TempName, 0 );
		fprintf(stderr, "%s: exec of /bin/cat failed: %s\n", ProgName,
			sys_errlist[errno]);
		exit(0);
	  }
  }

Done()
  {
	CatchSignalsPlease(SIG_IGN);

	if( TempName )  unlink( TempName );
	if( Temp2Name ) unlink( Temp2Name );

	exit(0);
  }

CatchSignalsPlease(action)
int	(*action)();
  {
	if( signal(SIGINT,  SIG_IGN) != SIG_IGN ) signal(SIGINT,  action);
	if( signal(SIGQUIT, SIG_IGN) != SIG_IGN ) signal(SIGQUIT, action);
	if( signal(SIGHUP,  SIG_IGN) != SIG_IGN ) signal(SIGHUP,  action);
  }

List()
  {
	register int	bp;
	register char	*bufp;
	char		buffer[256];

	NewFile();
	bp = Braces = 0;
	InString = InComment = 0;		/* reset for new file -DV */
	SawFunction = 0;
	bufp = buffer;
	while( fgets(bufp, sizeof(buffer), File) != NULL )
	  {
		++FileLineNumber;
		if( bp )  NewFunction();

		if( ++LineNumber >= PageEnd ) NewPage();

		if( bufp[0] == '\f'
		 && bufp[1] == '\n'
		 && bufp[2] == '\0' ) NewPage(); /* was strcpy(bufp, "^L\n");*/

		if( NumberFlag )
		  {
			if( *bufp == '\n' )
				printf("        ");
			else
				printf("%6ld  ", FileLineNumber);
		  }
		if( (Braces == 0) &&  LooksLikeFunction(bufp) )
			AddToTableOfContents(NEWFUNCTION);

		bp = PutLine(buffer);
	  }
  }

PutLine(l)
register char	*l;
  {
	extern   char	*EndComment();
	extern   char	*EndString();
	register char	c;
	int		bp;
	char		*save;

	bp = 0;
	for( save = l; c = *l; ++l )
		if( InComment ) 
			l = EndComment(l);
		else if( InString )
			l = EndString(l);
		else
			switch(c)
			  {
			    case '{':
				++Braces;
				break;
	
			    case '}':
				if( --Braces == 0 )
					bp = 1;
				break;

			    case '\'':
				for( ++l; *l && *l != '\''; ++l )
				    if( *l == '\\' && *(l+1) ) ++l;
				break;
			
			    case '"':
				InString = 1;
				break;

			    case '/':
				if( *(l+1) == '*' )
				  {
					InComment = 1;
					++l;
				  }
				break;
			  }
	printf("%s", save);
	return(bp);
  }

char *
EndComment(p)
register char	*p;
  {
	register char	c;

	/*
	 * Always return pointer to last non-null char looked at.
	 */
	while( c = *p++ )
		if( c == '*' && *p == '/' )
		  {
			InComment = 0;
			return(p);
		  }
	return(p-2);
  }

char *
EndString(p)
register char	*p;
  {
	register char	c;

	/*
	 * Always return pointer to last non-null char looked at.
	 */
	while( c = *p++ )
		if( c == '\\' && *p )
		  {
			++p;
			continue;
		  }
		else if( c == '"' )
		  {
			InString = 0;
			return(p-1);
		  }
	return(p-2);
  }

NewFunction()
  {
	register int	i;

	if( Space_to_leave <= 0 || !SawFunction ) return;
	if( LineNumber + Space_to_leave  > (PageLength * PagePart / 16) )
		NewPage();
	else
	  {
		for( i=0; i < (Space_to_leave); ++i ) putchar('\n');
		LineNumber += Space_to_leave;
	  }

	SawFunction = 0;
  }

#define HEADER_SIZE 3

NewPage()
  {
	if( PageNumber >= 0 ) ++PageNumber;
	putchar(BP);
	LineNumber = 0;

	PutHeader();
  }

PutHeader()
  {
	register int	i, l, j;

	putchar('\n');
	++LineNumber;
	l = strlen(Name);
	for( j=0; j < l; ++j )
		printf("%c\b%c\b%c", Name[j], Name[j], Name[j]);

	if( PageNumber > 0 )
	  {
		printf("  %.17s", FileDate);
		GoToColumn(l+19, 70);
		printf("Page:%4d\n\n", PageNumber);
		++LineNumber;
		++LineNumber;
	  }
	else
	  {
		GoToColumn(l, 55);
		printf("%s\n\n", Today);
		++LineNumber;
		++LineNumber;
	  }
  }

GoToColumn(from, to)
register int	from, to;
  {
	if( from < to)
	  {
		if( TabWidth > 0 ){
			from &= ~(TabWidth-1);
			for( ; (from + TabWidth) <= to; from += TabWidth )
				putchar('\t');
		}
		for( ; from < to; from++ )
			putchar(' ');
	  }
  }

#define isidchr(c)	(isalnum(c) || (c == '_'))

/* This used to incorrectly identify a declaration such as
 *     int (*name[])() = { initializers ... }
 * as a function.  It also picked up this in an assembler file:
 *     #define MACRO(x) stuff
 *     MACRO(x):
 * Fixed both of these.   -IAN!
 */
LooksLikeFunction(s)
register char	*s;
  {
	register char	*p;
	register int	i;
	char		*save;

	if( InComment || InString ) return(0);

	save = s;

	i = 0;
	do
	{
	   p = FunctionName;

	   if( *s == '*' ) ++s;
	   if( (*s != '_') && !isalpha(*s) ) return(0);

	   while( isidchr(*s) )
		   *p++ = *s++;
	   *p = '\0';

	   while( (*s == ' ') || (*s == '\t') ) ++s;
	   i++;
	}
	while ( *s && *s != '(' && i < 4 );

	if( *s != '(' || *(s+1) == '*' ) return(0);

	for (i = 0; *s; s++)
	  {
		switch( *s )
		  {
		    case '(':
			++i;
			continue;

		    case ')':
			--i;
			break;

		    default:
			break;
		  }
		  if( i == 0 ) break;
	  }
	if( !*s ) return(0);

	while( *s )
	  {
		if( *s == '{') break;
		if( *s == ';' || *s == ':' ) return(0);
		++s;
	  }

	/*
	 * This will cause the function name part of the line to
	 * be double striken.  Note that this assumes the name and the opening
	 * parentheses are on the same line...
	 */

	if( p = strchr( save, '(' ) )
	{
	   p--;
	   while( p != save && isidchr( *(p-1) ) ) p--;
	   for( i=0; save != p; save++, i++ ) putchar(' ');
	   for( ; *p != '('; p++, i++ )       putchar( *p );
	}
	else
	   for( i=0; *save && (*save == '*' || isidchr(*save)); ++i, ++save)
		if( *save == '*' )
			putchar(' ');
		else
			putchar(*save);

	while( i --> 0 ) putchar('\b');

	SawFunction = 1;
	return(1);
  }

static char	*Toc[TOC_SIZE];
static int	TocPages[TOC_SIZE];
static int	TocCount;

AddToTableOfContents(type)
  {
	if( TocCount > TOC_SIZE )
		return;
	if( TocCount == TOC_SIZE )
	  {
		fprintf(stderr, "%s: More than %d Table of contents entries; others ignored.\n",
			ProgName, TOC_SIZE);
		++TocCount;
		return;
	  }

	if( type == NEWFILE )
		AddFile();
	else
		AddFunction();
  }

AddFunction()
  {
	register int	l;
	register char	*p;

	/* This heuristic stops multiple occurrences of a function,
	 * selected by #ifdefs, to all end up many times over in the
	 * Table of Contents.  One only needs to see it once.  -IAN!
	 */
	if( TocCount > 0 && TocPages[TocCount-1] == PageNumber
	    && strcmp(Toc[TocCount-1],FunctionName) == 0 )
		return;
	l = strlen(FunctionName);
	p = Toc[TocCount] = (char *)malloc(l+1);
	strcpy(p, FunctionName);
	TocPages[TocCount] = PageNumber;
	++TocCount;
  }

AddFile()
  {
	register int	i, l;
	register int	len;
	char		temp[20];

	len = strlen(Name) + 20;
	len = (len < 130) ? 130 : len;
	Toc[TocCount] = (char *)malloc(len);
	sprintf(Toc[TocCount], "\n    File: %s ", Name);
	l = strlen(Toc[TocCount]);
	if( l < 64 )
	  {
		if( TabWidth > 0 ){
			i = ((64 - l) / TabWidth) + 1;
			while( i-- > 0 )
				Toc[TocCount][l++] = '\t';
		}
		else{
			while( l < 64 )
				Toc[TocCount][l++] = ' ';
		}
		Toc[TocCount][l++] = '\0';
	  }
	sprintf(temp, "  Page %4d\n", PageNumber);
	strcat(Toc[TocCount], temp);
	++TocCount;
  }

NewFile()
  {
	GetFileTime();
	NewPage();
	AddToTableOfContents(NEWFILE);
	FileLineNumber = 0;
  }

GetFileTime()
  {
	struct stat	st;

	if( File == stdin )
		strncpy(FileDate, &Today[4], 20);
	else
	  {
		fstat(fileno(File), &st);
		strncpy(FileDate, ctime(&st.st_mtime) + 4, 20);
	  }
	strncpy(&FileDate[12], &FileDate[15], 5);
	FileDate[18] = '\0';
  }

DumpTableOfContents()
  {
	register int	i, j;
	int     	index[TOC_SIZE];

	if( TocCount == 0 ) return;

	for (i = 0; i < TocCount; i++) index[i] = i;
	if( SortFlag )
		SortTable(index);

	Name = "Table of  Contents";

	PageNumber = -1;
	LineNumber = 0;
	NewPage();

	for( i=0; i < TocCount; ++i )
	  {
		if( Toc[index[i]][0] == '\n' )
		  {
			if( (LineNumber + 5) >= PageEnd ) NewPage();

			printf("%s", Toc[index[i]]);
			LineNumber += 2;
			continue;
		  }
		if( ++LineNumber >= PageEnd ) NewPage();

		printf("        %s ", Toc[index[i]]);
		for( j=strlen(Toc[index[i]]); j < 48; ++j ) putchar('.');
		printf(" %4d\n", TocPages[index[i]]);
	  }
  }

SortTable(index)
register int	*index;
  {
	register int	i, temp, flag;

	do {
		flag = 0;
		for (i = 0; i < TocCount - 1; i++)
		  {
			if( Toc[index[i]][0] == '\n' || Toc[index[i+1]][0] == '\n' )
				continue;       /* don't sort across file names */
			if( strcmp(Toc[index[i]], Toc[index[i+1]]) > 0)
			  {
				temp	   = index[i];
				index[i]   = index[i+1];
				index[i+1] = temp;
				flag       = 1;
			  }
		  }
	} while( flag );
  }


@//E*O*F cpr.c//
chmod u=rw,g=r,o=r cpr.c
 
echo Inspecting for damage in transit...
temp=/tmp/shar$$; dtemp=/tmp/.shar$$
trap "rm -f $temp $dtemp; exit" 0 1 2 3 15
cat > $temp <<\!!!
     735    2282   14610 cpr.c
!!!
wc  cpr.c | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp
if [ -s $dtemp ]
then echo "Ouch [diff of wc output]:" ; cat $dtemp
else echo "No problems found."
fi
exit 0

the top 3/4 of the page the function can be placed there
-- 
--------------------------------------------------------------------------------
Dennis Vadura, Computer Science Dept., University of Waterloo

UUCP:  {ihnp4|allegra|utzoo|utcsri}!watmath!watdaisy!dvadura
================================================================================