[comp.sources.wanted] C program source/comment counter wanted

dan@dyndata.UUCP (Dan Everhart) (01/03/90)

I'd like to find a program which will analyze a C source file, and
report the number of lines of executable code, and the number of lines
of comments.  This would provide one piece of information for
productivity measurements.  If you can help, please email.  Thanks.

-- 
Dan Everhart // Dynamic Data & Electronics // 7107 179th St SW
		Edmonds, WA 98020
                UUCP: {sun,microsoft,uw-beaver}!fluke!dyndata!dan             
                "...it's turtles all the way down."

mem@zinn.MV.COM (Mark E. Mallett) (01/09/90)

In article <256@dyndata.UUCP> dan@dyndata.UUCP (Dan Everhart) writes:
>I'd like to find a program which will analyze a C source file, and
>report the number of lines of executable code, and the number of lines
>of comments.  This would provide one piece of information for
>productivity measurements.  If you can help, please email.  Thanks.

Here's a program that does that, more or less.  It's a wc-style
program that knows how to remove comments and blank lines of a few
different styles (C, C++, and a couple of assembly formats).  For
instance,

	xwc -sC files..

strips C-style comments.  There's no document, but there is a usage
message that tells arguments it takes.

-mm-


/* wc.c - yet another implementation of the traditional word counter.

	May 31 1988, Mark E. Mallett

	Dec 7 1988 mem add argument to -s to indicate commenting
		   style; add commenting styles C_CPP, C_ASTAR, C_ASEMI.

*/

#include <stdio.h>
#include <ctype.h>

#ifdef	OS_MSDOS
#include <sys/types.h>
#include <dos.h>
#endif	/* OS_MSDOS */

#ifndef	FALSE
#define	FALSE	0
#define	TRUE	1
#endif	/* FALSE */

#ifndef	NUL
#define	NUL	'\0'
#endif	/* NUL */

#ifdef	OS_MSDOS
#define	OPENMODE	"rb"
#else
#define	OPENMODE	"r"
#define	xgetc(a)	getc(a)
#endif	/* OS_MSDOS */

extern	FILE	*fopen();

static	char	bolF = TRUE;		/* beginning-of-line flag */
static	char	ccF;			/* Print char count */
static	char	lcF;			/* Print line count */
static	char	srcF = FALSE;		/* Source mode */
static	char	wcF;			/* Print word count */

static	long	Charttl;
static	int	Filettl;
static	long	Lignttl;		/* Lines ignored */
static	long	Linettl;
static	long	Wordttl;

static enum {				/* Commenting style */
		C_C,			/* C */
		C_CPP,			/* C++ */
		C_ASTAR,		/* Assembly with * comments */
		C_ASEMI			/* Assembly with ; comments */
	    }
	    	Cmtstyle = C_C;		/* Default to C */


main( argc, argv )
	int		argc;		/* # of command line arguments */
	char		**argv;		/* command line args */
{
	int             i;              /* Scratch */
	int             n;              /* Scratch again */
	int             c;              /* A character */
	char            *aptr;          /* Argument pointer */

    /* Look at the command line arguments */
    for( i = 1; i < argc; ++i ) {
	if (argv[i][0] != '-')  	/* If not option */
	    break;			/*  done with options */
	else {				/* Option char */
	    c = argv[i][1];		/* Get char */
	    if ( isupper(c) )		/* Be paranoid about tolower macro */
	        c = tolower( c );	/*  don't care about arg case */

	    /* Find where arg is, if any */
            n = i;
            aptr = NULL;
	    if ( argv[i][2] != NUL ) {	/* Arg attached to option */
	    	aptr = &argv[i][2];
		n = i;			/* Where to set i if arg used */
	    }
	    else if ( i < argc-1 ) {	/* Use next */
	        n = i+1;
	    	aptr = argv[n];
	    }

	    /* Process the option character */
	    switch( c ) {
	        case 'c':		/* Print char count */
		    ccF = TRUE;
		    break;

	    	case 'l':		/* Print line count */
		    lcF = TRUE;
		    break;

		case 's':		/* Enable source mode */
		    if ( aptr == NULL ) {
		    	usage();
			exit();
		    }

		    if ( nccmp( aptr, "c" ) == 0 )
			Cmtstyle = C_C;
		    else if ( nccmp( aptr, "c++" ) == 0 )
		    	Cmtstyle = C_CPP;
		    else if ( ( nccmp( aptr, "asemi" ) == 0 ) ||
		    	      ( strcmp( aptr, "34010" ) == 0 ) )
			Cmtstyle = C_ASEMI;
		    else if ( ( nccmp( aptr, "astar" ) == 0 ) ||
		    	      ( strcmp( aptr, "68020" ) == 0 ) )
			Cmtstyle = C_ASTAR;
		    else {
		    	usage();
			exit();
		    }
		    i = n;		/* Skip over argument */
		    srcF = TRUE;	/* Set source mode */
		    break;


		case 'w':		/* Print word count */
		    wcF = TRUE;
		    break;

		default:
		    usage();
		    exit();
	    }
	}
    }

    /* Done with options -- do files */
    if ( (!lcF) && (!wcF) && (!ccF) )
    	lcF = wcF = ccF = TRUE;

    if ( i == argc )			/* No filenames given? */
    	wc( stdin, "" );
    else {
	for( ; i < argc; ++i ) {

#ifdef	OS_MSDOS
	    msdowildfn( argv[i] );	/* Expand wildcards */
#else
	    wcfn( argv[i] );		/* Do word count */
#endif	/* OS_MSDOS */
	}
    }

    /* Print summary if more than one file processed */
    if ( Filettl > 1 ) {
	wcsep();			/* Separator bars */
	wcprint( Lignttl, Linettl, Wordttl, Charttl, NULL );
    }

}
/*

*//* usage()

	Print program usage

*/

usage()
{
    printf( "usage:  wc [-c] [-l] [-w] [-s type] files...\n" );
    printf( "        -c to count characters\n" );
    printf( "        -l to count lines\n" );
    printf( "        -w to count words\n" );
    printf( "        -s type to strip blank lines & comments, type is:\n" );
    printf( "            C       for C style commenting\n" );
    printf( "            C++     for C++ style\n" );
    printf( "            ASEMI   for assembly with semicolon commenting\n" );
    printf( "                    (also 34010)\n" );
    printf( "            ASTAR   for assembly with asterisk commenting\n" );
    printf( "                    (also 68020)\n" );
    printf( "\n" );
    printf( "If no options are given, default is -c -l -w\n" );
}
/*

*//* msdowildfn( ifnm )

	Wildcard file search, for MS_DOS.

Accepts :

	ifnm		Name (possibly wildcarded) of file

Returns :

	< wcfn called for each matching file >

*/

#ifdef	OS_MSDOS
msdowildfn( ifnm )
	char		*ifnm;
{
	int		i;
	int		sts;
	struct find_t	eachfile;
	char		path[100];
	char		fullname[100];

/* Stupid MSDOS -- first find the drive-directory part of the
   filename. */
path[0] = NUL;
for( i = strlen( ifnm )-1; i >= 0; --i )
    if ( ( ifnm[i] == ':' ) || ( ifnm[i] == '\\' ) )
        {
	strncpy( &path[0], ifnm, ++i );
	path[i] = NUL;
    	break;
	}

    if ( ( sts = _dos_findfirst( ifnm, _A_NORMAL, &eachfile ) ) != 0 ) {
	fprintf( stderr, "No such file: %s\n", ifnm );
	return;
    }

    while( sts == 0 ) {
        sprintf( &fullname[0], "%s%s", &path[0], &eachfile.name[0] );
	wcfn( &fullname[0] );
	sts = _dos_findnext( &eachfile );
    }
}
#endif	/* OS_MSDOS */
/*

*//* wcfn( fnP )

	do wc for a named file

Accepts :

	fnP		Ptr to filename

Returns :


*/

wcfn( fnP )
	char		*fnP;		/* Name of file */
{
	FILE		*fP;

    if ( ( fP = fopen( fnP, OPENMODE ) ) == NULL )
	fprintf( stderr, "\"%s\" - can not open.\n", fnP );
    else {
	wc( fP, fnP );
	fclose( fP );
    }
}
/*

*//* wc( fP, fnP )

	Do word-count for file

Accepts :

	fP		File ptr for open file
	fnP		Name of the file

Returns :

*/

wc( fP, fnP )
register FILE		*fP;
	char		*fnP;
{
register char		inword;
register char		inline;
	char		instr;
	char		escaped;
	char		nonblank;
register int		c;
register long		charC;
	long		lignC;
	long		lineC;
	long		wordC;

    charC = lineC = wordC = lignC = 0;
    escaped = nonblank = FALSE;
    instr = 0;

    ++Filettl;
    inline = inword = FALSE;
    bolF = TRUE;

    while( ( c = xgetc( fP ) ) != EOF ) {
        if ( srcF ) {			/* If filtering source */
	    if ( instr == 0 ) {
	        /* Check for comment according to current style */
		if ( Cmtstyle == C_ASTAR ) {
		    if ( c == '*' ) {	/* Comment to end of line */
		    	skipeol( fP );	/* Skip to line end */
			continue;	/* Try next line */
		    }
		}
		else if ( Cmtstyle == C_ASEMI ) {
		    if ( ( c == ';' ) ||
		         ( bolF && ( c == '*' ) ) ) {
			skipeol( fP );
			continue;
		    }
		}
		else if ( ( Cmtstyle == C_C ) || ( Cmtstyle == C_CPP ) ) {
		    while ( c == '/' ) {
			/* Check for C++ style comment */
			if ( ( c == '/' ) && ( Cmtstyle == C_CPP ) ) {
			    skipeol( fP );
			    c = xgetc( fP );
			    continue;
			}

			if ( ( c = xgetc( fP ) ) == '*' ) {
		    	    /* Skip to end of comment */
			    for( c = xgetc( fP ); c != EOF ; ) {

				if ( c == '\n' )
			    	    ++lignC;

				if ( c == '*' ) {
			            if ( ( c = xgetc( fP ) ) == '/' )
					break;
				}
				else
			            c = xgetc( fP );
			    }

			    if ( c == EOF ) {
    				printf(
"     ^--- warning: unterminated comment.\n" );
			 	break;
			    }
			}

		        c = xgetc( fP );
		    }
		}

		if ( c == EOF )
		    break;

	        /* Check for string */
		if ( ( c == '"' ) || ( c == '\'' ) )
		    instr = c;
	    }

	    else {
	    	/* In string -- check for termination */
		if ( !escaped && ( c == instr ) )
		    instr = 0;
	    }

	    /* Check for escape character */
	    if ( c == '\\' )
	    	escaped = !escaped;
	    else
	    	escaped = FALSE;
	}


	++charC;
	if ( c == '\n' ) {
	    if ( !srcF || nonblank || instr )
	    	++lineC;
	    else {
	        ++lignC;
		--charC;
	    }
	    inword = inline = nonblank = FALSE;
	}
	else if ( ( c == ' ' ) || ( c == '\011' ) ||
		  ( c == '\r' ) || ( c == '\014' ) )
	    inword = FALSE;
	else {
	    nonblank = TRUE;
	    if ( !inword ) {
	    	++wordC;
		inword = TRUE;
	    }
	}
    }

    /* Check for unterminated last line */
    if ( inline )
        if ( !srcF || nonblank || instr )
	    ++lineC;
	else
	    ++lignC;

    wcprint( lignC, lineC, wordC, charC, fnP );
    Lignttl += lignC;
    Linettl += lineC;
    Wordttl += wordC;
    Charttl += charC;

    if ( instr )
    	printf( "     ^--- warning: unterminated string.\n" );

}
/*

*//* skipeol( fP )

	Skip to end of line, leaving EOL in input stream

Accepts :

	fP		File pointer

Returns :

*/

skipeol( fP )
	FILE		*fP;		/* Ptr to file var for input */
{
	int		c;

    while( ( c = xgetc( fP ) ) != '\n' )
    	if ( c == EOF )
	    break;

    if ( c != EOF )
        ungetc( c, fP );
}
/*

*//* wcprint( lignC, lineC, wordC, charC, fnP )

	Print totals

Accepts :

	lignC		number of lines ignored
	lineC		number of lines
	wordC		number of words
	charC		number of chars
	fnP		Filename, if any (NULL if no name).

Returns :

*/

wcprint( lignC, lineC, wordC, charC, fnP )
	long		lignC;		/* Number of lines ignored */
	long		lineC;		/* Number of lines */
	long		wordC;		/* Number of words */
	long		charC;		/* Number of characters */
	char		*fnP;		/* Name of file, or NULL */
{
    if ( srcF )
        printf( "%12ld", lignC );
    if ( lcF )
    	printf( "%12ld", lineC );
    if ( wcF )
	printf( "%12ld", wordC );
    if ( ccF )
	printf( "%12ld", charC );
    if ( fnP != NULL )
	printf( "    %s", fnP );
    printf( "\n" );
}
/*

*//* wcsep()

	Print separator bars for totals

Accepts :


Returns :

*/

wcsep()
{
    if ( srcF )
    	printf( "    --------" );
    if ( lcF )
    	printf( "    --------" );
    if ( wcF )
    	printf( "    --------" );
    if ( ccF )
    	printf( "    --------" );
    printf( "\n" );
}
/*

*//* nccmp( s1, s2 )

	Compare two strings like strcmp, ignoring case

*/

int
nccmp( s1, s2 )
register char		*s1;		/* First string */
register char		*s2;		/* Second string */
{
register char		c1;		/* Char from first string */
register char		c2;		/* Char from second string */

   for( ; ; ) {
   	c1 = *s1++;
	if ( islower( c1 ) )
	    c1 = toupper( c1 );

	c2 = *s2++;
	if ( islower( c2 ) )
	    c2 = toupper( c2 );

	if ( c1 < c2 )
	    return( -1 );
	if ( c1 > c2 )
	    return ( 1 );
	if ( c1 == NUL )
	    return( 0 );
    }
}
/*

*//* xgetc( fP )

	Implementation of getc for brain-damaged OS's such as MSDOS

Accepts :

	fP		File stream ptr


Returns :

	< value >	Character value, or EOF

*/

#ifndef	xgetc			/* May be a macro */

int
xgetc( fP )
	FILE		*fP;		/* Ptr to file stream */
{
	int		c;

    c = getc( fP );
    if ( c == '\032' )			/* if control-Z */
    	c = EOF;			/*  call it EOF */

    return( c );
}

#endif	/* xgetc */
-- 
Mark E. Mallett  Zinn Computer Co/ PO Box 4188/ Manchester NH/ 03103 
Bus. Phone: 603 645 5069    Home: 603 424 8129     BIX: mmallett
uucp: mem@zinn.MV.COM  (  ...{decvax|elrond|harvard}!zinn!mem   )
Northern MA and Southern NH consultants:  Ask (in mail!) about MV.COM

kaleb@mars.jpl.nasa.gov (Kaleb Keithley) (01/10/90)

In article <664@zinn.MV.COM> mem@zinn.MV.COM (Mark E. Mallett) writes:
>In article <256@dyndata.UUCP> dan@dyndata.UUCP (Dan Everhart) writes:
>>I'd like to find a program which will analyze a C source file, and
>>report the number of lines of executable code, and the number of lines
>>of comments.  This would provide one piece of information for
>>productivity measurements.  If you can help, please email.  Thanks.
>

I know managers love to count lines of code as a productivity measure; but
if my worth is a function of lines of code, then I can generate lots of
lines of code (that don't do much.)  I'm not the first, and I won't be the
last, to say that "fewer (and better) lines of code cost more then more
lines of code!"

On the other hand, if you use utilities like this for statistical purposes,
i.e. estimating lines of code for new projects, comment to source ratios,
and the like, then the tool has greater chances for providing useful 
information.

I'll get off my soapbox now, thanks for listening.
Chewey, get us outta here!
                 
kaleb@mars.jpl.nasa.gov             (818)354-8771
Kaleb Keithley