[gnu.emacs.bug] etags for C++

jjc@UUNET.UU.NET (James Clark) (06/27/89)

I have made a first attempt at modifying etags so that it works
properly with C++. The heuristic used should also work with C programs
that use ANSI function definitions.

It looks for C++ functions in files with a suffix of `.C' or `.cc' or,
if the `-p' option is given, in files with a suffix of `.c' or `.h'. The
`-p' option is necessary because most people use `.h' for their C++
header files, and these often contain function definitions.

It doesn't work too well in ctags mode, because there will typically
be many duplicate tags especially if functions are defined within
classes. This could be fixed with more work, but I have no use for it,
so I haven't done it.

There is doubtless plenty of scope for refinement of the heuristic
used to locate the function definitions; I would welcome suggestions.

James Clark
jjc@jclark.uucp

*** /u/local/emacs/etc/etags.c	Thu Jan 26 14:25:58 1989
--- etags.c	Tue Jun 27 09:52:13 1989
***************
*** 188,193 ****
--- 188,194 ----
  int	vflag = 0;		/* -v: create vgrind style index output */
  int	xflag = 0;		/* -x: create cxref style output */
  int	eflag = 0;		/* -e: emacs style output */
+ int	pflag = 0;		/* -p: assume *.[ch] files are C++ */
  
  /* Name this program was invoked with.  */
  char *progname;
***************
*** 204,209 ****
--- 205,212 ----
  char *concat ();
  void initbuffer ();
  long readline ();
+ char *malloc(), *realloc();
+ char *xmalloc(), *xrealloc();
  
  /* A `struct linebuffer' is a structure which holds a line of text.
   `readline' reads a line from a stream into a linebuffer
***************
*** 308,313 ****
--- 311,319 ----
  		}
  	      outfile = av[1];
  	      goto end_loop;
+ 	    case 'p':
+ 	      pflag++;
+ 	      break;
  	    case 't':
  	      tflag++;
  	      break;
***************
*** 341,349 ****
      {
      usage:
  #ifdef VMS
!       fprintf (stderr, "Usage: %s [-aetwvx] [-f outfile] file ...\n", progname);
  #else
!       fprintf (stderr, "Usage: %s [-BFaetuwvx] [-f outfile] file ...\n", progname);
  #endif
        exit(BAD);
      }
--- 347,355 ----
      {
      usage:
  #ifdef VMS
!       fprintf (stderr, "Usage: %s [-aeptwvx] [-f outfile] file ...\n", progname);
  #else
!       fprintf (stderr, "Usage: %s [-BFaeptuwvx] [-f outfile] file ...\n", progname);
  #endif
        exit(BAD);
      }
***************
*** 533,538 ****
--- 539,552 ----
        fclose(inf);
        return;
      }
+   if (cp && (!strcmp (cp + 1, "C")
+ 	     || !strcmp (cp + 1, "cc")
+ 	     || (pflag && (!strcmp (cp + 1, "c") || !strcmp (cp + 1, "h")))))
+     {
+       Cplus_funcs(inf);
+       fclose(inf);
+       return;
+     }
    /* if not a .c or .h or .y file, try fortran */
    if (cp && (cp[1] != 'c' && cp[1] != 'h' && cp[1] != 'y')
        && cp[2] == '\0')
***************
*** 636,642 ****
  	  if (!wflag)
  	    {
  	      fprintf(stderr,"%s: Duplicate entry in file %s, line %d: %s\n",
! 		      progname, node->file,lineno,node->name);
  	      fprintf(stderr,"Second entry ignored\n");
  	    }
  	  return;
--- 650,656 ----
  	  if (!wflag)
  	    {
  	      fprintf(stderr,"%s: Duplicate entry in file %s, line %d: %s\n",
! 		      progname, node->file,node->lno,node->name);
  	      fprintf(stderr,"Second entry ignored\n");
  	    }
  	  return;
***************
*** 1071,1076 ****
--- 1085,1344 ----
    fseek (inf, saveftell, 0);
  }
  
+ /* C++ parsing */
+ 
+ /*
+ 
+ We look for
+ 
+    an identifier (other than `if', `while' and `switch')
+    at the outermost parenthesis level, followed by
+ 
+    a left parenthesis, followed by
+ 
+    a sequence of tokens not including a semicolon, followed by
+ 
+    a left brace.
+ 
+ This won't find 
+ 
+ - old-style function definitions
+ 
+ - operator function definitions
+ 
+ - functions such as
+ 
+      int (*foo())() {}
+ 
+ Note that it will find functions defined within classes.
+ 
+ */
+ 
+ /* Compare a null delimited string with a length delimited string.
+    It's not the same as strncmp:
+      strmemcmp("foobar", "foo", 3) == 1
+      strncmp("foobar", "foo", 3) == 0 */
+ 
+ static int strmemcmp(s, p, n)
+      char *s, *p;
+      int n;
+ {
+   for (; *s && n > 0; s++, p++, n--)
+     if (*s != *p)
+       return *s - *p;
+   return n ? -1 : (*s ? 1 : 0);
+ }
+ 
+ Cplus_funcs ()
+ {
+   enum { 
+     BOL,			/* beginning of line */
+     START,			/* start of a token */
+     IN_CHAR, 
+     IN_CHAR_HAD_BACKSLASH,
+     IN_STRING,
+     IN_STRING_HAD_BACKSLASH,
+     IN_COMMENT,
+     IN_COMMENT_HAD_ASTERISK,
+     HAD_SLASH,
+     IN_EOL_COMMENT,		/* in a // comment */
+     IN_PREPROC,
+     IN_PREPROC_HAD_BACKSLASH,
+   } state = BOL;		/* current lexical state */
+   char *lp;
+   int plevel = 0;		/* parenthesis level */
+   int lineno = 0;
+   int charno = 0;
+   struct {
+     char *name;
+     char *line;
+     int endpos;
+     int lineno;
+     long charno;
+   } entry;			/* a possible function definition */
+   char c = '\0';
+   entry.name = 0;
+   while (!feof (inf))
+     {
+       if (c == '\0') {
+ 	CNL;
+       }
+       c = *lp;
+       if (c != '\0')
+ 	++lp;
+       
+       switch (state)
+ 	{
+ 	case IN_PREPROC:
+ 	  if (c == '\0')
+ 	    state = BOL;
+ 	  else if (c == '\\')
+ 	    state = IN_PREPROC_HAD_BACKSLASH;
+ 	  break;
+ 	case IN_PREPROC_HAD_BACKSLASH:
+ 	  if (c != '\\')
+ 	    state = IN_PREPROC;
+ 	  break;
+ 	case HAD_SLASH:
+ 	  if (c == '/') {
+ 	    state = IN_EOL_COMMENT;
+ 	    break;
+ 	  }
+ 	  if (c == '*') {
+ 	    state = IN_COMMENT;
+ 	    break;
+ 	  }
+ 	  state = START;
+ 	  /* fall through */
+ 	case BOL:
+ 	case START:
+ 	  switch (c)
+ 	    {
+ 	    case '#':
+ 	      if (state == BOL) {
+ 		while (isspace(*lp))
+ 		  ++lp;
+ 		if (strncmp(lp, "define", 6) == 0) {
+ 		  lp += 6;
+ 		  while (isspace(*lp))
+ 		    lp++;
+ 		  if (isalpha(*lp) || *lp == '_') {
+ 		    char *id_start = lp;
+ 		    do {
+ 		      ++lp;
+ 		    } while (isalnum(*lp) || *lp == '_');
+ 		    id_start = savenstr(id_start, lp - id_start);
+ 		    pfnote(id_start, 1, lb.buffer, lp - lb.buffer, 
+ 			   lineno, linecharno);
+ 		    free(id_start);
+ 		  }
+ 		}
+ 		state = IN_PREPROC;
+ 	      }
+ 	      break;
+ 	    case '\0':
+ 	      state = BOL;
+ 	      break;
+ 	    case '/':
+ 	      state = HAD_SLASH;
+ 	      break;
+ 	    case '"':
+ 	      state = IN_STRING;
+ 	      break;
+ 	    case '\'':
+ 	      state = IN_CHAR;
+ 	      break;
+ 	    case '{':
+ 	      if (entry.name) {
+ 		/* now we know it was a function definition */
+ 		pfnote(entry.name, 1, entry.line, entry.endpos, 
+ 		       entry.lineno, entry.charno);
+ 		free(entry.name);
+ 		free(entry.line);
+ 		entry.name = entry.line = 0;
+ 	      }
+ 	      break;
+ 	    case ';':
+ 	      if (entry.name) {
+ 		/* it wasn't a function definition after all */
+ 		free(entry.name);
+ 		free(entry.line);
+ 		entry.name = entry.line = 0;
+ 	      }
+ 	      break;
+ 	    case '(':
+ 	      plevel += 1;
+ 	      break;
+ 	    case ')':
+ 	      plevel -= 1;
+ 	      break;
+ 	    default:
+ 	      if (isalpha(c) || c == '_') {
+ 		char *id_start = lp - 1;
+ 		while (isalnum(*lp) || *lp == '_')
+ 		  lp++;
+ 		if (!eflag) {
+ 		  char *q = lp;
+ 		  while (isspace(*q))
+ 		    q++;
+ 		  if (q[0] == ':' && q[1] == ':') {
+ 		    q += 2;
+ 		    while (isspace(*q))
+ 		      q++;
+ 		    if (isalpha(*q) || *q == '_') {
+ 		      do {
+ 			++q;
+ 		      } while (isalnum(*q) || *q == '_');
+ 		      lp = q;
+ 		    }
+ 		  }
+ 		}
+ 		if (entry.name == 0
+ 		    && plevel == 0
+ 		    && strmemcmp("while", id_start, lp - id_start)
+ 		    && strmemcmp("if", id_start, lp - id_start)
+ 		    && strmemcmp("switch", id_start, lp - id_start)) {
+ 		  char *p;
+ 		  for (p = lp; isspace(*p); p++)
+ 		    ;
+ 		  if (*p == '(') {
+ 		    /* This is a possible function definition. We won't
+ 		       know for sure until we see left brace, which may
+ 		       be many lines later. So save away the information
+ 		       we need. */
+ 		    entry.line = savestr(lb.buffer);
+ 		    entry.endpos = lp - lb.buffer;
+ 		    entry.lineno = lineno;
+ 		    entry.charno = linecharno;
+ 		    entry.name = (char *)xmalloc(lp - id_start + 1);
+ 		    strncpy(entry.name, id_start, lp - id_start);
+ 		    entry.name[lp - id_start] = '\0';
+ 		  }
+ 		}
+ 	      }
+ 	      break;
+ 	    }
+ 	  if (state == BOL && !isspace(c) && c != '\0')
+ 	    state = START;
+ 	  break;
+ 	case IN_CHAR:
+ 	  if (c == '\'')
+ 	    state = START;
+ 	  else if (c == '\\')
+ 	    state = IN_CHAR_HAD_BACKSLASH;
+ 	  break;
+ 	case IN_CHAR_HAD_BACKSLASH:
+ 	  state = IN_CHAR;
+ 	  break;
+ 	case IN_STRING:
+ 	  if (c == '"')
+ 	    state = START;
+ 	  else if (c == '\\')
+ 	    state = IN_STRING_HAD_BACKSLASH;
+ 	  break;
+ 	case IN_STRING_HAD_BACKSLASH:
+ 	  state = IN_STRING;
+ 	  break;
+ 	case IN_COMMENT:
+ 	  if (c == '*')
+ 	    state = IN_COMMENT_HAD_ASTERISK;
+ 	  break;
+ 	case IN_COMMENT_HAD_ASTERISK:
+ 	  if (c == '/')
+ 	    state = START;
+ 	  else if (c != '*')
+ 	    state = IN_COMMENT;
+ 	  break;
+ 	case IN_EOL_COMMENT:
+ 	  if (c == '\0')
+ 	    state = BOL;
+ 	  break;
+ 	default:
+ 	  abort();
+ 	}
+     }
+ }
+ 
  /* Fortran parsing */
  
  char	*dbp;
***************
*** 1667,1689 ****
  
  /* Like malloc but get fatal error if memory is exhausted.  */
  
! int
  xmalloc (size)
       int size;
  {
!   int result = malloc (size);
    if (!result)
      fatal ("virtual memory exhausted", 0);
    return result;
  }
  
! int
  xrealloc (ptr, size)
       char *ptr;
       int size;
  {
!   int result = realloc (ptr, size);
    if (!result)
      fatal ("virtual memory exhausted");
    return result;
  }
--- 1935,1958 ----
  
  /* Like malloc but get fatal error if memory is exhausted.  */
  
! char *
  xmalloc (size)
       int size;
  {
!   char *result = malloc (size);
    if (!result)
      fatal ("virtual memory exhausted", 0);
    return result;
  }
  
! char *
  xrealloc (ptr, size)
       char *ptr;
       int size;
  {
!   char *result = realloc (ptr, size);
    if (!result)
      fatal ("virtual memory exhausted");
    return result;
  }
+