tcjones@watdragon.waterloo.edu (Crocodile Dundee) (11/09/87)
Here is an awk script that complements ctags. It will produce a tags file which has tags for all #define's and also for the first occurrence of all (well almost) identifier names. It is a little simple-minded about the way it collects identifier names, the cases that are not handled are documented in the code. Steve Hayman & Terry Jones ------------------------------------------------------------------------------- Department Of Computer Science, University Of Waterloo Waterloo Ontario Canada N2L 3G1 {ihnp4,allegra,decvax,utzoo,utcsri,clyde}!watmath!watdragon!tcjones tcjones@dragon.waterloo.{cdn,edu} tcjones@WATER.bitnet tcjones%watdragon@waterloo.csnet [from oz, tcjones@dragon.waterloo.cdn@munnari] ------------------------------------------------------------------------------- #!/bin/sh # This is a shell archive. Remove anything before this line, # then unpack it by saving it in a file and typing "sh file". # # Wrapped by watdragon!tcjones on Sat Nov 7 04:39:02 EST 1987 # Contents: mytags echo x - mytags sed 's/^@//' > "mytags" <<'@//E*O*F mytags//' #!/bin/sh PATH=/bin:/usr/ucb:/usr/bin # # usage: mytags [source-files] # Enhanced version of ctags. # Merge standard "ctags" and create extra tags from #define statements # and declarations. # # Declaration cases not handled: # ============================== # # # - Repeated identifier names. # ========================== # Only the first instance will be tagged. # Be careful about ^]'ing to tags that are in functions... you may # not get what you want. Worse, you might get put into another file # without getting what you want. You can always get back with ^^ # # # - Lines of declarations that are continued with a comma e.g. # ========================================================== # int fred, harry, joe, # mike, dick; # Will not try to produce tags for mike or dick. # # # - Declaration lines that do not start with a type name e.g. # ========================================================= # /* silly comment in the way */ int fred; # Will not tag fred. # # # - Declarations in comment blocks will be tagged e.g. # ================================================== # /* start of comment # int fred; # int harry; # end of comment */ # Will produce tags for fred and harry (if they don't already exist). # # # Run ctags, create extra tags, sort. # # Note that vi searches in NOMAGIC mode, meaning # only ^ and $ have any effect. Thus we have # to escape these, and /\, but nothing else. # (Note also that due to a bug in vi you get left in # nomagic mode if the pattern isn't found) # # Steve Hayman (MFCF) # Terry Jones (F.U.N. Corporation) 18/10/87 # #------------------------------------------------------------------------------- # Department Of Computer Science, University Of Waterloo # Waterloo Ontario Canada N2L 3G1 # #{ihnp4,allegra,decvax,utzoo,utcsri,clyde}!watmath!watdragon!tcjones #tcjones@dragon.waterloo.{cdn,edu} tcjones@WATER.bitnet #tcjones%watdragon@waterloo.csnet [from oz, tcjones@dragon.waterloo.cdn@munnari] #------------------------------------------------------------------------------- # # if [ $# -eq 0 ] then echo usage: `basename $0` files exit 1 fi # # Make the standard tags file with ctags. # ctags -w -t $* # # Do the additional tags # awk ' # # Initialise a few handy-dandy associative arrays. # BEGIN { keywd["char"]++ keywd["int"]++ keywd["long"]++ keywd["double"]++ keywd["float"]++ keywd["short"]++ keywd["register"]++ keywd["static"]++ keywd["void"]++ keywd["unsigned"]++ follow["["]++; follow["="]++ follow[";"]++ } # # The #define grabber. # NF > 0 && /^#[ ]*define/ { total_tokens++ if ($1 == "#") token = $3 else token = $2 # # Careful with macro functions. # if ( i = index(token, "(") ) token = substr(token, 1, i - 1) # # Set up these tags for later output (see END clause). # patterns[total_tokens] = $0 files[total_tokens] = FILENAME tags[total_tokens] = token next } # # The declaration grabber. # # # Make sure we have some fields and that the first is a type name. # Could check that NF>1 but for declarations like int*fred; # NF > 0 && keywd[$1] == 1 { # # If the last field is a keyword then we must have something like # # unsigned int # silly() # # And so we should just continue to the next line # (We could probably do a getline before the next, but then again # they might just have a #define there... who knows? who cares?) # if ( keywd[$NF] ) next # # Check to find the first word on the line that is not in the keywd # array. This must (famous last words) be the identifier we want. # for ( i = 2; i <= NF; i++ ) { if( keywd[$i] == 0 ) break } # # Get the tail of the line, starting from the first identifier. # spot = index($0, $i) line = substr($0, spot, length - spot + 1) # # Strip trailing characters from line like ; and = and [ if present # # *Dont* break out of the for loop once you have found one as # this will make the order of their declaration in the START # clause important. Anyway, it is not clear who would come # first out of = and [ # # We do this here since we want a line such as # # char *fred="this is fred" /* comment about fred the char* */ # # to be cut off at the "=" instead of processing each of the ten fields # *fred="this, is, fred", /*, comment, about, fred, the, char* and */ # to see if it they are identifiers. This way we process only "*fred". # since the line gets chopped off at the "=". # # (Dont take "," out at this stage, since we are going to split on ",") # for ( f in follow ) { if ( j = index(line, f) ) { # god knows why i have to do this fred = substr(line, 1, j - 1) line = fred } } # # Split the line that remains on commas. # total_ids = split(line, identifiers, ",") # # Process each of the identifiers. # for ( i = 1; i <= total_ids; i++ ) { token = identifiers[i] if ( length( token ) == 0 ) continue # # If there is a "(" present then this must be a function name # as in # # int silly() # # so we just continue. # if ( index(token, "(") ) continue # # Strip off leading white space and * characters. # while ( (first = substr(token, 1, 1) ) == "*" || \ first == " " || first == " " ) token = substr(token, 2, length(token) - 1) # # Otherwise lets assume we have an identifier. # Check to see that it is not already in existence, if it is # then its too bad for the user, well throw this one away. # # (one alternative would be to prepend the function name (if there # is one) to the identifier name). But this is messy and probably # would never get used anyway. # if ( identifiers[ token ] == 1 ) continue identifiers[ token ] = 1 total_tokens++ # # And finally set up the arrays for later use. # patterns[total_tokens] = $0 tags[total_tokens] = token files[total_tokens] = FILENAME } } # # Finally, process all of the tags array. # # The search pattern is the entire line. Print a line that looks like # # token <tab> filename <tab> /<appropriately-escaped-pattern>/ # END { for ( tok in patterns ) { pattern = patterns[tok] file = files[tok] tag = tags[tok] printf "%s\t%s\t/^", tag, file for ( i = 1; i <= length(pattern); i++ ) { if( index("^$/\\", c = substr(pattern,i,1)) ) printf "\\" printf "%s", c } printf "$/\n" } } # # Send all of this into sort, merging the tags we created with ctags # ' $* | sort -u -o tags - tags @//E*O*F mytags// chmod u=rwx,g=rx,o=rx mytags exit 0