[net.sources] mk - extract command lines from files

mcg@omepd (Steven McGeady) (01/20/87)

With reference to the recent discussion of ways to embed command lines in
nroff/troff files, I wrote this program the last time this subject came up.
I post it here in hopes that it will be of use to someone.  Old timers on
the network may remember that this was first posted about 4 years ago.

S. McGeady

P.s - I just noticed that my address on the man page is incorrect.  The
address and other information in the README file is correct - many things
change in 4 years.

----------------------------------------------------------------------------
# This is a shell archive.  Remove anything before this line, then
# unpack it by saving it in a file and typing "sh file".  (Files
# unpacked will be owned by you and have default permissions.)
#
# This archive contains:
# README mk.1 mk.c

echo x - README
cat > "README" << '//E*O*F README//'
README: Tue Jan 20 11:49:03 PST 1987

This is a program which reads the first part of a file, looks for a tag
of the form:

	$Compile:   cc -o %F -O %f&
	$Compile (OPTION): cc -o %F -DLOG -O %f&

The '%' fields are filled out with various things (part or all of the file
name, shell variables, other things), and the command is executed.  This
string can be embedded in a file at any point, to wit, in an nroff/troff
file in a line like:

	.\" $Compile: tbl %f | pic | eqn | troff -ms 

or in a C file:

	/* $Compile: $(CC) -c -DFOO=1 -DBAR=2 %f */

or in any other kind of file that supports a comment notion
(awk scripts, etc.)

In its simplest form, the invocation is

	mk file

there are options to select alternate '$Compile' lines, or to set
variables to be expanded in the lines, etc.

This program was written in 1983 in response to a similar set of
queries on the network about embedding commands in nroff sources,
while the author was at Tektronix, Inc.

The program is copyrighted, but as this (and the previous) posting
are 'publishing' of the program, it may be used without restriction
for personal use.  It may not be redistributed for profit.  In the
unlikely event that you would like to do so, you may call or write
to me:

	S. McGeady
	3714 SE 26th Ave.
	Portland, OR 97202
	(503) 235-2462 (h)
	(503) 681-4393 (w)

	tektronix!psu-cs!omepd!mcg
	uoregon!omepd!mcg


//E*O*F README//

echo x - mk.1
cat > "mk.1" << '//E*O*F mk.1//'
.TH MK 1
\" $Compile: nroff -man -e %'FLAGS' %f
.SH NAME
mk \- detect compilation directives in files
.br
compile \- detect compilation directives in files
.SH SYNOPSIS
.B mk
[
.BI -m mark
] [
.BI -d submark
] [
.BI -D def
] [
.B -n
] [
.B -s
]
file ...
.SH DESCRIPTION
.I Mk
(formerly known as
.I compile),
is a utility for detecting compilation directives within files.
.I Mk
searches through the first block of the named file(s), searching
for a marker, by default "$Compile:". When this marker
is detected, the portion of the line after the colon and up to a newline
(or an occurence of two sequential unescaped dollar signs ('$')) is
executed by the shell via
.I system(3).
.PP
The strings that are substituted are:
.sp
.RS
.ns
.TP 8
.B %f
\- the full name of the file specified on the command line.
.br
.sp
.ns
.TP 8
.B %d
\- the
.I directory
part of the specified file, including the final '/'.
.br
.sp
.ns
.TP 8
.B %x
\- the
.I extension
on the specified file, if any. This is the
string following the
.I last
occurence of a period ('.').
.br
.sp
.ns
.TP 8
.B %p
\- the
.I prefix
part of the specified file. This is the string which follows the final
slash and precedes the first period. (This is useful for SCCS files.)
.br
.sp
.ns
.TP 8
.B %F
\- the filename\-only part of the specified file. This 
does not include the path, prefix, or extension parts.
.br
.sp
.ns
.TP 8
.B %'name'
\- the
.I name
is substituted with the value (if any) specified by the
.B -D
switches on the command line. Double-quotes may be substituted for the
single quotes.
.br
.sp
.ns
.TP 8
.B %{name}
\- the
.I name
is substituted with the value (if any) of that variable in the user's
.I environment.
.RE
.PP
Also, some C-like escape sequences are substituted:
.sp
.RS
.ns
.TP 8
.B \en
\- newline
.br
.ns
.TP 8
.B \et
\- tab
.br
.ns
.TP 8
.B \e\e
\- backslash
.br
.ns
.TP 8
.B \e<nnn>
\- the character whose octal value is <nnn>
.br
.RE
.PP
Several switches modify the behavior of
.I mk :
.sp
.RS
.ns
.TP 12
.BI -m mark
\- specifies that the following argument should replace
the default mark string ("Compile"). Note that the replacement does
not include either the leading dollar\-sign of the trailing colon.
These are constant parts of the marker.
.br
.sp
.ns
.TP 12
.BI -d submark
\- specifies that the next argument is a
.I submark
that should be matched in addition to the specified (or defaulted)
.I mark.
The full mark searched for is then "$mark (submark):". Whitespace is
ignored. A command line including "-d DEBUG" would match
"$Compile (DEBUG):". Submarks in the file are ignored (do not take part
in matching) if no submarker is specified.
.br
.sp
.ns
.TP 12
.BI -D def
\- this argument specifies that the next argument is a
.I definition
specification. It takes the form
.I name=value.
When the substitution string "%'name'" is detected, the assigned
.I value
will be substituted, if it is defined, otherwise the empty string
will be substituted.
.br
.sp
.ns
.TP 12
.B -n
\- this flag specifies that the resulting command(s) should not be
executed, but only printed on the standard output (like
.I make (1)).
.br
.sp
.ns
.TP 12
.B -s
\- this flag indicates
.I silent
operation. Executed commands are not printed (like
.I make (1)).
.RE
.sp
.SH EXAMPLES
.I Mk
would most commonly be used to produce input for the shell.
The following lines might occur in a C program source file:
.sp
.RS
.nf
/*
 * $Compile: cc %'CFLAGS' -o %F -DFOO=1 %f
 * $Compile (DEBUG): cc %'CFLAGS' -o %F -DDEBUG=1 %f
 */
.fi
.RE
.sp
If the file were called "foo.c",
.I mk,
invoked as
.sp
.RS
.nf
mk foo.c
.fi
.RE
.sp
would execute the command:
.sp
.RS
.nf
cc  -o foo -DFOO=1 foo.c
.fi
.RE
.sp
With an invocation like
.sp
.RS
.nf
mk -dDEBUG -DCFLAGS="-g" foo.c
.fi
.RE
.sp
the command
.sp
.RS
.nf
cc -g -o foo -DDEBUG=1 foo.c
.fi
.RE
.sp
would be executed.
.I Mk
is in no way limited to "compiling" source language programs. It can
be used on
.I nroff
source by adding a line near the top of an
.I nroff
source file, e.g.
.sp
.RS
.nf
\e"  $Compile: nroff -ms -rO8 %f >%F.out
.fi
.RE
.sp
.SH BUGS
Extensions and prefices are rather rigidly
defined. These bugs will be fixed with future extensions. See
the comment at the beginning of the source code for other
planned enhancements.
.SH AUTHOR
S. McGeady
.br
Tektronix, Inc.
.br
(503) 685-2555
.br
stevenm@tektronix		(CSNET)
.br
stevenm.tektronix@rand-relay	(ARPA)
.br
decvax!teklabs!stevenm		(UUCP)
.br
ucbvax!teklabs!stevenm		(UUCP)
.br
zehntel!tektronix!stevenm	(UUCP)

//E*O*F mk.1//

echo x - mk.c
cat > "mk.c" << '//E*O*F mk.c//'
/*
 * mk: detect a compilation command in a file, and execute it
 *	(formerly called 'compile')
 *
 * Usage: mk [-m marker] [-d submarker] [-D defn] [-s] [-n] file ...
 *
 * example marker line:
 *
 *	$Compile:   cc -o %F -O %f&
 *	$Compile (TEKECS): cc -o %F -DLOG -O %f&
 *	$Compile (DEBUG): cc -o %F -g -DDEBUG %f&
 *	
 *
 * this program searches for the first occurence of a marker (DEFLTMARK)
 * in the first block of the named file(s), grabs the line on which
 * the marker occurs, performs some filename substitutions on the line,
 * and prints the line (typically a shell command line) on the stdout.
 *
 * this programs currently makes the following substitutions:
 *
 *	%f	- full name, as spec'd on command line
 *	%F	- non-prefix, non-extension part of filename
 *		  (e.g.) 'foo' in 's.foo.c'
 *	%p	- prefix
 *	%x	- extension - this is the string following the LAST '.'
 *	%d	- directory part of filename
 *	%'name'	- the value defined for "name"
 *	%{name}	- the value of "name" from the environment
 *	\n	- newline
 *	\t	- tab
 *	\nnn	- (nnn = octal number) character escape
 *
 * command-line switches:
 *
 *	-n	- don't execute, just print (a la 'make')
 *	-s	- silent (a la 'make')
 *	-Dfoo=x	- define a variable which can be expanded by %'foo' or %"foo"
 *	-m mark	- specify alternate marker
 *	-d submark - select marker option	(e.g. compile -d DEBUG ...)
 *			$Compile (DEBUG): ...
 *			$Compile (PDP11): ...
 *
 * planned additions:
 *	%#[1..n]- the n'th character of the filename
 *	%P	- current working directory
 *	%r	- comma-extension (e.g. 'foo.c,v')
 *	-p	- a switch to turn off prefix processing
 *	-x c	- specify alternate extension delimeter (instead of '.')
 *
 *
 *	(c) Copyright 1983, Steven McGeady
 *
 *	This program may be redistributed to other computer sites, but
 *	not for profit, and providing that this notice remains intact.
 *
 *	All bug fixes and improvements should be mailed to the author.
 *
 *
 * Author:
 *	S. McGeady
 */


static char *SCCSid = "@(#)mk.c	1.4	mk - S. McGeady";

#include <stdio.h>
#include <ctype.h>


extern char *strcpy();
extern char *rindex();
extern char *index();
extern char *getenv();

extern char *translit();	/* forward reference */
extern char *valof();		/* forward reference */

/* #define	DEBUG(fmt, lst)	fprintf(stderr, fmt, lst); */

#define	BACKSL	'\134'
#define	NUMDEFS	25
#define	MAXLIN	BUFSIZ
#define	TRUE	1
#define	FALSE	0

#define	LEADCHAR	'$'
#define	DEFLTMARK	"Compile"
char *markstr	= DEFLTMARK;
char *submark	= NULL;

char *myname;
char *curfile;
int silent	= FALSE;
int exec	= TRUE;

struct names {
	char	*nm_name;
	char	*nm_value;
} nmlist[NUMDEFS];

main(argc, argv)
int argc;
char **argv;
{
	char buf[MAXLIN];
	char combuf[MAXLIN];
	register char *p;
	register int i;
	register FILE *fin;
	int retval = 0;

	if ((myname = rindex(argv[0], '/')) == NULL) {
		myname = argv[0];
	} else {
		myname++;
	}

#ifdef LOG
	log(argc, argv);
#endif

	if (argc < 2) {
		usage();
		exit(1);
	}

	/*
	 * main loop, process files
	 */

	for(i = 1; i < argc; i++) {
		if ((argv[i][0] == '-') && (argv[i][1] != '\0')) {
			p = &(argv[i][1]);
		loop:	/* sorry ... */
			switch (*p) {
			case 'n':	/* don't execute */
				exec = FALSE;
				if (p[1] != '\0') {
					p++;
					goto loop;
				}
				break;
			case 's':	/* act silently */
				silent = TRUE;
				if (p[1] != '\0') {
					p++;
					goto loop;
				}
				break;
			case 'm':	/* select alternate marker */
				if (p[1] == '\0') {
					if (++i < argc) {
						markstr = argv[i];
					} else {
						error("no marker specified after -m");
					}
				} else {
					markstr = ++p;
				}
				break;
			case 'd':	/* submarker selection */
				if (p[1] == '\0') {
					if (++i < argc) {
						submark = argv[i];
					} else {
						error("no submarker specified after -d");
					}
				} else {
					submark = ++p;
				}
				break;
			case 'D':	/* define */
				if (p[1] == '\0') {
					if (++i < argc) {
						define(argv[i]);
					} else {
						error("no definition after -D");
					}
				} else {
					++p;
					define(p);
				}
				break;
			default:	/* ??? */
				error("unrecognized switch -%c", *p);
				break;
			}
			continue;
		}
		curfile = argv[i];
		if (strcmp(curfile, "-") == 0) {	/* '-' indicate stdin */
			fin = stdin;
		} else if ((fin=fopen(curfile, "r")) == NULL) {
			error("cannot open %s", curfile);
			retval++;
			continue;
		}
		if (!find(buf, LEADCHAR, markstr, submark, fin)) {
			if (submark == NULL) {
				error("no marker \"%c%s:\" in %s",LEADCHAR,markstr,curfile);
			} else {
				error("no marker \"%c%s (%s):\" in %s",
					LEADCHAR,markstr,submark,curfile);
			}
			retval++;
		} else {
			translit(combuf, buf);
			if (exec){
				if (!silent) fprintf(stderr, "+ %s\n", combuf);
				fflush(stderr);
				system(combuf);
			} else if (!silent) {
				printf("%s\n", combuf);
				fflush(stdout);
			}
			
		}
		if (fin != stdin) fclose(fin);
	}
	exit(retval);
}

find(bp, lead, mark, smark, f)
char *bp;
char lead;
char *mark;
char *smark;
register FILE *f;
{
	char buf[MAXLIN];
	char smbuf[MAXLIN];
	int found = FALSE;
	register int i;
	register int c;
	register char *p;
	register char *xp;

	if (smark && strlen(smark) > 0) {
		sprintf(smbuf, "(%s)", smark);
	}
	for (i=0; i < BUFSIZ; i++) {
		if ((c = getc(f)) == EOF) break;
		if (c == lead) {
			if (fgets(buf, MAXLIN, f) == NULL) break;
			if (strncmp(mark, buf, strlen(mark)) == 0) {
				p = &(buf[strlen(mark)]);
				xp = p;
				if ((p = index(p, ':')) == NULL) {
					continue;
				}
				if (smark) {
					if ((xp=index(xp, '(')) != NULL) {
						if (strncmp(xp, smbuf, strlen(smbuf)) != 0) {
							continue;	/* submarker compare fails */
						}
					} else { /* no submarker */
						continue; /* fail if no submarker found */
					}
				}
				found = TRUE;


				while(isspace(*++p)) /* skip leading spaces */
					;
				xp = p;
				/* terminate line on '$$' or '\n' */
				while(*xp) {
					if((xp = index(xp, LEADCHAR)) == NULL) {
						break;
					} else if (xp[1] == LEADCHAR) {
						*xp = '\0';
						break;
					}
					xp++;
				}
				if ((xp=rindex(p, '\n')) != NULL) {
					*xp = '\0';
				}
				strcpy(bp, p);
				break;
			}
		}
	}
	return(found);
}

char *
translit(dst, src)
register char *dst;
register char *src;
{
	register char *tp;
	register char *xp;
	register char c;
	int radix;

	/*fprintf(stderr, "translit(%s)\n", src);/*DBG*/

	while (*src) {

		switch (*src) {

		case '%': 
			switch (*++src) {
			case '\'':	/* %'name' == define */
			case '"':
				c = *src++;
				if ((xp = index(src, c&0377)) == NULL) {
					break;
				}
				*xp = '\0';
				strcpy(dst, valof(src));
				while(*dst++)
					;
				dst--;
				src = xp;
				break;

			case '{':	/* %{name} == get name from env */
				xp = ++src;
				if ((tp = index(src, '}')) == NULL) {
					break;
				}
				*tp = '\0';
				src = tp;
				if ((tp = getenv(xp)) == NULL) {
					tp = "";
				}
				strcpy(dst, tp);
				while(*dst++)
					;
				dst--;
				break;

			case 'f':	/* full filename */
				strcpy(dst, curfile);
				dst += strlen(curfile);
				break;

			case 'F':	/* file part only */
				if ((tp = rindex(curfile, '/')) != NULL) {
					tp++;
				} else {
					tp = curfile;
				}
				if ((xp = rindex(tp, '.')) != NULL) {
					*xp = '\0';
					strcpy(dst, tp);
					dst += strlen(tp);
					*xp = '.';
				} else {
					strcpy(dst, tp);
					dst += strlen(tp);
				}
				break;

			case 'x':	/* . extension */
				if ((tp = rindex(curfile, '.')) == NULL) {
					break;
				}
				strcpy(dst, ++tp);
				dst += strlen(tp);
				break;

			case 'p':	/* prefix */
				if ((tp = index(curfile, '.')) == NULL) {
					break;
				}
				*tp = '\0';
				strcpy(dst, curfile);
				*tp = '.';
				break;

			case 'd':	/* directory part */
				if ((tp = rindex(curfile, '/')) == NULL) {
					break;
				}
				c = *++tp;
				*tp = '\0';
				strcpy(dst, curfile);
				dst += strlen(curfile);
				*tp = c;
				break;

			default:	/* unrecognized chars are copied thru */
				*dst++ = *src;
				break;
			}		/* end of % codes switch */
			src++;
			break;

		case BACKSL:

			radix = 8;
			switch (*++src) {
				case 'n':	/* newline */
					*dst++ = '\n';
					break;

				case BACKSL:
					*dst++ = BACKSL;
					break;

				case NULL:
					*dst++ = src[-1];
					break;

				case 't':
					*dst++ = '\t';
					break;

				case '0':
					if (src[1] == '0') src++;
					/*FALLTHROUGH*/

				case '1': case '2': case '3':
				case '4': case '5': case '6':
				case '7': case '8': case '9':

					{
					register int i;
					char numbuf[10];
					char *p;
					register int base;
					register int num;
					int c;

					for (i=0; i < (radix==16 ? 2 : 3);i++) {
						numbuf[i] = *src++;
						if (radix == 8 && (numbuf[i] > '7' || numbuf[i] < '0')) {
							numbuf[i] = NULL;
							src -= 2;
							break;
						}
					}
					numbuf[(radix==16 ? 2 : 3)] = NULL;

					base = 1;
					num = 0;
					for (i=(radix==16 ? 1:(numbuf[2]==NULL ? 1:2));i>=0;i--) {
						if (numbuf[i] > 'a') numbuf[i] -= 'a' - 'A';
						if (radix != 16) {
							c = numbuf[i] -  '0';
						} else {
							if (numbuf[i] >= '0' && numbuf[i] <= '9') {
								c = numbuf[i] = '0';
							} else if (numbuf[i] >= 'A' && numbuf[i] <= 'F') {
								c = numbuf[i]-'A'+10;
							} else {
								c = 0;
							}
						}
						num += c*base;
						base *= radix;
					}

					*dst++ = num;
					}
					break;

				default:
					break;

			} /* end of backslash codes switch */
			src++;
			break;

		/********
		case '$':
			if (*++src != '$') {
				*dst++ = '$';
				break;
			}
			/*FALLTHROUGH
		case '\n':
			*src = '\0';
			break;
		*********/

		default:
			*dst++ = *src++;
			break;
		} /* end of outer switch */
	}

	*dst = '\0';
	return(dst);
}

static int nmcur = 0;

define(str)
char *str;
{
	register int i;
	register struct names *nm = nmlist;

	/*fprintf(stderr, "define('%s')\n", str);/*DBG*/

	nm = &nmlist[nmcur];
	nm->nm_name = str;
	if ((nm->nm_value = index(str, '=')) != NULL) {
		*nm->nm_value++ = '\0';
	} else {
		nm->nm_value = "";
	}
	for(i=0; i < nmcur; i++) {
		if (strcmp(nm->nm_name, nmlist[i].nm_name) == 0) {
			nmlist[i].nm_value = nm->nm_value;
			nmcur--;
			break;
		}
	}
	nmcur++;
	return;
}

char *
valof(str)
char *str;
{
	register struct names *nm;

	/*fprintf(stderr, "valof('%s')\n", str);/*DBG*/

	for(nm=nmlist; nm < &nmlist[nmcur]; nm++) {
		if (strcmp(nm->nm_name, str) == 0) {
			return(nm->nm_value);
		}
	}
	return("");
}

/*VARARGS*/
error(fmt, a1, a2, a3, a4, a5, a6, a7)
char *fmt, *a1, *a2, *a3, *a4, *a5, *a6, *a7;
{
	fprintf(stderr, "%s: ", myname);
	fprintf(stderr, fmt, a1, a2, a3, a4, a5, a6, a7);
	fprintf(stderr, "\n");
	fflush(stderr);
}

usage(){
	 fprintf(stderr,
		"Usage: %s [-m marker] [-d submarker] [-D defn] [-s] [-n] file ...\n", myname);
}

#ifdef LOG

#define	LOGFILE	"/cc/stevenm/tmp/mk.log"
extern char *ctime();

log(ac, av)
int ac;
char **av;
{
	long t;
	register char *p, *q;
	char pwbuf[MAXLIN];
	FILE *flog;

	getpw(getuid(), pwbuf);
	p = index(pwbuf, ':');
	*p = '\0';
	/* don't log the author */
	if (!strcmp("stevenm", pwbuf) || !strcmp("mcg", pwbuf)) {
		return;
	}
	if ((flog = fopen(LOGFILE, "a")) != NULL) {
		fprintf(flog, "%s: ", pwbuf);
		while(ac--) {
			fprintf(flog, "%s ", *av++);
		}
		time(&t);
		q = p = ctime(&t);
		while(*p++ != '\n');
		p[-1] = '\0';
		fprintf(flog, "(%s)\n", q);
		fclose(flog);
	}
}

#endif
//E*O*F mk.c//

echo Possible errors detected by \'wc\' [hopefully none]:
temp=/tmp/shar$$
trap "rm -f $temp; exit" 0 1 2 3 15
cat > $temp <<\!!!
      49     250    1383 README
     263     825    4493 mk.1
     584    1768   11371 mk.c
     896    2843   17247 total
!!!
wc  README mk.1 mk.c | sed 's=[^ ]*/==' | diff -b $temp -
exit 0