[comp.os.minix] symbol table support on executable files

veench@cs.vu.nl (Veen van CH) (02/19/88)

In the following shar file are three C-programs and a header file.
The header file is far from complete and only contains the parts I needed
to write these programs.

The first program, ast, adds a symbol table to an executable file
and uses the symbol.out file produced when compiling with the -s option:

	cc -s files.c libs.a > symbol.out

Nm displays the symbol table added to an executable file.
Strip removes the symbol table from an executable file.

				Dick van Veen
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	a.out.h
#	ast.c
#	nm.c
#	strip.c
# This archive created: Fri Feb 19 13:51:14 1988
export PATH; PATH=/bin:/usr/bin:$PATH
if test -f 'a.out.h'
then
	echo shar: "will not over-write existing file 'a.out.h'"
else
cat << \SHAR_EOF > 'a.out.h'
/* NOTE: this file is a temporary header file and should be replaced
 *	 by a complete one.
 */
/* a.out header file */

struct	exec {			/* a.out header */
	unsigned char	a_magic[2];	/* magic number */
	unsigned char	a_flags;	/* flags, see below */
	unsigned char	a_cpu;		/* cpu id */
	unsigned char	a_hdrlen;	/* length of header */
	unsigned char	a_unused;	/* reserved for future use */
	unsigned short	a_version;	/* version stamp */	
				/* not used */
	long		a_text;		/* size of text segement in bytes */
	long		a_data;		/* size of data segment in bytes */
	long		a_bss;		/* size of bss segment in bytes */
	long		a_no_entry;	/* in fact: entry point, a_entry */
	long		a_total;	/* total memory allocated */
	long		a_syms;		/* size of symbol table */
				/* SHORT FORM ENDS HERE */
	long		a_trsize;	/* text relocation size */
	long		a_drsize;	/* data relocation size */
};

#define A_MAGIC0	(unsigned char) 0x01
#define A_MAGIC1	(unsigned char) 0x03
#define BADMAG(X)	((X).a_magic[0] != A_MAGIC0 ||\
			 (X).a_magic[1] != A_MAGIC1)

/* CPU Id of TARGET machine */
	/* byte order coded in low order two bits */
#define A_NONE	0x00	/* unknown */
#define A_I8086	0x04	/* intel i8086/8088 */
#define A_M68K	0x0B	/* motorola m68000 */
#define A_NS16K	0x0C	/* national semiconductor 16032 */

#define A_BLR(cputype)	((cputype&0x01)!=0) /* TRUE if bytes left-to-right */
#define A_WLR(cputype)	((cputype&0x02)!=0) /* TRUE if words left-to-right */

/* flags: */
#define A_EXEC	0x10	/* executable */
#define A_SEP	0x20	/* separate I/D */
#define A_PURE	0x40	/* pure text */		/* not used */
#define A_TOVLY	0x80	/* text overlay */	/* not used */

/* offsets of various things: */
#define A_MINHDR	32
#define	A_TEXTPOS(X)	(((X).a_flags&A_TOFF)?(X).a_toffs:(long)(X).a_hdrlen)
#define A_DATAPOS(X)	(A_TEXTPOS(X) + (X).a_text)
#define	A_HASRELS(X)	((X).a_hdrlen > (unsigned char) A_MINHDR)
#define A_HASEXT(X)	((X).a_hdrlen > (unsigned char) (A_MINHDR +  8))
#define A_HASLNS(X)	((X).a_hdrlen > (unsigned char) (A_MINHDR + 16))
#define A_HASTOFF(X)	((X).a_hdrlen > (unsigned char) (A_MINHDR + 24))
#define A_TRELPOS(X)	(A_DATAPOS(X) + (X).a_data)
#define A_DRELPOS(X)	(A_TRELPOS(X) + (X).a_trsize)
#define A_SYMPOS(X)	(A_TRELPOS(X) + (A_HASRELS(X) ? \
				((X).a_trsize + (X).a_drsize) : 0))

struct reloc {
	long		r_vaddr;	/* virtual address of reference */
	unsigned short	r_symndx;	/* internal segnum or extern symbol num */
	unsigned short	r_type;		/* relocation type */
};

/* r_tyep values: */
#define R_ABBS		0
#define R_RELLBYTE	2
#define R_PCRBYTE	3
#define R_RELWORD	4
#define R_PCRWORD	5
#define R_RELLONG	6
#define R_PCRLONG	7
#define R_REL3BYTE	8
#define R_KBRANCHE	9

/* r_symndx for internal segments */
#define S_ABS		((unsigned short)-1)
#define S_TEXT		((unsigned short)-2)
#define S_DATA		((unsigned short)-3)
#define S_BSS		((unsigned short)-4)

struct nlist {			/* symbol table entry */
	char	 	n_name[8];	/* symbol name */
	long	 	n_value;	/* value */
	unsigned char	n_sclass;	/* storage class */
	unsigned char	n_numaux;	/* number of auxiliary entries */
						/* not used */
	unsigned short	n_type;		/* language base and derived type */
						/* not used */
};

/* low bits of storage class (section) */
#define	N_SECT		  07	/* section mask */
#define N_UNDF		  00	/* undefined */
#define N_ABS		  01	/* absolute */
#define N_TEXT		  02	/* text */
#define N_DATA		  03	/* data */
#define	N_BSS		  04	/* bss */
#define N_COMM		  05	/* (common) */

/* high bits of storage class */
#define N_CLASS		0370	/* storage class mask */
#define C_NULL
#define C_EXT		0020	/* external symbol */
#define C_STAT		0030	/* static */
	/* there are many others, but they are not supported */
SHAR_EOF
fi
if test -f 'ast.c'
then
	echo shar: "will not over-write existing file 'ast.c'"
else
cat << \SHAR_EOF > 'ast.c'
/* ast - add symbol table.	Author: Dick van Veen, veench@cs.vu.nl */

#include <a.out.h>
#include <stdio.h>

/*
 * Since the a.out file in MINIX does not contain any symbol table,
 * we use the symbol table produced with the -s option of asld.
 *
 * Read symbol table in memory, remove compiler generated labels,
 * sort the labels and add it to the a.out file.
 *
 * When finally there comes a real as and ld, we may also get
 * a symbol table in the a.out file, and we can forget this program.
 *
 */

/*
 * ast [flags] [file] [symbolfile]
 *
 * flags:
 *	-x	do not preserve local symbols
 *	-X	preserve local symbols except for those whose name begin
 *		with 'I', these are compiler generated.
 *
 *	-	when no symbol file is present, symbol.out is assumed.
 *	-	when no file is present, a.out is assumed.
 *	-	when one file name is present it must be the executable file
 *	-	just one flag may be pressent.
 *
 */

#define A_OUT		"a.out"
#define SYMBOL_FILE	"symbol.out"	/* contains symbol table */
#define LINE_LENGTH	24

#define WORTH_LESS	1		/* lines contain no symbol */
#define LAST_LINE	2		/* end of file reached */

struct exec header;			/* header info of a.out file */

int x_flag;				/* flags to ast */
int X_flag;
int o_flag;

char *s_file, *o_file;			/* names of files used by ast */
FILE *s_fd, *o_fd;			/* file descriptors of those files */
int nr_symbols;				/* number of symbols added */
char buffer[LINE_LENGTH];		/* contains line of symbol file */

char io_buf[BUFSIZ];			/* for buffered output on stderr */
unsigned int get_value();		/* forward definition */

main(argc, argv)
int argc;
char **argv;
{
	extern FILE *fopen();

	argv++;
	if (*argv != NULL && **argv == '-') {
		*argv += 1;
		if (**argv == 'x') x_flag = 1;
		else if (**argv == 'X') X_flag = 1;
		else {
			fprintf(stderr, "illegal flag: -%c\n", **argv);
			Exit(-1);
		}
		argv++;
	}
	if (*argv != NULL) {
		o_file = *argv;
		argv++;
	}
	if (*argv != NULL) {
		s_file = *argv;
		argv++;
	}
	if (*argv != NULL) {
		fprintf(stderr, "Usage: ast [-{x,X}] [file] [symbolfile]\n");
		Exit(-1);
	}
	if (o_file == NULL) o_file = A_OUT;
	o_fd = fopen(o_file, "a");
	if (o_fd == NULL) {
		fprintf(stderr, "can't open %s\n", o_file);
		Exit(-1);
	}
	if (s_file == NULL) s_file = SYMBOL_FILE;
	s_fd = fopen(s_file, "r");
	if (s_fd == NULL) {
		fprintf(stderr, "can't open %s\n", s_file);
		Exit(-1);
	}
	setbuf(s_fd, io_buf);
	ast(s_fd, o_fd);
	Exit(0);
}

Exit(val)
int val;
{
	_cleanup();
	exit(val);
}

ast(s_fd, o_fd)
FILE *s_fd, *o_fd;
{
	struct nlist symbol;
	int line_type;

	do_header();
	for(;;) {
		read_line(s_fd, buffer);
		line_type = transform_line(buffer, &symbol);
		if (line_type == WORTH_LESS) continue;
		if (line_type == LAST_LINE) break;
		save_line(o_fd, &symbol);
	}
	redo_header(o_fd);
}

read_line(fd, buffer)
FILE *fd;
char *buffer;
{
	char ch;
	char *buf1;

	buf1 = buffer;
	*buffer = '\n';
	ch = fgetc(fd);
	while (ch != '\n' && ch != EOF) {
		*buffer = ch;
		buffer++;
		ch = fgetc(fd);
	}
	if (ch == EOF)
		*buffer = '\0';
	else	*buffer = '\n';
	buffer[1] = '\0';
}

transform_line(buffer, symbol)
char *buffer;
struct nlist *symbol;
{
	switch(*buffer) {
	case 'a':	/* absolute symbol */
		symbol->n_sclass = N_ABS;
		break;
	case 'A':
		symbol->n_sclass = N_ABS | C_EXT;
		break;
	case 'u':	/* undefined symbol */
		symbol->n_sclass = N_UNDF;
		break;
	case 'U':
		symbol->n_sclass = N_UNDF | C_EXT;
		break;

	case 't':	/* text symbol */
		symbol->n_sclass = N_TEXT;
		break;
	case 'T':
		symbol->n_sclass = N_TEXT | C_EXT;
		break;
	case 'd':
		symbol->n_sclass = N_DATA;
	case 'D':	/* data symbol */
		symbol->n_sclass = N_DATA | C_EXT;
		break;
	case 'b':
		symbol->n_sclass = N_BSS;
	case 'B':	/* bss symbol */
		symbol->n_sclass = N_BSS | C_EXT;
		break;
	case '\0':	/* reached end of file */
		return(LAST_LINE);
	default:	/* one of first two lines */
		return(WORTH_LESS);
	}

	if (buffer[1] != ' ') {
		fprintf(stderr, "illegal file format\n");
		Exit(-1);
	}
	symbol->n_value = get_value(buffer + 2);

	if (buffer[6] != ' ') {
		fprintf(stderr, "illegal file format\n");
		Exit(-1);
	}
	get_name(buffer + 7, symbol->n_name);
	return(0);	/* yeah, found a symbol */
}

save_line(fd, symbol)
FILE *fd;
struct nlist *symbol;
{
	if (!(symbol->n_sclass & C_EXT)) {	/* local symbol */
		if (x_flag) return;
		if (X_flag && symbol->n_name[0] == 'I') return;
	}
	if (fwrite(symbol, sizeof(struct nlist), 1, fd) != 1) {
		fprintf(stderr, "can't write %s\n", o_file);
		Exit(-1);
	}
	nr_symbols++;
}

unsigned get_value(string)
char *string;
{
	unsigned value;
	int shift, bits;

	value = 0;
	for (shift = 0; shift < 16; shift += 4) {
		bits = get_bits(*string);
		value = (value << 4) | bits;
		string++;
	}
	return(value);
}

get_bits(ch)
char ch;
{
	if (ch >= '0' && ch <= '9')
		return (ch - '0');
	if (ch >= 'A' && ch <= 'F')
		return (ch - 'A' + 10);
	if (ch >= 'a' && ch <= 'f')
		return (ch - 'a' + 10);
	fprintf(stderr, "illegal file format\n");
	Exit(-1);
}

get_name(str1, str2)
register char *str1, *str2;
{
	int count;

	for (count = 0; count < 8; count++) {
		if (*str1 == '\n') break;
		*str2++ = *str1++;
	}
	while (count < 8) {
		*str2++ = '\0';
		count++;
	}
}

do_header()
{
	int fd;

	fd = open(o_file, 0);
	if (read(fd, &header, sizeof(struct exec)) != sizeof(struct exec)) {
		fprintf(stderr, "%s: no executable file\n", o_file);
		Exit(-1);
	}
	if (BADMAG(header)) {
		fprintf(stderr, "%s: bad header\n", o_file);
		Exit(-1);
	}
	if (header.a_syms != 0L) {
		fprintf(stderr, "%s: symbol table is installed\n", o_file);
		Exit(-1);
	}
	fseek(o_fd, A_SYMPOS(header), 0);
	nr_symbols = 0;
	close(fd);
}

redo_header(fd)
FILE *fd;
{
	header.a_syms = nr_symbols * sizeof(struct nlist);
	fseek(fd, 0L, 0);
	if (fwrite(&header, sizeof(header), 1, fd) != 1) {
		fprintf(stderr, "%s: can't write\n", o_file);
		Exit(-1);
	}
}

SHAR_EOF
fi
if test -f 'nm.c'
then
	echo shar: "will not over-write existing file 'nm.c'"
else
cat << \SHAR_EOF > 'nm.c'
/* nm - print name list.	Author: Dick van Veen, veench@cs.vu.nl */

#include <a.out.h>
#include <stdio.h>

/*
 * Read the name list in memory, sort it, and print it.
 *
 */

/*
 * nm [-gnopru] [file] ...
 *
 * flags:
 *	-g	print only external symbols.
 *	-n	sort numerically rather than alphabetically.
 *	-o	prepend file name to each line rather than only once.
 *	-p	don't sort, pint n symbol-table order.
 *	-r	sort in reverse order.
 *	-u	print only undefined symbols.
 *
 *	-	when no file name is present, a.out is assumed.
 *
 *	NOTE:	no archives are supported because assembly files don't
 *		have symbol tables.
 *
 */

#define A_OUT		"a.out"

int g_flag;
int n_flag;
int o_flag;
int p_flag;
int r_flag;
int u_flag;

char io_buf[BUFSIZ];			/* io buffer */
struct exec header;			/* header of a.out file */
int stbl_elems;				/* #elements in symbol table */

main(argc, argv)
int argc;
char **argv;
{
	argv++;
	while (*argv != 0 && **argv == '-') {
		*argv += 1;
		while (**argv != '\0') {
			switch (**argv) {
			case 'g':
				g_flag = 1;
				break;
			case 'n':
				n_flag = 1;
				break;
			case 'o':
				o_flag = 1;
				break;
			case 'p':
				p_flag = 1;
				break;
			case 'r':
				r_flag = 1;
				break;
			case 'u':
				u_flag = 1;
				break;
			default:
				fprintf(stderr, "illegal flag: -%c\n", **argv);
				Exit(-1);
			}
			*argv += 1;
		}
		argv++;
	}
	setbuf(stdin, io_buf);
	if (*argv == 0) nm(A_OUT);
	else while (*argv != 0) {
		nm(*argv);
		argv++;
	}
	Exit(0);
}

Exit(val)
int val;
{
	_cleanup();
	exit(val);
}

nm_sort(stbl1, stbl2)
struct nlist *stbl1, *stbl2;
{
	int cmp;

	if (n_flag) {		/* sort numerically */
		if ((stbl1->n_sclass & N_SECT) < 
		    (stbl2->n_sclass & N_SECT)) cmp = -1;
		else if ((stbl1->n_sclass & N_SECT) >
			 (stbl2->n_sclass & N_SECT)) cmp = 1;
		else if (stbl1->n_value < stbl2->n_value)
			cmp = -1;
		else if (stbl1->n_value > stbl2->n_value)
			cmp = 1;
		else cmp = strncmp(stbl1->n_name, stbl2->n_name, 8);
	} else {
		cmp = strncmp(stbl1->n_name, stbl2->n_name, 8);
		if (cmp == 0) {
			if (stbl1->n_value < stbl2->n_value)
				cmp = -1;
			else if (stbl1->n_value > stbl2->n_value)
				cmp = 1;
		}
	}

	if (r_flag) cmp = -cmp;		/* reverse sort */
	return(cmp);
}

nm(file)
char *file;
{
	struct nlist *stbl;
	int fd;

	fd = open(file, 0);
	if (fd == -1) {
		fprintf(stderr, "can't open %s\n", file);
		return;
	}

	if (read_header(fd)) {
		fprintf(stderr, "%s: no executable file\n", file);
		return;
	}

	stbl = (struct nlist *) malloc((int) (header.a_syms & 0xFFFF));
	if (stbl == NULL) {
		fprintf(stderr, "%s: can't allocate symbol table\n", file);
		return;
	}
	if (read(fd, stbl, (int) (header.a_syms & 0xFFFF))
			!= (int) (header.a_syms & 0xFFFF)) {
		fprintf(stderr, "%s: can't read symbol table\n", file);
		return;
	}
	stbl_elems = (int) header.a_syms/sizeof(struct nlist);
	if (!p_flag) qsort(stbl, stbl_elems, sizeof(struct nlist), nm_sort);
	nm_print(file, stbl);
	close(fd);
}

read_header(fd)
int fd;
{
	if (read(fd, &header, sizeof(struct exec)) != sizeof(struct exec))
		return(1);
	if (BADMAG(header)) return(1);
	lseek(fd, A_SYMPOS(header), 0);

	return(0);
}

nm_print(file, stbl)
char *file;
register struct nlist *stbl;
{
	struct nlist *last;
	char name[9];
	int n_sclass;
	char type;

	name[8] = '\0';
	if (!o_flag) printf("%s:\n", file);
	for (last = &stbl[stbl_elems]; stbl != last; stbl++) {
		if (g_flag && !(stbl->n_sclass & C_EXT)) continue;
		if (u_flag && stbl->n_sclass & N_SECT != N_UNDF) continue;

		n_sclass = stbl->n_sclass & N_SECT;
		if (n_sclass == N_ABS) type = 'a';
		else if (n_sclass == N_TEXT) type = 't';
		else if (n_sclass == N_DATA) type = 'd';
		else if (n_sclass == N_BSS) type = 'b';
		else type = 'u';
		if (stbl->n_sclass & C_EXT) type += 'A' -'a';
		strncpy(name, stbl->n_name, 8);
		if (o_flag) printf("%s:%04X %c %s\n", file, 
				stbl->n_value, type, name);
		else printf("%04X %c %s\n", stbl->n_value, type, name);
	}
}
SHAR_EOF
fi
if test -f 'strip.c'
then
	echo shar: "will not over-write existing file 'strip.c'"
else
cat << \SHAR_EOF > 'strip.c'
/* strip - remove symbols.	Author: Dick van Veen, veench@cs.vu.nl */

#include <a.out.h>
#include <stdio.h>
#include <stat.h>

/*
 * strip [file] ...
 *
 *	-	when no file is present, a.out is assumed.
 *
 */

#define A_OUT		"a.out"
#define NAME_LENGTH	128		/* max file path name */

char buffer[BUFSIZ];			/* used to copy executable */
char new_file[NAME_LENGTH];		/* contains name of temporary */
struct exec header;

main(argc, argv)
int argc;
char **argv;
{
	argv++;
	if (*argv == NULL) strip(A_OUT);
	else while (*argv != NULL) {
		strip(*argv);
		argv++;
	}
	exit(0);
}

strip(file)
char *file;
{
	int fd, new_fd;
	struct stat buf;

	fd = open(file, 0);
	if (fd == -1) {
		fprintf(stderr, "can't open %s\n", file);
		close(fd);
		return;
	}
	if (read_header(fd)) {
		fprintf(stderr, "%s: not an executable file\n", file);
		close(fd);
		return;
	}
	if (header.a_syms == 0L) {
		close(fd);		/* no symbol table present */
		return;
	}
	header.a_syms = 0L;		/* remove table size */
	fstat(fd, &buf);
	new_fd = make_tmp(new_file, file);
	if (new_fd == -1) {
		fprintf(stderr, "can't create temporary file\n");
		close(fd);
		return;
	}
	if (write_header(new_fd)) {
		fprintf(stderr, "%s: can't write temporary file\n");
		unlink(new_file);
		close(fd);
		close(new_fd);
		return;
	}
	if (copy_file(fd, new_fd, header.a_text + header.a_data)) {
		fprintf(stderr, "can't copy %s\n", file);
		unlink(new_file);
		close(fd);
		close(new_fd);
		return;
	}
	close(fd);
	close(new_fd);
	if (unlink(file) == -1) {
		fprintf(stderr, "can't unlink %s\n", file);
		unlink(new_file);
		return;
	}
	link(new_file, file);
	unlink(new_file);
	chmod(file, buf.st_mode);
}

read_header(fd)
int fd;
{
	if (read(fd, &header, A_MINHDR) != A_MINHDR) return(1);
	if (BADMAG(header)) return(1);
	if (header.a_hdrlen > sizeof(struct exec)) return(1);
	lseek(fd, 0L, 0);		/* variable size header */
	if (read(fd, &header, (int) header.a_hdrlen) != (int) header.a_hdrlen)
		return(1);
	return(0);
}

write_header(fd)
int fd;
{
	lseek(fd, 0L, 0);
	if(write(fd, &header, (int)header.a_hdrlen) != (int)header.a_hdrlen)
		return(1);
	return(0);
}

int make_tmp(new_name, name)
char *new_name, *name;
{
	int len;
	char *nameptr;
	extern char *rindex();

	len = strlen(name);
	if (len + 1 > NAME_LENGTH) return(-1);
	strcpy(new_name, name);
	nameptr = rindex(new_name, '/');
	if (nameptr == NULL) nameptr = new_name-1;
	if (nameptr - new_name + 6 + 1 > NAME_LENGTH) return(-1);
	strcpy(nameptr+1, "XXXXXX");
	mktemp(new_name);
	return(creat(new_name, 0777));
}

copy_file(fd1, fd2, size)
int fd1, fd2;
long size;
{
	long count;
	int length;

	count = 0;
	while (count < size) {
		length = (int) (size - count);
		if (length > sizeof(buffer)) length = sizeof(buffer);
		length = read(fd1, buffer, length);
		if (length == 0) break;
		if (write(fd2, buffer, length) != length) return(1);
		count += length;
	}
	if (count < size) return(1);
	return(0);
}
SHAR_EOF
fi
exit 0
#	End of shell archive