[comp.sys.atari.st] Object formats

leo@philmds.UUCP (Leo de Wit) (09/03/88)

Hi folks!
I'd like to extend my disassembler (one of my 'current programming
projects' 8-) with an option to disassemble object files (you know,
those files left by your compiler to be linked into an executable
program).

What I would like to know:
a) How many different formats do exist? The ones I know of are TOS
format (extension .o) and GST format (extension .bin). Are there any
(many ?) others?
b) What do these formats look like? I've been fiddling with dumps of
GST objects and I can recognize certain patterns, but this seems a hard
way to go.  Any official documentation? (must be, since for instance
Lattice uses a GST linker for their objects).
c) Libraries: for the GST linker a library is just a file that is a
concatenation of object files; does anyone know of more clever linkers
(those that can handle random access libraries)? I think the speed of
the GST linker leaves to be desired (perhaps I should wait for the GNU
stuff?).

E-mail preferably; I'll summarize. But if you think you're a guru on
this topic, feel free to teach us poor earthly creatures 8-).

            Leo.

leo@philmds.UUCP (Leo de Wit) (09/24/88)

Hi folks. Here's the collected responses to my question about object
formats. Since there seem to be so many around, I'm thinking about a
new solution to allow my disassembler to crunch object files.
Nevertheless, this info seems very useful, so here it is for you all to
enjoy. Many thanks to those who have spent time in sending me a
contribution. As for Edgar Roeder, I hope he will do both John Waldron
and me a great pleasure by sending the better linker for the GST-BIN
format (posting to one of the newsgroups would be great).

And here we go then (LIFO order) ...


******** From mcvax!stag!ardvar!krs (Kent Schumacher):

---------------------------  filefmt.h -------------------------

/*	Laser C object file format definitions
*/

#define  LMAGIC		0x0107	/*	Laser C magic number				*/

/*	Header prepended to each Laser object file.
*/
typedef struct {
	long				a_magic;	/* magic number						*/
	long				a_text;		/* size of text segment				*/
	long				a_data;		/* size of initialized data			*/
	long				a_bss;		/* size of uninitialized data		*/
	long				a_syms;		/* size of symbol table				*/
	long				a_entry;	/* entry point						*/
	long				a_trsize;	/* size of text relocation			*/
	long				a_drsize;	/* size of data relocation			*/
	}
	exec;

/*	Format of a relocation datum.
*/
typedef struct {
	long				r_address;		/* address which is relocated		*/
	unsigned long		r_info;			/* r_symbolnum, r_pcrel, r_length,	*/
										/* r_extern.						*/
	}
	reloc_info;

/*	NOTE:  If r_extern is zero, then r_address is actually and N_TYPE,
		and no symbol entry is present for the relocation.
*/

/*	Fields for r_info (above)
*/
#define  r_symbolnum(x)	((x>>8) & 0xffffffL)
#define  r_pcrel(x)		((x>>7) & 0x1L)
#define  r_length(x)	((x>>5) & 0x3L)
#define  r_extern(x)	((x>>4) & 0x1L)

/*	Symbol table entry
*/
typedef struct {
	char					*n_name;	/* index into string table			*/
	char					n_type;		/* type flag, i.e. N_TEXT etc		*/
	char					n_other;	/* unused							*/
	char					n_desc;		/* currently not used				*/
	long					n_value;	/* value of this sym				*/
	}
	nlist;

/*	Values for n_type (above)
*/
#define  N_UNDF		0x0		/*	undefined							*/
#define  N_ABS		0x2		/*	absolute							*/
#define  N_TEXT		0x4		/*	text								*/
#define  N_DATA		0x6		/*	data								*/
#define  N_BSS		0x8		/*	bss									*/

#define  N_EXT		0x01	/*	external bit, or'ed in				*/
#define  N_TYPE		0x1e	/*	mask for all the type bits			*/

/*	Following the relocation information is a long word (32-bit)
	which tells the length of the string table which follows.
	The length includes the four bytes of the long word (it
	includes own size).  Strings are zero (0) terminated.
*/

/*	GEMDOS executable file format
*/

/*  CP/M-68K header
*/
typedef struct {
    int        c_magic;    /* magic number (0x601A)       */
    long       c_text;     /* size of text segment        */
    long       c_data;     /* size of initialized data    */
    long       c_bss;      /* size of uninitialized data  */
    long       c_syms;     /* size of symbol table        */
    long       c_entry;    /* entry point                 */
    long       c_res;      /* reserved, always zero       */
    int        c_reloc;    /* size of data relocation     */
    } header;

/*  Symbol table entry
*/
typedef struct {
    char       name[8];    /* Symbol name                 */
    int        type;       /* Type (i.e. DEFINED|TEXT_REL)*/
    long       value;      /* Symbol value                */
    } symbol;

/*  CP/M-68K values for symbol types
*/
#define DEFINED       0x8000 /* The symbol is defined      */
#define EQUATED       0x4000 /* The symbol is an equate    */
#define GLOBAL        0x2000 /* The symbol is global       */
#define EQU_REG       0x1000 /* The symbol is a register   */
#define EXTERNAL      0x0800 /* The reference is external  */  
#define DAT_REL       0x0400 /* Data segment reference     */
#define TEX_REL       0x0200 /* Text segment reference     */
#define BSS_REL       0x0100 /* Bss segment reference      */

/* The above values may be OR'd together to indicate
	symbol type.

	One word (16-bit) of relocation information exists for each
	word of TEXT and DATA.  The type of relocation is indicated
	in bits 0-2 of the word.  If the relocation is an external
	reference, the remaining bits (15-3) form an index into the
	symbol table, thus indicating the name of the external
	reference.
*/

/*  CP/M-68K relocation word values (bits 0-2)
*/
#define NO_RELOC      0      /* No relocation necessary    */
#define DATA_BASED    1      /* Relocate from Data segment */
#define TEXT_BASED    2      /* Relocate from Text segment */
#define BSS_BASED     3      /* Relocate from Bss segment  */
#define UNDEF_SYMBOL  4      /* Symbolic reference         */
#define LONG_REF      5      /* Next relocation is long    */
#define PC_RELATIVE   6      /* Is a PC relative reference */
#define INSTRUCTION   7      /* Is an instruction          */

/*	The file format output by the linker (GEMDOS) is identical to the
	DRI object file format excepting the relocation information.  The
	GEMDOS loader will only relocate 32-bit references.  GEMDOS
	relocation information consists of a long (32-bit) word, indicating
	the offset into the program of the first long word to be relocated,
	followed by a series of relocation bytes (8-bit).  These bytes
	indicate the distance from the last offset relocated to the current
	offset to be relocated.  If a relocation byte is equal to 254, the
	last offset is incremented, but no relocation is done.  A
	relocation byte of zero means end-of-relocation-information.
*/

--------------------------------- EOF -----------------------------

I haven't really looked at what I've sent you, If you have some
questions I can probably answer them by looking them up in the
Laser C manual.  Hope it's some help...


******** From mcvax!cs.tcd.ie!jwaldron (John Waldron)

	I am interested in any information on good linkers for the GST
object format. I use Lattice C 3.04 to compile a very large program and
linking it is slow. Also setting the -debug option causes a bus error.
I have a specification of the GST object format. It is in the manual of
an early version of the metacomco assembler. Using this I have written
a program which can list the modules in a library. It can also update a
particular module. However the program has no concept of references to
other modules. this means that the whole object file is input to the link,
instead of being able to use the library command in the linker to input
the modules needed only.


******** From mcvax!STONY-BROOK.SCRC.Symbolics.COM!jrd (John R. Dunning)
    []...
Ummm, unless there's a truly unbelievable coincidence, .o can mean more
than one format.  GAS produces the format described in gnu-out.h, in the
gcc stuff.  It's for sure different than TOS format, but also called .o.
    []...
I've got some doc somewhere on GNU format; I'll try to dig it up.  I
think it's identical to that used on Sun 2's (and 3's?).  It's fairly
complicated, especially when you throw in all the symbol info used by
GDB.
    []...
Well, the GNU linker is happy to do random access when linking.  It's
pretty fast (tho the first time I ran it I thought my hard disk was
having a seizure; 28ms avg seek time, and LD was running it as fast as
it would go ! )

The format of OLB files as used by GNU is something I whipped up (see
ar.h).  In general, there's a header for the whole library, and a
per-element header that describes each elt; ie date, size etc.  The elts
are just object files.
    []...
Well, if you arrange to disassemble GNU object files, it's trivial to
hack library elements.  The GNU stuff comes with a module called
ARSCAN.C that you build into any program that wants to grok library
elements.  You tell it which elt you want, and it finds it and sets you
up with a handle that you can then read from just as you would read from
an ordinary file handle.


******** From mcvax!sbsvax!roeder (Edgar Roeder)

In the german magazine ST-Computer (i think 3/88) they have published a better
linker for the GST-BIN format. And of course there they described the exact
format. I have the source on disk and can e-mail or post it.


******** From mcvax!market.alliant.com!rosenkra (Bill Rosenkranz)

here is the format for alcyon .o and archive (ar68) files as best as i can
figure it out.


archives (alcyon):
------------------
each file starts with the magic number 0xFF65. it appears only once (first
2 bytes of file). each file is headed by:

	struct arfhdr 
	{
		char	ar_name[12];
		long	ar_dum1;	/* probably date (always 0x00000000) */
		int	ar_dum2;	/* probably uid (always 0x0000) */
		int	ar_dum3;	/* probably gid (always 0x0000) */
		int	ar_mode;	/* most always 0x0000, not used */
		long	ar_size;
		int	ar_dum4;	/* always 0x0000 (prob. ARFMAG) */
	}

the "ar_dumn" entries always seem to be 0x0000 (in every archive i have). they
probably contain date (long), uid (0x0000), gid (0x0000), and mode (0x0000)
but are never used by ar68 (seemingly). i do have some atari-supplied archive
with the mode field non-zero, but it is always rw-rw-rw- (0x01B6) and never
really used by ar68.

following the header for each file is the actual .o (or any other file) file.

archives (MWC):
---------------
each file starts with the magic number 0177535. it appears only once (first
2 bytes of file...i don't know what format it is, probably a binary 16-bit
word). each file is headed by:

	struct arfhdr 
	{
		char	ar_name[14];
		long	ar_date;	/* time inserted */
		short	ar_gid;		/* group id */
		short	ar_uid;		/* user id */
		short	ar_mode;	/* mode */
		long	ar_size;
	}

immediately following the header for each file is the actual ar_size bytes of
the .o (or any other file) file.


object (.o) mods (alcyon):
--------------------------
these seem to have the same format as .prg files. the general layout is:

	header
	text
	data
	symbols
	fixup (relocation) bits

the header struct looks like this:

	struct exec
	{
		int		a_magic;	/* "magic" number */
		long		a_text;		/* size of text segment */
		long		a_data;		/* size of data segment */
		long		a_bss;		/* size of bss segment */
		long		a_syms;		/* size of symbol table */
	       	long		a_resv1;	/* (Reserved, always 0) */
		long		a_beg;		/* start of text/prog exec */
		int		a_relbits;	/* rel bits flag (0 if yes) */
						/* text segment follows im- */
						/* mediately after this */

	};

	#define	SHSIZE		28		/* hdr size */
	#define AMAGIC		0x601A		/* 2 types, a for contiguous */
	#define BMAGIC		0x601B 		/* b for noncontiguous */

symbols look like this:

	struct nlist
	{
		char	n_name[8];		/* symbol name */
		int	n_type;			/* symbol type */
		long	n_value;		/* symbol value */
	
	};

	/*
	 *	symbol type values 
	 */
	#define	N_TYPE		0xFF00		/* mask for all the type bits */
	#define N_DEFINED	0x8000
	#define N_EQUATED	0x4000
	#define N_GLOBAL	0x2000
	#define N_EQREG		0x1000
	#define N_EXTERN	0x0800
	#define N_DATAREL	0x0400
	#define N_TEXTREL	0x0200
	#define N_BSSREL	0x0100
	#define	N_UNDF		0x0000		/* undefined */
	#define	N_FN		0x0001		/* file name symbol */

relocation bits exist if the header relbits value is 0. if non zero, they
do not exist. one word of reloc info exists for each word in the text segm.
(extern references and addr constants).

each rel word is 16 bits. LS bits (0-2) are:

	00	no rel info, reference is absolute
	01	ref relative to base addr of data segment
	02	ref relative to base addr of text segment
	03	ref relative to base addr of bss
	04	references undefined symbol
	05	ref upper word of a longword; next rel word holds value
		determining whether ref is absolute or dependent on bas add
		of text or data deg or the bss
	06	16-bit PC-relative ref
	07	indicates 1st word of instruction, which does not require
		relocation info

remaining bits (3-15) not used unless prog ref to extern. in that case, these
are index into symbol tbl. entries in symbol tbl are sequentially numbered
starting with 0.

here is a small example:

	/*------------------------------*/
	/*	wait_ms			*/
	/*------------------------------*/

	#define MSLOOP		125

	wait_ms (ms)
	int	ms;
	{

	/*
	 *	wait prescribed number of miliseconds (approx).
	 *	inner loop takes about 1 ms on normal ST.
	 */

		int	i;

		if (ms <= 0)
			return;

		for ( ; ms > 0; ms--)
			for (i = MSLOOP; i > 0; i--)
				;

		return;
	}

a dump of the .o file is:


file: WAIT_MS.O   size: 298

rec# ro  filoff   hex dump                                 ascii dump
0000 00 (000000): 601A 0000 002C 0000 0000 0000 0000 0000 *`....,..........*
0000 10 (000010): 00B6 0000 0000 0000 0000 0000 4E56 FFFA *............NV..*
0000 20 (000020): 4A6E 0008 6F1E 6016 3D7C 007D FFFE 6004 *Jn..o.`.=|.}..`.*
0000 30 (000030): 536E FFFE 4A6E FFFE 6EF6 536E 0008 4A6E *Sn..Jn..n.Sn..Jn*
0000 40 (000040): 0008 6EE4 4E5E 4E75 5F77 6169 745F 6D73 *..n.N^Nu_wait_ms*
0000 50 (000050): A200 0000 0000 7E7E 7761 6974 5F6D 8200 *......~~wait_m..*
0000 60 (000060): 0000 0000 7E6D 7300 0000 0000 C000 0000 *....~ms.........*
0000 70 (000070): 0008 7E69 0000 0000 0000 C000 FFFF FFFE *..~i............*
0001 00 (000080): 4C31 0000 0000 0000 8200 0000 0028 4C34 *L1...........(L4*
0001 10 (000090): 0000 0000 0000 8200 0000 0022 4C35 0000 *..........."L5..*
0001 20 (0000A0): 0000 0000 8200 0000 000C 4C38 0000 0000 *..........L8....*
0001 30 (0000B0): 0000 8200 0000 0018 4C39 0000 0000 0000 *........L9......*
0001 40 (0000C0): 8200 0000 0014 4C37 0000 0000 0000 8200 *......L7........*
0001 50 (0000D0): 0000 0014 4C36 0000 0000 0000 8200 0000 *....L6..........*
0001 60 (0000E0): 001E 4C33 0000 0000 0000 8200 0000 001E *..L3............*
0001 70 (0000F0): 4C32 0000 0000 0000 8200 0000 0028 0007 *L2...........(..*
0002 00 (000100): 0000 0007 0000 0007 0007 0007 0000 0000 *................*
0002 10 (000110): 0007 0007 0000 0007 0000 0007 0007 0000 *................*
0002 20 (000120): 0007 0000 0007 0007 0007                *................*


object (.o) mods (MWC):
-----------------------

these are slightly different but i do not have the format.


********  That's all, folks!

                 Leo.