[comp.unix.microport] disassembly enhancing script

imdave@cbnewsc.ATT.COM (david.e.bodenstab) (12/05/89)

Since microport is no longer, and support is zip, one might consider
disassembling kernel or other .o's to

	1. figure out what is going on
	2. fix it

The disassembler `dis' is pretty good, but it doesn't take advantage
of all the information in the .o file.  So, here is a quick-but-dirty
script that crunches the output of `dis' and `dump' to produce a
more useful disassembly listing.

There are some obvious improvements that could be made:

	+ relocation entries refering to .text or .data should be
		tagged with the actual label or variable name
	+ labels for jump targets sould be inserted

If anyone feels so inclined to make any improvements, it would be nice
if you could either post them or send them to me.  Please keep in
mind that, although the script could probably be simplified using
perl or new awk, my 286 system doesn't have these tools.  So, please
don't do something that would prevent the improvements from running
on a 286 system.  Thanks.

By the way, does anyone know of a .s to .c translator...?  :-)


Dave Bodenstab
(708)9798114
...att!iwtmx!imdave


#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	:disassemble
# This archive created: Mon Dec  4 15:08:36 1989
export PATH; PATH=/bin:$PATH
if test -f ':disassemble'
then
	echo shar: will not over-write existing file "':disassemble'"
else
sed 's/^X//' << \SHAR_EOF > ':disassemble'
X#
X# :disassemble object.o
X#
X
XDIS=dis
XDUMP=dump
XTMP=/tmp/$$a
X
Xif [ $# -lt 1 ]
Xthen
X    echo Usage: $0 objectfile >&2
X    exit 1
Xfi
X
Xtrap "rm -f $TMP*;exit 1" 0 1 2 15
X
X#
X# Create awk programs
X#
X
Xecho Building awk\(1\) programs . . . >&2
X
X#
X# awk program to massage `dump -vt' output (symbol table)
X#
X# result is:
X#   TEXT <offset> L|<type>|<symbol>
X#   DATA <offset> L|<type>|<symbol>
X#   BSS <offset> L|<type>|<symbol>
X#   BSS <offset> L|<type>|.bss <bss size>
X#   COM <offset> L|<type>|<symbol>
X#
Xcat >$TMP.t <<'TAC'
XBEGIN			    {
X			    m1bss = -1
X			    }
X/^\[[0-9]*\][ 	]*a3/	    {
X#[Index] a3 Scnlen Nreloc Nlinno
X#  $1    $2   $3     $4     $5
X			    if ( NR == m1bss + 1 )
X				{
X				x = 0
X				for( i = 3; i <= 10; ++i )
X				    {
X				    c = substr( $3, i, 1 )
X				    if ( c == "a" ) c = 10
X				    else if ( c == "b" ) c = 11
X				    else if ( c == "c" ) c = 12
X				    else if ( c == "d" ) c = 13
X				    else if ( c == "e" ) c = 14
X				    else if ( c == "f" ) c = 15
X				    x = 16 * x + c
X				    }
X
X				print "BSS",sbss,"L|" type "|.bss " x
X				}
X			    }
X/^\[[0-9]*\][ 	]*m[12]/    {
X#[Index] m1 Name   Value Scnum Type Sclass Numaux Name
X#[Index] m2 Offset Value Scnum Type Sclass Numaux Name
X#  $1    $2  $3     $4    $5    $6    $7     $8    $9
X			    x = 0
X			    for( i = 3; i <= 10; ++i )
X				{
X				c = substr( $4, i, 1 )
X				if ( c == "a" ) c = 10
X				else if ( c == "b" ) c = 11
X				else if ( c == "c" ) c = 12
X				else if ( c == "d" ) c = 13
X				else if ( c == "e" ) c = 14
X				else if ( c == "f" ) c = 15
X				x = 16 * x + c
X				}
X
X			    if ( $7 == "static" )
X				type = $7
X			    else
X				type = "."
X
X			    if ( $5 == 0 )
X				{
X				if ( x != 0 )
X				    print "COM",x,"L|" type "|" $9
X				}
X			    else if ( $5 == 1 )
X				print "TEXT",x,"L|" type "|" $9
X			    else if ( $5 == 2 )
X				print "DATA",x,"L|" type "|" $9
X			    else if ( $5 == 3 )
X				{
X				if ( $9 == ".bss" )
X				    {
X				    m1bss = NR
X				    sbss = x
X				    }
X				else
X				    print "BSS",x,"L|" type "|" $9
X				}
X			    }
XTAC
X
X#
X# awk program to massage `dump -vs -n .data' output (data section)
X#
X# result is:
X#   <offset> S|0x??
X#   <offset> S|0x??|?
X#
Xcat >$TMP.d <<'TAC'
XBEGIN	    {
X	    y = 0
X	    A["08"] = "\\b"
X	    A["09"] = "\\t"
X	    A["0A"] = "\\n"
X	    A["0B"] = "\\v"
X	    A["0C"] = "\\l"
X	    A["0D"] = "\\r"
X	    A["20"] = " "
X	    A["21"] = "!"
X	    A["22"] = "\""
X	    A["23"] = "#"
X	    A["24"] = "$"
X	    A["25"] = "%"
X	    A["26"] = "&"
X	    A["27"] = "\\'"
X	    A["28"] = "("
X	    A["29"] = ")"
X	    A["2A"] = "*"
X	    A["2B"] = "+"
X	    A["2C"] = ","
X	    A["2D"] = "-"
X	    A["2E"] = "."
X	    A["2F"] = "/"
X	    A["30"] = "0"
X	    A["31"] = "1"
X	    A["32"] = "2"
X	    A["33"] = "3"
X	    A["34"] = "4"
X	    A["35"] = "5"
X	    A["36"] = "6"
X	    A["37"] = "7"
X	    A["38"] = "8"
X	    A["39"] = "9"
X	    A["3A"] = ":"
X	    A["3B"] = ";"
X	    A["3C"] = "<"
X	    A["3D"] = "="
X	    A["3E"] = ">"
X	    A["3F"] = "?"
X	    A["40"] = "@"
X	    A["41"] = "A"
X	    A["42"] = "B"
X	    A["43"] = "C"
X	    A["44"] = "D"
X	    A["45"] = "E"
X	    A["46"] = "F"
X	    A["47"] = "G"
X	    A["48"] = "H"
X	    A["49"] = "I"
X	    A["4A"] = "J"
X	    A["4B"] = "K"
X	    A["4C"] = "L"
X	    A["4D"] = "M"
X	    A["4E"] = "N"
X	    A["4F"] = "O"
X	    A["50"] = "P"
X	    A["51"] = "Q"
X	    A["52"] = "R"
X	    A["53"] = "S"
X	    A["54"] = "T"
X	    A["55"] = "U"
X	    A["56"] = "V"
X	    A["57"] = "W"
X	    A["58"] = "X"
X	    A["59"] = "Y"
X	    A["5A"] = "Z"
X	    A["5B"] = "["
X	    A["5C"] = "\\\\"
X	    A["5D"] = "]"
X	    A["5E"] = "^"
X	    A["5F"] = "_"
X	    A["60"] = "`"
X	    A["61"] = "a"
X	    A["62"] = "b"
X	    A["63"] = "c"
X	    A["64"] = "d"
X	    A["65"] = "e"
X	    A["66"] = "f"
X	    A["67"] = "g"
X	    A["68"] = "h"
X	    A["69"] = "i"
X	    A["6A"] = "j"
X	    A["6B"] = "k"
X	    A["6C"] = "l"
X	    A["6D"] = "m"
X	    A["6E"] = "n"
X	    A["6F"] = "o"
X	    A["70"] = "p"
X	    A["71"] = "q"
X	    A["72"] = "r"
X	    A["73"] = "s"
X	    A["74"] = "t"
X	    A["75"] = "u"
X	    A["76"] = "v"
X	    A["77"] = "w"
X	    A["78"] = "x"
X	    A["79"] = "y"
X	    A["7A"] = "z"
X	    A["7B"] = "{"
X	    A["7D"] = "}"
X	    A["7F"] = "~"
X	    }
X/^\.data:/  {
X	    y = 1
X	    next
X	    }
X	    {
X	    if ( y == 0 )
X		next
X
X	    for( i = 1; i <= NF; ++i )
X		{
X		b1 = substr( $i, 1, 2 )
X		c = A[b1 ""]
X		if ( c == "" )
X		    print locctr,"S|0x" b1
X		else
X		    print locctr,"S|0x" b1 "|" c
X
X		locctr += 1
X
X		b2 = substr( $i, 3, 2 )
X		c = A[b2 ""]
X		if ( c == "" )
X		    print locctr,"S|0x" b2
X		else
X		    print locctr,"S|0x" b2 "|" c
X
X		locctr += 1
X		}
X	    }
XTAC
X
X#
X# awk program to massage `dump -vr' output (relocation table)
X#
X# result is:
X#   <offset> Z|<type of reference>
X#
Xcat >$TMP.r <<'TAC'
XBEGIN	    {
X	    px = -1
X	    p3 = ""
X	    p4 = ""
X	    }
X/^	0x/ {
X# Vaddr Symndx Type Name
X#  $1     $2    $3   $4
X	    x = 0
X	    for( i = 3; i <= 10; ++i )
X		{
X		c = substr( $1, i, 1 )
X		if ( c == "a" ) c = 10
X		else if ( c == "b" ) c = 11
X		else if ( c == "c" ) c = 12
X		else if ( c == "d" ) c = 13
X		else if ( c == "e" ) c = 14
X		else if ( c == "f" ) c = 15
X		x = 16 * x + c
X		}
X
X	    if ( $3 == "SEG12" )
X		{
X		if ( p3 == "DIR16" )
X		    {
X		    if ( px+2 == x )
X			print px,"Z|dword ptr " $4
X		    else
X			{
X			print px,"Z|offset " p4
X			print x,"Z|segment " $4
X			}
X
X		    px = -1
X		    p3 = ""
X		    }
X		else
X		    print x,"Z|segment " $4
X		}
X	    else
X		{
X		if ( p3 == "DIR16" )
X		    print px,"Z|offset " p4
X
X		px = x
X		p3 = $3
X		p4 = $4
X		}
X	    }
XEND	    {
X	    if ( p3 == "DIR16" )
X		print px,"Z|offset " p4
X	    }
XTAC
X
X#
X# awk program to massage `dis' output (disassembly)
X#
X# result is:
X#   <offset> S|<disassembly text>
X#
Xcat >$TMP.s <<'TAC'
XBEGIN	{
X	FS="|"
X	}
X	{
X	x = 0
X	for( i = 1; i <= length($1); ++i )
X	    {
X	    c = substr( $1, i, 1 )
X	    if ( c == "a" ) c = 10
X	    else if ( c == "b" ) c = 11
X	    else if ( c == "c" ) c = 12
X	    else if ( c == "d" ) c = 13
X	    else if ( c == "e" ) c = 14
X	    else if ( c == "f" ) c = 15
X	    x = 16 * x + c
X	    }
X
X	print x,"S|" $2
X	}
XTAC
X
X#
X# awk program to merge symbol, relocation and disassembly for text
X#
X# result is:
X#   <offset> L|<name> ()
X#   <offset> L|static <name> ()
X#   <offset> S|<disassembly text>|<offset> <type of reference>
X#
Xcat >$TMP.m1 <<'TAC'
XBEGIN	{
X	FS = "|"
X	lastline = ""
X	}
X	{
X	offset = substr( $1, 1, length($1)-2 ) + 0
X	t = substr( $1, length($1), 1 )
X
X	if ( offset >= etext+0 )
X	    exit
X
X	if ( t == "L" )
X	    {
X	    if ( lastline != "" ) print lastline
X	    lastline = ""
X
X	    if ( $2 == "static" )
X		print offset,"L|static " $3 "()"
X	    else    
X		print offset,"L|" $3 "()"
X
X	    next
X	    }
X
X	if ( t == "S" )
X	    {
X	    if ( lastline != "" ) print lastline
X
X	    lastline = offset " S|" $2
X	    }
X	else
X	    lastline = lastline "|" sprintf("%x=%s",offset,$2)
X	}
XEND	{
X	if ( lastline != "" ) print lastline
X	}
XTAC
X
X#
X# awk program to merge symbol, relocation and disassembly for data
X#
X# result is:
X#   <offset> L|char <name> ={
X#   <offset> L|static char <name> ={
X#   <offset> S|<disassembly text>|<type of reference>
X#
Xcat >$TMP.m2 <<'TAC'
XBEGIN	{
X	FS = "|"
X	hex = ""
X	lasthex = ""
X	reference = ""
X	}
X	{
X	o = substr( $1, 1, length($1)-2 ) + 0
X	t = substr( $1, length($1), 1 )
X
X	if ( o < sdata+0 )
X	    next
X
X	if ( t == "L" )
X#       <offset> L | <storage class> | <name>
X	    {
X	    if ( hex != "" )
X		{
X		if ( lasthex != "" )
X		    {
X		    if ( c == "" )
X			{
X			if ( lastc == "" )
X			    {
X			    if ( reference == "" )
X				print offset,"S|" hex " " lasthex
X			    else
X				print offset,"S|" hex " " lasthex "|" reference
X			    }
X			else
X			    {
X			    if ( reference == "" )
X				print offset,"S|" hex
X			    else
X				print offset,"S|" hex "|" reference
X
X			    print lasto,"S|" lasthex "|\"" lastc "\""
X			    }
X			}
X		    else
X			{
X			if ( lastc == "" )
X			    {
X			    print offset,"S|" hex "|\"" c "\"" reference
X			    print lasto,"S|" lasthex
X			    }
X			else
X			    print offset,"S|" hex " " lasthex "|\"" c lastc "\"" reference
X			}
X		    }
X		else
X		    {
X		    if ( c == "" )
X			{
X			if ( reference == "" )
X			    print offset,"S|" hex
X			else
X			    print offset,"S|" hex "|" reference
X			}
X		    else
X			print offset,"S|" hex "|\"" c "\"" reference
X		    }
X		}
X
X	    hex = ""
X	    lasthex = ""
X	    reference = ""
X
X	    if ( $2 == "static" )
X		print o,"L|static char " $3 " ={"
X	    else    
X		print o,"L|char " $3 " ={"
X
X	    next
X	    }
X
X	if ( t == "S" )
X	    {
X	    if ( NF == 2 )
X		a3 = ""
X	    else
X		a3 = $3
X
X	    if ( hex == "" )
X		{
X		offset = o;
X		hex = $2;
X		c = a3
X		count = 1
X		}
X	    else
X		{
X		if ( lasthex != "" )
X		    {
X		    if ( c == "" )
X			{
X			if ( lastc == "" && count++ < 4 )
X			    hex = hex " " lasthex
X			else
X			    {
X			    if ( reference == "" )
X				print offset, "S|" hex
X			    else
X				print offset, "S|" hex "|" reference
X
X			    offset = lasto
X			    hex = lasthex
X			    c = lastc
X			    count = 1
X			    reference = ""
X			    }
X			}
X		    else
X			{
X			if ( lastc == "" || count >= 4 )
X			    {
X			    print offset, "S|" hex "|\"" c "\"" reference
X
X			    offset = lasto
X			    hex = lasthex
X			    c = lastc
X			    count = 1
X			    reference = ""
X			    }
X			else
X			    {
X			    hex = hex " " lasthex
X			    c = c lastc
X			    ++count
X			    }
X			}
X		    }
X
X		lasto = o
X		lasthex = $2
X		lastc = a3
X		}
X	    }
X	else
X#       <offset> Z | <type of reference>
X	    {
X	    if ( lasthex != "" )
X		{
X		if ( c == "" )
X		    if ( reference == "" )
X			print offset,"S|" hex
X		    else
X			print offset,"S|" hex "|" reference
X		else
X		    print offset,"S|" hex "|\"" c "\"" reference
X
X		offset = lasto
X		hex = lasthex
X		c = lastc
X
X		lasthex = ""
X		}
X
X	    reference = "|" $2
X	    }
X	}
XEND	{
X	if ( hex != "" )
X	    {
X	    if ( lasthex != "" )
X		{
X		if ( c == "" )
X		    {
X		    if ( lastc == "" )
X			{
X			if ( reference == "" )
X			    print offset,"S|" hex " " lasthex
X			else
X			    print offset,"S|" hex " " lasthex "|" reference
X			}
X		    else
X			{
X			if ( reference == "" )
X			    print offset,"S|" hex
X			else
X			    print offset,"S|" hex "|" reference
X
X			print lasto,"S|" lasthex "|\"" lastc "\""
X			}
X		    }
X		else
X		    {
X		    if ( lastc == "" )
X			{
X			print offset,"S|" hex "|\"" c "\"" reference
X			print lasto,"S|" lasthex
X			}
X		    else
X			print offset,"S|" hex " " lasthex "|\"" c lastc "\"" reference
X		    }
X		}
X	    else
X		{
X		if ( c == "" )
X		    {
X		    if ( reference == "" )
X			print offset,"S|" hex
X		    else
X			print offset,"S|" hex "|" reference
X		    }
X		else
X		    print offset,"S|" hex "|\"" c "\"" reference
X		}
X	    }
X	}
XTAC
X
X#
X# awk program to put text section back together
X#
Xcat >$TMP.f1 <<'TAC'
XBEGIN	{
X	FS = "|"
X	firsttime = 1
X	}
X	{
X	o = substr( $1, 1, length($1)-2 ) + 0
X	t = substr( $1, length($1), 1 )
X
X	if ( t == "L" )
X	    {
X	    if ( ! firsttime )
X		printf "}\n\n%s {\n", $2
X	    else
X		printf "%s {\n", $2
X
X	    firsttime = 0
X	    next
X	    }
X
X	printf "\t%4x: %s", o, $2
X
X	for( i = 3; i <= NF; ++i )
X	    printf "\t%s", $i
X
X	printf "\n"
X	}
XEND	{
X	if ( ! firsttime )
X	    print "}"
X	}
XTAC
X
X#
X# awk program to put data section back together
X#
Xcat >$TMP.f2 <<'TAC'
XBEGIN	{
X	FS = "|"
X	firsttime = 1
X	}
X	{
X	o = substr( $1, 1, length($1)-2 ) + 0
X	t = substr( $1, length($1), 1 )
X
X	if ( t == "L" )
X	    {
X	    if ( ! firsttime )
X		printf "};\n\n%s\n", $2
X	    else
X		printf "\n%s\n", $2
X
X	    firsttime = 0
X	    next
X	    }
X
X	printf "\t%4x: %s", o, $2
X
X	for( i = 3; i <= NF; ++i )
X	    printf "\t%s", $i
X
X	printf "\n"
X	}
XEND	{
X	if ( ! firsttime )
X	    print "};"
X	}
XTAC
X
X#
X# awk program to put bss section back together
X#
Xcat >$TMP.f3 <<'TAC'
XBEGIN	{
X	FS = "|"
X	lastn = ""
X	}
X	{
X	o = substr( $1, 1, length($1)-2 ) + 0
X
X	if ( lastn != "" )
X	    printf "\nstatic char %s[%d];\n\t%4x:\n", lastn, o - lasto, lasto
X
X	lasto = o
X	lastn = $3
X	}
XEND	{
X	if ( lastn != "" )
X	    printf "\nstatic char %s[%d];\n\t%4x:\n", lastn, (sbss + bssize) - lasto, lasto
X	}
XTAC
X
X#
X# awk program to put common section back together
X#
Xcat >$TMP.f4 <<'TAC'
XBEGIN	{
X	FS = "|"
X	}
X	{
X	s = substr( $1, 1, length($1)-2 ) + 0
X
X	printf "\nchar %s[%d];\n", $3, s
X	}
XTAC
X
X
X#
X# Begin...
X#
Xobject=$1
Xname=`basename $object .o`
X
Xecho Dis-assembling . . . >&2
X$DIS $object |
X    sed -e 's/^[ 	]*//' -e 's/: */|/' |
X    fgrep '|' |
X    awk -f $TMP.s - >$name.S
X
Xecho Dumping symbol table . . . >&2
X$DUMP -vt $object |
X    awk -f $TMP.t - |
X    sort -n +1 -2 >$name.T
X
Xecho Extracting .data origin, and .bss origin and size . . . >&2
Xset -- `fgrep .data $name.T`
Xdata=$2
X
Xset -- `fgrep .bss $name.T`
Xbss=$2
Xbssize=$4
X
Xecho Dumping .data section . . . >&2
X$DUMP -vs -n .data $object |
X    awk -f $TMP.d locctr=$data - >$name.D
X
Xecho Dumping relocation table . . . >&2
X$DUMP -vr $object |
X    awk -f $TMP.r - >$name.R
X
X# put everything back together
X(
X    # merge symbols, relocation and disassembly for text section
X    echo Crunching text section . . . >&2
X    fgrep TEXT $name.T |
X	fgrep -v .text |
X	sed 's/^TEXT //' |
X	sort -m -b +0n -1 +1 -1.1 - $name.S $name.R |
X	awk -f $TMP.m1 etext=$data - |
X	awk -f $TMP.f1 -
X
X    # merge symbols, relocation and data for data section
X    echo Crunching data section . . . >&2
X    fgrep DATA $name.T |
X	fgrep -v .data |
X	sed 's/^DATA //' |
X	sort -m -b +0n -1 +1 -1.1 - $name.D $name.R |
X	awk -f $TMP.m2 sdata=$data - |
X	awk -f $TMP.f2 -
X
X    echo Crunching bss section . . . >&2
X    fgrep BSS $name.T |
X	fgrep -v .bss |
X	sed 's/^BSS //' |
X	awk -f $TMP.f3 sbss=$bss bssize=$bssize -
X
X    echo Crunching bss \(common\) section . . . >&2
X    fgrep COM $name.T |
X	sed 's/^COM //' |
X	awk -f $TMP.f4 -
X) |
X    newform -i4,35,55,75 -o-0
X
Xecho Done! >&2
X
Xrm -f $name.[DRST]
Xexit 0
SHAR_EOF
chmod +x ':disassemble'
fi # end of overwriting check
#	End of shell archive
exit 0