[unix-pc.sources] undump: create an executable from a.out + core dump

brant@manta.UUCP (Brant Cheikes) (06/02/88)

Here's an interesting little hack.  I managed to infer enough of the
core file structure to get undump working.  This program takes an
executable image in COFF together with a core dump and creates a new
executable with all the static data initialized.  This is useful when
building systems like Gnu Emacs and TeX/LaTeX, since they often take a
non-negligible amount of time to initialize themselves.  The idea is
to run the target program, let it initialize itself, and then cause it
to core dump.  Undump will take the result and put together a new
a.out.  It is assumed that the original program tests some internal
flag indicating whether initialization has taken place.  This version
properly handles executables linked with the shared library (unlike
Gnu Emacs unexec).  One item on the to-do list: I didn't make an
effort to get undump to properly transfer debugging information, thus
the resulting executable looks as if it were run through strip(1).

It's probably not something that lots of folks have been dying for,
but hey, the price is right.  Questions, bugs, etc., to me.

								Brant
----- cut here
echo x - Makefile
sed 's/^X//' >Makefile <<'*-*-END-of-Makefile-*-*'
X# Makefile for undump
X# UNIXpc version
X
Xinclude $(MAKEINC)/Makepre.h
X
XBINDIR=/usr/local/bin
XCFLAGS = -O -DUNIXPC
XLFLAGS = -s
X#CFLAGS= -g -DUNIXPC
X
Xundump: undump.o
X#	$(CC) -o undump undump.o
X	$(LD) $(SHAREDLIB) $(LFLAGS) -o undump undump.o
X
Xundump.o: undump.c
X	$(CC) $(CFLAGS) -c undump.c
X
Xclean:
X	rm -f undump.o
X
Xinstall: undump
X	cp undump $(BINDIR)
X
Xinclude $(MAKEINC)/Makepost.h
*-*-END-of-Makefile-*-*
echo x - undump.c
sed 's/^X//' >undump.c <<'*-*-END-of-undump.c-*-*'
X/*
X * undump - resurrect a core file into a runnable program.
X *
X *	for UNIX System V on a 3Bx
X *	that uses the Common Object File Format
X *
X * Author:
X *	Lou Salkind
X *	New York University
X *	Tue Mar  3 13:18:25 EST 1987
X *
X * Adapted from:
X *	Spencer Thomas's undump and the file unexec.c in GNU emacs
X *
X * Modified:
X *	Brant Cheikes
X *	University of Pennsylvania
X *	Department of Computer and Information Science
X *	ARPA: brant@linc.cis.upenn.edu, UUCP: ...drexel!manta!brant
X *	Wed Jun  1 00:57:16 EDT 1988
X *	Patched to make it fly on an AT&T UNIXpc (aka PC7300, 3b1).
X *	Bug: undump currently won't dump out line numbers, symbol
X *	table entries, or the string table.  Thus the resulting
X *	a.out looks like the result of an ld with the -s (strip)
X *	option.
X */
X
X#include <sys/param.h>
X#include <sys/types.h>
X#include <sys/signal.h>
X#include <sys/user.h>
X
X#include <stdio.h>
X#include <sys/stat.h>
X
X#include <aouthdr.h>
X#include <filehdr.h>
X#include <scnhdr.h>
X#ifndef UNIXPC
X#include <syms.h>
X#endif /* not UNIXPC */
X
X#define	PAGE_SIZE	NBPC	/* size of a page in bytes */
X
Xstruct filehdr fh;		/* file header */
XAOUTHDR aout;			/* optional header */
Xstruct scnhdr tsc;		/* text section header */
Xstruct scnhdr dsc;		/* data section header */
Xstruct scnhdr bsc;		/* bss section header */
X#ifdef UNIXPC
Xstruct scnhdr lsc;		/* shared lib section header */
X#endif /* UNIXPC */
X
Xlong text_scnptr;
Xlong data_scnptr;
X#ifndef UNIXPC
Xlong bias;
Xlong lnnoptr;
Xlong symlocptr;
X#endif /* not UNIXPC */
X
Xmain(argc, argv)
X     char **argv;
X{
X  FILE *afp, *cfp, *nfp;
X  struct user u;
X  long off, size;
X  struct scnhdr sc;
X  int i;
X  char *a_out_name = "a.out";
X  char *core_name = "core";
X  char *new_name;
X  
X  if (argc < 2 || argc > 4) {
X    fprintf(stderr,"Usage: %s new [a.out [core]]\n", argv[0]);
X    exit(1);
X  }
X  new_name = argv[1];
X  if (argc > 2)
X    a_out_name = argv[2];
X  if (argc > 3)
X    core_name = argv[3];
X  afp = fopen(a_out_name, "r");
X  if (afp == 0)
X    Perror(a_out_name);
X  cfp = fopen(core_name, "r");
X  if (cfp == 0)
X    Perror(core_name);
X  nfp = fopen(new_name, "w");
X  if (nfp == 0)
X    Perror(new_name);
X  
X  /*
X   * Read in the file header and optional file header from
X   * the a.out file.
X   */
X  if (fread(&fh, sizeof(fh), 1, afp) != 1)
X    Perror("fh read");
X  if (fread(&aout, sizeof(aout), 1, afp) != 1)
X    Perror("aout read");
X  
X  /*
X   * Read in the various section headers.
X   */
X  for (i = 0; i < fh.f_nscns; i++) {
X    if (fread(&sc, sizeof(sc), 1, afp) != 1)
X      Perror("scnhdr read");
X    if (strcmp(sc.s_name, ".text") == 0) {
X      tsc = sc;
X    } else if (strcmp(sc.s_name, ".data") == 0) {
X      dsc = sc;
X    } else if (strcmp(sc.s_name, ".bss") == 0) {
X      bsc = sc;
X#ifdef UNIXPC
X    } else if (strcmp(sc.s_name, ".lib") == 0) {
X      lsc = sc;
X#endif /* UNIXPC */
X    }
X  }
X  
X#ifdef UNIXPC
X  /*
X   * The kernel stack (I think) takes up the first U_OFFSET
X   * bytes of the core file, followed by the user structure.
X   * Skip past that junk and get down to the nitty-gritty.
X   */
X  fseek(cfp, (long)U_OFFSET, 0);
X#endif /* UNIXPC */
X  
X  /*
X   * Read in the user structure from the core file and perform
X   * a sanity check.
X   */
X  if (fread(&u, sizeof(u), 1, cfp) != 1)
X    Perror("core read");
X  if (u.u_exdata.ux_tsize != aout.tsize ||
X      u.u_exdata.ux_dsize != aout.dsize ||
X      u.u_exdata.ux_bsize != aout.bsize) {
X    fprintf(stderr,"mismatch between %s and %s sizes\n",a_out_name, core_name);
X    exit(1);
X  }
X  
X  /*
X   * off = offset to start of actual mem dump data in core file.
X   * size = size of new .data section.
X   */
X  off = USIZE*PAGE_SIZE;
X  size = u.u_dsize * PAGE_SIZE;
X  
X  fh.f_flags |= F_RELFLG | F_EXEC;
X  /*
X   * .data section size of new a.out = data size at time of
X   * core dump.  .bss size is now zero (no uninitialized data).
X   */
X  aout.dsize = size;
X  aout.bsize = 0;
X  
X  /*
X   * text size same as text size of a.out.
X   */
X  tsc.s_size = aout.tsize;
X#ifndef UNIXPC
X  /*
X   * I'm commenting out the following code, since I believe we can use
X   * unmodified the scnptr of the text section stored in the original
X   * a.out.  Nothing we have done needs to alter its starting point.
X   * In fact, we arguably SHOULDN'T alter the text starting point, since
X   * certain kinds of files require special alignments of text and data
X   * sections, both in memory and in the file.  If it ain't broke, don't
X   * fix it.
X   */
X  tsc.s_scnptr = sizeof(fh) + sizeof(aout);
X  tsc.s_scnptr += fh.f_nscns * sizeof (struct scnhdr);
X#endif /* not UNIXPC */
X  text_scnptr = tsc.s_scnptr;
X
X#ifndef UNIXPC
X  lnnoptr = tsc.s_lnnoptr;
X  symlocptr = fh.f_symptr;
X#endif /* not UNIXPC */
X  
X#ifndef UNIXPC
X  /*
X   * I'm going to assume that the .text section size never
X   * grows.  Thus, we need not tweak the paddr and vaddr of
X   * the .data section, since its start point has not moved.
X   */
X  dsc.s_paddr = dsc.s_vaddr = aout.data_start;
X#endif /* not UNIXPC */
X  
X  /*
X   * New .data size is computed from core dump.
X   * Initialize pointer to raw .data section in the file.
X   */
X  dsc.s_size = size;
X  dsc.s_scnptr = tsc.s_scnptr + tsc.s_size;
X#ifdef UNIXPC
X  /*
X   * Magic 0413 files require that the .data section be aligned
X   * on a 1024 byte boundary IN THE FILE.
X   */
X  if (u.u_exdata.ux_mag == 0413)
X    dsc.s_scnptr = (dsc.s_scnptr + 0x400) & ~0x3ff;
X#endif /* UNIXPC */
X  data_scnptr = dsc.s_scnptr;
X  
X  /*
X   * Tweak .bss header values.
X   */
X  bsc.s_paddr = bsc.s_vaddr = aout.data_start + aout.dsize;
X  bsc.s_size = aout.bsize;
X  bsc.s_scnptr = 0L;
X  
X#ifdef UNIXPC
X  /*
X   * Adjust the physical/virtual addresses of the shared lib,
X   * if present.  Basically, since the .bss section has just
X   * disappeared, the shlib and .bss end up with the same addresses.
X   */
X  if (fh.f_nscns > 3) {
X    lsc.s_paddr = lsc.s_vaddr = aout.data_start + aout.dsize;
X    lsc.s_size = lsc.s_scnptr = 0L;
X  }
X#endif /* UNIXPC */
X  
X#ifndef UNIXPC
X  bias = dsc.s_scnptr + dsc.s_size - lnnoptr;
X  if (fh.f_symptr > 0L)
X    fh.f_symptr += bias;
X  if (tsc.s_lnnoptr > 0L)
X    tsc.s_lnnoptr += bias;
X#endif /* not UNIXPC */
X  
X  /*
X   * Write out the file header, optional file header, and the
X   * various section headers.
X   */
X  if (fwrite(&fh, sizeof(fh), 1, nfp) != 1)
X    Perror("fh write");
X  if (fwrite(&aout, sizeof(aout), 1, nfp) != 1)
X    Perror("aout write");
X  if (fwrite(&tsc, sizeof(tsc), 1, nfp) != 1)
X    Perror("ts hdr write");
X  if (fwrite(&dsc, sizeof(dsc), 1, nfp) != 1)
X    Perror("ds hdr write");
X  if (fwrite(&bsc, sizeof(bsc), 1, nfp) != 1)
X    Perror("bs hdr write");
X#ifdef UNIXPC
X  /*
X   * Dump out shared lib section header, if it was present
X   * in the old a.out.
X   */
X  if (fh.f_nscns > 3)
X    if (fwrite(&lsc, sizeof(lsc), 1, nfp) != 1)
X      Perror("lib hdr write");
X#endif /* UNIXPC */
X  
X  /*
X   * Hokay, set position in core file to right after user structure.
X   * Position in new a.out to where text_scnptr says.
X   */
X  fseek(cfp, off, 0);
X  fseek(nfp, text_scnptr, 0);
X#ifdef UNIXPC
X  /*
X   * If we have a 0407 file, then we have to copy the text from
X   * the core file.  If not, we copy the text from the original
X   * a.out.
X   */
X  if (u.u_exdata.ux_mag == 0407)
X    copy(cfp, nfp, aout.tsize);
X  else {
X    fseek(afp, text_scnptr, 0);
X    copy(afp, nfp, aout.tsize);
X  }
X#else
X  copy(afp, nfp, aout.tsize);
X#endif
X  fseek(nfp, data_scnptr, 0);
X  copy(cfp, nfp, size);
X#ifndef UNIXPC
X  copy_syms(afp, nfp);
X#endif /* not UNIXPC */
X  fclose(nfp);
X  fclose(afp);
X  fclose(cfp);
X  mark_x(new_name);
X  exit(0);
X}
X
X#ifndef UNIXPC
Xcopy_syms(afp, nfp)
X     register FILE *afp, *nfp;
X{
X  char page[BUFSIZ];
X  register int n;
X  register int nsyms;
X  struct syment symentry;
X  AUXENT auxentry;
X  
X  /* if there are line numbers, copy them */
X  if (lnnoptr) {
X    if (fseek(afp, lnnoptr, 0) == -1L)
X      Perror("ln fseek");
X    copy(afp, nfp, symlocptr - lnnoptr);
X  }
X  
X  /* now write the symbol table */
X  if (fseek(nfp, fh.f_symptr, 0) == -1L)
X    Perror("symtbl fseek");
X  for (nsyms = 0; nsyms < fh.f_nsyms; nsyms++) {
X    if (fread(&symentry, SYMESZ, 1, afp) != 1)
X      Perror("sym fread");
X    if (fwrite(&symentry, SYMESZ, 1, nfp) != 1)
X      Perror("sym fwrite");
X    /*
X     * adjust relative offsets of line numbers for
X     * function definitions
X     */
X    if (symentry.n_numaux) {
X      if (fread(&auxentry, AUXESZ, 1, afp) != 1)
X	Perror("aux fread");
X      nsyms++;
X      if (ISFCN (symentry.n_type))
X	auxentry.x_sym.x_fcnary.x_fcn.x_lnnoptr += bias;
X      if (fwrite(&auxentry, AUXESZ, 1, nfp) != 1)
X	Perror("aux fwrite");
X    }
X  }
X  
X  /* finally write the string table, if any */
X  while ((n = fread(page, 1, sizeof(page), afp)) > 0) {
X    if (fwrite(page, 1, n, nfp) != n)
X      Perror("sym write");
X  }
X  if (n < 0)
X    Perror("sym read");
X}
X#endif /* not UNIXPC */
X
X/*
X * After succesfully building the new a.out, mark it executable
X */
Xmark_x(name)
X     char *name;
X{
X  struct stat sbuf;
X  int um;
X  
X  um = umask(777);
X  umask(um);
X  if (stat(name, &sbuf) == -1) {
X    perror ("Can't stat new a.out");
X    fprintf(stderr, "Setting protection to %o\n", 0777 & ~um);
X    sbuf.st_mode = 0777;
X  }
X  sbuf.st_mode |= 0111 & ~um;
X  if (chmod(name, sbuf.st_mode) == -1)
X    Perror("Couldn't mark new a.out executable");
X  
X}
X
Xcopy(a, b, size)
X     register FILE *a, *b;
X     long size;
X{
X  char buf[BUFSIZ];
X  register int i, n;
X  
X  while (size > 0) {
X    i = size;
X    if (i > sizeof(buf))
X      i = sizeof(buf);
X    if ((n = fread(buf, 1, i, a)) <= 0)
X      Perror("copy read");
X    if (fwrite(buf, 1, n, b) != n)
X      Perror("copy write");
X    size -= n;
X  }
X}
X
XPerror(s)
X     char *s;
X{
X  perror(s);
X  exit(1);
X}
*-*-END-of-undump.c-*-*
echo x - undump.1
sed 's/^X//' >undump.1 <<'*-*-END-of-undump.1-*-*'
X.TH UNDUMP 1 "University of Utah"
X.SH NAME
Xundump \- convert a core dump to an executable a.out file
X.SH SYNOPSIS
Xundump new-a.out-file [old-a.out-file] [core-file]
X.SH DESCRIPTION
XUndump takes a core dump file and the executable "a.out" file which
Xcaused it and produces a new executable file with all static
Xvariables initialized to the values they held at the time of the
Xcore dump.  It is primarily useful for programs which take a long time
Xto initialize themselves, e.g., Emacs.  The idea is to go through all of
Xthe initializations and then create a core dump (e.g., with the abort()
Xcall).  One then uses undump to make a new executable file with all of it
Xdone.  This usually implies the use of a global flag variable which says
Xwhether or not initialization has been done.
X.PP
XUndump's arguments, old-a.out-file and core-file, default to "a.out" and
X"core", respectively.
X.PP
XA few things to keep in mind about undump:
X.IP
XIt doesn't preserve open files.
X.IP
XThe program will be re-entered at the beginning of main(), not at the point
Xwhere the core dump occurred.
X.SH BUGS
XProbably should have an option to not require
Xold-a.out-file if the core came from a 407 file.
X.PP
XThe UNIXpc version of undump strips all line number, symbol table and
Xstring table information from the resulting executable.
X
*-*-END-of-undump.1-*-*
exit
----- cut here
-- 
Brant Cheikes
University of Pennsylvania
Department of Computer and Information Science
ARPA: brant@linc.cis.upenn.edu, UUCP: ...drexel!manta!brant