brnstnd@kramden.acf.nyu.edu (Dan Bernstein) (10/11/90)
This is a beta release of pmckpt, the poor man's checkpointer. The idea of a checkpointer is to save the state of a running process in a file, so that you can restore the process later where it left off. pmckpt is a cooperative checkpointer: you have to add checkpointing to your program explicitly. Like every utility in the Poor Man's Series, pmckpt includes some feature missing from all the competition: in this case, it doesn't use setjmp() or longjmp() when it's restoring the instruction pointer, so your compiler can put variables and intermediate values into registers without any risk of destroying the values of those variables. It also doesn't try to read the infinite variety of core file formats. As checkpointers go, pmckpt is quite portable. It should be possible to make this version of pmckpt work on practically any UNIX system, from the heights of BSD to the depths of System V. But no guarantees. In particular, you may have to modify the ustate manipulations in pmckpt.c. Send comments to me or to alt.sources.d. ---Dan #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh <file", e.g.. If this archive is complete, you # will see the following message at the end: # "End of shell archive." # Contents: Makefile README UAREA argv0.c checkpoint ckptcvt pmceot.c # pmckpt.c pmckpt.h test.c # Wrapped by brnstnd@kramden on Thu Oct 11 03:39:47 1990 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'Makefile' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'Makefile'\" else echo shar: Extracting \"'Makefile'\" \(454 characters\) sed "s/^X//" >'Makefile' <<'END_OF_FILE' XCC=cc XCCOPTS=-g X Xall: pmceot.o pmckpt.o argv0 X Xshar: pmckpt.shar X Xpmceot.o: pmceot.c pmckpt.h X $(CC) $(CCOPTS) -c pmceot.c X Xpmckpt.o: pmckpt.c pmckpt.h X $(CC) $(CCOPTS) -c pmckpt.c X Xargv0: argv0.c X $(CC) $(CCOPTS) -o argv0 argv0.c X Xpmckpt.shar: Makefile README UAREA argv0.c checkpoint ckptcvt pmceot.c pmckpt.c pmckpt.h test.c X shar Makefile README UAREA argv0.c checkpoint ckptcvt pmceot.c pmckpt.c pmckpt.h test.c > pmckpt.shar X chmod 400 pmckpt.shar END_OF_FILE if test 454 -ne `wc -c <'Makefile'`; then echo shar: \"'Makefile'\" unpacked with wrong size! fi # end of 'Makefile' fi if test -f 'README' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'README'\" else echo shar: Extracting \"'README'\" \(6528 characters\) sed "s/^X//" >'README' <<'END_OF_FILE' Xpmckpt 0.95, 10/10/90. XPlaced into the public domain by Daniel J. Bernstein. XComments to him at brnstnd@nyu.edu. Please let him know if pmckpt works Xon your machine, and what applications you might have found for it. X X XThis is a beta release of pmckpt, the poor man's checkpointer. X XThe idea of a checkpointer is to save the state of a running process in Xa file, so that you can restore the process later where it left off. Xpmckpt is a cooperative checkpointer: you have to add checkpointing to Xyour program explicitly. You can't just checkpoint a random executable Xwithout the source code. X XCheckpointing has many uses. One is to have processes survive a crash. XYou just checkpoint the process periodically, and restart it if the Xsystem crashes. Another is to avoid long, complex initializations on Xstartup; if you checkpoint after initialization, you won't have to waste Xthe time again. Another is to transfer running programs between Xcomputers with the same architecture. X Xpmckpt is (should be, at least) much more portable than other available Xcheckpointing systems, including undump and Condor. It just saves the Xdata, stack, and heap (allocated memory) to a file. It doesn't try to Xread the infinite variety of core file formats. Most importantly, it Xdoesn't use setjmp() or longjmp(), so your compiler can put variables Xand intermediate values into registers without any risk of destroying Xthe values of those variables (as longjmp() usually does). It handles Xstacks in either direction. It also restores file positions and signal Xhandlers. X X XIn any routine where you want to allow checkpointing: X X Put a CKPT VARS flush left on a line by itself at the top, right after X the {. X X Put a CKPT TOPS flush left on a line by itself after all the variables. X (Actually, you can have statements above CKPT TOPS; see below.) X X Put a CKPT POINT x y flush left on a line by itself anywhere that you X want to allow a checkpoint. x is declared as a variable, and y is X declared as a label, so be careful to use unique names. X X Put a CKPT BOTS flush left on a line by itself right before the final }. X XYou must #include "pmckpt.h" at the top of the file. This may seem like Xa lot of work for a checkpointer, but remember that the most advanced Xcontrol structure used by pmckpt is goto. A setjmp()-based system may be Xeasier to use, but it'll also lose variable values when you least expect Xit. X X XIMPORTANT RULE: X XYou *MUST* have a CKPT POINT immediately before calling any subroutine Xthat's checkpointed. (See the main() call of sub1(), line 39 of test.c.) XThere must be absolutely no side effects (and maybe no computation at Xall, depending on your compiler) between the CKPT POINT line and the Xcall. You may have to rewrite the function call to achieve this. X X XYou schedule a checkpoint by calling ckpt_schedule(). At the next CKPT XPOINT, your program will save some crucial information, followed by its Xtext, data, and stack, to the CHECKPOINT file. To set this filename to Xthe value of the CKPTFN environment variable instead, call ckpt_init(). X(This also sets a temporary file name, default CHECKPOINT.TEMP, to the Xvalue of CKPTFNTEMP. The temporary file is used to ensure atomic Xcheckpointing.) test.c shows how you can schedule a checkpoint on any Xinterrupt. In practice you probably want to checkpoint at regular Xintervals, with whatever your system uses for a timer. X X XTo run your program starting from the checkpoint, run X X % checkpoint prog CHECKPOINT X Xwhere prog is the program name and CHECKPOINT is the checkpoint file Xname. If you're lucky, everything will work. X XYou can have statements after the variables and before CKPT TOPS. These Xstatements form the ``preamble.'' They're executed every time the Xfunction is entered, whether in normal execution or as part of a Xrestore. You should be very careful with statements in a preamble, as Xyou will lose any variable values set in a preamble during a restore. XOne safe use of the preamble is at the top of main(), to open files X(perhaps passed as arguments to the program) in a fixed order. In fact, Xif you don't do this, files opened within the program won't be reopened Xon a restore. X X XTo compile a pmckpt program, such as test.c, run the following: X X % ckptcvt < test.c > tmp.c X % cc -c tmp.c X % cc -o tmp pmckpt.o tmp.o pmceot.o X Xtmp can be any name. Make sure that pmckpt.o comes before all other .o's Xloaded (except maybe crt0---though this probably leads to bugs), and Xpmceot.o comes after all .o's loaded. You shouldn't have to worry about Xdynamic loading on Suns, or other weird schemes; pmckpt is pretty Xportable, for a checkpointer. X XFinally, to complete our outside-in tour of pmckpt, you have to compile Xthe pmckpt library itself before using it in programs as above. To do Xthis, edit the options in Makefile and type ``make''. X X XTo test pmckpt, compile test.c into tmp by the above instructions. Run Xtmp. You should see 13 lines of output. Run it again; a CKPT POINT comes Xafter each output, so if you type ^C, the program will save state after Xthe next output in CHECKPOINT. Try typing ^C at any moment. If you run X% checkpoint tmp CHECKPOINT, the program should restart from where it Xleft off. ^C works after a restore too, so a single program can Xcheckpoint and restart any number of times. X XFor a more sophisticated test, try redirecting the output of tmp to a Xfile. tail -f file & to see what's happening. Type ^C after a few Xseconds; after a few seconds more, to simulate a system crash, interrupt Xthe process with ^\ (or whatever your interrupt key is). tmp will dump Xcore. Kill the tail, and restore tmp from CHECKPOINT, redirecting output Xto the same file with >>. When it finishes, look at the file. tmp should Xhave moved back to the location of the first checkpoint, so that output Xbetween the checkpoint and the ``crash'' won't have been written twice. XIn other words, you shouldn't be able to tell from looking at the file Xthat tmp had crashed at all. X XTry running the last test again, but don't kill the tail. What should Xhappen is that what shows up on your tty is the correct output in order, Xand what ends up in foo is the correct output in order---even though Xsome output is written between the last checkpoint and the crash! The Xreason that tail doesn't write anything twice to your tty is that it Xdoesn't go backwards in the file when pmckpt does. X X XInternally, pmckpt wends its way down through the saved process stack to Xget to where it was before. Run a post-ckptcvt program through cpp if Xyou want to see what's going on. END_OF_FILE if test 6528 -ne `wc -c <'README'`; then echo shar: \"'README'\" unpacked with wrong size! fi # end of 'README' fi if test -f 'UAREA' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'UAREA'\" else echo shar: Extracting \"'UAREA'\" \(517 characters\) sed "s/^X//" >'UAREA' <<'END_OF_FILE' XEverything in the u area could be saved. X XWhat we actually save: X open file descriptor positions X signal handlers X (uid) X XEasy to restore from the outside: X open file descriptor state (fcntl, ioctl, etc.) X cwd X umask X args, environment (don't know about environ) X Xsecurity: X pid X ppid X pgid X uid, euid X gid, egid X access groups X root dir X Xtoo system dependent: X shm X sem X signal stack X signal mask X trace flag X ctty X interval timers X resource limits X Xsimply can't be restored: X rusage X profil END_OF_FILE if test 517 -ne `wc -c <'UAREA'`; then echo shar: \"'UAREA'\" unpacked with wrong size! fi # end of 'UAREA' fi if test -f 'argv0.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'argv0.c'\" else echo shar: Extracting \"'argv0.c'\" \(233 characters\) sed "s/^X//" >'argv0.c' <<'END_OF_FILE' X#include <stdio.h> X Xmain(argc,argv) Xint argc; Xchar *argv[]; X{ X if (argc < 3) X { fputs("Usage: argv0 realname program [ arg ... ]\n",stderr); exit(1); } X execvp(argv[1],argv + 2); X perror("argv0: fatal: cannot execute"); X exit(4); X} END_OF_FILE if test 233 -ne `wc -c <'argv0.c'`; then echo shar: \"'argv0.c'\" unpacked with wrong size! fi # end of 'argv0.c' fi if test -f 'checkpoint' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'checkpoint'\" else echo shar: Extracting \"'checkpoint'\" \(57 characters\) sed "s/^X//" >'checkpoint' <<'END_OF_FILE' X#!/bin/sh Xi="$1" Xshift 1 Xexec argv0 "$i" CHECKPOINT "$@" END_OF_FILE if test 57 -ne `wc -c <'checkpoint'`; then echo shar: \"'checkpoint'\" unpacked with wrong size! fi chmod +x 'checkpoint' # end of 'checkpoint' fi if test -f 'ckptcvt' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'ckptcvt'\" else echo shar: Extracting \"'ckptcvt'\" \(689 characters\) sed "s/^X//" >'ckptcvt' <<'END_OF_FILE' X#!/bin/sh Xawk '{ X if ($1 == "CKPT") X { X if ($2 == "VARS") X { print " CKPT_VAR1"; X print "#undef CKPT_HEAD"; X print "#define CKPT_HEAD CKPT_head"; X print " CKPT_VAR4"; } X else if ($2 == "TOPS") X { print " CKPT_TOP1"; X print "#undef CKPT_SPOT"; X print "#define CKPT_SPOT CKPT_spot"; X print " CKPT_TOP4" } X else if ($2 == "POINT") X { print " CKPT_1("$3")"; X print "#undef CKPT_HEAD"; X print "#define CKPT_HEAD "$3; X print " CKPT_4("$4")"; X print "#undef CKPT_SPOT"; X print "#define CKPT_SPOT "$4; X print " CKPT_7" } X else if ($2 == "BOTS") X { print " CKPT_BOT1" } X } X else X print X}' END_OF_FILE if test 689 -ne `wc -c <'ckptcvt'`; then echo shar: \"'ckptcvt'\" unpacked with wrong size! fi chmod +x 'ckptcvt' # end of 'ckptcvt' fi if test -f 'pmceot.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'pmceot.c'\" else echo shar: Extracting \"'pmceot.c'\" \(112 characters\) sed "s/^X//" >'pmceot.c' <<'END_OF_FILE' X#include "pmckpt.h" X Xstatic char ckpt_eot; /* uninitialized */ X Xvoid ckpt_set(s) Xchar **s; X{ X *s = &ckpt_eot; X} END_OF_FILE if test 112 -ne `wc -c <'pmceot.c'`; then echo shar: \"'pmceot.c'\" unpacked with wrong size! fi # end of 'pmceot.c' fi if test -f 'pmckpt.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'pmckpt.c'\" else echo shar: Extracting \"'pmckpt.c'\" \(4798 characters\) sed "s/^X//" >'pmckpt.c' <<'END_OF_FILE' X#include <sys/types.h> X#include <sys/file.h> X#include <sys/param.h> X#include <signal.h> X#include "pmckpt.h" X#undef main X Xextern char *getenv(); Xextern char *sbrk(); Xextern ckpt_main(); Xextern long tell(); Xextern long lseek(); Xextern int brk(); X X#define NUMFDS NOFILE /* portable */ X#define NUMSIGS 32 /* XXX */ X#define SIGRET void X X/* XXX: t for text was an original misnomer. Should have been d for data. */ X/* XXX: Don't need the end of stack pointer. */ X/* XXX: Need BSD timer routines to signal through ckpt_point. */ X X/* XXX: Must initialize all these variables. We assume that the loader */ X/* puts symbols in order, with initialized data before uninitialized data. */ X Xstatic int fd = -1; /* must be before sot */ Xstatic long ckpt_diskbase = 1; Xstatic char *ckpt_stackbase = ""; Xstatic char *ckpt_eotptr = ""; /* end of text pointer */ Xstatic char *ckpt_sosptr = ""; /* start of stack pointer */ Xstatic char *ckpt_eosptr = ""; /* end of stack pointer */ Xstatic char *ckpt_origbrk = ""; Xstatic char *ckpt_newbrk = ""; Xchar *ckpt_restore = (char *) 0; Xint ckpt_point = -1; Xstatic char ckpt_sot = 'x'; /* start of text */ Xstatic char *ckpt_fn = "CHECKPOINT"; /* after sot: should be restored */ Xstatic char *ckpt_fntemp = "CHECKPOINT.TEMP"; X Xstatic struct /* restored automatically with the rest of the data */ X { X /* We should but don't make a lot of effort to restore uids. */ X /* It just wouldn't be portable between BSD and System V if we did. */ X int uid; X long fpos[NUMFDS]; X SIGRET (*handler[NUMSIGS])(); X } Xustate; X Xstatic void get_ustate() X{ X int i; X X ustate.uid = getuid(); X for (i = 0;i < NUMFDS;i++) X ustate.fpos[i] = tell(i); X for (i = 0;i < NUMSIGS;i++) X { X /* We assume that no signal arrives in this period. That's stupid. */ X ustate.handler[i] = signal(i,SIG_IGN); X (void) signal(i,ustate.handler[i]); X } X} X Xstatic void set_ustate() X{ X int i; X X (void) setuid(ustate.uid); /*XXX*/ X for (i = 0;i < NUMFDS;i++) X if (ustate.fpos[i] != -1) X (void) lseek(i,ustate.fpos[i],L_SET); /* XXX: what if fails? */ X for (i = 0;i < NUMSIGS;i++) X { X /* XXX: race, race, glorious race */ X (void) signal(i,ustate.handler[i]); X } X} X X#define horribly 1 X Xstatic void die(e) Xint e; X{ X exit(e); /*XXX*/ X} X Xmain(argc,argv,envp) Xint argc; Xchar *argv[]; Xchar *envp[]; X{ X char ckpt_sos; /* start of stack */ X X ckpt_point = 0; X ckpt_origbrk = sbrk(0); X ckpt_sosptr = &ckpt_sos; X ckpt_set(&ckpt_eotptr); X if (!strcmp(argv[0],"CHECKPOINT")) X { X if ((fd = open(argv[1],O_RDONLY)) == -1) die(horribly); X argv += 2; X if (read(fd,&ckpt_restore,sizeof(ckpt_restore)) == -1) die(horribly); X if (read(fd,&ckpt_eosptr,sizeof(ckpt_eosptr)) == -1) die(horribly); X if (read(fd,&ckpt_newbrk,sizeof(ckpt_newbrk)) == -1) die(horribly); X ckpt_data_restore(); X ckpt_heap_restore(); X ckpt_stackbase = ckpt_sosptr; X if (ckpt_sosptr - ckpt_eosptr > 0) X ckpt_diskbase = lseek(fd,0L,L_XTND); X else X ckpt_diskbase = tell(fd); X } X return ckpt_main(argc,argv,envp); X} X Xvoid ckpt_data_restore() X{ X /* read in information between &sot and eotptr */ X if (read(fd,&ckpt_sot,ckpt_eotptr - &ckpt_sot) == -1) die(horribly); X} X Xvoid ckpt_heap_restore() X{ X (void) brk(ckpt_newbrk); X if (read(fd,ckpt_origbrk,ckpt_newbrk - ckpt_origbrk) == -1) die(horribly); X} X Xvoid ckpt_increment(left,right) Xchar *left; Xchar *right; X{ X if (right - left > 0) X { X if (lseek(fd,ckpt_diskbase + (left - ckpt_stackbase),L_SET) == -1) X die(horribly); X if (read(fd,left,right - left) == -1) X die(horribly); X } X else X { X /* character at ckpt_stackbase - x is at ckpt_diskbase + x */ X if (lseek(fd,ckpt_diskbase - (ckpt_stackbase - right),L_SET) == -1) X die(horribly); X if (read(fd,right,left - right) == -1) X die(horribly); X } X} X Xvoid ckpt_stack_restore() X{ X /* read in any final information between sosptr and ckpt_eosptr */ X (void) close(fd); X set_ustate(); X return; X} X Xvoid ckpt_save(restore,eosptr) Xchar *restore; Xchar *eosptr; X{ X get_ustate(); X if ((fd = open(ckpt_fntemp,O_RDWR | O_CREAT | O_TRUNC,0600)) == -1) return; X if (write(fd,&restore,sizeof(restore)) == -1) return; X if (write(fd,&eosptr,sizeof(eosptr)) == -1) return; X ckpt_newbrk = sbrk(0); X if (write(fd,&ckpt_newbrk,sizeof(ckpt_newbrk)) == -1) return; X if (write(fd,&ckpt_sot,ckpt_eotptr - &ckpt_sot) == -1) return; X if (write(fd,ckpt_origbrk,ckpt_newbrk - ckpt_origbrk) == -1) return; X if (eosptr - ckpt_sosptr > 0) X { if (write(fd,ckpt_sosptr,eosptr - ckpt_sosptr) == -1) return; } X else X { if (write(fd,eosptr,ckpt_sosptr - eosptr) == -1) return; } X (void) close(fd); X if (rename(ckpt_fntemp,ckpt_fn) == -1) return; X} X Xvoid ckpt_schedule() X{ X ckpt_point = 1; X} X Xvoid ckpt_init() X{ X if (getenv("CKPTFNTEMP")) X ckpt_fntemp = getenv("CKPTFNTEMP"); X if (getenv("CKPTFN")) X ckpt_fn = getenv("CKPTFN"); X} END_OF_FILE if test 4798 -ne `wc -c <'pmckpt.c'`; then echo shar: \"'pmckpt.c'\" unpacked with wrong size! fi # end of 'pmckpt.c' fi if test -f 'pmckpt.h' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'pmckpt.h'\" else echo shar: Extracting \"'pmckpt.h'\" \(1152 characters\) sed "s/^X//" >'pmckpt.h' <<'END_OF_FILE' X#ifndef PMCKPT_H X#define PMCKPT_H X Xextern void ckpt_stack_restore(); Xextern void ckpt_data_restore(); Xextern void ckpt_heap_restore(); Xextern void ckpt_save(); Xextern void ckpt_increment(); Xextern void ckpt_schedule(); Xextern void ckpt_init(); Xextern void ckpt_set(); Xextern char *ckpt_restore; Xextern int ckpt_point; X X#define CKPT_HEAD ckpt_failure X#define CKPT_SPOT ckpt_failure X X#define main ckpt_main X X#define CKPT_VAR1 static char CKPT_head; char CKPT_mark; X#define CKPT_VAR4 static char *CKPT_pos; { X X#define CKPT_POINT CKPT_pos = &CKPT_HEAD; if (ckpt_point) { char CKPT_duh; ckpt_save(&CKPT_HEAD,&CKPT_duh); ckpt_point = 0; } X X#define CKPT_TOP1 if (ckpt_restore) { char CKPT_duh; ckpt_increment(&CKPT_mark,&CKPT_duh); goto CKPT_final; CKPT_spot: ckpt_stack_restore(); ckpt_restore = (char *) 0; X#define CKPT_TOP4 } else { CKPT_POINT } X X#define CKPT_1(foo) { static char foo; if (0) { X#define CKPT_4(bar) bar: if (CKPT_pos != &CKPT_HEAD) goto CKPT_SPOT; X#define CKPT_7 if (ckpt_restore == &CKPT_HEAD) { ckpt_stack_restore(); ckpt_restore = (char *) 0; } } CKPT_POINT } X X#define CKPT_BOT1 } if (0) { CKPT_final: goto CKPT_SPOT; } X X#endif PMCKPT_H END_OF_FILE if test 1152 -ne `wc -c <'pmckpt.h'`; then echo shar: \"'pmckpt.h'\" unpacked with wrong size! fi # end of 'pmckpt.h' fi if test -f 'test.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'test.c'\" else echo shar: Extracting \"'test.c'\" \(728 characters\) sed "s/^X//" >'test.c' <<'END_OF_FILE' X#include <signal.h> X#include <malloc.h> X#include <stdio.h> X#include "pmckpt.h" X Xchar *foostr = "hmm"; X Xsub1(i) Xint i; X{ XCKPT VARS X float x = i * 3.14; X int k = 1094795586; XCKPT TOPS X for (k = 1;k < 3;k++) X { X sleep(1); X printf("%d %g %d\n",i,x,k); X fflush(stdout); X x += k; XCKPT POINT boom box X } XCKPT BOTS X} X Xmain() X{ XCKPT VARS X int i = 1094795585; X int j = 1094795585; X XCKPT TOPS X X signal(SIGINT,ckpt_schedule); X foostr = malloc(150); X strcpy(foostr,"yo, dude."); X X for (i = 0;i < 4;i++) X { XCKPT POINT head1 spot1 X sub1(i); X sleep(1); X printf("%d %s\n",i,foostr); X fflush(stdout); X } X X for (j = 0;j < 2;j++) X { XCKPT POINT head2 spot2 X sleep(1); X printf("%d\n",j); X fflush(stdout); X } X XCKPT BOTS X} END_OF_FILE if test 728 -ne `wc -c <'test.c'`; then echo shar: \"'test.c'\" unpacked with wrong size! fi # end of 'test.c' fi echo shar: End of shell archive. exit 0