koreth@ssyx.ucsc.edu (Steven Grimm) (02/25/89)
Submitted-by: saj@chinet.chi.il.us (Stephen Jacobs) Posting-number: Volume 2, Issue 15 Archive-name: sed/part01 This is the first of 2 parts of source/text and one part of binary of sed working on the Atari ST. The executable was compiled with Mark Williams C, v3.06. Due to the limitations on distribution mentioned in sed.man, I think sed.man should not be arc-ed together with the executable, but should only be part of the source distribution. [That's what I've done. -sg] It is an implementation of the Unix utility of the same name, and that even though it's named sed.ttp it really shouldn't be run from the desktop because the input is case sensitive. #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh <file", e.g.. If this archive is complete, you # will see the following message at the end: # "End of shell archive." # Contents: ctrans debug.h pascal sajnote.sed sed.h sed.man sedcomp.c # Wrapped by saj@chinet on Sun Jan 29 23:52:01 1989 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f ctrans -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"ctrans\" else echo shar: Extracting \"ctrans\" \(1134 characters\) sed "s/^X//" >ctrans <<'END_OF_ctrans' X# First, change over comment delimiters X /{/s//\/* /g X /}/s// *\//g X# Then the block start and end X /begin/s//{/g X /end/s//}/g X# Stash away assignment ops and the relationals with = in them X /:=/s//::/g X />=/s//>:/g X /<=/s//<:/g X# Remaining Pascal = are C == X /=/s//==/g X# Now convert to C assignment syntax and restore relationals X /::/s//=/g X />:/s//>=/g X /<:/s//<=/g X# Now convert Pascal's not-equal X /<>/s//!=/g X# Eliminate unused thens X / then/s///g X# Convert to C logical operators X / or /s// || /g X / and /s// && /g X# Convert modulo operator X / mod /s// % /g X# Now convert procedure syntax X /procedure/s/);$/)/ X /procedure/s//void/ X# So C won't think it's a declare X /function/s/function \(.*\): *\(.*\);/function \2 \1/ X /function/s/: [a-zA-Z0-9]+// X /function +/s/// X# First hack at changing strings X /'/s//"/g X# Turn space pairs to tabs X /^ /s/ / /g X# Change standard types X /integer/s//int/g X /integer/s//bool/g X /cycle/s//continue/g END_OF_ctrans if test 1134 -ne `wc -c <ctrans`; then echo shar: \"ctrans\" unpacked with wrong size! fi # end of overwriting check fi if test -f debug.h -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"debug.h\" else echo shar: Extracting \"debug.h\" \(348 characters\) sed "s/^X//" >debug.h <<'END_OF_debug.h' X#define PASS(msgstring)/* printf(msgstring), printf ("\n")*/ X X/* This debug header was added by Stephen Jacobs, and isn't part of GNU X*sed. It may be hacked omitted or replaced at will. The only other X*thing I've done is fix a compiler dependency and an inappropriate declaration, X*which are marked in the source files with my initials, SAJ. X*/ END_OF_debug.h if test 348 -ne `wc -c <debug.h`; then echo shar: \"debug.h\" unpacked with wrong size! fi # end of overwriting check fi if test -f pascal -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"pascal\" else echo shar: Extracting \"pascal\" \(111 characters\) sed "s/^X//" >pascal <<'END_OF_pascal' X{ this is a test } Xx := y Xbegin Xend Xif (x>=y) Xx<>y Xx or y Xprocedure foo(); Xfunction fie(abc,def,gef): integer; END_OF_pascal if test 111 -ne `wc -c <pascal`; then echo shar: \"pascal\" unpacked with wrong size! fi # end of overwriting check fi if test -f sajnote.sed -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"sajnote.sed\" else echo shar: Extracting \"sajnote.sed\" \(1952 characters\) sed "s/^X//" >sajnote.sed <<'END_OF_sajnote.sed' XAfter putting an unreasonable amount of effort into debugging this beast, I Xfeel entitled to add a few words to what's already in the .man file. X XThe main reason I wanted sed was to help distinguish defining and referencing Xdeclarations of external variables in C. Andy Tanenbaum got me started on Xthis in his Operating Systems, Design and Implementation (the MINIX book). XAnyway, I adopted his idea of a pre-processor macro that expands to a Xgramatically proper referencing declaration everywhere except in one file, Xwhere it is redefined to expand to a proper defining declaration. This does Xnot serve for initialized external variables, however. My solution follows: X XThe macros EXTERN add Initialize are defined as X X#define EXTERN extern X#define INITIALIZE(type, name, value) extern type name X Xexcept in one file, which says X X#undef EXTERN X#undef INITIALIZE X#define EXTERN X#define INITIALIZE(type, name, value) type name = (value) X XThis allows things like X XINITIALIZE(double, imagintvl, -.0125); XEXTERN double reintmp, imintmp; XEXTERN struct complex upleft, center; XINITIALIZE(int, drawarray[4], ({0, 9, 0, 0})); X XFine so far. But that initializer {0. 9, 0, 0} really does need to be Xenclosed in parentheses, because only parentheses can protect a comma from Xbeing taken as a token delimiter by the C pre-processor. Similarly, Xthe INITIALIZE macro really should put the initializer in parentheses Xto avoid unpleasant surprises. Imagine my surprise when C compilers Xrejected "int drawarray[4] = (({0, 9, 0, 0}));". It seems that the syntax Xof a multi-value initializer requires " = <optional whitespace> {". Damn. X XSed to the rescue!! My makefile for a complex program now includes X X Xexpanded.c: defines.c X cc -E defines.c > $(TMPDIR)\expanded X sed -e '/=/s/(\([{"].*\));/\1;/' $(TMPDIR)\expanded > $(TMPDIR)\filtered X sed -e '/=/s/(\([{"].*\));/\1;/' $(TMPDIR)\filtered > expanded.c X XTa daaa! X Stephen Jacobs (saj@chinet.chi.il.us) END_OF_sajnote.sed if test 1952 -ne `wc -c <sajnote.sed`; then echo shar: \"sajnote.sed\" unpacked with wrong size! fi # end of overwriting check fi if test -f sed.h -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"sed.h\" else echo shar: Extracting \"sed.h\" \(4015 characters\) sed "s/^X//" >sed.h <<'END_OF_sed.h' X/* sed.h -- types and constants for the stream editor */ X X/* data area sizes used by both modules */ X#define MAXBUF 4000 /* current line buffer size */ X#define MAXAPPENDS 20 /* maximum number of appends */ X#define MAXTAGS 9 /* tagged patterns are \1 to \9 */ X X/* constants for compiled-command representation */ X#define EQCMD 0x01 /* = -- print current line number */ X#define ACMD 0x02 /* a -- append text after current line */ X#define BCMD 0x03 /* b -- branch to label */ X#define CCMD 0x04 /* c -- change current line */ X#define DCMD 0x05 /* d -- delete all of pattern space */ X#define CDCMD 0x06 /* D -- delete first line of pattern space */ X#define GCMD 0x07 /* g -- copy hold space to pattern space */ X#define CGCMD 0x08 /* G -- append hold space to pattern space */ X#define HCMD 0x09 /* h -- copy pattern space to hold space */ X#define CHCMD 0x0A /* H -- append hold space to pattern space */ X#define ICMD 0x0B /* i -- insert text before current line */ X#define LCMD 0x0C /* l -- print pattern space in escaped form */ X#define NCMD 0x0D /* n -- get next line into pattern space */ X#define CNCMD 0x0E /* N -- append next line to pattern space */ X#define PCMD 0x0F /* p -- print pattern space to output */ X#define CPCMD 0x10 /* P -- print first line of pattern space */ X#define QCMD 0x11 /* q -- exit the stream editor */ X#define RCMD 0x12 /* r -- read in a file after current line */ X#define SCMD 0x13 /* s -- regular-expression substitute */ X#define TCMD 0x14 /* t -- branch on last substitute successful */ X#define CTCMD 0x15 /* T -- branch on last substitute failed */ X#define WCMD 0x16 /* w -- write pattern space to file */ X#define CWCMD 0x17 /* W -- write first line of pattern space */ X#define XCMD 0x18 /* x -- exhange pattern and hold spaces */ X#define YCMD 0x19 /* y -- transliterate text */ X Xstruct cmd_t /* compiled-command representation */ X{ X char *addr1; /* first address for command */ X char *addr2; /* second address for command */ X union X { X char *lhs; /* s command lhs */ X struct cmd_t *link; /* label link */ X } u; X char command; /* command code */ X char *rhs; /* s command replacement string */ X FILE *fout; /* associated output file descriptor */ X struct X { X unsigned allbut : 1; /* was negation specified? */ X unsigned global : 1; /* was p postfix specified? */ X unsigned print : 2; /* was g postfix specified? */ X unsigned inrange : 1; /* in an address range? */ X } flags; X}; Xtypedef struct cmd_t sedcmd; /* use this name for declarations */ X X#define BAD ((char *) -1) /* guaranteed not a string ptr */ X X X/* address and regular expression compiled-form markers */ X#define STAR 1 /* marker for Kleene star */ X#define CCHR 2 /* non-newline character to be matched follows */ X#define CDOT 4 /* dot wild-card marker */ X#define CCL 6 /* character class follows */ X#define CNL 8 /* match line start */ X#define CDOL 10 /* match line end */ X#define CBRA 12 /* tagged pattern start marker */ X#define CKET 14 /* tagged pattern end marker */ X#define CBACK 16 /* backslash-digit pair marker */ X#define CLNUM 18 /* numeric-address index follows */ X#define CEND 20 /* symbol for end-of-source */ X#define CEOF 22 /* end-of-field mark */ X X/* sed.h ends here */ END_OF_sed.h if test 4015 -ne `wc -c <sed.h`; then echo shar: \"sed.h\" unpacked with wrong size! fi # end of overwriting check fi if test -f sed.man -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"sed.man\" else echo shar: Extracting \"sed.man\" \(9360 characters\) sed "s/^X//" >sed.man <<'END_OF_sed.man' XNAME X sed - the stream editor X X XSYNOPSIS X sed [-n] [-g] [-e script ] [-f sfile ] [ file ] ... X X XDESCRIPTION X Sed copies the named files (standard input default) to the standard Xoutput, edited according to a script of commands. X An -e option supplies a single edit command from the next argument; Xif there are several of these they are executed in the order in which Xthey appear. If there is just one -e option and no -f 's, the -e flag Xmay be omitted. X An -f option causes commands to be taken from the file "sfile"; if Xthere are several of these they are executed in the order in which Xthey appear; -e and -f commands may be mixed. X The -g option causes sed to act as though every substitute command Xin the script has a g suffix. X The -n option suppresses the default output. X X A script consists of commands, one per line, of the following form: X X[address [, address] ] function [arguments] X X Normally sed cyclically copies a line of input into a current text Xbuffer, then applies all commands whose addresses select the buffer in Xsequence, then copies the buffer to standard output and clears it. X The -n option suppresses normal output (so that only p and w output Xis done). Also, some commands (n, N) do their own line reads, and some Xothers (d, D) cause all commands following in the script to be skipped X(the D command also suppresses the clearing of the current text buffer Xthat would normally occur before the next cycle). X X It is also helpful to know that there's a second buffer (called the X'hold space' that can be copied or appended to or from or swapped with Xthe current text buffer. X X An address is: a decimal numeral (which matches the line it numbers Xwhere line numbers start at 1 and run cumulatively across files), or a X`$' that addresses the last line of input, or a context address, which Xis a `/regular expression/', in the style of ed (1) modified thus: X X(1) The escape sequence `\n' matches a newline embedded in the buffer, X and `\t' matches a tab. X X(2) A command line with no addresses selects every buffer. X X(3) A command line with one address selects every buffer that matches X that address. X X(4) A command line with two addresses selects the inclusive range from X the first input buffer that matches the first address through the X next input buffer that matches the second. (If the second address X is a number less than or equal to the line number first selected, X only one line is selected.) Once the second address is matched sed X starts looking for the first one again; thus, any number of these X ranges will be matched. X X The negation operator '!' can prefix a command to apply it to every Xline not selected by the address(es). X X In the following list of functions, the maximum number of addresses Xpermitted for each function is indicated in parentheses. X An argument denoted "text" consists of one or more lines, with all Xbut the last ending with `\' to hide the newline. X Backslashes in text are treated like backslashes in the replacement Xstring of an `s' command and may be used to protect initial whitespace X(blanks and tabs) against the stripping that is done on every line of Xthe script. X X An argument denoted "rfile" or "wfile" must be last on the command Xline. Each wfile is created before processing begins. There can be at Xmost 10 distinct wfile arguments. X Xa "text" (1) X Append. Place text on output before reading the next input line. X Xb "label" (2) X Branch to the `:' command bearing the label. If no label is given, Xbranch to the end of the script. X Xc "text" (2) X Change. Delete the current text buffer. With 0 or 1 address, or at Xthe end of a 2-address range, place text on the output. Start the next Xcycle. X Xd (2) X Delete the current text buffer. Start the next cycle. X XD (2) X Delete the first line of the current text buffer (all chars up to the Xfirst newline). Start the next cycle. X Xg (2) X Replace the contents of the current text buffer with the contents of Xthe hold space. X XG (2) X Append the contents of the hold space to the current text buffer. X Xh (2) X Copy the current text buffer into the hold space. X XH (2) X Append a copy of the current text buffer to the hold space. X Xi "text" (1) X Insert. Place text on the standard output. X Xl (2) X List. Sends the pattern space to standard output. A "w" option may Xfollow as in the s command below. Non-printable characters expand to: X \b -- backspace (ASCII 08) X \t -- tab (ASCII 09) X \n -- newline (ASCII 10) X \r -- return (ASCII 13) X \e -- escape (ASCII 27) X \xx -- the ASCII character corresponding to 2 hex digits xx. X Xn (2) X Copy the current text buffer to standard output. Read the next line Xof input into it. X XN (2) X Append the next line of input to the current text buffer, inserting Xan embedded newline between the two. The current line number changes. X Xp (2) X Print. Copy the current text buffer to the standard output. X XP (2) X Copy the first line of the current text buffer (all chars up to the Xfirst newline) to standard output. X Xq (1) X Quit. Branch to the end of the script. Do not start a new cycle. X Xr "rfile" (1) X Read the contents of rfile. Place them on the output before reading Xthe next input line. X Xs /regular expression/replacement/flags (2) X Substitute the replacement for instances of the regular expression Xin the current text buffer. Any character may be used instead of `/'. XFor a fuller description see ed (1). X Flags is zero or more of the following: X Xg -- Global. Substitute for all nonoverlapping instances of the string X rather than just thefirst one. X Xp -- Print the pattern space if a replacement was made. X Xw -- Write. Append the current text buffer to a file argument as in a X w command if a replacement is made. Standard output is used if no X file argument is given X Xt "label" (2) X Branch-if-test. Branch to the : command with the given label if any Xsubstitutes have been made since the most recent read of an input line Xor execution of a `t'or `T'. If no label is given, branch to the end Xof the script. X XT "label" (2) X Branch-on-error. Branch to the : command with the given label if no Xsubstitutes have succeeded since the last input line or t or T command. XBranch to the end of the script if no label is given. X Xw "wfile" (2) X Write. Append the current text buffer to wfile . X XW "wfile" (2) X Write first line. Append first line of the current text buffer Xto wfile. X Xx (2) X Exchange the contents of the current text buffer and hold space. X Xy /string1/string2/ (2) X Translate. Replace each occurrence of a character in string1 with Xthe corresponding character in string2. The lengths of these strings Xmust be equal. X X! "command" (2) X All-but. Apply the function (or group, if function is `{') only to Xlines not selected by the address(es). X X: "label" (0) X This command does nothing but hold a label for `b' and `t' commands Xto branch to. X X= (1) X Place the current line number on the standard output as a line. X X{ (2) X Execute the following commands through a matching `}' only when the Xcurrent line matches the address or address range given. X X An empty command is ignored. X X XPORTABILITY X This tool was reverse-engineered from BSD 4.1 UNIX sed, and (as far Xas the author's knowledge and tests can determine) is compatible with Xit. All documented features of BSD 4.1 sed are supported. X X One undocumented feature (a leading 'n' in the first comment having Xthe same effect as an -n command-line option) has been omitted. X XThe following bugs and limitations have been fixed: X * There is no hidden length limit (40 in BSD sed) on w file names. X * There is no limit (8 in BSD sed) on the length of labels. X * The exchange command now works for long pattern and hold spaces. X XThe following enhancements to existing commands have been made: X * a, i commands don't insist on a leading backslash-\n in the text. X * r, w commands don't insist on whitespace before the filename. X * The g, p and P options on s commands may be given in any order. X XSome enhancements to regular-expression syntax have been made: X * \t is recognized in REs (and elswhere) as an escape for tab. X * In an RE, + calls for 1..n repeats of the previous pattern. X XThe following are completely new features: X * The l command (list, undocumented and weaker in BSD) X * The W command (write first line of pattern space to file). X * The T command (branch on last substitute failed). X * Trailing comments are now allowed on command lines. X X In addition, sed's error messages have been made more specific and Xinformative. X X The implementation is also significantly smaller and faster than XBSD 4.1 sed. It uses only the standard I/O library and exit. X X XNOTE X This is a freeware component of the GNU operating system. The user Xis hereby granted permission to use, modify, reproduce and distribute Xit subject to the following conditions: X 1. The authorship notice appearing in each source file may not be Xaltered or deleted. X 2. The object form may not be distributed without source. X X XSEE ALSO X ed(1), grep(1), awk(1), lex(1), regexp(5) END_OF_sed.man if test 9360 -ne `wc -c <sed.man`; then echo shar: \"sed.man\" unpacked with wrong size! fi # end of overwriting check fi if test -f sedcomp.c -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"sedcomp.c\" else echo shar: Extracting \"sedcomp.c\" \(37223 characters\) sed "s/^X//" >sedcomp.c <<'END_OF_sedcomp.c' X#if 0 /* SAJ */ X#include "compiler.h" X#ifdef LATTICE X#define void int X#endif X#endif /* SAJ */ X X#include "debug.h" X/* sedcomp.c -- stream editor main and compilation phase X X The stream editor compiles its command input (from files or -e options) Xinto an internal form using compile() then executes the compiled form using Xexecute(). Main() just initializes data structures, interprets command line Xoptions, and calls compile() and execute() in appropriate sequence. X The data structure produced by compile() is an array of compiled-command Xstructures (type sedcmd). These contain several pointers into pool[], the Xregular-expression and text-data pool, plus a command code and g & p flags. XIn the special case that the command is a label the struct will hold a ptr Xinto the labels array labels[] during most of the compile, until resolve() Xresolves references at the end. X The operation of execute() is described in its source module. X X==== Written for the GNU operating system by Eric S. Raymond ==== */ X X#include <stdio.h> /* uses getc, fprintf, fopen, fclose */ X#include "sed.h" /* command type struct and name defines */ X X/* imported functions */ Xextern int strcmp(); /* test strings for equality */ Xextern void execute(); /* execute compiled command */ X X/***** public stuff ******/ X X#define MAXCMDS 200 /* maximum number of compiled commands */ X#define MAXLINES 256 /* max # numeric addresses to compile */ X X/* main data areas */ Xchar linebuf[MAXBUF+1]; /* current-line buffer */ Xsedcmd cmds[MAXCMDS+1]; /* hold compiled commands */ Xlong linenum[MAXLINES]; /* numeric-addresses table */ X X/* miscellaneous shared variables */ Xint nflag; /* -n option flag */ Xint eargc; /* scratch copy of argument count */ Xsedcmd *pending = NULL; /* next command to be executed */ Xchar bits[] = {1,2,4,8,16,32,64,128}; X X/***** module common stuff *****/ X X#define POOLSIZE 10000 /* size of string-pool space */ X#define WFILES 10 /* max # w output files that can be compiled */ X#define RELIMIT 256 /* max chars in compiled RE */ X#define MAXDEPTH 20 /* maximum {}-nesting level */ X#define MAXLABS 50 /* max # of labels that can be handled */ X X#define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++ X#define ABORT(msg) (fprintf(stderr, msg, linebuf), exit(2)) X#define IFEQ(x, v) if (*x == v) x++ , /* do expression */ X X/* error messages */ Xstatic char AGMSG[] = "sed: garbled address %s\n"; Xstatic char CGMSG[] = "sed: garbled command %s\n"; Xstatic char TMTXT[] = "sed: too much text: %s\n"; Xstatic char AD1NG[] = "sed: no addresses allowed for %s\n"; Xstatic char AD2NG[] = "sed: only one address allowed for %s\n"; Xstatic char TMCDS[] = "sed: too many commands, last was %s\n"; Xstatic char COCFI[] = "sed: cannot open command-file %s\n"; Xstatic char UFLAG[] = "sed: unknown flag %c\n"; Xstatic char COOFI[] = "sed: cannot open %s\n"; Xstatic char CCOFI[] = "sed: cannot create %s\n"; Xstatic char ULABL[] = "sed: undefined label %s\n"; Xstatic char TMLBR[] = "sed: too many {'s\n"; Xstatic char FRENL[] = "sed: first RE must be non-null\n"; Xstatic char NSCAX[] = "sed: no such command as %s\n"; Xstatic char TMRBR[] = "sed: too many }'s\n"; Xstatic char DLABL[] = "sed: duplicate label %s\n"; Xstatic char TMLAB[] = "sed: too many labels: %s\n"; Xstatic char TMWFI[] = "sed: too many w files\n"; Xstatic char REITL[] = "sed: RE too long: %s\n"; Xstatic char TMLNR[] = "sed: too many line numbers\n"; Xstatic char TRAIL[] = "sed: command \"%s\" has trailing garbage\n"; X Xtypedef struct /* represent a command label */ X{ X char *name; /* the label name */ X sedcmd *last; /* it's on the label search list */ X sedcmd *address; /* pointer to the cmd it labels */ X} Xlabel; X X/* label handling */ Xstatic label labels[MAXLABS]; /* here's the label table */ Xstatic label *lab = labels + 1; /* pointer to current label */ Xstatic label *lablst = labels; /* header for search list */ X X/* string pool for regular expressions, append text, etc. etc. */ Xstatic char pool[POOLSIZE]; /* the pool */ Xstatic char *fp = pool; /* current pool pointer */ Xstatic char *poolend = pool + POOLSIZE; /* pointer past pool end */ X X/* compilation state */ Xstatic FILE *cmdf = NULL; /* current command source */ Xstatic char *cp = linebuf; /* compile pointer */ Xstatic sedcmd *cmdp = cmds; /* current compiled-cmd ptr */ Xstatic char *lastre = NULL; /* old RE pointer */ Xstatic int bdepth = 0; /* current {}-nesting level */ Xstatic int bcount = 0; /* # tagged patterns in current RE */ Xstatic char **eargv; /* scratch copy of argument list */ X X/* compilation flags */ Xstatic int eflag; /* -e option flag */ Xstatic int gflag; /* -g option flag */ X X Xmain(argc, argv) X/* main sequence of the stream editor */ Xint argc; Xchar *argv[]; X{ X void compile(), resolve(); X X eargc = argc; /* set local copy of argument count */ X eargv = argv; /* set local copy of argument list */ X cmdp->addr1 = pool; /* 1st addr expand will be at pool start */ X if (eargc == 1) X exit(0); /* exit immediately if no arguments */ XPASS("main(): setup"); X /* scan through the arguments, interpreting each one */ X while ((--eargc > 0) && (**++eargv == '-')) X switch (eargv[0][1]) X { X case 'e': X eflag++; compile(); /* compile with e flag on */ X eflag = 0; X continue; /* get another argument */ X case 'f': X if (eargc-- <= 0) /* barf if no -f file */ X exit(2); X if ((cmdf = fopen(*++eargv, "r")) == NULL) X { X fprintf(stderr, COCFI, *eargv); X exit(2); X } X compile(); /* file is O.K., compile it */ X fclose(cmdf); X continue; /* go back for another argument */ X case 'g': X gflag++; /* set global flag on all s cmds */ X continue; X case 'n': X nflag++; /* no print except on p flag or w */ X continue; X default: X fprintf(stdout, UFLAG, eargv[0][1]); X continue; X } X XPASS("main(): argscan"); X X if (cmdp == cmds) /* no commands have been compiled */ X { X eargv--; eargc++; X eflag++; compile(); eflag = 0; X eargv++; eargc--; X } X X if (bdepth) /* we have unbalanced squigglies */ X ABORT(TMLBR); X X lablst->address = cmdp; /* set up header of label linked list */ X resolve(); /* resolve label table indirections */ XPASS("main(): resolve"); X if (eargc <= 0) /* if there were no -e commands */ X execute(NULL); /* execute commands from stdin only */ X else while(--eargc>=0) /* else execute only -e commands */ X execute(*eargv++); XPASS("main(): end & exit OK"); X exit(0); /* everything was O.K. if we got here */ X} X X X#define H 0x80 /* 128 bit, on if there's really code for command */ X#define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */ X X/* indirect through this to get command internal code, if it exists */ Xstatic char cmdmask[] = X{ X 0, 0, H, 0, 0, H+EQCMD,0, 0, X 0, 0, 0, 0, H+CDCMD,0, 0, CGCMD, X CHCMD, 0, 0, 0, 0, 0, CNCMD, 0, X CPCMD, 0, 0, 0, H+CTCMD,0, 0, H+CWCMD, X 0, 0, 0, 0, 0, 0, 0, 0, X 0, H+ACMD, H+BCMD, H+CCMD, DCMD, 0, 0, GCMD, X HCMD, H+ICMD, 0, 0, H+LCMD, 0, NCMD, 0, X PCMD, H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0, 0, H+WCMD, X XCMD, H+YCMD, 0, H+BCMD, 0, H, 0, 0, X}; X Xstatic void compile() X/* precompile sed commands out of a file */ X{ X char ccode, *address(); X XPASS("compile(): entry"); X X for(;;) /* main compilation loop */ X { X if (*cp != ';') /* get a new command line */ X if (cmdline(cp = linebuf) < 0) X break; X SKIPWS(cp); X if (*cp=='\0' || *cp=='#') /* a comment */ X continue; X if (*cp == ';') /* ; separates cmds */ X { X cp++; X continue; X } X X /* compile first address */ X if (fp > poolend) X ABORT(TMTXT); X else if ((fp = address(cmdp->addr1 = fp)) == BAD) X ABORT(AGMSG); X X if (fp == cmdp->addr1) /* if empty RE was found */ X { X if (lastre) /* if there was previous RE */ X cmdp->addr1 = lastre; /* use it */ X else X ABORT(FRENL); X } X else if (fp == NULL) /* if fp was NULL */ X { X fp = cmdp->addr1; /* use current pool location */ X cmdp->addr1 = NULL; X } X else X { X lastre = cmdp->addr1; X if (*cp == ',' || *cp == ';') /* there's 2nd addr */ X { X cp++; X if (fp > poolend) ABORT(TMTXT); X fp = address(cmdp->addr2 = fp); X if (fp == BAD || fp == NULL) ABORT(AGMSG); X if (fp == cmdp->addr2) X cmdp->addr2 = lastre; X else X lastre = cmdp->addr2; X } X else X cmdp->addr2 = NULL; /* no 2nd address */ X } X if (fp > poolend) ABORT(TMTXT); X X SKIPWS(cp); /* discard whitespace after address */ X IFEQ(cp, '!') cmdp->flags.allbut = 1; X X SKIPWS(cp); /* get cmd char, range-check it */ X if ((*cp < LOWCMD) || (*cp > '~') X || ((ccode = cmdmask[*cp - LOWCMD]) == 0)) X ABORT(NSCAX); X X cmdp->command = ccode & ~H; /* fill in command value */ X if ((ccode & H) == 0) /* if no compile-time code */ X cp++; /* discard command char */ X else if (cmdcomp(*cp++)) /* execute it; if ret = 1 */ X continue; /* skip next line read */ X X if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS); X X SKIPWS(cp); /* look for trailing stuff */ X if (*cp != '\0') X if (*++cp == ';') X continue; X else if (cp[-1] != '#') X ABORT(TRAIL); X } X} X Xstatic int cmdcomp(cchar) X/* compile a single command */ Xregister char cchar; /* character name of command */ X{ X char *gettext(), *rhscomp(), *recomp(), *ycomp(); X static sedcmd **cmpstk[MAXDEPTH]; /* current cmd stack for {} */ X static char *fname[WFILES]; /* w file name pointers */ X static FILE *fout[WFILES]={stdout}; /* w file file ptrs */ X static int nwfiles = 1; /* count of open w files */ X int i; /* indexing dummy used in w */ X sedcmd *sp1, *sp2; /* temps for label searches */ X label *lpt, *search(); /* ditto, and the searcher */ X char redelim; /* current RE delimiter */ X X switch(cchar) X { X case '{': /* start command group */ X cmdp->flags.allbut = !cmdp->flags.allbut; X cmpstk[bdepth++] = &(cmdp->u.link); X if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS); X if (*cp == '\0') *cp = ';'; /* get next cmd w/o lineread */ X return(1); X X case '}': /* end command group */ X if (cmdp->addr1) ABORT(AD1NG); /* no addresses allowed */ X if (--bdepth < 0) ABORT(TMRBR); /* too many right braces */ X *cmpstk[bdepth] = cmdp; /* set the jump address */ X return(1); X X case '=': /* print current source line number */ X case 'q': /* exit the stream editor */ X if (cmdp->addr2) ABORT(AD2NG); X break; X X case ':': /* label declaration */ X if (cmdp->addr1) ABORT(AD1NG); /* no addresses allowed */ X fp = gettext(lab->name = fp); /* get the label name */ X if (lpt = search(lab)) /* does it have a double? */ X { X if (lpt->address) ABORT(DLABL); /* yes, abort */ X } X else /* check that it doesn't overflow label table */ X { X lab->last = NULL; X lpt = lab; X if (++lab >= labels + MAXLABS) ABORT(TMLAB); X } X lpt->address = cmdp; X return(1); X X case 'b': /* branch command */ X case 't': /* branch-on-succeed command */ X case 'T': /* branch-on-fail command */ X SKIPWS(cp); X if (*cp == '\0') /* if branch is to start of cmds... */ X { X /* add current command to end of label last */ X if (sp1 = lablst->last) X { X while(sp2 = sp1->u.link) X sp1 = sp2; X sp1->u.link = cmdp; X } X else /* lablst->last == NULL */ X lablst->last = cmdp; X break; X } X fp = gettext(lab->name = fp); /* else get label into pool */ X if (lpt = search(lab)) /* enter branch to it */ X { X if (lpt->address) X cmdp->u.link = lpt->address; X else X { X sp1 = lpt->last; X while(sp2 = sp1->u.link) X sp1 = sp2; X sp1->u.link = cmdp; X } X } X else /* matching named label not found */ X { X lab->last = cmdp; /* add the new label */ X lab->address = NULL; /* it's forward of here */ X if (++lab >= labels + MAXLABS) /* overflow if last */ X ABORT(TMLAB); X } X break; X X case 'a': /* append text */ X case 'i': /* insert text */ X case 'r': /* read file into stream */ X if (cmdp->addr2) ABORT(AD2NG); X case 'c': /* change text */ X if ((*cp == '\\') && (*++cp == '\n')) cp++; X fp = gettext(cmdp->u.lhs = fp); X break; X X case 'D': /* delete current line in hold space */ X cmdp->u.link = cmds; X break; X X case 's': /* substitute regular expression */ X redelim = *cp++; /* get delimiter from 1st ch */ X if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD) X ABORT(CGMSG); X if (fp == cmdp->u.lhs) /* if compiled RE zero len */ X cmdp->u.lhs = lastre; /* use the previous one */ X else /* otherwise */ X lastre = cmdp->u.lhs; /* save the one just found */ X if ((cmdp->rhs = fp) > poolend) ABORT(TMTXT); X if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) ABORT(CGMSG); X if (gflag) cmdp->flags.global++; X while (*cp == 'g' || *cp == 'p' || *cp == 'P') X { X IFEQ(cp, 'g') cmdp->flags.global++; X IFEQ(cp, 'p') cmdp->flags.print = 1; X IFEQ(cp, 'P') cmdp->flags.print = 2; X } X X case 'l': /* list pattern space */ X if (*cp == 'w') X cp++; /* and execute a w command! */ X else X break; /* s or l is done */ X X case 'w': /* write-pattern-space command */ X case 'W': /* write-first-line command */ X if (nwfiles >= WFILES) ABORT(TMWFI); X fp=gettext(fname[nwfiles]=fp); /* filename will be in pool */ X for(i = nwfiles-1; i >= 0; i--) /* match it in table */ X if (strcmp(fname[nwfiles], fname[i]) == 0) X { X cmdp->fout = fout[i]; X return(0); X } X /* if didn't find one, open new out file */ X if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL) X { X fprintf(stderr, CCOFI, fname[nwfiles]); X exit(2); X } X fout[nwfiles++] = cmdp->fout; X break; X X case 'y': /* transliterate text */ X fp = ycomp(cmdp->u.lhs = fp, *cp++); /* compile translit */ X if (fp == BAD) ABORT(CGMSG); /* fail on bad form */ X if (fp > poolend) ABORT(TMTXT); /* fail on overflow */ X break; X } X return(0); /* succeeded in interpreting one command */ X} X Xstatic char *rhscomp(rhsp, delim) /* uses bcount */ X/* generate replacement string for substitute command right hand side */ Xregister char *rhsp; /* place to compile expression to */ Xregister char delim; /* regular-expression end-mark to look for */ X{ X register char *p = cp; /* strictly for speed */ X X for(;;) X if ((*rhsp = *p++) == '\\') /* copy; if it's a \, */ X { X *rhsp = *p++; /* copy escaped char */ X /* check validity of pattern tag */ X if (*rhsp > bcount + '0' && *rhsp <= '9') X return(BAD); X *rhsp++ |= 0x80; /* mark the good ones */ X continue; X } X else if (*rhsp == delim) /* found RE end, hooray... */ X { X *rhsp++ = '\0'; /* cap the expression string */ X cp = p; X return(rhsp); /* pt at 1 past the RE */ X } X else if (*rhsp++ == '\0') /* last ch not RE end, help! */ X return(BAD); X} X Xstatic char *recomp(expbuf, redelim) /* uses cp, bcount */ X/* compile a regular expression to internal form */ Xchar *expbuf; /* place to compile it to */ Xchar redelim; /* RE end-marker to look for */ X{ X register char *ep = expbuf; /* current-compiled-char pointer */ X register char *sp = cp; /* source-character ptr */ X register int c; /* current-character pointer */ X char negclass; /* all-but flag */ X char *lastep; /* ptr to last expr compiled */ X char *svclass; /* start of current char class */ X char brnest[MAXTAGS]; /* bracket-nesting array */ X char *brnestp; /* ptr to current bracket-nest */ X char *pp; /* scratch pointer */ X int classct; /* class element count */ X int tags; /* # of closed tags */ X X if (*cp == redelim) /* if first char is RE endmarker */ X return(cp++, expbuf); /* leave existing RE unchanged */ X X lastep = NULL; /* there's no previous RE */ X brnestp = brnest; /* initialize ptr to brnest array */ X tags = bcount = 0; /* initialize counters */ X X if (*ep++ = (*sp == '^')) /* check for start-of-line syntax */ X sp++; X X for (;;) X { X if (ep >= expbuf + RELIMIT) /* match is too large */ X return(cp = sp, BAD); X if ((c = *sp++) == redelim) /* found the end of the RE */ X { X cp = sp; X if (brnestp != brnest) /* \(, \) unbalanced */ X return(BAD); X *ep++ = CEOF; /* write end-of-pattern mark */ X return(ep); /* return ptr to compiled RE */ X } X if ((c != '*') && (c != '+')) /* if we're a postfix op */ X lastep = ep; /* get ready to match last */ X X switch (c) X { X case '\\': X if ((c = *sp++) == '(') /* start tagged section */ X { X if (bcount >= MAXTAGS) X return(cp = sp, BAD); X *brnestp++ = bcount; /* update tag stack */ X *ep++ = CBRA; /* enter tag-start */ X *ep++ = bcount++; /* bump tag count */ X continue; X } X else if (c == ')') /* end tagged section */ X { X if (brnestp <= brnest) /* extra \) */ X return(cp = sp, BAD); X *ep++ = CKET; /* enter end-of-tag */ X *ep++ = *--brnestp; /* pop tag stack */ X tags++; /* count closed tags */ X continue; X } X else if (c >= '1' && c <= '9') /* tag use */ X { X if ((c -= '1') >= tags) /* too few */ X return(BAD); X *ep++ = CBACK; /* enter tag mark */ X *ep++ = c; /* and the number */ X continue; X } X else if (c == '\n') /* escaped newline no good */ X return(cp = sp, BAD); X else if (c == 'n') /* match a newline */ X c = '\n'; X else if (c == 't') /* match a tab */ X c = '\t'; X else X goto defchar; /* else match \c */ X X case '\0': /* ignore nuls */ X continue; X X case '\n': /* trailing pattern delimiter is missing */ X return(cp = sp, BAD); X X case '.': /* match any char except newline */ X *ep++ = CDOT; X continue; X X case '+': /* 1 to n repeats of previous pattern */ X if (lastep == NULL) /* if + not first on line */ X goto defchar; /* match a literal + */ X if (*lastep == CKET) /* can't iterate a tag */ X return(cp = sp, BAD); X pp = ep; /* else save old ep */ X while (lastep < pp) /* so we can blt the pattern */ X *ep++ = *lastep++; X *lastep |= STAR; /* flag the copy */ X continue; X X case '*': /* 0..n repeats of previous pattern */ X if (lastep == NULL) /* if * isn't first on line */ X goto defchar; /* match a literal * */ X if (*lastep == CKET) /* can't iterate a tag */ X return(cp = sp, BAD); X *lastep |= STAR; /* flag previous pattern */ X continue; X X case '$': /* match only end-of-line */ X if (*sp != redelim) /* if we're not at end of RE */ X goto defchar; /* match a literal $ */ X *ep++ = CDOL; /* insert end-symbol mark */ X continue; X X case '[': /* begin character set pattern */ X if (ep + 17 >= expbuf + RELIMIT) X ABORT(REITL); X *ep++ = CCL; /* insert class mark */ X if (negclass = ((c = *sp++) == '^')) X c = *sp++; X svclass = sp; /* save ptr to class start */ X do { X if (c == '\0') ABORT(CGMSG); X X /* handle character ranges */ X if (c == '-' && sp > svclass && *sp != ']') X for (c = sp[-2]; c < *sp; c++) X ep[c >> 3] |= bits[c & 7]; X X /* handle escape sequences in sets */ X if (c == '\\') X if ((c = *sp++) == 'n') X c = '\n'; X else if (c == 't') X c = '\t'; X X /* enter (possibly translated) char in set */ X ep[c >> 3] |= bits[c & 7]; X } while X ((c = *sp++) != ']'); X X /* invert the bitmask if all-but was specified */ X if (negclass) X for(classct = 0; classct < 16; classct++) X ep[classct] ^= 0xFF; X ep[0] &= 0xFE; /* never match ASCII 0 */ X ep += 16; /* advance ep past set mask */ X continue; X X defchar: /* match literal character */ X default: /* which is what we'd do by default */ X *ep++ = CCHR; /* insert character mark */ X *ep++ = c; X } X } X} X Xstatic int cmdline(cbuf) /* uses eflag, eargc, cmdf */ X/* read next command from -e argument or command file */ Xregister char *cbuf; X{ X register int inc; /* not char because must hold EOF */ X X cbuf--; /* so pre-increment points us at cbuf */ X X /* e command flag is on */ X if (eflag) X { X register char *p; /* ptr to current -e argument */ X static char *savep; /* saves previous value of p */ X X if (eflag > 0) /* there are pending -e arguments */ X { X eflag = -1; X if (eargc-- <= 0) X exit(2); /* if no arguments, barf */ X X /* else transcribe next e argument into cbuf */ X p = *++eargv; X while(*++cbuf = *p++) X if (*cbuf == '\\') X { X if ((*++cbuf = *p++) == '\0') X return(savep = NULL, -1); X else X continue; X } X else if (*cbuf == '\n') /* end of 1 cmd line */ X { X *cbuf = '\0'; X return(savep = p, 1); X /* we'll be back for the rest... */ X } X X /* found end-of-string; can advance to next argument */ X return(savep = NULL, 1); X } X X if ((p = savep) == NULL) X return(-1); X X while(*++cbuf = *p++) X if (*cbuf == '\\') X { X if ((*++cbuf = *p++) == '0') X return(savep = NULL, -1); X else X continue; X } X else if (*cbuf == '\n') X { X *cbuf = '\0'; X return(savep = p, 1); X } X X return(savep = NULL, 1); X } X X /* if no -e flag read from command file descriptor */ X while((inc = getc(cmdf)) != EOF) /* get next char */ X if ((*++cbuf = inc) == '\\') /* if it's escape */ X *++cbuf = inc = getc(cmdf); /* get next char */ X else if (*cbuf == '\n') /* end on newline */ X return(*cbuf = '\0', 1); /* cap the string */ X X return(*++cbuf = '\0', -1); /* end-of-file, no more chars */ X} X Xstatic char *address(expbuf) /* uses cp, linenum */ X/* expand an address at *cp... into expbuf, return ptr at following char */ Xregister char *expbuf; X{ X static int numl = 0; /* current ind in addr-number table */ X register char *rcp; /* temp compile ptr for forwd look */ X long lno; /* computed value of numeric address */ X X if (*cp == '$') /* end-of-source address */ X { X *expbuf++ = CEND; /* write symbolic end address */ X *expbuf++ = CEOF; /* and the end-of-address mark (!) */ X cp++; /* go to next source character */ X return(expbuf); /* we're done */ X } X if (*cp == '/') /* start of regular-expression match */ X return(recomp(expbuf, *cp++)); /* compile the RE */ X X rcp = cp; lno = 0; /* now handle a numeric address */ X while(*rcp >= '0' && *rcp <= '9') /* collect digits */ X lno = lno*10 + *rcp++ - '0'; /* compute their value */ X X if (rcp > cp) /* if we caught a number... */ X { X *expbuf++ = CLNUM; /* put a numeric-address marker */ X *expbuf++ = numl; /* and the address table index */ X linenum[numl++] = lno; /* and set the table entry */ X if (numl >= MAXLINES) /* oh-oh, address table overflow */ X ABORT(TMLNR); /* abort with error message */ X *expbuf++ = CEOF; /* write the end-of-address marker */ X cp = rcp; /* point compile past the address */ X return(expbuf); /* we're done */ X } X X return(NULL); /* no legal address was found */ X} X Xstatic char *gettext(txp) /* uses global cp */ X/* accept multiline input from *cp..., discarding leading whitespace */ Xregister char *txp; /* where to put the text */ X{ X register char *p = cp; /* this is for speed */ X X SKIPWS(p); /* discard whitespace */ X do { X if ((*txp = *p++) == '\\') /* handle escapes */ X *txp = *p++; X if (*txp == '\0') /* we're at end of input */ X return(cp = --p, ++txp); X else if (*txp == '\n') /* also SKIPWS after newline */ X SKIPWS(p); X } while X (txp++); /* keep going till we find that nul */ X} X Xstatic label *search(ptr) /* uses global lablst */ X/* find the label matching *ptr, return NULL if none */ Xregister label *ptr; X{ X register label *rp; X for(rp = lablst; rp < ptr; rp++) X if (strcmp(rp->name, ptr->name) == 0) X return(rp); X return(NULL); X} X Xstatic void resolve() /* uses global lablst */ X/* write label links into the compiled-command space */ X{ X register label *lptr; X register sedcmd *rptr, *trptr; X X /* loop through the label table */ X for(lptr = lablst; lptr < lab; lptr++) X if (lptr->address == NULL) /* barf if not defined */ X { X fprintf(stderr, ULABL, lptr->name); X exit(2); X } X else if (lptr->last) /* if last is non-null */ X { X rptr = lptr->last; /* chase it */ X while(trptr = rptr->u.link) /* resolve refs */ X { X rptr->u.link = lptr->address; X rptr = trptr; X } X rptr->u.link = lptr->address; X } X} X Xstatic char *ycomp(ep, delim) X/* compile a y (transliterate) command */ Xregister char *ep; /* where to compile to */ Xchar delim; /* end delimiter to look for */ X{ X register char c, *tp, *sp; X X /* scan the 'from' section for invalid chars */ X for(sp = tp = cp; *tp != delim; tp++) X { X if (*tp == '\\') X tp++; X if ((*tp == '\n') || (*tp == '\0')) X return(BAD); X } X tp++; /* tp now points at first char of 'to' section */ X X /* now rescan the 'from' section */ X while((c = *sp++ & 0x7F) != delim) X { X if (c == '\\' && *sp == 'n') X { X sp++; X c = '\n'; X } X if ((ep[c] = *tp++) == '\\' && *tp == 'n') X { X ep[c] = '\n'; X tp++; X } X if ((ep[c] == delim) || (ep[c] == '\0')) X return(BAD); X } X X if (*tp != delim) /* 'to', 'from' parts have unequal lengths */ X return(BAD); X X cp = ++tp; /* point compile ptr past translit */ X X for(c = 0; c < 128; c++) /* fill in self-map entries in table */ X if (ep[c] == 0) X ep[c] = c; X X return(ep + 0x80); /* return first free location past table end */ X} X X/* sedcomp.c ends here */ END_OF_sedcomp.c if test 37223 -ne `wc -c <sedcomp.c`; then echo shar: \"sedcomp.c\" unpacked with wrong size! fi # end of overwriting check fi echo shar: End of shell archive. exit 0