[pe.cust.sources] UNIX Clinic by Axel Schreiner - 1/85

carl@nrcaero.UUCP (Carl P. Swail) (03/21/85)

: '!/bin/sh'
: 'This is a shell archive, meaning:'
: '1. Remove everything above the #!/bin/sh line.'
: '2. Save the resulting text in a file.'
: '3. Execute the file with /bin/sh (not csh) to create the files:'
: '      1.85'
: 'This archive created: Wed Mar 20 14:47:24 1985'
export PATH; PATH=/bin:$PATH
echo shar: extracting "'1.85'" '(24548 characters)'
if test -f '1.85'
then
	echo shar: over-writing existing file "'1.85'"
fi
sed 's/^X//' << \SHAR_EOF > '1.85'
X.ds Sp \fIUNIX clinic\fR
X.TL
X\*(Sp 1/85
X.ND "\*(Sp 1/85"
X.PP
XThis column first appears in the German quarterly
X.I unix/mail
X(Hanser Verlag, Munich, Germany).
XIt is copyrighted: \(co 1985 by Axel T. Schreiner, Ulm, West Germany.
XIt may be reproduced
Xas long as the copyright notice is included
Xand reference is made to the original publication.
X.PP
XThe column attempts to discuss typical approaches
Xto problem solving using the
X.UX
Xsystem.
XIt emphasizes what the author considers to be
Xgood programming pratices and appropriate choice of tools.
X.SH
X/usr/games
X.SH
XSignal Puzzle
X.PP
XThe signal puzzle presented in the last issue (4/84)
Xis really an example for process communication
Xby means of signals:
X.ne  30
X.DS
X#include <signal.h>
X
Xcatch() {}
X
Xmain()
X{       int vater, ich, sohn;
X
X        signal(16, catch);
X        for (vater = 0; ich = getpid(); vater = ich)
X                switch (sohn = fork()) {
X                default:
X                        pause();
X                case -1:
X                        signal(SIGALRM, catch);
X                        alarm(2);
X                        getchar();
X                        alarm(0);
X                        if (vater)
X                        {       signal(16, catch);
X                                kill(vater, 16);
X                                pause();
X                        }
X                        kill(sohn, 16);
X                        wait(0);
X                        exit(0);
X                case 0:
X                        ;
X                }
X}
X.DE
X.PP
XThe following diagram shows what actions each individual process performs:
X.ne  38
X.DS
Xfirst process          | intermediate processes |           last process
X-----------------------+------------------------+-----------------------
Xsignal(16, catch)      |                        |
Xvater = 0              |                        |
Xich = getpid()         |                        |
Xswitch (sohn = fork()) |                        |
Xcase 0:                |                        |
X    -------------------> vater = ich            |
X                       | ich = getpid()         |
X                       | switch (sohn = fork()) |
X                       | case -1:               |
X                       |     -------------------> signal(SIGALRM, catch)
X                       |                        | alarm(2)
X                       |                        | getchar()
X                       |                        | alarm(0)
X                       | default:               | assert(vater != 0)
X                       |     pause()            | signal(16, catch)
X                       |         <~~~~~~~~~~~~~~~ kill(vater, 16)
X                       | signal(SIGALRM, catch) |
X                       | alarm(2)               |
X                       | getchar()              |
X                       | alarm(0)               |
Xdefault:               | assert(vater != 0)     |
X    pause()            | signal(16, catch)      |
X        <~~~~~~~~~~~~~~~ kill(vater, 16)        |
Xsignal(SIGALRM, catch) |                        |
Xalarm(2)               |                        |
Xgetchar()              |                        |
Xalarm(0)               |                        |
Xassert(vater == 0)     | pause()                |
Xkill(sohn, 16) ~~~~~~~~~~~~>                    | pause()
X                       | kill(sohn, 16) ~~~~~~~~~~~~>
X                       |                        | kill(-1, 16) == -1
X                       | wait(0)                | wait(0) == -1
Xwait(0)                |    <~~~~~~~~~~~~~~~~~~~~ exit(0)
X    <~~~~~~~~~~~~~~~~~~~ exit(0)
Xexit(0)
X.DE
X.PP
XFirst a sequence of processes is created.
XEach process,
Xonce it has successfully created a successor,
Xissues a
X.B pause (2)
Xsystem call and waits for things (i.e., signals) to come.
X.PP
XThe last process in the sequence cannot
X.B fork (2)
Xanymore and therefore skips
X.B pause (2).
XUsing
X.B getchar(),
Xthe process attempts to read a character
X(more precisely, a linefeed)
Xfrom standard input.
X.PP
XThis may take a while.
XUsing
X.B alarm (2),
Xthe process therefore arranges
Xthat it will receive a
X.B SIGALRM
Xsignal after two seconds.
X.B catch()
Xis necessary,
Xso that the signal is recognized but does not terminate the process.
XIf the signal arrives
X.I before
Xa linefeed is available,
Xthe
X.B read (2)
Xsystem call issued by
X.B getchar()
Xwill be interrupted,
Xi.e.,
X.B getchar()
Xis aborted.
X.PP
XOnly a few system calls can be aborted by means of signals.
X.B read ,
Xfor example,
Xcan only be aborted
Xif it is directed to a terminal.
XA
X.B read
Xcall to a magnetic tape drive can normally not even be interrupted
Xif the drive is defective.
XThis explains why processes can remain in a system
Xand can no longer be killed
Xas a result of hardware defects.
X.PP
XIf a system call is aborted by means of a signal,
Xthe
X.B "extern int"
Xvariable
X.B errno
Xcontains the value
X.B EINTR ;
Xin this example this could be used to determine
Xwhether the
X.B read
Xsystem call has failed.
X.B alarm(0)
Xwill make sure in any case
Xthat a
X.B SIGALRM
Xsignal cannot occur later.
X.PP
XIn all processes but the first,
X.B vater
Xis the process number of the predecessor in the sequence
Xand therefore non-zero.
XBackwards along the sequence of processes,
Xfrom successor to predecessor,
Xsignal 16 will now be passed.
X.B pause (2)
Xis another system call,
Xwhich is aborted when a signal arrives.
XActually,
X.B pause (2)
Xis used specifically to suspend a process until it receives a signal.
X.PP
XEach process in turn waits for input
Xor for a time interval to expire
Xand then sends signal 16 to its predecessor.
XIn the first process,
X.B vater
Xis zero and signal 16 therefore will not be propagated further.
X.PP
X.B sohn
Xin all processes contains the value returned by
X.B fork (2),
Xi.e.,
Xit normally contains the process number of the successor process.
XNow signal 16 is passed along the sequence from first to last process.
X.PP
XOnce a process has sent signal 16 to its successor,
Xit executes a
X.B wait (2)
Xsystem call,
Xi.e.,
Xit will wait for the termination of its successor.
XIn the last process,
X.B sohn
Xhas the value
X.B -1 .
XNormal users are not allowed to use
X.B -1
Xas a process number for
X.B kill (2),
X.FS
XThe super user can use this special value to send a signal
Xto all processes in the system
Xand thus to arrange for peace and quiet at will...
X.FE
Xtherefore signal 16 is
X.I not
Xpassed on in the last process.
XAdditionally,
Xthis process does not have descendants.
XTherefore the
X.B wait (2)
Xsystem call will fail in this process as well.
X.PP
XThe last process in the sequence thus reaches
X.B exit
Xand satisfies the
X.B wait
Xcondition in its predecessor.
XOur sequence of processes finally terminates
Xin reverse order.
X.PP
XOf course,
Xthe example is not a structured program.
XAfter all,
X.B switch
Xis but a
X.B goto
Xin disguise!
XThe example was constructed to demonstrate
Xthat several processes may share a single terminal
Xin an organized fashion.
X.B alarm (2)
Xcan be used to let other processes proceed
Xeven if no input is presented at the terminal.
X.SH
X/bin/lex
X.SH
XPatterns, ready to wear
X.PP
X.I lex
Xis a very useful tool to quickly generate programs for text manipulation.
X.I lex
Xpatterns are usually easy to write.
XHere we will discuss a few useful patterns
Xto deal with some programming language features.
X.FS
XThe examples were inspired by a book on
X.I lex
Xand
X.I yacc
Xwhich will be published early in 1985 by Hanser Verlag and Prentice-Hall
X[Schreiner/Friedman
X.I
XIntroduction to Compiler Construction \(em A UNIX Tutorial\fR].
X.FE
XFor starters,
Xhere is a
X.I lex
Xprogram to extract comments from Pascal programs:
X.ne  10
X.DS
X%{
X#define	YYLMAX	512	/* length of yytext[] */
X%}
X
X%%
X
X"{"[^}]*"}"		ECHO;
X\&.			|
X\en			;
X.DE
X.LP
XIn Pascal
Xa comment is enclosed in braces;
Xsuch comments cannot be nested.
X.B [^}]*
Xdescribes an arbitrarily long sequence of arbitrary characters,
Xwith the exception of
X.B } ,
Xof course.
XA comment consists of such a sequence surrounded by braces.
X.B ECHO
Xfinally is an action defined by
X.I lex
Xwhich is equivalent to
X.B "fputs(yytext, stdout)" ,
Xi.e.,
Xthe text described by the pattern will be output.
XThe other patterns,
Xtogether with an empty action,
Xserve to ignore all other characters.
X.PP
XThese applications critically depend on the input buffer
X.B yytext
Xbeing large enough.
XThe example demonstrates,
Xhow we can influence the size of this buffer:
Xas we can easily check in the program generated by
X.I lex
Xor in its model in
X.I /usr/lib/lex/ncform ,
X.B yytext
Xis defined with the length
X.B YYLMAX .
XThe definition of this constant can easily be overridden if necessary.
X.PP
XUnfortunately,
Xcomments in Pascal may also be surrounded by
X.B (*
Xand
X.B *) .
XA pattern for this type of comment is more complicated:
X.ne  11
X.DS
Xanf	("(*")
Xlk	("("*)
Xbel	([^*)])
Xnend	([^*]")"|"*"[^)])
Xast	("*"*)
Xend	("*)")
X
X%%
X
X{anf}{lk}({bel}|{nend})*{ast}{end}	;
X.DE
X.LP
XThis program eliminates comments,
Xsince a
X.I lex
Xprogram will copy all characters
Xwhich are not recognized by any pattern.
XBy the way,
Xjust like definitions for the C preprocessor,
X.I lex
Xdefinitions for text replacement
Xare best enclosed in parentheses
Xto avoid mistakes.
X.PP
XThe pattern above is composed of parts
Xwhich have been defined individually
Xto be more easily understood:
X.B {anf}
Xand
X.B {end}
Xdescribe the delimeters for a comment in Pascal.
X.B ({bel}|{nend})
Xwill recognize one character,
Xor a pair of characters,
Xwhich cannot be part of the terminating delimeter
X.B *)
Xat the end of the comment.
X.B {ast}
Xand
X.B {lk}
Xdeal with certain pathological cases:
X.B {ast}
Xtakes care of recognizing
X.B (***)
Xas a comment,
X.B {lk}
Xattends to
X.B (*(*) .
X.B {lk}
Xis not really necessary to recognize a Pascal comment,
Xbut it is required for the analogous pattern for a C comment (why?!).
X.PP
XFor an easier problem,
Xlet us look at patterns for strings in both languages:
X.ne   2
X.DS
X\e'([^'\en]|\e'\e')+\e'
X.DE
X.LP
XIn Pascal,
Xa string is limited to one line,
Xit must contain at least one character,
Xand a quote must be represented as a sequence of two quotes.
X.ne   2
X.DS
X\e"([^"\en]|\e\e["\en])*\e"
X.DE
X.LP
XLinefeeds or
X\fB\&"\fR
Xcan appear in a string in C,
Xas long as they are escaped with
X.B \e .
XFollowing
X.B \e ,
Xarbitrary other characters may of course be used
X(how does the pattern permit this?!).
XA string may also be empty.
X.PP
XThe last example shows
Xthat with
X.I lex
Xeven floating point constants are easy to recognize:
X.ne  13
X.DS
Xdigits		([0-9]+)
Xperiod		(".")
Xsign		([+-]?)
Xexponent	([eE]{sign}{digits})
X
X%%
X
X{sign}{digits}({period}{digits}?)?{exponent}?	|
X{sign}{digits}?{period}{digits}{exponent}?	ECHO;
X
X\&.	|
X\en	;
X.DE
X.LP
XThese patterns are suitable as a filter for
X.B atof (3).
X.SH
XExpress filters
X.SH
X/usr/ucb/num
X.PP
X.I lex
Xcan be used to develop filters at a moments notice:
X.ne   5
X.DS
X%%
X
X\en	ECHO;
X^.*$	printf("%d\et%s", yylineno, yytext);
X.DE
X.LP
X.B yylineno
Xis maintained by
X.I lex
Xand contains the number of the current input line.
XThe program above will show the line number and a tab character
Xpreceding each non-empty line.
X.PP
XOne could assume that a linefeed need not be recognized separately
Xand emitted with
X.B ECHO .
XIf this line is removed from the
X.I lex
Xprogram,
Xempty lines will also be numbered,
Xand the numbers then will be wrong (why?!)!
X.PP
XThe following, much more elegant solution
Xdoes not limit the input line length:
X.ne   4
X.DS
X%%
X
X^.	printf("%d\et%c", yylineno, yytext[0]);
X.DE
X.PP
XIf we also want to number blank lines
Xwe can try the following:
X.ne   4
X.DS
X%%
X
X^.*\en	printf("%d\t%s", yylineno-1, yytext);
X.DE
X.LP
X.B yylineno-1
Xhas to be specified,
Xsince
X.B yylineno
Xis incremented when the linefeed is
X.I read .
X.PP
XHere, too, should be an easier way:
X.ne   4
X.DS
X%%
X
X^	printf("%d\et", yylineno);
X.DE
X.LP
XAt least my version of
X.I lex
Xdecided to junk this idea without further comment.
XShould
X.I lex
Xthink this solution to be too simplistic?
X.SH
X/bin/split
X.PP
XA text could be split into several files as follows:
XAssume that following a line like
X\fB.di\ \fIfilename\fR
Xtext should be diverted to the indicated file.
XIf no
X.I filename
Xis specified,
Xthe previous output file should be restored.
XDiversions thus may be nested.
X.ne  17
X.DS
X%{
Xstatic divert();
X%}
X
X%%
X
X^".di".*\en	{ yytext[yyleng-1] = '\e0'; divert(yytext+3); }
X
X%%
X
X#include <ctype.h>
X
Xstatic struct stack {
X	FILE * fp;
X	struct stack * prev;
X	} * stack;
X.DE
X.ne  27
X.DS
Xstatic divert(s)
X	register char * s;
X{	register struct stack * sp;
X
X	while (isspace(*s))
X		++ s;
X	if (*s)
X		if (sp = calloc(1, sizeof(struct stack)))
X		{	sp->fp = yyout;
X			if (yyout = fopen(s, "w"))
X				sp->prev = stack, stack = sp;
X			else
X			{	perror(s);
X				yyout = sp->fp;
X				cfree(sp);
X			}
X		}
X		else
X			fputs("no room\en", stderr);
X	else if (sp = stack)
X	{	fclose(yyout), yyout = sp->fp;
X		stack = sp->prev, cfree(sp);
X	}
X	else
X		fputs(".di??\en", stderr);
X}
X.DE
X.LP
XThe solution, of course, depends on a stack of
X.B FILE
Xpointers for output,
Xand on the fact that
X.I lex
Xuses the
X.B FILE
Xpointer
X.B yyout
Xfor output.
X.FS
X.I stdio.h
Xis always included by
X.I lex .
X.FE
X.PP
XIt should be clear,
Xby the way,
Xthat this simple program solves a problem
Xwhich cannot be solved as easily or generally with
X.I sed
Xor
X.I awk :
Xthose programs do limit the number of output files.
XIf a file needs to be split into more then ten separate files,
Xthe solution shown here is both,
Xflexible,
Xand the only one feasible.
X.SH
X/usr/ucb/soelim
X.PP
XThe following filter copies text
Xand replaces a line with \fB.so\ \fIfilename\fR
Xby the contents of the indicated file.
XInclusions may be nested.
X.ne  17
X.DS
X%{
Xstatic include();
X%}
X
X%%
X
X^".so".*\en	{ yytext[yyleng-1] = '\e0'; include(yytext+3); }
X
X%%
X
X#include <ctype.h>
X
Xstatic struct stack {
X	FILE * fp;
X	struct stack * prev;
X	} * stack;
X.DE
X.ne  21
X.DS
Xstatic include(s)
X	register char * s;
X{	register struct stack * sp =
X		calloc(1, sizeof(struct stack));
X
X	while (isspace(*s))
X		++ s;
X	if (sp)
X	{	sp->fp = yyin;
X		if (yyin = fopen(s, "r"))
X			sp->prev = stack, stack = sp;
X		else
X		{	perror(s);
X			yyin = sp->fp;
X			cfree(sp);
X		}
X	}
X	else
X		fputs("no room\en", stderr);
X}
X.DE
X.ne  11
X.DS
Xint yywrap()
X{	register struct stack * sp = stack;
X
X	if (sp)
X	{	fclose(yyin), yyin = sp->fp;
X		stack = sp->prev, cfree(sp);
X		return 0;
X	}
X	return 1;
X}
X.DE
X.PP
XAgain,
Xthe solution depends on a stack of
X.B FILE
Xpointers.
X.B yyin
Xis the
X.B FILE
Xpointer which
X.I lex
Xuses for input.
X.B yywrap()
Xis called by
X.I lex
Xat the end of file on
X.B yyin ;
Xif this function returns zero,
X.I lex
Xassumes that more input is available on
X.B yyin .
X.SH
X/usr/bin/led
X.SH
XRapid prototyping
X.PP
X.I lex
Xis quite suitable to construct prototypes of programs
Xwhere patterns are used as
X.I input .
XAs an example,
Xwe will look at the relevant parts of an interactive system
Xfor text corrections,
Xwhich were developed in a day,
Xthanks to
X.I lex
Xand
X.I curses ,
Xthe (shaky) screen management software from Berkeley.
X.PP
XA text is to be corrected,
Xwhere parts to be corrected may contain linefeeds.
XThe corrections,
Xhowever,
Xshould first be presented in context at the terminal;
Xthe user may then accept the correction,
Xreject it,
Xterminate the procedure,
Xaccept all further corrections implicitly, etc.
XPatterns are used to identify the places to be corrected.
XThe replacement text is a string
Xwhich does not need to reference parts of the patterns explicitly.
X.PP
X.I ed ,
X.I sed ,
Xor
X.I awk
Xcould not be used in this case,
Xsince they are strongly line oriented.
XTo start by implementing pattern recognition did not seem appealing.
XEven if Berkeley's
X.B regex (3)
Xhad been available,
Xi.e., pattern recognition in the style of
X.I ed
Xpackaged as subroutines,
Xan efficient search would still have been quite difficult to implement.
X.PP
XIt seemed preferable to describe the corrections as a
X.I lex
Xprogram and to implement a suitable function library
Xwhich would only have to take care of performing the actual correction.
XMore precisely,
Xthe following program would replace
X.B ue
Xin a text by an
X.I nroff
Xstring
X.B \e*u
Xthus introducing an
X.I umlaut
X\fB\*u\fR
Xwhere appropriate:
X.ne  15
X.DS
X%{
X#undef	input
X#undef	output
X#undef	unput
X#undef	ECHO
X#define	ECHO	pass()
X%}
X
X%%
X
Xneue		|
X[Qq]ue		|
Xeventuell	ECHO;
Xue		new("\e\e*u");
X.DE
X.LP
XThe definitions at the beginning would normally be obtained
Xfrom a header file.
X.PP
XNot every
X.B ue
Xin German must really be replaced by an umlaut.
XThe patterns therefore specify first the exceptions
Xwhich are
X.I not
Xto be corrected,
Xand then the actual corrections.
X.B pass()
Xis used if
X.B yytext
Xcan be emitted unchanged.
X.B new(s)
Xwill have to arrange for
X.B s
Xto be offered as a correction for
X.B yytext ,
Xand to be emitted as the user requests.
X.PP
XOur library must therefore contain the functions
X.B new()
Xand
X.B pass() .
XAdditionally,
Xwe offer the corrections in context,
Xe.g.,
Xfrom the preceding to the following linefeed.
X.PP
XTo show context,
Xwe have to plug into input and output of the routine
Xgenerated by
X.I lex .
XThis is possible and permitted
X\(em we may replace the routines
X.B input() ,
X.B output() ,
Xand
X.B unput() .
X.I lex
Xdefines these routines as macros
Xwhich we eliminate as shown above.
X.PP
X.B unput()
Xworks like
X.B ungetc() ,
Xbut it has to be able to push many characters back into the input:
X.ne   8
X.DS
Xstatic char unbufbuf[BUFSIZ], * unbuf = unbufbuf;
X
Xunput(ch)
X	register char ch;
X{
X	return * unbuf++ = ch;
X}
X.DE
X.LP
XThe buffer
X.B unbufbuf
Xmust not overflow.
XOur sketch of the program,
Xhowever,
Xomits security measures to concentrate on the essential aspects.
X.PP
X.B input()
Xmust deliver a single input character to
X.I lex .
XOur
X.B input()
Xfunction does deliver only a single character,
Xbut we read a line at a time
Xand dynamically save the input lines in a buffer queue:
X.ne  38
X.DS
X#define	make(n,x) ((x *) calloc(n, sizeof(x)))
X
Xstatic struct save {
X	struct save * s_next;	/* chain first->...->last */
X	char * s_old;		/* original (dynamic) */
X	char ** s_new;		/* correction */
X	} * first, * last;
X
Xstatic save(s)			/* save line in queue */
X	register char * s;
X{	register struct save * p;
X	register int len = strlen(s);
X
X	p = make(1, struct save);
X	p->s_old = strcpy(make(len + 1, char), s);
X	p->s_new = make(len + 1, char *);
X	if (! first)
X		first = p;
X	if (last)
X		last->s_next = p;
X	last = p;
X}
X
Xint input()
X{	static char buf[BUFSIZ],	/* line buffer */
X		* cp;			/* current character */
X
X	if (unbuf > unbufbuf)
X		return * --unbuf;
X	if (! cp			/* no buffer */
X	    || ! *cp)			/* end of buffer */
X		if (fgets(buf, sizeof buf, stdin))
X			save(cp = buf);	/* new line */
X		else
X			return 0;	/* end of file */
X	return *cp++;
X}
X.DE
X.PP
X.B output()
Xfinally is the principal output function.
Xhere we will have to emit the buffer queue filled by
X.B input()
Xand insert the corrections.
XIn order for
X.B new()
Xto add the corrections at the proper position,
Xwe keep track of
Xhow far a line has already been processed by
X.I lex :
X.ne  18
X.DS
Xstatic int offset;	/* first->[0..offset-1] is done */
X
Xoutput(ch)
X	register char ch;
X{
X	if (ch == '\n')
X		unsave(), offset = 0;
X	else
X		++ offset;
X}
X
Xpass()				/* emit yytext */
X{	register char ch, * s = yytext;
X
X	while (ch = *s++)
X		output(ch);
X}
X.DE
X.PP
XOur technique only works,
Xif
X.B output()
Xemits exactly those characters
Xwhich
X.B input()
Xhas read.
XThe design would be more resilient,
Xif only
X.B pass()
Xwere made available to the user.
XOn the other hand,
Xwe did want to show an
X.B output()
Xfunction!
X.PP
X.B unsave()
Xwill emit the buffer at
X.B first
Xand will free its dynamically acquired memory.
XLet us,
Xhowever,
Xfirst look at inserting a correction:
X.ne  10
X.DS
Xnew(s)
X	register char * s;
X{
X	show(offset, yytext);
X	show(offset, s);
X	if (ask())
X		change(offset, s);
X	pass();
X}
X.DE
X.PP
X.B show()
Xis used to suggest a correction.
X.B ask()
Xmanages the inquiry at the terminal.
XIf the user agrees,
X.B change()
Xwill insert the correction.
XIn either case
Xthe input contained in
X.B yytext
Xhas then been processed.
X.PP
XWe will first look at
X.B change()
Xto understand what
X.B show()
Xmust show and what
X.B unsave()
Xmust emit:
X.ne  17
X.DS
X#define	CHANGED	((char *) -1)
X
Xstatic change(offset, new)
X	int offset;
X	char * new;
X{	register int i = offset;
X	register char * old = yytext;
X	register struct save * sp = first;
X
X	while (*old)
X	{	sp->s_new[i++] = CHANGED;
X		if (*old++ == '\n')
X			sp = sp->s_next, i = 0;
X	}
X	first->s_new[offset] = strsave(new);
X}
X.DE
X.PP
X.B offset
Xmarks the point in the buffer at
X.B first
Xfrom where
X.B yytext
Xis to be replaced by
X.B new .
X.B offset
Xcan only point into the buffer at
X.B first ,
Xotherwise that buffer would already have been emitted.
XThe correction is marked in
X.B s_new
Xand is dynamically saved there.
XFor each linefeed in
X.B yytext
Xwe will have to move on to the next buffer.
X.PP
X.B show()
Xis now easy to construct:
X.ne  23
X.DS
Xstatic show(offset, new)
X	int offset;
X	char * new;
X{	register int i;
X	register char * old = yytext;
X	register struct save * sp = first;
X
X	for (i = 0; i < offset; ++ i)
X		if (! sp->s_new[i])
X			putc(sp->s_old[i], stderr);
X		else if (sp->s_new[i] != CHANGED)
X			fputs(sp->s_new[i], stderr);
X	if (new)
X		fputs(new, stderr);
X	while (*old)
X		if (*old++ == '\n')
X			sp = sp->s_next, i = 0;
X		else
X			++ i;
X	if (sp && sp->s_old[i])
X		fputs(sp->s_old + i, stderr);
X}
X.DE
X.LP
XUp to
X.B offset
Xwe emit either corrections from
X.B s_new
Xor the original characters from
X.B s_old
Xin the buffer at
X.B first .
XThe suggested new text, if any, follows.
XThe original text is passed just as it was in
X.B change() .
XFinally we show the rest of the buffer.
X.PP
XWe pass up the function
X.B ask() ,
Xas well as solving the problem that original
Xand suggested correction should be shown at separate
Xpreferably fixed positions on the terminal,
Xto avoid tiring the user.
X.PP
X.B unsave() ,
Xour last function,
Xmust write the buffer at
X.B first
Xinto a file.
XSince we have shown the change on
X.B stderr
Xwe can assume
Xthat the output file is connected to
X.B stdout :
X.ne  20
X.DS
Xstatic unsave()
X{	register int i;
X	register struct save * sp = first;
X
X	for (i = 0; sp->s_old[i]; ++ i)
X		if (! sp->s_new[i])
X			putchar(sp->s_old[i]);
X		else if (sp->s_new[i] != CHANGED)
X		{	fputs(sp->s_new[i], stdout);
X			cfree(sp->s_new[i]);
X		}
X	if (sp == last)
X		first = last = (struct save *) 0;
X	else
X		first = sp->s_next;
X	cfree(sp->s_old);
X	cfree(sp->s_new);
X	cfree(sp);
X}
X.DE
X.LP
XMemory for the buffer and for its corrections can be released.
X.PP
XSince
X.I lex
Xtakes care of pattern recognition,
Xa protype of
X.I led
Xis easy to develop.
X.I led
Xis even efficient to use,
Xif few corrections are to be applied to a lot of text.
XThe prototype is less suitable,
Xif corrections are changed quickly and are applied to small texts,
Xsince each group of corrections has to be compiled using
X.I lex
Xand
X.I cc .
X.I lex
Xis used to advantage here,
Xto study the method and the correction rules needed for actual applications,
Xwithout having to invest a lot of effort initially.
XIf the technique turns out to be useful (as it did for us),
Xfurther efforts are then justified \(em
X.I
Xmake it work, then make it work faster
X.R
Xis a UNIX adage.
X.SH
XTree puzzle
X.PP
XThe program sketched below
Xdeposits words in a binary tree
Xso that the words appear sorted when the tree is traversed in
X.I inorder .
X.ne   5
X.DS
Xstatic struct word {
X	char * text;
X	struct word * l, * r;
X	} * word;
X.DE
X.ne  12
X.DS
Xstatic struct word * new(cp)
X	register char * cp;
X{	register struct word * new =
X		calloc(1, sizeof(struct word));
X
X	if (new)
X	{	new->text = strsave(cp);
X		return new;
X	}
X	fputs("no room\en", stderr), exit(1);
X}
X.DE
X.ne  21
X.DS
Xstruct word * enter(cp)
X	register char * cp;
X{	register struct word * w;
X	register int c;
X
X	if (! (w = word))
X		return word = new(cp);
X	for (;;)
X		if ((c = strcmp(cp, w->text)) == 0)
X			return w;
X		else if (c < 0)
X			if (w->l)
X				w = w->l;
X			else
X				return w->l = new(cp);
X		else if (w->r)
X			w = w->r;
X		else
X			return w->r = new(cp);
X}
X.DE
X.ne  10
X.DS
Xvisit(p)
X	register struct word * p;
X{
X	if (p)
X	{	visit(p->l);
X		puts(p->text);
X		visit(p->r);
X	}
X}
X.DE
X.LP
X.B enter()
Xis used to add a word to the tree,
Xit returns a pointer to the new entry.
XWith
X.B visit(word)
Xthe stored words are recursively emitted in inorder;
X.B word
Xshould be the first value returned by
X.B enter() .
X.PP
XIf we process a
X.I sorted
Xlist of words using
X.B enter() ,
Xa binary will still be constructed,
Xexcept that the tree deteriorates into a linear list.
XAs a puzzle until next time the following question remains:
Xhow should
X.B enter()
Xbe replaced if we know that the input is sorted,
Xand if we want to construct a binary tree of minimal height,
Xi.e.,
Xif we want to prevent that the tree deteriorates into a linear list?
X
SHAR_EOF
if test 24548 -ne "`wc -c '1.85'`"
then
	echo shar: error transmitting "'1.85'" '(should have been 24548 characters)'
fi
: '       End of shell archive '
exit 0
-- 

Carl Swail      Mail: National Research Council of Canada
		      Building U-66, Montreal Road
		      Ottawa, Ontario, Canada K1A 0R6
		Phone: (613) 998-3408
USENET:
{pesnta,lsuc}!nrcaero!carl
{cornell,uw-beaver}!utcsrgv!dciem!nrcaero!carl
{allegra,decvax,duke,floyd,ihnp4,linus}!utzoo!dciem!nrcaero!carl