[net.sources] PASCAL Program Formatter

oz@utcsstat.UUCP (Ozan Yigit) (08/12/84)

The following is a *VERY PORTABLE* pascal program formatter in
pascal. It has appeared in a DECUS Languages SIG tape many moons
ago, along with the swedish pascal compiler. It compiles and
works beautifully under 4.2BSD, VAX/VMS and I am sure it would port
without any problems to many other systems. My apologies for
not providing a proper man page. 

				Oz (the wizard of one thing or another..)
				Dept. of Computer Science
				York University

----------------------------- RIP -----------------------------

 program pascalformatter (input, output);

{
| **                Pascal Program Formatter                       **
| **                                                               **
| **   by J. E. Crider, Shell Oil Company, Houston, Texas 77025    **
| **                                                               **
| **   Copyright (c) 1980 by Shell Oil Company.  Permission to     **
| **   copy, modify, and distribute, but not for profit, is        **
| **   hereby granted, provided that this note is included.        **
|
|   This portable program formats Pascal programs and acceptable
|   program fragments according to structured formatting principles
|   [SIGPLAN Notices, Vol. 13, No. 11, Nov. 1978, pp. 15-22].
|   The actions of the program are as follows:
|
|   PREPARATION:  For each structured statement that controls a
|      structured statement, the program converts the controlled
|      statement into a compound statement.  The inserted BEGIN/END
|      pair are in capital letters.  A null statement (with semicolon)
|      is inserted before the last END symbol of each program/
|      procedure/function, if needed.  The semicolon forces the END
|      symbol to appear on a line by itself.
|
|   FORMATTING:  Each structured statement that controls a simple
|      statement is placed on a single line, as if it were a simple
|      statement.  Otherwise, each structured statement is formatted
|      in the following pattern (with indentation "indent"):
|
|            XXXXXX header XXXXXXXX
|               XXXXXXXXXXXXXXXXXX
|               XXXXX body XXXXXX
|               XXXXXXXXXXXXXXXXXX
|
|      where the header is one of:
|
|            while <expression> do begin
|            for <control variable> := <for list> do begin
|            with <record variable list> do begin
|            repeat
|            if <expression> then begin
|            else if <expression> then begin
|            else begin
|            case <expression> of
|            <case label list>: begin
|
|      and the last line either begins with UNTIL or ends with END.
|      Other program parts are formatted similarly.  The headers are:
|
|            <program/procedure/function heading>;
|            label
|            const
|            type
|            var
|            begin
|            (various for records and record variants)
|
|   COMMENTS:  Each comment that starts before or on a specified
|      column on an input line (program constant "commthresh") is
|      copied without shifting or reformatting.  Each comment that
|      starts after "commthresh" is reformatted and left-justified
|      following the aligned comment base column ("alcommbase").
|
|   LABELS:  Each statement label is justified to the left margin and
|      is placed on a line by itself.
|
|   SPACES AND BLANK LINES:  Spaces not at line breaks are copied from
|      the input.  Blank lines are copied from the input if they appear
|      between statements (or appropriate declaration units).  A blank
|      line is inserted above each significant part of each program/
|      procedure/function if one is not already there.
|
|   CONTINUATION:  Lines that are too long for an output line are
|      continued with additional indentation ("contindent").
|
|   INPUT FORM:  The program expects as input a program or program
|      fragment in Standard Pascal.  A program fragment is acceptable
|      if it consists of a sequence of (one or more) properly ordered
|      program parts; examples are:  a statement part (that is, a
|      compound statement), or a TYPE part and a VAR part followed by
|      procedure declarations.  If the program fragment is in serious
|      error, then the program may copy the remainder of the input file
|      to the output file without significant modification.  Error
|      messages may be inserted into the output file as comments.
|}

    const
       maxrwlen = 10;              { size of reserved word strings }
       ordminchar = 32;            { ord of lowest char in char set }
       ordmaxchar = 126;           { ord of highest char in char set }
 {  Although this program uses the ASCII character set, conversion to
    most other character sets should be straightforward. }

 {  The following parameters may be adjusted for the installation: }
       maxinlen = 255;             { maximum width of input line + 1 }
       maxoutlen = 72;             { maximum width of output line }
       initmargin = 1;             { initial value of output margin }
       commthresh = 4;             { column threshhold in input for
                                      comments to be aligned }
       alcommbase = 35;            { aligned comments in output start
                                      AFTER this column }
       indent = 3;                 { RECOMMENDED indentation increment }
       contindent = 5;             { continuation indentation, >indent }
       endspaces = 3;              { number of spaces to precede 'END' }
       commindent = 3;             { comment continuation indentation }

    type
       natural = 0..maxint;
       inrange = 0..maxinlen;
       outrange = 0..maxoutlen;

       errortype = (longline, noendcomm, notquote, longword, notdo,
            notof, notend, notthen, notbegin, notuntil, notsemicolon,
            notcolon, notparen, noeof);

       chartype = (illegal, special, chapostrophe, chleftparen,
            chrightparen, chperiod, digit, chcolon, chsemicolon,
            chlessthan, chgreaterthan, letter, chleftbrace);

                                   { for reserved word recognition }
       resword = (                 { reserved words ordered by length }
            rwif, rwdo, rwof, rwto, rwin, rwor,
                                   { length: 2 }
            rwend, rwfor, rwvar, rwdiv, rwmod, rwset, rwand, rwnot,
            rwnil,                 { length: 3 }
            rwthen, rwelse, rwwith, rwgoto, rwcase, rwtype, rwfile,
                                   { length: 4 }
            rwbegin, rwuntil, rwwhile, rwarray, rwconst, rwlabel,
                                   { length: 5 }
            rwrepeat, rwrecord, rwdownto, rwpacked,
                                   { length: 6 }
            rwprogram,             { length: 7 }
            rwfunction,            { length: 8 }
            rwprocedure,           { length: 9 }
            rwx);                  { length: 10 for table sentinel }
       rwstring = packed array [1..maxrwlen] of char;

       firstclass = (              { class of word if on new line }
            newclause,             { start of new clause }
            continue,              { continuation of clause }
            alcomm,                { start of aligned comment }
            contalcomm,            { continuation of aligned comment }
            uncomm,                { start of unaligned comment }
            contuncomm,            { continuation of unaligned comment }
            stmtlabel);            { statement label }
       wordtype = record           { data record for word }
          whenfirst: firstclass;   { class of word if on new line }
          puncfollows: boolean;    { to reduce dangling punctuation }
          blanklncount: natural;   { number of preceding blank lines }
          spaces: integer;         { number of spaces preceding word }
          base: -9..maxinlen;      { inline.buf[base] precedes word }
          size: inrange   end;     { length of word in inline.buf }

       symboltype = (              { symbols for syntax analysis }
            semicolon, sybegin, syend,
                                   { three insertable symbols first }
            syif, sydo, syof, sythen, syelse, sygoto, sycase, syuntil,
            syrepeat, syrecord, forwhilewith, progprocfunc, declarator,
            otherword, othersym, leftparen, rightparen, period,
            sysubrange, intconst, colon, ident, comment, syeof);
       inserttype = semicolon..syend;
       symbolset = set of symboltype;
 { *** NOTE: set size of 0..26 REQUIRED for symbolset! }

    var
       inline: record              { input line data }
          endoffile: boolean;      { end of file on input? }
          ch: char;                { current char, buf[index] }
          index: inrange;          { subscript of current char }
          len: natural;            { length of input line in buf }
                                   { string ';BEGINEND' in buf[-8..0] }
          buf: array [-8..maxinlen] of char   end;
       outline: record             { output line data }
          blanklns: natural;       { number of preceding blank lines }
          len: outrange;           { number of chars in buf }
          buf: array [1..maxoutlen] of char   end;
       word: wordtype;             { current word }
       margin: outrange;           { left margin }
       lnpending: boolean;         { new line before next symbol? }
       symbol: symboltype;         { current symbol }

  { Structured Constants }
       headersyms: symbolset;      { headers for program parts }
       strucsyms: symbolset;       { symbols that begin structured
                                      statements }
       stmtbeginsyms: symbolset;   { symbols that begin statements }
       stmtendsyms: symbolset;     { symbols that follow statements }
       stopsyms: symbolset;        { symbols that stop expression scan }
       recendsyms: symbolset;      { symbols that stop record scan }
       datawords: symbolset;       { to reduce dangling punctuation }
       newword: array [inserttype] of wordtype;
       instring: packed array [1..9] of char;
       firstrw: array [1..maxrwlen] of resword;
       rwword: array [rwif..rwprocedure] of rwstring;
       rwsy: array [rwif..rwprocedure] of symboltype;
       charclass: array [char] of chartype;
  { above is portable form; possible ASCII form is: }
  {    charclass: array [' '..'~'] of chartype;     }
       symbolclass: array [chartype] of symboltype;

    function capital (ch: char): char;
                                   { capitalize char if lower-case
                                      letter }
                                   { !!! implementation-dependent! }

       const
          lettercasediff = 32;     { ASCII character set }

       begin
          if (ch < 'a') or (ch > 'z') then capital := ch
          else capital := chr (ord (ch) - lettercasediff);
          end;                     { capital }

    procedure strucconsts;         { establish values of structured
                                      constants }

       var
          i: ordminchar..ordmaxchar;
                                   { loop index }
          ch: char;                { loop index }

       procedure buildinsert (symbol: inserttype;
            inclass: firstclass;
            inpuncfollows: boolean;
            inspaces, inbase: integer;
            insize: inrange);

          begin
             with newword[symbol] do begin
                whenfirst := inclass;
                puncfollows := inpuncfollows;
                blanklncount := 0;
                spaces := inspaces;
                base := inbase;
                size := insize   end;
             end;                  { buildinsert }

       procedure buildrw (rw: resword;
            symword: rwstring;
            symbol: symboltype);

          begin
             rwword[rw] := symword;{ reserved word string }
             rwsy[rw] := symbol;   { map to symbol }
             end;                  { buildrw }

       begin                       { strucconsts }
                                   { symbol sets for syntax analysis }
          headersyms := [progprocfunc, declarator, sybegin, syeof];
          strucsyms := [sycase, syrepeat, syif, forwhilewith];
          stmtbeginsyms := strucsyms + [sybegin, ident, sygoto];
          stmtendsyms := [semicolon, syend, syuntil, syelse, syeof];
          stopsyms := headersyms + strucsyms + stmtendsyms + [sygoto];
          recendsyms := [rightparen, syend, syeof];

          datawords := [otherword, intconst, ident, syend];

                                   { words for insertable symbols }
          buildinsert (semicolon, continue, false, 0, -9, 1);
          buildinsert (sybegin, continue, false, 1, -8, 5);
          buildinsert (syend, newclause, true, endspaces, -3, 3);
          instring := ';BEGINEND';

                                   { constants for recognizing reserved
                                      words }
          firstrw[1] := rwif;      { length: 1 }
          firstrw[2] := rwif;      { length: 2 }
          buildrw (rwif, 'IF        ', syif);
          buildrw (rwdo, 'DO        ', sydo);
          buildrw (rwof, 'OF        ', syof);
          buildrw (rwto, 'TO        ', othersym);
          buildrw (rwin, 'IN        ', othersym);
          buildrw (rwor, 'OR        ', othersym);
          firstrw[3] := rwend;     { length: 3 }
          buildrw (rwend, 'END       ', syend);
          buildrw (rwfor, 'FOR       ', forwhilewith);
          buildrw (rwvar, 'VAR       ', declarator);
          buildrw (rwdiv, 'DIV       ', othersym);
          buildrw (rwmod, 'MOD       ', othersym);
          buildrw (rwset, 'SET       ', othersym);
          buildrw (rwand, 'AND       ', othersym);
          buildrw (rwnot, 'NOT       ', othersym);
          buildrw (rwnil, 'NIL       ', otherword);
          firstrw[4] := rwthen;    { length: 4 }
          buildrw (rwthen, 'THEN      ', sythen);
          buildrw (rwelse, 'ELSE      ', syelse);
          buildrw (rwwith, 'WITH      ', forwhilewith);
          buildrw (rwgoto, 'GOTO      ', sygoto);
          buildrw (rwcase, 'CASE      ', sycase);
          buildrw (rwtype, 'TYPE      ', declarator);
          buildrw (rwfile, 'FILE      ', othersym);
          firstrw[5] := rwbegin;   { length: 5 }
          buildrw (rwbegin, 'BEGIN     ', sybegin);
          buildrw (rwuntil, 'UNTIL     ', syuntil);
          buildrw (rwwhile, 'WHILE     ', forwhilewith);
          buildrw (rwarray, 'ARRAY     ', othersym);
          buildrw (rwconst, 'CONST     ', declarator);
          buildrw (rwlabel, 'LABEL     ', declarator);
          firstrw[6] := rwrepeat;  { length: 6 }
          buildrw (rwrepeat, 'REPEAT    ', syrepeat);
          buildrw (rwrecord, 'RECORD    ', syrecord);
          buildrw (rwdownto, 'DOWNTO    ', othersym);
          buildrw (rwpacked, 'PACKED    ', othersym);
          firstrw[7] := rwprogram; { length: 7 }
          buildrw (rwprogram, 'PROGRAM   ', progprocfunc);
          firstrw[8] := rwfunction;{ length: 8 }
          buildrw (rwfunction, 'FUNCTION  ', progprocfunc);
          firstrw[9] := rwprocedure;
                                   { length: 9 }
          buildrw (rwprocedure, 'PROCEDURE ', progprocfunc);
          firstrw[10] := rwx;      { length: 10 for table sentinel }

                                   { constants for lexical scan }
          for i := ordminchar to ordmaxchar do begin
             charclass[chr (i)] := illegal   end;
          for ch := 'a' to 'z' do begin
                                   { !!! implementation-dependent!  (but
                                      can be replaced with 52 explicit
                                      assignments) }
             charclass[ch] := letter;
             charclass[capital (ch)] := letter   end;
          for ch := '0' to '9' do charclass[ch] := digit;
          charclass[' '] := special;
          charclass['$'] := special;
          charclass[''''] := chapostrophe;
          charclass['('] := chleftparen;
          charclass[')'] := chrightparen;
          charclass['*'] := special;
          charclass['+'] := special;
          charclass['-'] := special;
          charclass['.'] := chperiod;
          charclass['/'] := special;
          charclass[':'] := chcolon;
          charclass[';'] := chsemicolon;
          charclass['<'] := chlessthan;
          charclass['='] := special;
          charclass['>'] := chgreaterthan;
          charclass['@'] := special;
          charclass['['] := special;
          charclass[']'] := special;
          charclass['^'] := special;
          charclass['{'] := chleftbrace;
          symbolclass[illegal] := othersym;
          symbolclass[special] := othersym;
          symbolclass[chapostrophe] := otherword;
          symbolclass[chleftparen] := leftparen;
          symbolclass[chrightparen] := rightparen;
          symbolclass[chperiod] := period;
          symbolclass[digit] := intconst;
          symbolclass[chcolon] := colon;
          symbolclass[chsemicolon] := semicolon;
          symbolclass[chlessthan] := othersym;
          symbolclass[chgreaterthan] := othersym;
          symbolclass[letter] := ident;
          symbolclass[chleftbrace] := comment;

          end;                     { strucconsts }

{ writeline/writeerror/readline convert between files and lines. }

    procedure writeline;           { write buffer into output file }

       var
          i: outrange;             { loop index }

       begin
          with outline do begin
             while blanklns > 0 do begin
                writeln (output);
                blanklns := blanklns - 1   end;
             if len > 0 then begin
                for i := 1 to len do write (output, buf[i]);
                writeln (output);
                len := 0   end   end;
          end;                     { writeline }

    procedure writeerror (error: errortype);
                                   { report error to output }

       var
          i, ix: inrange;          { loop index, limit }

       begin
          writeline;
          write (output, ' (*  !!! error, ');
          case error of
             longline:     write (output, 'shorter line');
             noendcomm:    write (output, 'end of comment');
             notquote:     write (output, 'final "''" on line');
             longword:     write (output, 'shorter word');
             notdo:        write (output, '"do"');
             notof:        write (output, '"of"');
             notend:       write (output, '"end"');
             notthen:      write (output, '"then"');
             notbegin:     write (output, '"begin"');
             notuntil:     write (output, '"until"');
             notsemicolon: write (output, '";"');
             notcolon:     write (output, '":"');
             notparen:     write (output, '")"');
             noeof:        write (output, 'end of file')   end;
          write (output, ' expected');
          if error >= longword then begin
             write (output, ', not "');
             with inline, word do begin
                if size > maxrwlen then ix := maxrwlen
                else ix := size;
                for i := 1 to ix do write (output, buf[base + i])   end;
             write (output, '"')   end;
          if error = noeof then write (output, ', FORMATTING STOPS');
          writeln (output, ' !!!  *)');
          end;                     { writeerror }

    procedure readline;            { read line into input buffer }

       var
          c: char;                 { input character }
          nonblank: boolean;       { is char other than space? }

       begin
          with inline do begin
             len := 0;
             if eof (input) then endoffile := true
             else begin            { get next line }
                while not eoln (input) do begin
                   read (input, c);
                   if c < ' ' then begin
                                   { convert ASCII control chars (except
                                      leading form feed) to spaces }
                      if c = chr (9) then begin
                                   { ASCII tab char }
                         c := ' '; { add last space at end }
                         while len mod 8 <> 7 do begin
                            len := len + 1;
                            if len < maxinlen then buf[len] := c   end;
                         end       { end tab handling }
                      else if (c <> chr (12)) or (len > 0) then c :=
                           ' ';
                      end;         { end ASCII control char conversion }
                   len := len + 1;
                   if len < maxinlen then buf[len] := c   end;
                readln (input);
                if len >= maxinlen then begin
                                   { input line too long }
                   writeerror (longline);
                   len := maxinlen - 1   end;
                nonblank := false;
                repeat             { trim line }
                   if len = 0 then nonblank := true
                   else if buf[len] <> ' ' then nonblank := true
                   else len := len - 1
                   until nonblank   end;
             len := len + 1;       { add exactly ONE trailing blank }
             buf[len] := ' ';
             index := 0   end;
          end;                     { readline }

{ startword/finishword/copyword convert between lines and words.
   auxiliary procedures getchar/nextchar precede. }

    procedure getchar;             { get next char from input buffer }

       begin
          with inline do begin
             index := index + 1;
             ch := buf[index]   end;
          end;                     { getchar }

    function nextchar: char;       { look at next char in input buffer }

       begin
          with inline do nextchar := buf[index + 1];
          end;                     { nextchar }

    procedure startword (startclass: firstclass);
                                   { note beginning of word, and count
                                      preceding lines and spaces }

       var
          first: boolean;          { is word the first on input line? }

       begin
          first := false;
          with inline, word do begin
             whenfirst := startclass;
             blanklncount := 0;
             while (index >= len) and not endoffile do begin
                if len = 1 then blanklncount := blanklncount + 1;
                if startclass = contuncomm then writeline
                else first := true;
                readline;          { with exactly ONE trailing blank }
                getchar;
{ ASCII:        if ch = chr (12) then begin
                                   [ ASCII form feed char ]
                   writeline;
                   writeln (output, chr (12));
                   blanklncount := 0;
                   getchar   end;  [ end ASCII form feed handling }
                end;
             spaces := 0;          { count leading spaces }
             if not endoffile then begin
                while ch = ' ' do begin
                   spaces := spaces + 1;
                   getchar   end   end;
             if first then spaces := 1;
             base := index - 1   end;
          end;                     { startword }

    procedure finishword;          { note end of word }

       begin
          with inline, word do begin
             puncfollows := (symbol in datawords) and (ch <> ' ');
             size := index - base - 1   end;
          end;                     { finishword }

    procedure copyword (newline: boolean;
         word: wordtype);          { copy word from input buffer into
                                      output buffer }

       var
          i: integer;              { outline.len excess, loop index }

       begin
          with word, outline do begin
             i := maxoutlen - len - spaces - size;
             if newline or (i < 0) or ((i = 0) and puncfollows) then
                  writeline;
             if len = 0 then begin { first word on output line }
                blanklns := blanklncount;
                case whenfirst of  { update LOCAL word.spaces }
                   newclause:  spaces := margin;
                   continue:   spaces := margin + contindent;
                   alcomm:     spaces := alcommbase;
                   contalcomm: spaces := alcommbase + commindent;
                   uncomm:     spaces := base;
                   contuncomm: ;   { spaces := spaces }
                   stmtlabel:  spaces := initmargin   end;
                if spaces + size > maxoutlen then begin
                   spaces := maxoutlen - size;
                                   { reduce spaces }
                   if spaces < 0 then begin
                      writeerror (longword);
                      size := maxoutlen;
                      spaces := 0   end   end   end;
             for i := 1 to spaces do begin
                                   { put out spaces }
                len := len + 1;
                buf[len] := ' '   end;
             for i := 1 to size do begin
                                   { copy actual word }
                len := len + 1;
                buf[len] := inline.buf[base + i]   end   end;
          end;                     { copyword }

{ docomment/copysymbol/insert/getsymbol/findsymbol convert between
   words and symbols. }

    procedure docomment;           { copy aligned or unaligned comment }

       procedure copycomment (commclass: firstclass;
            commbase: inrange);    { copy words of comment }

          var
             endcomment: boolean;  { end of comment? }

          begin
             with word do begin    { copy comment begin symbol }
                whenfirst := commclass;
                spaces := commbase - outline.len;
                copyword ((spaces < 0) or (blanklncount > 0), word)
                end;
             commclass := succ (commclass);
             with inline do begin
                repeat             { loop for successive words }
                   startword (commclass);
                   endcomment := endoffile;
                                   { premature end? }
                   if endcomment then writeerror (noendcomm)
                   else begin
                      repeat
                         if ch = '*' then begin
                            getchar;
                            if ch = ')' then begin
                               endcomment := true;
                               getchar   end   end
                         else if ch = '}' then begin
                            endcomment := true;
                            getchar   end
                         else getchar
                         until (ch = ' ') or endcomment   end;
                   finishword;
                   copyword (false, word)
                   until endcomment   end;
             end;                  { copycomment }

       begin                       { docomment }
          if word.base < commthresh then begin
                                   { copy comment without alignment }
             copycomment (uncomm, word.base)   end
          else begin               { align and format comment }
             copycomment (alcomm, alcommbase)   end;
          end;                     { docomment }

    procedure copysymbol (symbol: symboltype;
         word: wordtype);          { copy word(s) of symbol }

       begin
          if symbol = comment then begin
             docomment;            { NOTE: docomment uses global word! }
             lnpending := true   end
          else if symbol = semicolon then begin
             copyword (false, word);
             lnpending := true   end
          else begin
             copyword (lnpending, word);
             lnpending := false   end;
          end;                     { copysymbol }

    procedure insert (newsymbol: inserttype);
                                   { copy word for inserted symbol into
                                      output buffer }

       begin
          copysymbol (newsymbol, newword[newsymbol]);
          end;                     { insert }

    procedure getsymbol;           { get next non-comment symbol }

       procedure findsymbol;       { find next symbol in input buffer }

          var
             chclass: chartype;    { classification of leading char }

          procedure checkresword;  { check if current identifier is
                                      reserved word/symbol }

             var
                rw, rwbeyond: resword;
                                   { loop index, limit }
                symword: rwstring; { copy of symbol word }
                i: 1..maxrwlen;    { loop index }

             begin
                with word, inline do begin
                   size := index - base - 1;
                   if size < maxrwlen then begin
                      symword := '          ';
                      for i := 1 to size do symword[i] := capital (buf[
                           base + i]);
                      rw := firstrw[size];
                      rwbeyond := firstrw[size + 1];
                      symbol := semicolon;
                      repeat
                         if rw >= rwbeyond then symbol := ident
                         else if symword = rwword[rw] then symbol :=
                              rwsy[rw]
                         else rw := succ (rw)
                         until symbol <> semicolon;
                      if symbol = syend then begin
                         if spaces < endspaces then spaces := endspaces;
                         whenfirst := newclause   end   end   end;
                end;               { checkresword }

          procedure getname;

             begin
                while charclass[inline.ch] in [letter, digit] do
                     getchar;
                checkresword;
                end;               { getname }

          procedure getnumber;

             begin
                with inline do begin
                   while charclass[ch] = digit do getchar;
                   if ch = '.' then begin
                                   { thanks to A.H.J.Sale, watch for
                                      '..' }
                      if charclass[nextchar] = digit then begin
                                   { NOTE: nextchar is a function! }
                         symbol := otherword;
                         getchar;
                         while charclass[ch] = digit do getchar   end
                      end;
                   if capital (ch) = 'E' then begin
                      symbol := otherword;
                      getchar;
                      if (ch = '+') or (ch = '-') then getchar;
                      while charclass[ch] = digit do getchar   end
                   end;
                end;               { getnumber }

          procedure getstringliteral;

             var
                endstring: boolean;{ end of string literal? }

             begin
                with inline do begin
                   endstring := false;
                   repeat
                      if ch = '''' then begin
                         getchar;
                         if ch = '''' then getchar
                         else endstring := true   end
                      else if index >= len then begin
                                   { error, final "'" not on line }
                         writeerror (notquote);
                         symbol := syeof;
                         endstring := true   end
                      else getchar
                      until endstring   end;
                end;               { getstringliteral }

          begin                    { findsymbol }
             startword (continue);
             with inline do begin
                if endoffile then symbol := syeof
                else begin
                   chclass := charclass[ch];
                   symbol := symbolclass[chclass];
                   getchar;        { second char }
                   case chclass of
                      chsemicolon, chrightparen, chleftbrace, special,
                           illegal:   ;
                      letter:  getname;
                      digit:  getnumber;
                      chapostrophe:  getstringliteral;
                      chcolon:  begin
                         if ch = '=' then begin
                            symbol := othersym;
                            getchar   end   end;
                      chlessthan:  begin
                         if (ch = '=') or (ch = '>') then getchar   end;
                      chgreaterthan:  begin
                         if ch = '=' then getchar   end;
                      chleftparen:  begin
                         if ch = '*' then begin
                            symbol := comment;
                            getchar   end   end;
                      chperiod:  begin
                         if ch = '.' then begin
                            symbol := sysubrange;
                            getchar   end   end   end   end   end;
             finishword;
             end;                  { findsymbol }

       begin                       { getsymbol }
          repeat
             copysymbol (symbol, word);
                                   { copy word for symbol to output }
             findsymbol            { get next symbol }
             until symbol <> comment;
          end;                     { getsymbol }

{ block performs recursive-descent syntax analysis with symbols,
   adjusting margin, lnpending, word.whenfirst, and
   word.blanklncount.  auxiliary procedures precede. }

    procedure startclause;         { (this may be a simple clause, or
                                      the start of a header) }

       begin
          word.whenfirst := newclause;
          lnpending := true;
          end;                     { startclause }

    procedure passsemicolons;      { pass consecutive semicolons }

       begin
          while symbol = semicolon do begin
             getsymbol;
             startclause   end;    { new line after ';' }
          end;                     { passsemicolons }

    procedure startpart;           { start program part }

       begin
          with word do begin
             if blanklncount = 0 then blanklncount := 1   end;
          startclause;
          end;                     { startpart }

    procedure startbody;           { finish header, start body of
                                      structure }

       begin
          passsemicolons;
          margin := margin + indent;
          startclause;
          end;                     { startbody }

    procedure finishbody;

       begin
          margin := margin - indent;
          end;                     { finishbody }

    procedure passphrase (finalsymbol: symboltype);
                                   { process symbols until significant
                                      symbol encountered }

       var
          endsyms: symbolset;      { complete set of stopping symbols }

       begin
          if symbol <> syeof then begin
             endsyms := stopsyms + [finalsymbol];
             repeat
                getsymbol
                until symbol in endsyms   end;
          end;                     { passphrase }

    procedure expect (expectedsym: symboltype;
         error: errortype;
         syms: symbolset);

       begin
          if symbol = expectedsym then getsymbol
          else begin
             writeerror (error);
             while not (symbol in [expectedsym] + syms) do getsymbol;
             if symbol = expectedsym then getsymbol   end;
          end;                     { expect }

    procedure dolabel;             { process statement label }

       var
          nextfirst: firstclass;   { (pass whenfirst to statement) }

       begin
          with word do begin
             nextfirst := whenfirst;
             whenfirst := stmtlabel;
             lnpending := true;
             getsymbol;
             expect (colon, notcolon, stopsyms);
             whenfirst := nextfirst;
             lnpending := true   end;
          end;                     { dolabel }

    procedure block;               { process block }

       procedure heading;          { process heading for program,
                                      procedure, or function }

          procedure matchparens;   { process parentheses in heading }

             begin
                getsymbol;
                while not (symbol in recendsyms) do begin
                   if symbol = leftparen then matchparens
                   else getsymbol   end;
                expect (rightparen, notparen, stopsyms + recendsyms);
                end;               { matchparens }

          begin                    { heading }
             getsymbol;
             passphrase (leftparen);
             if symbol = leftparen then matchparens;
             if symbol = colon then passphrase (semicolon);
             expect (semicolon, notsemicolon, stopsyms);
             end;                  { heading }

       procedure statement;        { process statement }

          forward;

       procedure stmtlist;         { process sequence of statements }

          begin
             repeat
                statement;
                passsemicolons
                until symbol in stmtendsyms;
             end;                  { stmtlist }

       procedure compoundstmt (    { process compound statement }
            stmtpart: boolean);    { statement part of block? }

          begin
             getsymbol;
             startbody;            { new line, indent after 'BEGIN' }
             stmtlist;
             if stmtpart and not lnpending then insert (semicolon);
             expect (syend, notend, stmtendsyms);
             finishbody;           { left-indent after 'END' }
             end;                  { compoundstmt }

       procedure statement;        { process statement }

          procedure checkcompound; { if structured then force compound }

             begin
                if symbol = intconst then dolabel;
                if symbol in strucsyms then begin
                                   { force compound }
                   insert (sybegin);
                   startbody;      { new line, indent after 'BEGIN' }
                   statement;
                   insert (syend);
                   finishbody   end{ left-indent after 'END' }
                else statement;
                end;               { checkcompound }

          procedure ifstmt;        { process if statement }

             begin
                passphrase (sythen);
                expect (sythen, notthen, stopsyms);
                checkcompound;
                if symbol = syelse then begin
                   startclause;    { new line before 'ELSE' }
                   getsymbol;
                   if symbol = syif then ifstmt
                   else checkcompound   end;
                end;               { ifstmt }

          procedure repeatstmt;    { process repeat statement }

             begin
                getsymbol;
                startbody;         { new line, indent after 'REPEAT' }
                stmtlist;
                startclause;       { new line before 'UNTIL' }
                expect (syuntil, notuntil, stmtendsyms);
                passphrase (semicolon);
                finishbody;        { left-ident after 'UNTIL' }
                end;               { repeatstmt }

          procedure fwwstmt;       { process for, while, or with
                                      statement }

             begin
                passphrase (sydo);
                expect (sydo, notdo, stopsyms);
                checkcompound;
                end;               { fwwstmt }

          procedure casestmt;      { process case statement }

             begin
                passphrase (syof);
                expect (syof, notof, stopsyms);
                startbody;         { new line, indent after 'OF' }
                repeat
                   passphrase (colon);
                   expect (colon, notcolon, stopsyms);
                   checkcompound;
                   passsemicolons
                   until symbol in stopsyms;
                expect (syend, notend, stmtendsyms);
                finishbody;        { left-indent after 'END' }
                end;               { casestmt }

          begin                    { statement }
             if symbol = intconst then dolabel;
             if symbol in stmtbeginsyms then begin
                case symbol of
                   sybegin:       compoundstmt (false);
                   sycase:        casestmt;
                   syif:          ifstmt;
                   syrepeat:      repeatstmt;
                   forwhilewith:  fwwstmt;
                   ident, sygoto: passphrase (semicolon)   end   end;
             if not (symbol in stmtendsyms) then begin
                writeerror (notsemicolon);
                                   { ';' expected }
                passphrase (semicolon)   end;
             end;                  { statement }

       procedure passfields (forvariant: boolean);

          forward;

       procedure dorecord;         { process record declaration }

          begin
             getsymbol;
             startbody;
             passfields (false);
             expect (syend, notend, recendsyms);
             finishbody;
             end;                  { dorecord }

       procedure dovariant;        { process (case) variant part }

          begin
             passphrase (syof);
             expect (syof, notof, stopsyms);
             startbody;
             passfields (true);
             finishbody;
             end;                  { dovariant }

       procedure doparens (forvariant: boolean);
                                   { process parentheses in record }

          begin
             getsymbol;
             if forvariant then startbody;
             passfields (false);
             lnpending := false;   { for empty field list }
             expect (rightparen, notparen, recendsyms);
             if forvariant then finishbody;
             end;                  { doparens }

       procedure passfields;       { process declarations }
 {     procedure passfields (forvariant: boolean); }

          begin                    { passfields }
             while not (symbol in recendsyms) do begin
                if symbol = semicolon then passsemicolons
                else if symbol = syrecord then dorecord
                else if symbol = sycase then dovariant
                else if symbol = leftparen then doparens (forvariant)
                else getsymbol   end;
             end;                  { passfields }

       begin                       { block }
          while symbol = declarator do begin
             startpart;            { label, const, type, var }
             getsymbol;
             startbody;
             repeat
                passphrase (syrecord);
                if symbol = syrecord then dorecord;
                if symbol = semicolon then passsemicolons
                until symbol in headersyms;
             finishbody   end;
          while symbol = progprocfunc do begin
             startpart;            { program, procedure, function }
             heading;
             startbody;
             if symbol in headersyms then block
             else if symbol = ident then begin
                startpart;         { directive: forward, etc. }
                passphrase (semicolon);
                passsemicolons   end
             else writeerror (notbegin);
             finishbody   end;
          if symbol = sybegin then begin
             startpart;            { statement part }
             compoundstmt (true);
             if symbol in [sysubrange, period] then symbol := semicolon;
                                   { treat final period as semicolon }
             passsemicolons   end;
          end;                     { block }

    procedure copyrem;             { copy remainder of input }

       begin
          writeerror (noeof);
          with inline do begin
             repeat
                copyword (false, word);
                startword (contuncomm);
                if not endoffile then begin
                   repeat
                      getchar
                      until ch = ' '   end;
                finishword;
                until endoffile   end;
          end;                     { copyrem }

    procedure initialize;          { initialize global variables }

       var
          i: 1..9;                 { loop index }

       begin
          with inline do begin
             for i := 1 to 9 do buf[i - 9] := instring[i];
                                   { string ';BEGINEND' in buf[-8..0] }
             endoffile := false;
             ch := ' ';
             index := 0;
             len := 0   end;
          with outline do begin
             blanklns := 0;
             len := 0   end;
          with word do begin
             whenfirst := contuncomm;
             puncfollows := false;
             blanklncount := 0;
             spaces := 0;
             base := 0;
             size := 0   end;
          margin := initmargin;
          lnpending := false;
          symbol := othersym;
          end;                     { initialize }

    begin                          { pascalformatter }
       strucconsts;
       initialize;
 {  ***************  Files may be opened here. }
       getsymbol;
       block;
       if not inline.endoffile then copyrem;
       writeline;
       end                         { pascalformatter } .