[comp.lang.scheme] SUMMARY--reindentation and pretty-printing for Scheme

dak@sq.sq.com (David A Keldsen) (06/12/90)

OK, here's the promised summary.  Thanks to all the folks who replied
(quickly, too!).

Fast overview:  
Emacs and vi can re-indent; emacs can do it in batch mode; fools lisp
has pretty-printing code.  A standalone re-indenter in C by
wri!henry@uunet.uu.net is posted as a follow-up to this posting, in
comp.lang.scheme only.

Detailed summary:
John R. Ellis (ellis@src.dec.com) points out that my initial request is
ambiguous.  Pretty-printers and a re-indenters are quite different
beasts.  A *pretty printer* is either a print procedure for an internal
form, or is a code reformatter that is permitted to re-arrange the
code.  A *re-indenter* merely re-adjusts the indentation, and is not
permitted to change the line splits.

mob@media-lab.media.mit.edu, alms@cambridge.apple.com (Andrew L. M. Shalit),
ted@NMSU.Edu, rees@parc.xerox.com, vladimir@Eng.Sun.COM (Vladimir G. Ivanovic) 
and Keiji Kanazawa <kgk@cs.brown.edu> all suggested Emacs.
(This was before my revised specification that the code be free of
encumbrances, small, and portable).  For those free of these
specifications, it was also noted that GNU emacs can be fired up in
batch mode, for use as a stand-alone indenter.

Tim Bradshaw <tim@cstr.edinburgh.ac.uk> offered the suggestion that in
Common Lisp, it is easy to redefine the read table to keep comments (as
asked in my second request).  This isn't in the standard for Scheme,
but it is supported in some implementations.  donc@vaxa.isi.edu (Don
Cohen) also suggested the readtable solution.

J. A. Durieux <xerox@cs.vu.nl> and Jeff Mantei <uiucdcs!cs.uiuc.edu!mantei> 
both pointed out that vi has a lisp mode (:set lisp), and that =<addr>
will lisp-indent the specified lines.  =% was suggested as being
especially useful; it re-indents to the matching parenthesis.  (Note
also that vi will start up in lisp mode, with auto-indent set, and
show-matching-parentheses, if you use the -l option).

Ozan Yigit <oz@nexus.yorku.ca> noted that the fools lisp distribution
by Jonathan Lee (jonathan@scam.berkeley.edu) contains some nice
pretty-printing code.

And finally, wri!henry@uunet.uu.net sent the stand-alone re-indentation
code, which I am posting as a follow-up to this article, in
comp.lang.scheme only.
-- 
// David A. 'Dak' Keldsen:  dak@sq.com or utai[.toronto.edu]!sq!dak
// "I have heard the mermaids singing, each to each."  -- T.S.Eliot

dak@sq.sq.com (David A Keldsen) (06/12/90)

And here's the promised stand-alone indenter (in C).
----------------------------------------------------------------------
From:	wri!henry@uunet.uu.net
Received: by WRI.com (3.2/SMI-3.0DEV3)
	id AA28825; Fri, 8 Jun 90 18:19:25 CDT
Message-Id: <9006082319.AA28825@WRI.com>
Date:	Fri, 8 Jun 90 19:19:24 EDT
To:	dak@sq.com

Here is a very simple indenter that I use all the  time.   Note,  it  doesn't
pretty print, just indent.  I find that that is exactly what I want.

/*
 * Indent a lisp (scheme) program.
 * The only transformations performed are:
 *	Leading white space is replaced with 4 n spaces where n is the
 *		nesting level.
 *	Open and close square brackets (``['' and ``]'') are paired together
 *		and the latter are replaced with the correct number of close
 *		parens (``)'').
 */
#include <stdio.h>
#include <assert.h>



/*
 * The following definitions make C more amenable to a purist.
 */
#define	bool	char			/* boolean type */
#define	uint	unsigned int		/* short names for unsigned types */
#define	ulong	unsigned long
#define	uchar	unsigned char
#define	ushort	unsigned short int
#define	not	!			/* logical negation operator */
#define	and	&&			/* logical conjunction */
#define	or	||			/* logical disjunction */
#define	TRUE	(0 == 0)
#define	FALSE	(not TRUE)
#define	loop	while (TRUE)		/* loop until break */
#define	EOS	'\0'			/* end-of-string char */
#define	NULL	0			/* invalid pointer */

#define	cardof(a)	(sizeof(a) / sizeof(*(a)))
#define	endof(a)	((a) + cardof(a))
#define	bitsof(a)	(sizeof(a) * 8)


/*
 * Function declarations that should be in stdio.h.
 */
extern char	*malloc(),
		*realloc();


#define	ISPACE	4			/* distance to indent for each level */
#define	TAB	8			/* distance between tab stops */
#define	MAXSQ	32			/* maximum nesting of ``['' and ``]'' */


/*
 * Note, an entry in sqs records the level BEFORE the cooresponding ``[''
 * was seen.
 */
static int	level,			/* current ``('' level */
		sqs[MAXSQ],		/* stack of un-matched ``['' */
		*sqtop	= sqs - 1;	/* top of sqs stack */


extern int	main();
static void	die(),
		scan(),
		doline(),
		docomment(),
		dostring();
static int	doocto();
static void	uplevel(),
		downlevel();
static int	skipws();
static void	putws();


int
main()
{
	scan();
	return (0);
}


static void
die(fmt, arg)
char	*fmt;
int	arg;
{
	fflush(stdout);
	fprintf(stderr, fmt, arg);
	fprintf(stderr, "\n");
	exit(1);
}


static void
scan()
{
	int	ch;

	loop
		switch (ch = skipws()) {
		case '\n':
			putchar('\n');
			continue;
		case EOF:
			if (level != 0)
				die("%d missing ``)''.", level);
			return;
		default:
			putws(ISPACE * level);
			doline(ch);
			putchar('\n');
		}
}


static void
doline(ch)
int	ch;
{
	loop {
		switch (ch) {
		case '(':
			uplevel(ch);
			break;
		case ')':
			downlevel(ch);
			break;
		case '[':
			uplevel(ch);
			ch = '(';
			break;
		case ']':
			downlevel(ch);
			ch = ')';
			break;
		case ';':
			docomment(ch);
			return;
		case '"':
			dostring(ch);
			ch = getchar();
			continue;
		case '#':
			ch = doocto(ch);
			continue;
		case '\n':
		case EOF:
			return;
		}
		putchar(ch);
		ch = getchar();
	}
}


static void
docomment(ch)
int	ch;
{
	assert(ch == ';');
	do {
		putchar(ch);
		ch = getchar();
	} while (ch != '\n' && ch != EOF);
}


static void
dostring(ch)
int	ch;
{
	static bool	warned	= FALSE;

	assert(ch == '\"');
	loop {
		putchar(ch);
		ch = getchar();
		switch (ch) {
		case EOF:
			putchar('\n');
			die("Unterminated string");
		case '\n':
			if (not warned) {
				warned = TRUE;
				fprintf(stderr,
					"Warning: new-line in string\n");
			}
			break;
		case '"':
			putchar(ch);
			return;
		case '\\':
			putchar(ch);
			ch = getchar();
			if (ch == EOF) {
				putchar('\n');
				die("Unterminted string");
			}
			break;
		}
	}
}


static int
doocto(ch)
int	ch;
{
	assert(ch == '#');
	putchar(ch);
	ch = getchar();
	switch (ch) {
	case '\\':
		putchar(ch);
		ch = getchar();
		if (ch == EOF) {
			putchar('\n');
			die("Unterminated character constant");
		}
		putchar(ch);
		return (getchar());
	default:
		return (ch);
	}
}


static void
uplevel(ch)
int	ch;
{
	switch (ch) {
	case '(':
		++level;
		break;
	case '[':
		if (++sqtop == endof(sqs)) {
			putchar('\n');
			die("[ ... ] too deeply nested");
		}
		*sqtop = level++;
		break;
	default:
		assert(FALSE);
	}
}


static void
downlevel(ch)
int	ch;
{
	switch (ch) {
	case ')':
		--level;
		if ((sqtop >= sqs)
		and (level <= *sqtop)) {
			putchar('\n');
			die("Unmatched ``[''.");
		}
		if (level < 0) {
			putchar('\n');
			die("Too many ``)''.");
		}
		break;
	case ']':
		if (sqtop < sqs) {
			putchar('\n');
			die("Too many ``]''.");
		}
		assert(*sqtop >= 0);
		if (--level != *sqtop) {
			assert(level >= *sqtop);
			putchar('\n');
			die("``]'' seen but need %d ``)'' first",
				level - *sqtop);
		}
		--sqtop;
		break;
	default:
		assert(FALSE);
	}
}


static int
skipws()
{
	int	ch;

	loop
		switch (ch = getchar()) {
		case ' ':
		case '\t':
			break;
		default:
			return (ch);
		}
}


static void
putws(len)
int	len;
{
	assert(len >= 0);
	for (; len >= TAB; len -= TAB)
		putchar('\t');
	for (; len != 0; --len)
		putchar(' ');
}
-- 
// David A. 'Dak' Keldsen:  dak@sq.com or utai[.toronto.edu]!sq!dak
// "I have heard the mermaids singing, each to each."  -- T.S.Eliot