[net.micro.cbm] untokenize.c plus commentary/one request

grwalter@watmath.UUCP (Fred Walter) (07/31/86)
I recently needed to do a diff on some huge source files on my C128.

Rather then write a program to do this, and burn out my drives with all the
accessing, I uploaded the sources to my UNIX account and did the diff
there. However, before I did the diff I needed to get the sources
untokenized (they were PAL sources - regular BASIC files). The following
program that I wrote (and included here) will untokenize BASIC sources,
given to it from stdin, and send a readable version to stdout.

I know that this is not net.sources, BUT since this is relevant only to
people with commodore computers, I felt it would be better to post the
entire thing here, rather than just a notice here, and the body to
net.sources (and make all non-commodore people who read net.sources have
to learn how to use the 'n' key :-) ). So send flames to /dev/null.

The will compile with just
	cc untokenize.c -o untokenize.

Another reason that this program might be useful, is that one can upload
hugh sources/programs, and get a cross-reference listing of what routines
are in what source files, and if/if not they are actually used anymore.
This is useful when the sources fill a regular 1541 diskette.

One note : some of the control characters are different (the disable/enable
case switching characters) from the C64 to the C128, but aside from this one
should get correct listings using this program on C64 source listings.

				fred

UUCP  : {allegra|clyde|linus|decvax|utzoo|ihnp4}!watmath!grwalter
CSNET : grwalter%watmath@waterloo.csnet
ARPA  : grwalter%watmath%waterloo.csnet@csnet-relay.arpa

P.S. I have the sources to the 6502 assembler written in C (for C-Power).
These will also generate a cross-assembler (for 6502) which can be used to
assemble those sources you just uploaded and untokenized (with just a
little format conversion). Thus tremendously speeding up the edit/assemble
cycle (well, down-loading does take some time ... even at 1200 baud :-( ). And
letting one use all the UNIX tools while editting source files.

Unfortunitely, my copy of the sources has some errors in it, and rather
then hack at it (with it having who knows how many un-obvious errors in it), I
was wondering if anybody on the net has sources that will actually
compile. If so, I would appreciate a copy.

If anybody makes any improvements to the code below, I would appreciate a copy
of the new version.

---------------this-is-a-cut-line----------so-cut-here-!!!!!-------------------
/*
 * untokenize.c 
 *
 * copyright 1986 by gottfried robert walter
 *
 * last date of modification = july 29, 1986
 *
 * this program takes a tokenized commodore basic program from stdin
 * and outputs to stdout an untokenized listing
 */

#include <stdio.h>


char	*ascii[256]		= {	"[CONTROL_@]",
							"[CONTROL_a]",
							"[UNDERLINE(80)]",
							"[STOP]",
							"[CONTROL_d]",
							"[WHITE]",
							"[CONTROL_f]",
							"[BELL]",
							"[CONTROL_h]",
							"[TAB]",
							"[LINEFEED]",
							"[ENABLE_CASE_SWITCH]",		/* c64 ^i */
							"[DISABLE_CASE_SWITCH]",	/* c64 ^h */
							"[RETURN]",
							"[LOWER/UPPER_CASE_MODE]",
							"[FLASH ON(80)]",
							"[CONTROL_p]",
							"[DOWN]",
							"[RVS_ON]",
							"[HOME]",
							"[DEL]",
							"[CONTROL_u]",
							"[CONTROL_v]",
							"[CONTROL_w]",
							"[TAB_SET/CLEAR]",
							"[CONTROL_y]",
							"[CONTROL_z]",
							"[ESCAPE]",
							"[RED]",
							"[RIGHT]",
							"[GREEN]",
							"[BLUE]",
							" ",
							"!",
							"\042",
							"#",
							"$",
							"%",
							"&",
							"'",
							"(",
							")",
							"*",
							"+",
							",",
							"-",
							".",
							"/",
							"0",
							"1",
							"2",
							"3",
							"4",
							"5",
							"6",
							"7",
							"8",
							"9",
							":",
							";",
							"<",
							"=",
							">",
							"?",
							"@",
							"a",
							"b",
							"c",
							"d",
							"e",
							"f",
							"g",
							"h",
							"i",
							"j",
							"k",
							"l",
							"m",
							"n",
							"o",
							"p",
							"q",
							"r",
							"s",
							"t",
							"u",
							"v",
							"w",
							"x",
							"y",
							"z",
							"[",
							"\\",
							"]",
							"^",
							"[BACK_ARROW]",
							"[SHIFT_@]",
							"A",
							"B",
							"C",
							"D",
							"E",
							"F",
							"G",
							"H",
							"I",
							"J",
							"K",
							"L",
							"M",
							"N",
							"O",
							"P",
							"Q",
							"R",
							"S",
							"T",
							"U",
							"V",
							"W",
							"X",
							"Y",
							"Z",
							"[SHIFT_+]",
							"[C=-]",
							"[SHIFT_-]",
							"[PI]",
							"[C=*]",
							"[CONTROL_SHIFT_@]",
							"[ORANGE]",
							"[CONTROL_B]",
							"[RUN]",
							"[HELP]",
							"[F1]",
							"[F3]",
							"[F5]",
							"[F7]",
							"[F2]",
							"[F4]",
							"[F6]",
							"[F8]",
							"[SHIFT_RETURN]",
							"[UPPER_CASE/GRAPHICS_MODE]",
							"[CONTROL_O]",
							"[BLACK]",
							"[UP]",
							"[RVS_OFF]",
							"[CLR]",
							"[INST]",
							"[BROWN]",
							"[LIGHT_RED]",
							"[DARK_GRAY]",
							"[GRAY]",
							"[LIGHT_GREEN]",
							"[LIGHT_BLUE]",
							"[LIGHT_GRAY]",
							"[PURPLE]",
							"[LEFT]",
							"[YELLOW]",
							"[CYAN]",
							"[SHIFT_SPACE]",
							"[161]",
							"[162]",
							"[163]",
							"[164]",
							"[165]",
							"[166]",
							"[167]",
							"[168]",
							"[169]",
							"[170]",
							"[171]",
							"[172]",
							"[173]",
							"[174]",
							"[175]",
							"[176]",
							"[177]",
							"[178]",
							"[179]",
							"[180]",
							"[181]",
							"[182]",
							"[183]",
							"[184]",
							"[185]",
							"[186]",
							"[187]",
							"[188]",
							"[189]",
							"[190]",
							"[191]",
							"[SHIFT_@]",
							"A",
							"B",
							"C",
							"D",
							"E",
							"F",
							"G",
							"H",
							"I",
							"J",
							"K",
							"L",
							"M",
							"N",
							"O",
							"P",
							"Q",
							"R",
							"S",
							"T",
							"U",
							"V",
							"W",
							"X",
							"Y",
							"Z",
							"[SHIFT_+]",
							"[C=-]",
							"[SHIFT_-]",
							"[PI]",
							"[C=*]",
							"[SHIFT_SPACE]",
							"[161]",
							"[162]",
							"[163]",
							"[164]",
							"[165]",
							"[166]",
							"[167]",
							"[168]",
							"[169]",
							"[170]",
							"[171]",
							"[172]",
							"[173]",
							"[174]",
							"[175]",
							"[176]",
							"[177]",
							"[178]",
							"[179]",
							"[180]",
							"[181]",
							"[182]",
							"[183]",
							"[184]",
							"[185]",
							"[186]",
							"[187]",
							"[188]",
							"[189]",
							"[190]",
							"[PI]"
						};

char	*token0[128]	= {	"end",				/* c64 compatible tokens */
							"for",
							"next",
							"data",
							"input#",
							"input",
							"dim",
							"read",
							"let",
							"goto",
							"run",
							"if",
							"restore",
							"gosub",
							"return",
							"rem",
							"stop",
							"on",
							"wait",
							"load",
							"save",
							"verify",
							"def",
							"poke",
							"print#",
							"print",
							"cont",
							"list",
							"clr",
							"cmd",
							"sys",
							"open",
							"close",
							"get",
							"new",
							"tab(",
							"to",
							"fn",
							"spc(",
							"then",
							"not",
							"step",
							"+",
							"-",
							"*",
							"/",
							"^",
							"and",
							"or",
							">",
							"=",
							"<",
							"sgn",
							"int",
							"abs",
							"usr",
							"fre",
							"pos",
							"sqr",
							"rnd",
							"log",
							"exp",
							"cos",
							"sin",
							"tan",
							"atn",
							"peek",
							"len",
							"str$",
							"val",
							"asc",
							"chr$",
							"left$",
							"right$",
							"mid$",
							"go",
							"rgr",				/* new functions */
							"rclr",
							"bunch of tokens - see token1",
							"joy",
							"rdot",
							"dec",
							"hex$",
							"err$",
							"instr",
							"else",				/* new commands */
							"resume",
							"trap",
							"tron",
							"troff",
							"sound",
							"vol",
							"auto",
							"pudef",
							"graphic",
							"paint",
							"char",
							"box",
							"circle",
							"gshape",
							"sshape",
							"draw",
							"locate",
							"color",
							"scnclr",
							"scale",
							"help",
							"do",
							"loop",
							"exit",
							"directory",
							"dsave",
							"dload",
							"header",
							"scratch",
							"collect",
							"copy",
							"rename",
							"backup",
							"delete",
							"renumber",
							"key",
							"monitor",
							"using",			/* language elements */
							"until",
							"while",
							"bunch of tokens - see token2",
							"pi"
						};

char	*token1[11]		= {	"illegal token",
							"illegal token",
							"pot",
							"bump",
							"pen",
							"rsppos",
							"rsprite",
							"rspcolor",
							"xor",
							"rwindow",
							"pointer"
						};

char	*token2[39]		= {	"illegal token",
							"illegal token",
							"bank",
							"filter",
							"play",
							"tempo",
							"movspr",
							"sprite",
							"sprcolor",
							"rreg",
							"envelope",
							"sleep",
							"catalog",
							"dopen",
							"append",
							"dclose",
							"bsave",
							"bload",
							"record",
							"concat",
							"dverify",
							"dclear",
							"sprsav",
							"collision",
							"begin",
							"bend",
							"window",
							"boot",
							"width",
							"sprdef",
							"quit",
							"stash",
							"illegal token",
							"fetch",
							"illegal token",
							"swap",
							"off",
							"fast",
							"slow"
						};

main()
{
	int		quotemode, c, lo, hi;

	lo = getchar();	/* load address */
	hi = getchar();

	for(;;) {

		if (feof(stdin))
			break;

		quotemode = 0;

		lo = getchar();	/* line link */
		hi = getchar();
		if ((lo|hi) == 0)
			break;

		lo = getchar();	/* line number */
		hi = getchar();
		(void)printf("%5d ", hi*256+lo);

		while ((c = getchar()) != '\0') {

			if (feof(stdin))
				break;

			if (quotemode != 0) {
				(void)printf("%s", ascii[c]);
				if (c == 34)
					quotemode = 0;
				continue;
			}

			if (c == 34) {
				(void)printf("%s", ascii[c]);
				quotemode = 1;
				continue;
			}

			if ( c < 0X80)
				(void)printf("%s", ascii[c]);

			else if (c == 0Xce) {
				c = getchar();
				if (c > 0X0a)
					c = 0;
				(void)printf("%s", token1[c]);
			}

			else if (c == 0Xfe) {
				c = getchar();
				if (c > 0X26)
					c = 0;
				(void)printf("%s", token2[c]);
			}

			else
				(void)printf("%s", token0[c-0X80]);
		}

		(void)printf("\n");
	}

	(void)exit(0);
}