grwalter@watmath.UUCP (Fred Walter) (07/31/86)
I recently needed to do a diff on some huge source files on my C128. Rather then write a program to do this, and burn out my drives with all the accessing, I uploaded the sources to my UNIX account and did the diff there. However, before I did the diff I needed to get the sources untokenized (they were PAL sources - regular BASIC files). The following program that I wrote (and included here) will untokenize BASIC sources, given to it from stdin, and send a readable version to stdout. I know that this is not net.sources, BUT since this is relevant only to people with commodore computers, I felt it would be better to post the entire thing here, rather than just a notice here, and the body to net.sources (and make all non-commodore people who read net.sources have to learn how to use the 'n' key :-) ). So send flames to /dev/null. The will compile with just cc untokenize.c -o untokenize. Another reason that this program might be useful, is that one can upload hugh sources/programs, and get a cross-reference listing of what routines are in what source files, and if/if not they are actually used anymore. This is useful when the sources fill a regular 1541 diskette. One note : some of the control characters are different (the disable/enable case switching characters) from the C64 to the C128, but aside from this one should get correct listings using this program on C64 source listings. fred UUCP : {allegra|clyde|linus|decvax|utzoo|ihnp4}!watmath!grwalter CSNET : grwalter%watmath@waterloo.csnet ARPA : grwalter%watmath%waterloo.csnet@csnet-relay.arpa P.S. I have the sources to the 6502 assembler written in C (for C-Power). These will also generate a cross-assembler (for 6502) which can be used to assemble those sources you just uploaded and untokenized (with just a little format conversion). Thus tremendously speeding up the edit/assemble cycle (well, down-loading does take some time ... even at 1200 baud :-( ). And letting one use all the UNIX tools while editting source files. Unfortunitely, my copy of the sources has some errors in it, and rather then hack at it (with it having who knows how many un-obvious errors in it), I was wondering if anybody on the net has sources that will actually compile. If so, I would appreciate a copy. If anybody makes any improvements to the code below, I would appreciate a copy of the new version. ---------------this-is-a-cut-line----------so-cut-here-!!!!!------------------- /* * untokenize.c * * copyright 1986 by gottfried robert walter * * last date of modification = july 29, 1986 * * this program takes a tokenized commodore basic program from stdin * and outputs to stdout an untokenized listing */ #include <stdio.h> char *ascii[256] = { "[CONTROL_@]", "[CONTROL_a]", "[UNDERLINE(80)]", "[STOP]", "[CONTROL_d]", "[WHITE]", "[CONTROL_f]", "[BELL]", "[CONTROL_h]", "[TAB]", "[LINEFEED]", "[ENABLE_CASE_SWITCH]", /* c64 ^i */ "[DISABLE_CASE_SWITCH]", /* c64 ^h */ "[RETURN]", "[LOWER/UPPER_CASE_MODE]", "[FLASH ON(80)]", "[CONTROL_p]", "[DOWN]", "[RVS_ON]", "[HOME]", "[DEL]", "[CONTROL_u]", "[CONTROL_v]", "[CONTROL_w]", "[TAB_SET/CLEAR]", "[CONTROL_y]", "[CONTROL_z]", "[ESCAPE]", "[RED]", "[RIGHT]", "[GREEN]", "[BLUE]", " ", "!", "\042", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "[", "\\", "]", "^", "[BACK_ARROW]", "[SHIFT_@]", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[SHIFT_+]", "[C=-]", "[SHIFT_-]", "[PI]", "[C=*]", "[CONTROL_SHIFT_@]", "[ORANGE]", "[CONTROL_B]", "[RUN]", "[HELP]", "[F1]", "[F3]", "[F5]", "[F7]", "[F2]", "[F4]", "[F6]", "[F8]", "[SHIFT_RETURN]", "[UPPER_CASE/GRAPHICS_MODE]", "[CONTROL_O]", "[BLACK]", "[UP]", "[RVS_OFF]", "[CLR]", "[INST]", "[BROWN]", "[LIGHT_RED]", "[DARK_GRAY]", "[GRAY]", "[LIGHT_GREEN]", "[LIGHT_BLUE]", "[LIGHT_GRAY]", "[PURPLE]", "[LEFT]", "[YELLOW]", "[CYAN]", "[SHIFT_SPACE]", "[161]", "[162]", "[163]", "[164]", "[165]", "[166]", "[167]", "[168]", "[169]", "[170]", "[171]", "[172]", "[173]", "[174]", "[175]", "[176]", "[177]", "[178]", "[179]", "[180]", "[181]", "[182]", "[183]", "[184]", "[185]", "[186]", "[187]", "[188]", "[189]", "[190]", "[191]", "[SHIFT_@]", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[SHIFT_+]", "[C=-]", "[SHIFT_-]", "[PI]", "[C=*]", "[SHIFT_SPACE]", "[161]", "[162]", "[163]", "[164]", "[165]", "[166]", "[167]", "[168]", "[169]", "[170]", "[171]", "[172]", "[173]", "[174]", "[175]", "[176]", "[177]", "[178]", "[179]", "[180]", "[181]", "[182]", "[183]", "[184]", "[185]", "[186]", "[187]", "[188]", "[189]", "[190]", "[PI]" }; char *token0[128] = { "end", /* c64 compatible tokens */ "for", "next", "data", "input#", "input", "dim", "read", "let", "goto", "run", "if", "restore", "gosub", "return", "rem", "stop", "on", "wait", "load", "save", "verify", "def", "poke", "print#", "print", "cont", "list", "clr", "cmd", "sys", "open", "close", "get", "new", "tab(", "to", "fn", "spc(", "then", "not", "step", "+", "-", "*", "/", "^", "and", "or", ">", "=", "<", "sgn", "int", "abs", "usr", "fre", "pos", "sqr", "rnd", "log", "exp", "cos", "sin", "tan", "atn", "peek", "len", "str$", "val", "asc", "chr$", "left$", "right$", "mid$", "go", "rgr", /* new functions */ "rclr", "bunch of tokens - see token1", "joy", "rdot", "dec", "hex$", "err$", "instr", "else", /* new commands */ "resume", "trap", "tron", "troff", "sound", "vol", "auto", "pudef", "graphic", "paint", "char", "box", "circle", "gshape", "sshape", "draw", "locate", "color", "scnclr", "scale", "help", "do", "loop", "exit", "directory", "dsave", "dload", "header", "scratch", "collect", "copy", "rename", "backup", "delete", "renumber", "key", "monitor", "using", /* language elements */ "until", "while", "bunch of tokens - see token2", "pi" }; char *token1[11] = { "illegal token", "illegal token", "pot", "bump", "pen", "rsppos", "rsprite", "rspcolor", "xor", "rwindow", "pointer" }; char *token2[39] = { "illegal token", "illegal token", "bank", "filter", "play", "tempo", "movspr", "sprite", "sprcolor", "rreg", "envelope", "sleep", "catalog", "dopen", "append", "dclose", "bsave", "bload", "record", "concat", "dverify", "dclear", "sprsav", "collision", "begin", "bend", "window", "boot", "width", "sprdef", "quit", "stash", "illegal token", "fetch", "illegal token", "swap", "off", "fast", "slow" }; main() { int quotemode, c, lo, hi; lo = getchar(); /* load address */ hi = getchar(); for(;;) { if (feof(stdin)) break; quotemode = 0; lo = getchar(); /* line link */ hi = getchar(); if ((lo|hi) == 0) break; lo = getchar(); /* line number */ hi = getchar(); (void)printf("%5d ", hi*256+lo); while ((c = getchar()) != '\0') { if (feof(stdin)) break; if (quotemode != 0) { (void)printf("%s", ascii[c]); if (c == 34) quotemode = 0; continue; } if (c == 34) { (void)printf("%s", ascii[c]); quotemode = 1; continue; } if ( c < 0X80) (void)printf("%s", ascii[c]); else if (c == 0Xce) { c = getchar(); if (c > 0X0a) c = 0; (void)printf("%s", token1[c]); } else if (c == 0Xfe) { c = getchar(); if (c > 0X26) c = 0; (void)printf("%s", token2[c]); } else (void)printf("%s", token0[c-0X80]); } (void)printf("\n"); } (void)exit(0); }