linhart@cream.rutgers.edu (Mike Threepoint) (04/02/91)
I previously posted this program over spring break, then received mail informing me that some people with short mail spools missed it. I had made some portability fixes anyway, so here's Release 3. This program extracts and translates the vocabulary list from the Infocom adventure game data files. The data files should be the same format on any system. The program should compile with only minor tweaks on any ANSI C compiler. Thanks to everyone who responded, especially to the two hackers who wrote me about their C programs for ZIL interpreters, a project I myself was planning to undertake. (And may still, since neither is complete to the point of supporting the opcodes in the newer Interactive Fiction Plus games, or the ones with Visiclues.) Rather than expose them to a flood of unexpected email, I'll let them post about their respective projects if they wish. Enjoy... <-- snip, snip #define INFO "\ vocab -- A data dumper\n\ Copyleft (c) 1991 by Mike Threepoint. All rights reversed.\n\ Release 3 / Serial number 910401\n\ \n\ Display a vocabulary list of an Infocom(tm) adventure game data file.\n\ \n\ Usage: vocab [-1] [-w] [-#] [-f] file...\n\ \n\ \t-1 one-column list\n\ \t-w wide list (default)\n\ \t-# toggle word numbers\n\ \t-f toggle flags\n\ \n" /* Now you can: * make sure you've seen every last Encyclopedia Frobizzica entry, * learned every spell in the Enchanter series, * know all the magic wand's F-words, * discover obscure synonyms (like calling the Zorkian elvish sword * Glamdring, the dragon Smaug, and the robot R2D2) * learn trivia about the game's internal operations (like the internal * `intnum' noun in several games, used when you type a number) * play with curious debugging commands hidden in some games (Stu Galley's * works are good for this) * * I doubt Infocom's employees will complain, either of them. Alas, Infocom. * I wore a black armband when you went under. If only you'd stayed solvent. * (At least till I could buy Sherlock and Arthur! Can't purchase them * anywhere anymore...) * * Email correspondence to linhart@remus.rutgers.edu. * * Disclaimer: This program works for me, at the moment. I've never seen * any Infocom source code(*), and nobody within the company * told me any technical details. I'm just an independent * public domain software author. If I-Need-Magic sues, I'll * cheerfully turn over all zero profits I made on this program. * * * (Well, maybe one function. I noticed the Beyond Zork MS-DOS interpreter * was in MSC, so I mailed them a MSC function to get the screen size from * the BIOS instead of the stupid SETUP.EXE method, so the interpreter * could figure out when my VGA was in 50 line mode. Some time later, a * new text game was released, with VisiClues. I started it in 50-line * mode, but the screen was reset to 25-line CGA color mode. And then the * text ran off the bottom of the screen and scrolled as if it were still * 50 lines long. I'd mail another helpful letter, but it's too late now.) */ #define MAXCOL 79 #if !defined(__STDC__) && !defined(__TURBOC__) #error This is an ANSI-complaint. It looks like you are not ANSI-compliant. #endif #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <string.h> #include <ctype.h> #define S_BLANK 1 #define S_MACRO1 2 #define S_MACRO2 3 #define S_MACRO3 4 #define S_CAPS 5 #define S_PUNC 6 #define S_FILLER 6 #define S_OFF 7 struct text { unsigned ch3 : 5; unsigned ch2 : 5; unsigned ch1 : 5; unsigned last : 1; }; /* the 5-bit character set */ const char err_chars[7] = { /* null thrown in for string handling */ '\0', /* special codes above */ ' ', '1', '2', '3', '^', '@', /* followed by: */ }; typedef const char alfabet[26]; alfabet lower = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' }, upper = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }, punct = { '\0' /* ASCII literal */, '\n', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', ',', '!', '?', '_', '#', '\'', '\"', '/', '\\', '-', ':', '(', ')' }; typedef unsigned char byte; typedef unsigned z_word; struct info_header { byte z_version; byte flags; z_word release; z_word resident_size; z_word game_offset; z_word vocab_offset; z_word object_offset; z_word variable_offset; z_word save_area_size; z_word script_status; char rev_date[6]; z_word macro_offset; z_word verify_length; z_word verify_checksum; z_word reserved[17]; }; FILE *infile; short column = 0, columns = 0; char show_flags = 0, numbers = 0, did_file = 0; unsigned unzword ( z_word z ) { byte *zp = (byte *)&z; return (zp[0] << 8) + zp[1]; } void newline ( void ) { putchar('\n'); column = 0; } char * expand ( struct text *chars ) { static char buf[4] = {0, 0, 0, 0}; buf[0] = chars->ch1 + 1; buf[1] = chars->ch2 + 1; buf[2] = chars->ch3 + 1; return buf; } char * decode ( char *s ) { int len = strlen(s); static char new[MAXCOL+1]; unsigned newlen = 0; while (s[len-1] == S_FILLER) s[--len] = '\0'; while (*s) { switch (*s) { case S_MACRO1: case S_MACRO2: case S_MACRO3: /* shouldn't appear in vocabulary list */ new [ newlen++ ] = err_chars[*s]; break; case S_CAPS: if (*(s+1) >= S_OFF) new [ newlen++ ] = upper[*++s - S_OFF]; else new [ newlen++ ] = err_chars[S_CAPS]; break; case S_PUNC: if (*(s+1) >= S_OFF) if (*++s == S_OFF) { new [ newlen ] = ((*++s - 1) & 0x03) << 5; new [ newlen++ ] += *++s - 1; } else new [ newlen++ ] = punct[*s - S_OFF]; else new [ newlen++ ] = err_chars[S_PUNC]; break; case S_BLANK: new [ newlen++ ] = ' '; break; default: new [ newlen++ ] = lower[*s - S_OFF]; } s++; } new [ newlen ] = '\0'; return new; } void disp_ch ( char x ) { putchar(x); column++; } void disp_str ( char *fmt, ... ) { va_list argptr; static char buf[16]; short len; va_start(argptr, fmt); vsprintf(buf, fmt, argptr); va_end(argptr); len = strlen(buf); printf(buf); column += len; } void disp_bits ( char c ) { unsigned b; disp_ch(' '); for (b = 0x80; b; b >>= 1) disp_ch(c & b ? '1' : '0'); } void error ( char *fmt, ... ) { va_list argptr; fprintf(stderr, "\nError: "); va_start(argptr, fmt); vfprintf(stderr, fmt, argptr); va_end(argptr); exit(1); } void read_error ( void ) { error("Can't read file at offset %04X.\n", ftell(infile)); } void dump_vocab ( unsigned long pos ) { register unsigned count = 0, index; unsigned words; int vocab_entry_size; byte letters_per_word, zwords_per_word; short entry_width, entries_per_line; char format[sizeof("%%-%ds")]; char * buf; #ifdef DEBUG printf("Vocabulary table at offset %04X\n", pos); #endif if (fseek(infile, pos, SEEK_SET) != 0) error("Can't seek offset %04X.\n", pos); /* skip leading info */ if ((pos = getc(infile)) == EOF) read_error(); if (fseek(infile, pos, SEEK_CUR) != 0) error("Can't skip %ld bytes from offset %04X.\n", pos, ftell(infile)); if ((vocab_entry_size = getc(infile)) == EOF) read_error(); if (fread(&words, sizeof(words), 1, infile) < 1) read_error(); words = unzword(words); if (!numbers) printf("%u vocabulary entries\n", words); letters_per_word = (vocab_entry_size - 3) / 2 * 3; zwords_per_word = letters_per_word / 3; entry_width = letters_per_word + 2; if (numbers) entry_width += 5; if (show_flags) entry_width += 3 * (columns == 1 ? 8 : 2) + 3; entries_per_line = columns ? columns : (MAXCOL + 2) / entry_width; buf = malloc(letters_per_word + 1); sprintf(format, "%%-%ds", letters_per_word); while ( count < words ) { byte flags[3]; ++count; if (numbers) disp_str("%04d ", count); for (index = 0; index < zwords_per_word; index++) { unsigned z; if (fread(&z, sizeof(z), 1, infile) < 1) read_error(); z = unzword(z); if (index) strcat(buf, expand((struct text *)&z)); else strcpy(buf, expand((struct text *)&z)); } disp_str(format, decode(buf)); if (fread(flags, sizeof(char), 3, infile) < 3) read_error(); if (show_flags) if (columns == 1) { disp_ch(' '); disp_bits(flags[1]); disp_bits(flags[2]); disp_bits(flags[3]); } else disp_str(" %02x %02x %02x", flags[1], flags[2], flags[3]); if (entries_per_line > 1 && (count % entries_per_line)) disp_str(" "); else newline(); } free(buf); if (column) newline(); } void frob_file ( const char *filename ) { struct info_header header; if((infile = fopen(filename, "rb")) == NULL) error("Can't open file \"%s\".\n", filename); printf("%s:\n", filename); if (fread(&header, sizeof(header), 1, infile) < 1) read_error(); printf("Release %u / Serial number %.6s\n", unzword(header.release), &header.rev_date[0]); dump_vocab(unzword(header.vocab_offset)); fclose(infile); } #ifndef LINT const char sccsid[] = "@(#) " __FILE__ " by Mike Threepoint compiled " __DATE__; #endif void info ( void ) { puts(INFO); exit(0); } void parse_opt ( char p ) { switch (p) { case 'w': columns = 0; break; case '#': case 'n': numbers = !numbers; break; case 'f': case 'b': show_flags = !show_flags; break; case 'h': case '?': info(); break; default: if (isdigit(p)) columns = (p - '0'); } } void parse ( char *parm ) { switch (*parm) { case '/': parse_opt(*++parm); break; case '-': while (*++parm) parse_opt(*parm); break; default: if (did_file) newline(); frob_file(parm); did_file = 1; } } int main ( const unsigned argc, char *argv[] ) { if (argc > 1) { register count; if (strcmp(argv[1], "?") == 0) info(); for (count = 1; count < argc; count++) parse(argv[count]); if (did_file) return 0; } info(); return 1; }