[misc.misc] Infocom vocabulary lister source, Release 3

linhart@cream.rutgers.edu (Mike Threepoint) (04/02/91)

I previously posted this program over spring break, then received mail
informing me that some people with short mail spools missed it.  I had
made some portability fixes anyway, so here's Release 3.

This program extracts and translates the vocabulary list from the
Infocom adventure game data files.  The data files should be the same
format on any system.  The program should compile with only minor
tweaks on any ANSI C compiler.

Thanks to everyone who responded, especially to the two hackers who
wrote me about their C programs for ZIL interpreters, a project I
myself was planning to undertake.  (And may still, since neither is
complete to the point of supporting the opcodes in the newer
Interactive Fiction Plus games, or the ones with Visiclues.)  Rather
than expose them to a flood of unexpected email, I'll let them post
about their respective projects if they wish.

Enjoy...

<-- snip, snip
#define INFO "\
vocab -- A data dumper\n\
Copyleft (c) 1991 by Mike Threepoint.  All rights reversed.\n\
Release 3 / Serial number 910401\n\
\n\
Display a vocabulary list of an Infocom(tm) adventure game data file.\n\
\n\
Usage:  vocab [-1] [-w] [-#] [-f] file...\n\
\n\
\t-1   one-column list\n\
\t-w   wide list (default)\n\
\t-#   toggle word numbers\n\
\t-f   toggle flags\n\
\n"

/* Now you can:
 *    make sure you've seen every last Encyclopedia Frobizzica entry,
 *    learned every spell in the Enchanter series,
 *    know all the magic wand's F-words,
 *    discover obscure synonyms (like calling the Zorkian elvish sword
 *	 Glamdring, the dragon Smaug, and the robot R2D2)
 *    learn trivia about the game's internal operations (like the internal
 *	 `intnum' noun in several games, used when you type a number)
 *    play with curious debugging commands hidden in some games (Stu Galley's
 *	 works are good for this)
 *
 * I doubt Infocom's employees will complain, either of them.  Alas, Infocom.
 * I wore a black armband when you went under.	If only you'd stayed solvent.
 * (At least till I could buy Sherlock and Arthur!  Can't purchase them
 * anywhere anymore...)
 *
 * Email correspondence to linhart@remus.rutgers.edu.
 *
 * Disclaimer:	This program works for me, at the moment.  I've never seen
 *		any Infocom source code(*), and nobody within the company
 *		told me any technical details.	I'm just an independent
 *		public domain software author.	If I-Need-Magic sues, I'll
 *		cheerfully turn over all zero profits I made on this program.
 *
 * * (Well, maybe one function.  I noticed the Beyond Zork MS-DOS interpreter
 *    was in MSC, so I mailed them a MSC function to get the screen size from
 *    the BIOS instead of the stupid SETUP.EXE method, so the interpreter
 *    could figure out when my VGA was in 50 line mode.  Some time later, a
 *    new text game was released, with VisiClues.  I started it in 50-line
 *    mode, but the screen was reset to 25-line CGA color mode.  And then the
 *    text ran off the bottom of the screen and scrolled as if it were still
 *    50 lines long.  I'd mail another helpful letter, but it's too late now.)
 */

#define MAXCOL 79

#if !defined(__STDC__) && !defined(__TURBOC__)
#error This is an ANSI-complaint.  It looks like you are not ANSI-compliant.
#endif

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>

#define S_BLANK 	1
#define S_MACRO1	2
#define S_MACRO2	3
#define S_MACRO3	4
#define S_CAPS		5
#define S_PUNC		6
#define S_FILLER	6
#define S_OFF		7

struct text {
	unsigned ch3  : 5;
	unsigned ch2  : 5;
	unsigned ch1  : 5;
	unsigned last : 1;
};

/* the 5-bit character set */
const char err_chars[7] = {
	/* null thrown in for string handling */
	'\0',
	/* special codes above */
	' ', '1', '2', '3', '^', '@',
	/* followed by: */
};

typedef const char alfabet[26];

alfabet
lower = {
	'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
	'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
	},
upper = {
	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
	'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
	},
punct = {
	'\0' /* ASCII literal */, '\n',
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	'.', ',', '!', '?', '_', '#', '\'', '\"', '/', '\\', '-', ':', '(', ')'
	};

typedef unsigned char	byte;
typedef unsigned	z_word;

struct info_header {
        byte    z_version;
        byte    flags;
	z_word	release;
	z_word	resident_size;
	z_word	game_offset;
	z_word	vocab_offset;
	z_word	object_offset;
	z_word	variable_offset;
	z_word	save_area_size;
	z_word	script_status;
	char	rev_date[6];
	z_word	macro_offset;
	z_word	verify_length;
	z_word	verify_checksum;
	z_word	reserved[17];
};

FILE	*infile;
short   column = 0,
        columns = 0;
char    show_flags = 0,
	numbers = 0,
	did_file = 0;

unsigned
unzword ( z_word z )
{
	byte *zp = (byte *)&z;
	return (zp[0] << 8) + zp[1];
}

void
newline ( void )
{
	putchar('\n');
	column = 0;
}

char *
expand ( struct text *chars )
{
	static char buf[4] = {0, 0, 0, 0};

	buf[0] = chars->ch1 + 1;
	buf[1] = chars->ch2 + 1;
	buf[2] = chars->ch3 + 1;

	return buf;
}

char *
decode ( char *s )
{
	int		len = strlen(s);
	static char	new[MAXCOL+1];
	unsigned	newlen = 0;

	while (s[len-1] == S_FILLER)
		s[--len] = '\0';

	while (*s) {
		switch (*s) {
			case S_MACRO1:
			case S_MACRO2:
			case S_MACRO3:
				/* shouldn't appear in vocabulary list */
				new [ newlen++ ] = err_chars[*s];
				break;
			case S_CAPS:
				if (*(s+1) >= S_OFF)
					new [ newlen++ ] = upper[*++s - S_OFF];
				else
					new [ newlen++ ] = err_chars[S_CAPS];
				break;
			case S_PUNC:
				if (*(s+1) >= S_OFF)
					if (*++s == S_OFF) {
						new [ newlen ] = ((*++s - 1) & 0x03) << 5;
						new [ newlen++ ] += *++s - 1;
					} else
						new [ newlen++ ] = punct[*s - S_OFF];
				else
					new [ newlen++ ] = err_chars[S_PUNC];
				break;
			case S_BLANK:
				new [ newlen++ ] = ' ';
				break;
			default:
				new [ newlen++ ] = lower[*s - S_OFF];
		}
		s++;
	}

	new [ newlen ] = '\0';

	return new;
}

void
disp_ch ( char x )
{
	putchar(x);
	column++;
}

void
disp_str ( char *fmt, ... )
{
	va_list 	argptr;
	static char	buf[16];
	short		len;

	va_start(argptr, fmt);
	vsprintf(buf, fmt, argptr);
	va_end(argptr);

	len = strlen(buf);
        printf(buf);
	column += len;
}

void
disp_bits ( char c )
{
	unsigned b;

	disp_ch(' ');
	for (b = 0x80; b; b >>= 1)
		disp_ch(c & b ? '1' : '0');
}

void
error ( char *fmt, ... )
{
	va_list 	argptr;

	fprintf(stderr, "\nError: ");

	va_start(argptr, fmt);
	vfprintf(stderr, fmt, argptr);
	va_end(argptr);

	exit(1);
}

void
read_error ( void )
{
	error("Can't read file at offset %04X.\n", ftell(infile));
}

void
dump_vocab ( unsigned long pos )
{
	register unsigned	count = 0, index;
	unsigned		words;
	int			vocab_entry_size;
	byte			letters_per_word,
				zwords_per_word;
        short                   entry_width,
                                entries_per_line;
	char			format[sizeof("%%-%ds")];
	char *			buf;

#ifdef DEBUG
	printf("Vocabulary table at offset %04X\n", pos);
#endif

	if (fseek(infile, pos, SEEK_SET) != 0)
		error("Can't seek offset %04X.\n", pos);

	/* skip leading info */
	if ((pos = getc(infile)) == EOF)
		read_error();

	if (fseek(infile, pos, SEEK_CUR) != 0)
		error("Can't skip %ld bytes from offset %04X.\n", pos, ftell(infile));

	if ((vocab_entry_size = getc(infile)) == EOF)
		read_error();

	if (fread(&words, sizeof(words), 1, infile) < 1)
		read_error();
	words = unzword(words);
	if (!numbers)
                printf("%u vocabulary entries\n", words);

	letters_per_word = (vocab_entry_size - 3) / 2 * 3;
	zwords_per_word = letters_per_word / 3;

        entry_width = letters_per_word + 2;
        if (numbers)
                entry_width += 5;
        if (show_flags)
                entry_width += 3 * (columns == 1 ? 8 : 2) + 3;
        entries_per_line = columns ? columns : (MAXCOL + 2) / entry_width;
        
	buf = malloc(letters_per_word + 1);
	sprintf(format, "%%-%ds", letters_per_word);

	while ( count < words ) {
		byte	flags[3];

		++count;
		if (numbers)
			disp_str("%04d ", count);

		for (index = 0; index < zwords_per_word; index++) {
			unsigned	z;

			if (fread(&z, sizeof(z), 1, infile) < 1)
				read_error();
			z = unzword(z);
			if (index)
                                strcat(buf, expand((struct text *)&z));
			else
                                strcpy(buf, expand((struct text *)&z));
		}

                disp_str(format, decode(buf));

		if (fread(flags, sizeof(char), 3, infile) < 3)
			read_error();

                if (show_flags)
                        if (columns == 1) {
				disp_ch(' ');
				disp_bits(flags[1]);
				disp_bits(flags[2]);
				disp_bits(flags[3]);
                        } else
                                disp_str("  %02x %02x %02x", flags[1], flags[2], flags[3]);

                if (entries_per_line > 1 && (count % entries_per_line))
			disp_str("  ");
		else
			newline();
	}

	free(buf);

	if (column)
		newline();
}

void
frob_file ( const char *filename )
{
	struct info_header	header;

	if((infile = fopen(filename, "rb")) == NULL)
		error("Can't open file \"%s\".\n", filename);

	printf("%s:\n", filename);

	if (fread(&header, sizeof(header), 1, infile) < 1)
		read_error();
	printf("Release %u / Serial number %.6s\n", unzword(header.release), &header.rev_date[0]);

	dump_vocab(unzword(header.vocab_offset));

	fclose(infile);
}

#ifndef LINT
const char sccsid[] = "@(#) " __FILE__ " by Mike Threepoint compiled " __DATE__;
#endif

void
info ( void )
{
	puts(INFO);
	exit(0);
}

void
parse_opt ( char p )
{
	switch (p) {
		case 'w':
                        columns = 0;
			break;
		case '#':
		case 'n':
			numbers = !numbers;
			break;
                case 'f':
                case 'b':
                        show_flags = !show_flags;
			break;
                case 'h':
		case '?':
			info();
                        break;
                default:
                        if (isdigit(p))
                                columns = (p - '0');
	}
}

void
parse ( char *parm )
{
	switch (*parm) {
		case '/':
			parse_opt(*++parm);
			break;
		case '-':
			while (*++parm) parse_opt(*parm);
			break;
		default:
			if (did_file) newline();
			frob_file(parm);
			did_file = 1;
	}
}

int
main ( const unsigned argc, char *argv[] )
{
	if (argc > 1) {
		register count;

		if (strcmp(argv[1], "?") == 0)
			info();

		for (count = 1; count < argc; count++)
			parse(argv[count]);

		if (did_file) return 0;
	}

	info();
	return 1;
}