[comp.sys.handhelds] CHASM v0.1 - Chip 48 Assembler

steve@gumby.Altos.COM (Steve Scherf) (04/12/91)

This file contains the source code to an easy-to-use assembler for Chip
48 v2.25. Apologies to net.bandwidth.watchers about this very long posting,
but I have spent a lot of effort on this program and want the world to use it.

The shar archive begins after the virtual perforation.

--------------------------- 8< Cut Here >8 -----------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	chasm.c
#	chasm.h
#	chasm.doc
#	grammar.y
#	lex.c
#	link.c
#	Makefile
#	README
#	sample.ch
# This archive created: Thu Apr 11 15:57:01 1991
export PATH; PATH=/bin:/usr/bin:$PATH
echo shar: "extracting 'chasm.c'" '(2235 characters)'
if test -f 'chasm.c'
then
	echo shar: "will not over-write existing file 'chasm.c'"
else
cat << \SHAR_EOF > 'chasm.c'
/*
 *	chasm.c - main module
 *
 *	@(#)chasm.c	1.1 91/04/10
 *
 *	Copyright (c) 1991 Steve Scherf
 *
 *	Author:	Steve Scherf
 *	Date:	Wed Apr 10 22:53:11 PDT 1991
 *
 */

#include <stdio.h>
#include <string.h>
#include "chasm.h"

char *whatstr = "@(#) chasm v0.1	Copyright (c) 1991 Steve Scherf";

int aflag;			/* true if ascii output */
int oflag;			/* true if redfined output filename */

char *infile;
char *outfile;
char *cout	= "c.out";
char *cascii	= "c.ascii";
char *ctmp	= "c.tmp";
char *xstrrchr();

FILE *ifp;
FILE *ofp;


main(argc, argv)
int argc;
char **argv;
{
	int i;
	char *name;

	name = *argv;
	infile = argv[argc-1];

	if(!--argc)
		usage(name);

	for(argv++; *argv && argc > 1; argv += 1 + i, argc -= 1 + i) {
		if(**argv != '-')
			usage(name);
		i = 0;
		while(*(++*argv))
			switch(**argv) {
				case 'a':
					aflag++;
					break;
				case 'o':
					if(!(outfile = *(argv+1)))
						usage(name);
					oflag++;
					i++;
					break;
				default:
					usage(name);
			}
	}

	if(!oflag)
		if(aflag)
			outfile = cascii;
		else
			outfile = cout;

	if((ifp = fopen(infile, "r")) == 0) {
		fprintf(stderr, "%s: cannot open %s\n", name, infile);
		exit(1);
	}

	if((ofp = fopen(ctmp, "w")) == 0) {
		fprintf(stderr, "%s: cannot open %s\n", name, ctmp);
		exit(1);
	}

	init_links();
	yyparse();
	chk_var();

	if(chk_size() || (chk_val() + chk_lab()) || pars_err ||
	    !out_inst(ofp, aflag)) {
		if(unlink(ctmp))
			perror(ctmp);
		printf("No output written to %s\n", outfile);
		exit(1);
	}

	if(sunlink(outfile))
		exit(1);

	if(link(ctmp, outfile)) {
		fprintf(stderr, "Cannot link %s to %s\n", ctmp, outfile);
		sunlink(ctmp);
		exit(1);
	}

	sunlink(ctmp);
}


/* unlink only if file exists */
sunlink(n)
char *n;
{
	if(!access(n, 0) && unlink(n)) {
		perror(n);
		return -1;
	}

	return 0;
}


/* print usage message and exit */
usage(name)
char *name;
{
	char *n;

	if(n = xstrrchr(name, '/'))
		n++;
	else
		n = name;

	fprintf(stderr, "Usage: %s [-a] [-o output_file] input_file\n", n);
	exit(1);
}


/* return a pointer to the char c in s, null if nonexistent */
char *
xstrrchr(s, c)
register char *s;
register char c;
{
	register char *p = 0;

	while(*s) {
		if(*s == c)
			p = s;
		s++;
	}

	return p;
}
SHAR_EOF
fi
echo shar: "extracting 'chasm.h'" '(1913 characters)'
if test -f 'chasm.h'
then
	echo shar: "will not over-write existing file 'chasm.h'"
else
cat << \SHAR_EOF > 'chasm.h'
/*
 *	chasm.h - chasm header file
 *
 *	@(#)chasm.h	1.1 91/04/10
 *
 *	Copyright (c) 1991 Steve Scherf
 *
 *	Author:	Steve Scherf
 *	Date:	Wed Apr 10 22:53:11 PDT 1991
 *
 */

#include <stdio.h>

/* external definitions */
extern FILE *ifp;
extern int pars_err;
extern int add_lab();

#define	LOADADR	0x200	/* binaries are loaded at this address */
#define MAXADDR 0xFFF	/* maximum addressible memory location */

/* input token definitions */
#define  STRING	257	/* string token */
#define  REG	258	/* register token */
#define  VAR	259	/* variable token */
#define  CONST	260	/* long constant token */
#define  LABEL	261	/* label token */
#define  IBASE	262	/* first instruction */

/* instruction token definitions */
#define  ADD	0	/* add */
#define  AND	1	/* bitwise and */
#define  BCD	2	/* store BCD representation of a number */
#define  CLD	3	/* clear display */
#define  DATA	4	/* store numeric data in memory (pseudo-instruction) */
#define  DEF	5	/* define a constant (pseudo-instruction) */
#define  DMP	6	/* dump registers */
#define  DSP	7	/* display sprite at coordinates */
#define  INK	8	/* input keystroke */
#define  JMP	9	/* jump to instruction */
#define  JOF	10	/* jump to instruction at offset */
#define  JSR	11	/* jump to subroutine */
#define  MEM	12	/* allocate memory (pseudo instruction) */
#define  MOV	13	/* store a value */
#define  OR	14	/* bitwise or */
#define  RES	15	/* restore registers */
#define  RET	16	/* return from subroutine */
#define  RND	17	/* random number */
#define  SAR	18	/* subtract and replace */
#define  SEQ	19	/* skip if equal */
#define  SHL	20	/* bitwise left shift */
#define  SHR	21	/* bitwise right shift */
#define  SIP	22	/* skip if key pressed */
#define  SNE	23	/* skip if not equal */
#define  SNP	24	/* skip if not pressed */
#define  SSC	25	/* set sprite character */
#define  SUB	26	/* subtract */
#define  XOR	27	/* bitwise exclusive or */
SHAR_EOF
fi
echo shar: "extracting 'chasm.doc'" '(11452 characters)'
if test -f 'chasm.doc'
then
	echo shar: "will not over-write existing file 'chasm.doc'"
else
cat << \SHAR_EOF > 'chasm.doc'

				C H A S M

			 Chip 48 v2.25 assembler

		 Written by Steve Scherf, steve@Altos.COM

		     Copyright (c) 1991 Steve Scherf


This documentation and the accompanying source code may be freely distributed
with the following restrictions:

	- All files must be distributed intact and unaltered. Any fixes or
	  patches to any of the files must be distributed as separate
	  information, but may be included with the original files.

	- No person other than the author may sell this software or
	  distribute it with any product which is not in the public domain
	  or is intended for sale.

	- This software is provided as-is, and is not promised to perform
	  reliably. The user of this software does so at his own risk.

	- Possession of this software implies knowledge and acceptance of
	  these copyright restrictions.


The following is the list of files that are included in this distribution:

	chasm.c
	chasm.doc
	chasm.h
	grammar.y
	lex.c
	link.c
	Makefile
	README
	sample.ch


Chasm v0.1

The purpose of this program is to aid in the creation of programs for
the Chip 48 v2.25 virtual machine, written by Andreas Gustafsson
(gson@niksula.hut.fi). This document assumes a fundamental understanding of
Chip 48. For a description of the Chip 48 program, please refer to the
documentation which is available from a variety of ftp sites and mail servers.


HOW TO USE "CHASM":

Usage:	chasm [-a] [-o output_file] input_file

The "input_file" must be a text file consisting of legal "chasm" instructions.
The instructions in this file will be converted to a binary suitable for
execution by the Chip 48 v2.25 virtual machine. The "-a" option causes output
to be in the form of ascii hex characters rather than in an executable binary
format. This is useful for debugging, as it provides a human-readable output
file. The "-o" option forces the output to be placed in the filename specified
by "output_file". If an error occurs during assembly, no output file is
generated.

Files created by "chasm" are:

	c.out - The default name of the executable binary produced by "chasm".
	c.ascii - The default name of the ascii hex file produced by "chasm".
	c.tmp - A temporary file used during assembly.


INSTRUCTION DESCRIPTIONS:

The following text describes the usage and syntax of the "chasm" instruction
set. The descriptions are all in the following format:

MNEMONIC:	what the mnemonic stands for

	syntax rules

	Explanation of the instruction.

In the syntax rules:

"constant" is a value represented by a number, a single-quoted character, or
	a label. Numbers may be hex, decimal or octal. These types are
	specified as they are in the C language; i.e. hex numbers are
	preceded by 0x, octal numbers are preceded by a zero and decimal
	numbers have no preceding characters. Single-quoted characters
	represent the ascii value of that character. Special characters may
	be escaped as in C.

	Examples:

		0xA9	- hex A9 or decimal 169
		075	- octal 75 or decimal 61
		32	- decimal 32
		'c'	- decimal 99

	Special escape characters are:

		'\t'	- tab or decimal
		'\b'	- backspace or decimal
		'\r'	- carriage return or decimal
		'\n'	- newline or decimal
		'\0'	- null or decimal 0
		'\\'	- backslash or decimal

	Any escaped character that is not in the above list is treated as if
	it were not escaped.

	A special form of constant is a string. As in many languages, a string
	is a series of characters preceded and ended by double quotes. The
	special escape characters defined above are also valid within strings.
	Strings may only be used in the data instruction and have the same
	effect as a series of constants.

	Example:

		"abcd"

		is the same as

		'a', 'b', 'c', 'd'

		or

		97, 98, 99, 100

"label" is a word not beginning with a digit and composed of alphanumeric
	characters and/or the underbar ("_") character.

"register" is a "V" followed by a single hex digit. This corresponds to one
	of the 16 virtual machine registers.

"dtimer", "stimer", and "I" correspond to the special predefined variables
	delay_timer, sound_timer, and I respectively. For explanations of
	these variables, see the documentation for Chip 48 v2.25.


In the explanation of the instructions, "arg1", "arg2" and "arg3" refer to the
first, second and third arguments to the instruction respectively.

Instructions that are marked with an asterisk are pseudo-instructions.

ADD:	add

	add constant, register
	add register, register
	add register, I

	Add arg1 and arg2 and put the result in arg2. In the second case
	VF is true if a carry occurred.

AND:	and

	and register, register

	Store the bitwise and of arg1 and arg2 in arg2.  VF may change.

BCD:	binary coded decimal

	bcd register

	Store the three byte binary coded decimal representation of arg1 in
	the memory address pointed to by I. (I ... I+2)

CLD:	clear display

	cld

	Clear the display.

DATA: *	data

	data datalist

	Store a list of one-byte constants in memory, where datalist is a
	list of constants and/or strings separated by commas. Examples:

		data 'a', 'b', 'c'
		data 0xAB, "hello", 56, 0123, xyz

	In the latter example, xyz is a label that must be defined somewhere
	within the program. The data list may span more than one line.

DEF: *	define

	def label, constant

	Assign a value to a label. Example:

		def xyz, 0xFF

DMP:	dump

	dmp register

	Dump registers V0 through arg1 to the memory address pointed to by I.

DSP:	display sprite

	dsp constant, register, register

	Display sprite pointed to by I. The sprite is taken to be arg1 bytes
	long and is displayed at coordinates arg2, arg3. VF is true if a
	collision occurred with another sprite.

INK:	input key

	ink register

	Get keypress and store hex value in arg1. Execution is suspended
	until a key is pressed. The speaker beeps when a key is pressed.

JMP:	jump

	jmp constant

	Cause execution to branch to the instruction at address arg1.

JOF:	jump to offset

	jof constant

	Cause execution to branch to the instruction at address arg1 plus
	the value in V0. This is good for branch tables. Offsets are easy
	to compute since all true instructions are two bytes; pseudo-
	instructions are as follows: "mem" takes exactly as many bytes as
	specified; "data" takes one byte per element in the data list, and
	one byte per character in a string; "def" takes no memory. Keep in
	mind that if an odd number of bytes is specified with "mem" or "data",
	a pad byte may be added before the next instruction to insure that
	it is on an even address boundary.

JSR:	jump to subroutine

	jsr constant

	Cause execution to branch to the instruction at address arg1. Execution
	returns to the branch point when a "ret" instruction is executed.

MEM: *	memory

	mem constant

	Allocate arg1 bytes of memory. Memory is all zeros.

MOV:	move

	mov register, register
	mov constant, register
	mov constant, I
	mov register, {stimer, dtimer, I}
	mov dtimer, register

	Store value of arg1 in arg2. In the first case VF may change.

OR:	or

	or register, register

	Store the bitwise or of arg1 and arg2 in arg2.  VF may change.

RES:	restore

	res register

	Restore registers V0 through arg1 from the memory address pointed to
	by I.

RET:	return

	ret

	Cause execution to branch to the instruction following the last
	"jsr" instruction executed.

RND:	random

	rnd constant, register

	Generate a random number bitwise and arg1. The result is stored
	in arg2.

SAR:	subtract and replace

	sar register, register

	Subtract arg2 from arg1 and put the result in arg2; VF equals
	not borrow.


SEQ:	skip if equal

	seq register, register

	Skip the next instruction if arg1 equals arg2.

SHL:	shift left
	
	shl register

	Shift arg1 left one bit; VF equals carry.

SHR:	shift right

	shr register

	Shift arg1 right one bit; VF equals carry.

SIP:	skip if pressed

	sip register

	Skip the next instruction if the key represented by the hex value
	arg1 is pressed.

SNE:	skip if not equal

	sne constant, register
	sne register, register

	Skip the next instruction if arg1 does not equal arg2.

SNP:	skip if not pressed

	snp register

	Skip the next instruction if the key represented by the hex value
	arg1 is not pressed.

SSC:	set sprite coordinates

	ssc register

	Store the address of a 5 byte font sprite for the hex character
	arg1.

SUB:	subtract

	sub register, register

	Subtract arg1 from arg2 and put the result in arg2; VF equals
	not borrow.

XOR:	exclusive or

	xor register, register

	Store the bitwise exclusive or of arg1 and arg2 in arg2. VF may change.


NOTES:

1]	Labels may be used before they are defined, as long as they are defined
	somewhere within the file. The only exception is the "mem"
	pseudo-instruction; labels used in this instruction must be defined
	in advance.

2]	In addition to the "def" instruction, a label may be defined by
	putting it on a line by itself followed by a colon. This will
	set the value of the label to the address of the instruction following
	the label definition.

	Example:

		add V0, V1
		xyz:
		mov 0xFF, V3

	In this code fragment, xyz is set to the address of the "mov"
	instruction. This label is suitable for use in a "jmp", "jsr" or
	"jof" instruction.

3]	The maximum size for a constant is 0xFFF. Some instructions require
	a constant to be smaller. The general rule is: addresses must be
	<= 0xFFF, numerical constants must be <= 0xFF, and sprite sizes
	must be <= 0xF. Using a value that is too large will generate an
	assembler error.

4]	Since both data and instructions share common space, care must be
	taken to ensure that program execution does not fall into addresses
	containing data. This will result in program failure unless the data
	resembles legal instructions. Data space is allocated with the "data"
	and "mem" pseudo-instructions.

5]	Instructions must occupy a single line, except for the "data"
	instruction.

6]	Blank lines and white space are ignored.

7]	Instructions, registers, and numerical constants may be specified in
	either upper or lower case.

8]	Comments are delineated by either the ";" or the "/" character.
	Any text after either of these two characters is ignored by the
	assembler until a newline is reached, so comments must appear after
	instructions.


ERROR MESSAGES:

Error messages generated by the assembler are accompanied by an input file
line number indicating which line in the input file had the error.
Below is a list of possible assembler error messages and their meanings.

"label multiply defined" - A label has been defined more than once.
	
"constant must be forward declared" - A previously undefined label has been
	used in a "mem" instruction.

"illegal variable usage" - A special variable has been used as an argument
	to an instruction that does not allow that variable.

"duplicate label" - More than one "line label" in a row has been defined.

"constant out of range" - A constant on the specified line is larger than 0xFFF.

"Error - program too large for address space" - The binary that would be
	generated from the input file is larger than the maximum legal size.
	The maximum addressible memory location is 0xFFF, but the first 0x200
	bytes of address space are reserved. Thus, the maximum binary size is
	0xDFF bytes.

"Error - undefined labels:" - The list of labels following this message have
	not been defined.

"Error - constant size:" - The list of line numbers following this message
	specifies instructions that have constants that are illegally large.
SHAR_EOF
fi
echo shar: "extracting 'grammar.y'" '(4647 characters)'
if test -f 'grammar.y'
then
	echo shar: "will not over-write existing file 'grammar.y'"
else
cat << \SHAR_EOF > 'grammar.y'
/*
 *	grammar.y - yacc parser
 *
 *	@(#)grammar.y	1.1 91/04/10
 *
 *	Copyright (c) 1991 Steve Scherf
 *
 *	Author:	Steve Scherf
 *	Date:	Wed Apr 10 22:53:11 PDT 1991
 *
 */

%{
#include <stdio.h>

extern char *vartab[];
extern int bcnt;
extern int svin;
int lineno = 1;
int pars_err;

%}

/* input token definitions */

%token  STRING
%token  REG
%token  VAR
%token  CONST
%token  LABEL

/* instruction token definitions */
%token  ADD
%token  AND
%token  BCD
%token  CLD
%token  DATA
%token  DEF
%token  DMP
%token  DSP
%token  INK
%token  JMP
%token  JOF
%token  JSR
%token  MEM
%token  MOV
%token  OR
%token  RES
%token  RET
%token  RND
%token  SAR
%token  SEQ
%token  SHL
%token  SHR
%token  SIP
%token  SNE
%token  SNP
%token  SSC
%token  SUB
%token  XOR


%%	/* rules section */

line	: /* empty */
	| line inst '\n'
		{
			lineno++;
		}

	| line error '\n'
		{
			lineno++;
			yyerrok;
		}
	;

inst	: /* empty */
	| ADD const ',' REG
		{
			add_inst(lineno, 0x7, $4, 1, 0x0, 0, $2, 2);
		}

	| ADD REG ',' REG
		{
			add_inst(lineno, 0x8, $4, 1, $2, 1, 0x4, 1);
		}

	| ADD REG ',' VAR
		{
			if($4 == 2)
				add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x1E, 2);
			else
				yyerror("illegal variable usage");
		}

	| AND REG ',' REG
		{
			add_inst(lineno, 0xF, $2, 1, $4, 1, 0x2, 1);
		}

	| BCD REG
		{
			add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x33, 2);
		}

	| CLD
		{
			add_inst(lineno, 0x0, 0x0, 0, 0x0, 0, 0x0E0, 3);
		}

	| DATA dlist
		{
			add_str_list();
		}

	| DEF LABEL ',' const
		{
			register int i;

			i = add_lab($2);
			lineno++;
			add_val(i, $4);
			lineno--;
		}

	| DMP REG
		{
			add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x55, 2);
		}

	| DSP const ',' REG ',' REG
		{
			add_inst(lineno, 0xD, $4, 1, $6, 1, $2, 1);
		}

	| INK REG
		{
			add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x0A, 2);
		}

	| JMP const
		{
			add_inst(lineno, 0x1, 0x0, 0, 0x0, 0, $2, 3);
		}

	| JOF const
		{
			add_inst(lineno, 0xB, 0x0, 0, 0x0, 0, $2, 3);
		}

	| JSR const
		{
			add_inst(lineno, 0x2, 0x0, 0, 0x0, 0, $2, 3);
		}

	| MEM const
		{
			add_pad($2);
		}

	| MOV const ',' REG
		{
			add_inst(lineno, 0x6, $4, 1, 0x0, 0, $2, 2);
		}

	| MOV REG ',' REG
		{
			add_inst(lineno, 0x8, $4, 1, $2, 1, 0x0, 1);
		}

	| MOV const ',' VAR
		{
			if($4 == 2)
				add_inst(lineno, 0xA, 0x0, 0, 0x0, 0, $2, 3);
			else
				yyerror("illegal variable usage");
		}

	| MOV REG ',' VAR
		{
			switch($4) {
			    case 0:
				add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x15, 2);
				break;
			    case 1:
				add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x18, 2);
				break;
			    case 2:
				yyerror("illegal variable usage");
				break;
			}
		}

	| MOV VAR ',' REG
		{
			if($2 == 0)
				add_inst(lineno, 0xF, $4, 1, 0x0, 0, 0x07, 2);
			else
				yyerror("illegal variable usage");
		}

	| OR REG ',' REG
		{
			add_inst(lineno, 0x8, $4, 1, $2, 1, 0x1, 1);
		}

	| RES REG
		{
			add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x65, 2);
		}

	| RET
		{
			add_inst(lineno, 0x0, 0x0, 0, 0x0, 0, 0x0EE, 3);
		}

	| RND const ',' REG
		{
			add_inst(lineno, 0xC, $4, 1, 0x0, 0, $2, 2);
		}

	| SAR REG ',' REG
		{
			add_inst(lineno, 0x8, $4, 1, $2, 1, 0x7, 1);
		}

	| SEQ REG ',' REG
		{
			add_inst(lineno, 0x5, $2, 1, $4, 1, 0x0, 1);
		}

	| SEQ const ',' REG
		{
			add_inst(lineno, 0x3, $4, 1, 0x0, 0, $2, 2);
		}

	| SHL REG
		{
			add_inst(lineno, 0x8, $2, 1, 0x0, 0, 0x0E, 2);
		}

	| SHR REG
		{
			add_inst(lineno, 0x8, $2, 1, 0x0, 0, 0x06, 2);
		}

	| SIP REG
		{
			add_inst(lineno, 0xE, $2, 1, 0x0, 0, 0x9E, 2);
		}

	| SNE const ',' REG
		{
			add_inst(lineno, 0x4, $4, 1, 0x0, 0, $2, 2);
		}

	| SNE REG ',' REG
		{
			add_inst(lineno, 0x9, $2, 1, $4, 1, 0x0, 1);
		}

	| SNP REG
		{
			add_inst(lineno, 0xE, $2, 1, 0x0, 0, 0xA1, 2);
		}

	| SSC REG
		{
			add_inst(lineno, 0xF, $2, 1, 0x0, 0, 0x29, 2);
		}

	| SUB REG ',' REG
		{
			add_inst(lineno, 0x8, $4, 1, $2, 1, 0x5, 1);
		}

	| XOR REG ',' REG
		{
			add_inst(lineno, 0x8, $4, 1, $2, 1, 0x3, 1);
		}

	| LABEL ':'
		{
			if(svin != -1) {
				add_val(svin, 0);
				yyerror("duplicate label");
			}
			svin = add_lab($1);
		}
	;

dlist	: dlent
	| dlist ',' dlent
	| dlist ',' newln dlent
	;

dlent	: const
		{
			add_con(lineno, $1);
			$$ = $1;
		}

	| STRING
		{
			add_str(lineno, (char *)$1);
			$$ = $1;
		}
	;

const	: CONST
		{
			if($1 > 0xFFF || $1 < 0) {
				yyerror("constant out of range");
				$$ = 0;
			}
			else
				$$ = $1;
		}

	| LABEL
		{
			register int i;

			i = add_lab($1);
			$$ = ((i + 1) << 12) + 0xFFF;
		}

newln	: '\n'
		{
			lineno++;
		}

	| newln '\n'
		{
			lineno++;
		}
	;

%%	/* C code */

yyerror(s)
char *s;
{
	printf("line %d: %s\n", lineno, s);
	pars_err = 1;
}
SHAR_EOF
fi
echo shar: "extracting 'lex.c'" '(5351 characters)'
if test -f 'lex.c'
then
	echo shar: "will not over-write existing file 'lex.c'"
else
cat << \SHAR_EOF > 'lex.c'
/*
 *	lex.c - lexical analyzer
 *
 *	@(#)lex.c	1.1 91/04/10
 *
 *	Copyright (c) 1991 Steve Scherf
 *
 *	Author:	Steve Scherf
 *	Date:	Wed Apr 10 22:53:11 PDT 1991
 *
 */

#include <stdio.h>
#include <ctype.h>
#include "chasm.h"

extern int yylval;

char *insttab[] = {
	"add", "and", "bcd", "cld", "data", "def", "dmp", "dsp", "ink",
	"jmp", "jof", "jsr", "mem", "mov", "or", "res", "ret", "rnd",
	"sar", "seq", "shl", "shr", "sip", "sne", "snp", "ssc", "sub", "xor"
};

int ninst = sizeof(insttab) / sizeof(char *);

char *vartab[] = {
	"dtimer",
	"stimer",
	"i"
};

int nvar = sizeof(vartab) / sizeof(char *);


char getch();
char getcconst();
char getslash();
char getslashval();
char getqchar();
char *gettoken();
int getinstr();
int getstr();
int getconst();
int getreg();
int gethex();
int getoct();
int getdec();
int getvar();


/* lexical analyzer */
yylex()
{
	unsigned int x;
	register int i;
	register char *s;
	char l[512], lbl[512], c;

	s = gettoken();

	if(!s)
		return 0;
	
	if(*s == '"')
		if((yylval = (int)getstr()) != 0)
			return STRING;
		else
			return 1;

	if(*s == '\'')
		if((yylval = getcconst()) != -1)
			return CONST;
		else
			return 1;

	if(isctok(*s))
		return *s;

	if((i = getinstr(s)) >= 0)
		return(i + IBASE);

	if((yylval = getvar(s)) != -1)
		return VAR;

	if((yylval = getreg(s)) != -1)
		return REG;

	if((yylval = getconst(s)) != -1)
		return CONST;

	if(islabel(s) != 0) {
		strcpy(lbl, s);
		yylval = (int)lbl;
		return LABEL;
	}

	return 1;
}


/* read one token from input */
char *
gettoken()
{
	register char *p;
	static char l[512];
	static char ctok = 0;

	if(ctok) {
		l[0] = ctok;
		l[1] = '\0';
		ctok = 0;
		return l;
	}

	p = l;
	while((*p = getch()) != EOF && isspace(*p) && !isctok(*p))
		continue;

	if(*p == EOF)
		return 0;

	if(isctok(*p)) {
		*(++p) = '\0';
		return l;
	}

	p++;
	while((*p = getch()) != EOF && !isspace(*p) && *p != ',' && *p != ':' &&
	    p < l + sizeof(l) - 1)
		p++;
	if(isctok(*p))
		ctok = *p;
	*p = '\0';

	return l;
}


/* get one character, ignoring comments */
char
getch()
{
	register char c;

	if((c = getc(ifp)) != ';' && c != '/')
		return c;

	while(getc(ifp) != '\n')
		continue;
	return '\n';
}


/* return true if c is a one character token */
isctok(c)
register c;
{
	if(c == ',' || c == '\n' || c == '"' || c == '\'' || c == ':')
		return 1;

	return 0;
}


/* get a single-quoted character constant from input */
char
getcconst()
{
	char c, d;

	c = getc(ifp);
	if(c == '\\') {
		if((c = getslash()) == EOF)
			return -1;
	}
	else
		if(c == '\n') {
			ungetc(c, ifp);
			return -1;
		}

	if((d = getc(ifp)) == '\n')
		ungetc(d, ifp);
	else
		if(d == '\'')
			return c;

	return -1;
}


/* get string of characters from input */
int
getstr()
{
	register char *p;
	static char l[512];

	p = l;
	while((*p = getc(ifp)) != EOF && *p != '\n' && *p != '"' &&
	    p < l + sizeof(l) - 1) {
		if(*p == '\\' && (*p = getslash()) == EOF)
			return 0;
		p++;
	}

	if(*p == '\n')
		ungetc('\n', ifp);

	if(*p != '"')
		return 0;

	*p = '\0';

	return((int)l);
}


/* get a backslash-escaped character from input */
char
getslash()
{
	register char c;

	if((c = getc(ifp)) == '\n') {
		ungetc(c, ifp);
		return EOF;
	}

	switch(c) {
		case 'b':
			return '\b';
		case 'n':
			return '\n';
		case 'r':
			return '\r';
		case 't':
			return '\t';
		case '0':
			return '\0';
		case EOF:
			return EOF;
		default:
			return c;
	}
}


/* return true if s is an alphanumeric label */
islabel(s)
char *s;
{
	if(isdigit(*s))
		return 0;

	while(*s && (isalnum(*s) || *s == '_'))
		s++;

	return(*s == '\0');
}


/* if s is an instruction, return index in table */
int
getinstr(s)
char *s;
{
	register char *p1, *p2;
	register int i;

	for(i = 0; i < ninst; i++) {
		for(p1 = s, p2 = insttab[i]; *p1 && *p2; p1++, p2++)
			if(*p1 != *p2 && *p1 != toupper(*p2))
				break;

		if(!*p1 && !*p2)
			return i;
	}

	return -1;
}


/* if s is a register, return its number */
int
getreg(s)
register char *s;
{
	register int x;

	if((s[0] == 'V' || s[0] == 'v') && s[2] == '\0') {
		x = gethex(s+1);
		if(x >= 0x0 && x <= 0xF)
			return x;
	}

	return -1;
}


/* convert s to an integer of the appropriate base */
int
getconst(s)
register char *s;
{
	if(s[0] == '0')
		if(s[1] == 'x' || s[1] == 'X')
			return(gethex(s+2));
		else
			return(getoct(s+1));

	return(getdec(s));
}


/* convert s to a hex integer */
int
gethex(s)
register char *s;
{
	register int n, l;

	for(n = 0; *s; s++) {
		if(!isxdigit(*s))
			return -1;

		if(isdigit(*s))
			n = (n << 4) + *s - '0';
		else {
			if(isupper(*s))
				*s = tolower(*s);
			n = (n << 4) + *s - 'a' + 10;
		}
	}

	return n;
}


/* convert s to an octal integer */
int
getoct(s)
register char *s;
{
	register int n;

	for(n = 0; *s; s++) {
		if(!isdigit(*s) || *s > '7')
			return -1;

		n = (n << 3) + *s - '0';
	}

	return n;
}


/* convert s to a decimal integer */
int
getdec(s)
register char *s;
{
	register int n;

	for(n = 0; *s; s++) {
		if(!isdigit(*s))
			return -1;

		n = (n * 10) + *s - '0';
	}

	return n;
}


/* if s is a special variable, return its index */
int
getvar(s)
char *s;
{
	register char *p1, *p2;
	register int i;

	for(i = 0; i < nvar; i++) {
		for(p1 = s, p2 = vartab[i]; *p1 && *p2; p1++, p2++)
			if(*p1 != *p2 && *p1 != toupper(*p2))
				break;

		if(!*p1 && !*p2)
			return i;
	}

	return -1;
}
SHAR_EOF
fi
echo shar: "extracting 'link.c'" '(8096 characters)'
if test -f 'link.c'
then
	echo shar: "will not over-write existing file 'link.c'"
else
cat << \SHAR_EOF > 'link.c'
/*
 *	link.c - linked list routines
 *
 *	@(#)link.c	1.1 91/04/10
 *
 *	Copyright (c) 1991 Steve Scherf
 *
 *	Author:	Steve Scherf
 *	Date:	Wed Apr 10 22:53:11 PDT 1991
 *
 */

#include <stdio.h>
#include "chasm.h"

#define T_INST	0	/* instruction type link */
#define T_CON	1	/* constant type link */
#define T_STR	2	/* string type link */
#define T_PAD	3	/* pad type link */


struct link {
	struct link *l_next;
	int l_type;
	int l_n;
	union {
		struct {
			char *l_l;
			int l_v;
			int l_va;
		} l;
		struct {
			int l_i[4];
			int l_s[3];
		} i;
		struct {
			struct link *l_c;
			int l_con;
			unsigned char *l_s;
		} c;
	} u;
};

#define	l_line	l_n
#define	l_pad	l_n
#define	l_lab	u.l.l_l
#define l_val	u.l.l_v
#define l_valid	u.l.l_va
#define l_op	u.i.l_i
#define l_sz	u.i.l_s
#define	l_cp	u.c.l_c
#define	l_const	u.c.l_con
#define	l_len	u.c.l_con
#define	l_str	u.c.l_s


struct link *lh;	/* head of linked-list of labels */
struct link *lt;	/* tail of linked-list of labels */
struct link *ih;	/* head of linked-list of instructions */
struct link *it;	/* tail of linked-list of instructions */
struct link *ch;	/* head of linked-list of constants */
struct link *ct;	/* tail of linked-list of constants */

struct link *get_lab_in();
struct link *smalloc();

int bcnt = LOADADR;	/* output byte count */
int svin = -1;		/* saved index of label to be defined */

extern int lineno;


/* set up linked lists */
init_links()
{
	lh = smalloc(sizeof(struct link));
	ih = smalloc(sizeof(struct link));

	lh->l_next = lh;
	lt = lh;
	ih->l_next = ih;
	it = ih;
	ch = 0;
	ct = 0;
}


/* add label to list if needed, return index */
int
add_lab(lab)
char *lab;
{
	register struct link *l;
	register int i;

	for(i = 0, l = lh->l_next; l != lh; i++, l = l->l_next)
		if(!strcmp(lab, l->l_lab))
			return i;

	l = smalloc(sizeof(struct link));
	l->l_next = lh;
	lt->l_next = l;
	lt = l;

	l->l_lab = (char *)smalloc(strlen(lab)+1);
	strcpy(l->l_lab, lab);
	l->l_val = 0;
	l->l_valid = 0;

	return i;
}


/* assign value to label by index */
add_val(in, val)
int in, val;
{
	register int i;
	register struct link *l;

	for(i = 0, l = lh->l_next; i < in; i++)
		l = l->l_next;

	if(l->l_valid) {
		lineno--;
		yyerror("label multiply defined");
		lineno++;
	}
	else {
		l->l_valid = 1;
		l->l_val = val;
	}
}


/* get label by index */ 
struct link *
get_lab_in(in)
register int in;
{
	register struct link *l;

	for(l = lh->l_next; in; in--)
		l = l->l_next;

	return l;
}


/* check the size of the potential binary */
chk_size()
{
	if(--bcnt > MAXADDR) {
		printf("Error - program too large for address space ");
		printf("(0x%X > 0x%3X)\n", bcnt, MAXADDR);
		return 1;
	}

	return 0;
}


/* check the linked list of labels for any that are undefined */
chk_lab()
{
	static int first = 1;
	register struct link *l;

	for(l = lh->l_next; l != lh; l = l->l_next)
		if(!l->l_valid) {
			if(first) {
				printf("Error - undefined labels:\n");
				first = 0;
			}
			printf("\t%s\n", l->l_lab);
		}

	return(!first);
}


/* check the value of constants to make sure they are in bounds */
chk_val()
{
	register struct link *l, *lp;
	register int x;
	struct link *lp2;
	int i, y, err = 0;

	for(l = ih->l_next; l != ih; l = l->l_next)
		switch(l->l_type) {
		    case T_CON:
			for(lp = l->l_cp; lp; lp = lp->l_cp) {
				x = lp->l_const;
				if(x > 0xFFF) {
					lp2 = get_lab_in((x >> 12)-1);
					x = lp2->l_const;
				}
					
				if(x > 0xFF) {
					print_ctl(lp->l_line, !err);
					err = 1;
					break;
				}
			}
			break;

		    case T_INST:
			for(i = 0; i < 3; i++) {
				if(l->l_op[i+1] > 0xFFF) {
					lp = get_lab_in((l->l_op[i+1] >> 12)-1);
					x = lp->l_val;
				}
				else
					x = l->l_op[i+1];

				y = l->l_sz[i];
				if(y < 0)
					y++;

				if(x >= (1 << (y * 4))) {
					print_ctl(l->l_line, !err);
					err = 1;
					break;
				}
			}
			break;

		    default:
			break;
		}

	return err;
}


/* check to see if the last instruction is a label */
chk_var()
{
	if(svin != -1)
		add_val(svin, bcnt);
}


/* add instruction to list */
add_inst(line, a0, a1, s1, a2, s2, a3, s3)
int line, a0, a1, s1, a2, s2, a3, s3;
{
	register struct link *l;

	ct = ch;

	if(svin != -1) {
		add_val(svin, bcnt + (bcnt % 2));
		svin = -1;
	}

	if(bcnt % 2)
		add_pad(1);

	l = smalloc(sizeof(struct link));
	l->l_line = line;
	l->l_type = T_INST;

	l->l_op[0] = a0;
	l->l_op[1] = a1;
	l->l_op[2] = a2;
	l->l_op[3] = a3;

	l->l_sz[0] = s1;
	l->l_sz[1] = s2;
	l->l_sz[2] = s3;

	l->l_next = ih;
	it->l_next = l;
	it = l;

	bcnt += 2;
}


/* add constant to string list */
add_con(line, c)
int line, c;
{
	if(!ch) {
		ch = smalloc(sizeof(struct link));
		ch->l_cp = 0;
		ct = ch;
	}

	if(!ct->l_cp) {
		ct->l_cp = smalloc(sizeof(struct link));
		ct = ct->l_cp;
		ct->l_cp = 0;
	}
	else
		ct = ct->l_cp;

	ct->l_type = T_CON;
	ct->l_line = line;
	ct->l_const = c;
}


/* add a string to linked list of constants */
add_str(line, s)
int line;
char *s;
{
	int i;

	if(!ch) {
		ch = smalloc(sizeof(struct link));
		ch->l_cp = 0;
		ct = ch;
	}

	if(!ct->l_cp) {
		ct->l_cp = smalloc(sizeof(struct link));
		ct = ct->l_cp;
		ct->l_cp = 0;
	}
	else
		ct = ct->l_cp;

	i = strlen(s);
	ct->l_len = i;
	ct->l_str = (unsigned char *)smalloc(++i);
	strcpy(ct->l_str, s);

	ct->l_type = T_STR;
	ct->l_line = line;
}


/* add a byte pad directive to the linked list of instructions */
add_pad(n)
int n;
{
	register struct link *l;

	if(svin != -1) {
		add_val(svin, bcnt);
		svin = -1;
	}

	if(n > 0xFFF) {
		n = n >> 12;
		l = get_lab_in(--n);
		if(!l->l_valid) {
			yyerror("constant must be forward declared");
			n = 0;
		}
		else
			n = l->l_val;
	}

	bcnt += n;
	ct = ch;

	l = smalloc(sizeof(struct link));
	l->l_type = T_PAD;
	l->l_pad = n;
	l->l_next = ih;
	it->l_next = l;
	it = l;
}


/* add a constant list directive to the linked list of instructions */
add_str_list()
{
	register struct link *l;
	register int i;

	if(ch == ct)
		return;

	if(svin != -1) {
		add_val(svin, bcnt);
		svin = -1;
	}

	for(l = ch->l_cp, i = 0; l; l = l->l_cp)
		if(l->l_type == T_STR)
			i += l->l_len;
		else
			i++;

	bcnt += i;

	ch->l_type = T_CON;
	ch->l_next = ih;
	it->l_next = ch;
	it = ch;
	ch = ct->l_cp;
	ct->l_cp = 0;
	ct = ch;
}


/* output instructions accordingly */
out_inst(fp, asc)
FILE *fp;
int asc;
{
	register struct link *l, *lp, *lp2;
	register int y;
	int i, x;
	unsigned char c, *p;

	for(l = ih->l_next; l != ih; l = l->l_next)
		switch(l->l_type) {
		    case T_CON:
			for(lp = l->l_cp; lp; lp = lp->l_cp) {
				if(lp->l_type == T_STR) {
					p = lp->l_str;
					while(*p) {
						if(asc) {
							if(fprintf(fp, "%.2X",
							    *p) != 2)
								return 0;
						}
						else
							if(fwrite(p, 1, 1, fp)
							    != 1)
								return 0;
						p++;
					}
				}
				else {
					y = lp->l_const;
					if(y > 0xFFF) {
						y = y >> 12;
						lp2 = get_lab_in(--y);
						y = lp2->l_val;
					}

					if(asc) {
						if(fprintf(fp, "%.2X", y) != 2)
							return 0;
					}
					else {
						c = y;
						if(fwrite(&c, 1, 1, fp) != 1)
							return 0;
					}
				}
			}
			break;

		    case T_INST:
			x = l->l_op[0] << 12;

			for(i = 1; i < 4; i++) {
				y = l->l_op[i];
				if(y > 0xFFF) {
					y = y >> 12;
					lp = get_lab_in(--y);
					y = lp->l_val;
				}
				x |= y << (12 - (i * 4));
			}

			if(asc) {
				if(fprintf(fp, "%.4X", x) != 4)
					return 0;
			}
			else {
				c = x >> 8;
				if(fwrite(&c, 1, 1, fp) != 1)
					return 0;
				c = x & 0xFF;
				if(fwrite(&c, 1, 1, fp) != 1)
					return 0;
			}
			break;

		    case T_PAD:
			for(i = 0; i < l->l_pad; i++)
				if(asc) {
					if(fprintf(fp, "%.2X", 0) != 2)
						return 0;
				}
				else {
					c = 0;
					if(fwrite(&c, 1, 1, fp) != 1)
						return 0;
				}
			break;
		}

	return 1;
}


/* print constant size error list */
print_ctl(line, first)
int line, first;
{
	if(first)
		printf("Error - constant size:\n");

	printf("\tline %d\n", line);
}


/* "safe" malloc */
struct link *
smalloc(size)
int size;
{
	struct link *p;

	if(!(p = (struct link *)malloc(size))) {
		perror("malloc");
		exit(1);
	}

	return p;
}
SHAR_EOF
fi
echo shar: "extracting 'Makefile'" '(537 characters)'
if test -f 'Makefile'
then
	echo shar: "will not over-write existing file 'Makefile'"
else
cat << \SHAR_EOF > 'Makefile'
#
#	Makefile - chasm makefile
#
#	@(#)Makefile	1.1 91/04/10
#
#	Copyright (c) 1991 Steve Scherf
#
#	Author:	Steve Scherf
#	Date:	Wed Apr 10 22:53:11 PDT 1991
#

SRC = chasm.c lex.c link.c
OBJ = chasm.o lex.o link.o y.tab.o
GRM = grammar.y
HDR = chasm.h
BIN = chasm
MAK = Makefile
RDM = README

FILES = $(SRC) $(GRM) $(HDR) $(MAK) $(RDM)

CFLAGS = -O

$(BIN): $(OBJ)
	cc $(CFLAGS) -o $(BIN) $(OBJ)
	mcs -d $(BIN)

lex.o: $(HDR)
link.o: $(HDR)
chasm.o: $(HDR)

y.tab.c: $(GRM)
	yacc $?

wc:
	wc $(FILES)

shar:
	shar $(FILES) > chasm.shar
SHAR_EOF
fi
echo shar: "extracting 'README'" '(2027 characters)'
if test -f 'README'
then
	echo shar: "will not over-write existing file 'README'"
else
cat << \SHAR_EOF > 'README'
/*
 *	README - chasm README file
 *
 *	@(#)README	1.1 91/04/10
 *
 *	Copyright (c) 1991 Steve Scherf
 *
 *	Author:	Steve Scherf
 *	Date:	Wed Apr 10 22:53:11 PDT 1991
 *
 */

Chasm v0.1

Since you're reading this, you've most likely unshar-ed the chasm package.
The unpacked files should include:

	chasm.c
	chasm.doc
	chasm.h
	grammar.y
	lex.c
	link.c
	Makefile
	README
	sample.ch

Chasm is written for System V Unix, but should run under BSD and other
operating systems with little or no modification. To compile, merely type
"make". The result should be the successful compilation of a binary called
"chasm". This program was written mostly as an exercise in learning yacc
and general assembler construction, and as a result is probably a little
over-complex. There are many features that are not really necessary, but
are useful and might come in handy. Chasm really does make it easy to write
Chip 48 programs!

At this point I have extensively tested chasm to make sure it generates the
opcodes laid out in the original Chip 48 v2.25 document. Each instruction
does indeed produce the proper opcode. However, I have only written a few
programs with it and have not actually tested every instruction in the context
of a program. Therefore, it is possible that some instructions may not work
properly if the documentation is erroneous; nevertheless, I believe that chasm
is fairly bug-free.

Chasm is simple to use. Simply type "chasm filename" where "filename" is
a file containing chasm assembly instructions. If chasm successfully
assembles the file, the result should be a file called "c.out" which contains
an executable Chip 48 binary. This file can be downloaded to the HP 48
via kermit (make sure the mode is set to binary).

Included is a sample chasm file called sample.ch. Try assembling and
running it!

If you have any questions, comments or bugs to report, don't hesitate to send
email. I can't promise that I will respond very quickly since I am often
very busy, but I will try.

Steve Scherf
steve@Altos.COM
SHAR_EOF
fi
echo shar: "extracting 'sample.ch'" '(2060 characters)'
if test -f 'sample.ch'
then
	echo shar: "will not over-write existing file 'sample.ch'"
else
cat << \SHAR_EOF > 'sample.ch'
/ This is a sample chasm file. It doesn't do an awful lot, but makes
/ a good example. Keep in mind that this is pretty much of a hack.
/ The only key used by the program is "+".

/ Registers used:
/	V0 - scratch
/	V1 - keypress flag
/	V2 - key comparison
/	V3 - sprite x
/	V4 - sprite y
/	V5 - sad/smile flag
/	V6 - scratch
/	V7 - scratch

	jmp start			/ jump over the data
	data "By Steve Scherf"		/ this shows up for all to see

start:
	cld				/ blank the screen
	mov 0, V1			/ initialize keypress flag
	mov 0xF, V2			/ this is the plus key
	mov 32, V3			/ initialize x coord
	mov 16, V4			/ initialize y coord
	mov 0, V5			/ initialize smiley flag
	mov sprite, I			/ set pointer to sprite
	dsp 6, V3, V4			/ display the sprite
loop:
	jsr get_key			/ check for keyboard input
	mov dtimer, V0			/ copy dtimer to V0
	sne 0, V0			/ is it time to move sprite?
	jsr move_sprite			/ do so if it is time
	jmp loop			/ go back to top

/ don't fall through here

get_key:
	mov 0, V7			/ clear V7
	snp V2				/ check for the "+" key
	mov 1, V7			/ set V7 to true if pressed
	mov V1, V6			/ set V6 to old value of keypress flag
	mov V7, V1			/ set keypress flag accordingly

	seq 1, V7			/ is the key pressed?
	ret				/ return if not
	sne 1, V6			/ if V6 is true, key has not been lifted
	ret				/ return if not lifted

	mov 1, V6
	mov V6, stimer			/ beep for 1/60th second
	mov 1, V6
	xor V6, V5			/ toggle smile/sad flag
	dsp 6, V3, V4			/ clear the old sprite
	mov sprite, I			/ point I to the new sprite
	mov 6, V6
	seq 0, V5			/ is this the second sprite?
	add V6, I			/ point I to the second sprite
	dsp 6, V3, V4			/ display the new sprite
	ret

move_sprite:
	dsp 6, V3, V4			/ clear the old sprite
	add 1, V3			/ compute new coords
	add 255, V4			/ this is the same as subtracting 1
	dsp 6, V3, V4			/ display the sprite at new coords
	mov 15, V0			/ we can only set timer from a register
	mov V0, dtimer			/ set timer for 1/4 second
	ret

sprite:
	data 0x66, 0x66, 0x00, 0x00, 0x42, 0x3C		/ smiley sprite
	data 0x66, 0x66, 0x00, 0x00, 0x3C, 0x42		/ sad sprite
SHAR_EOF
fi
exit 0
#	End of shell archive
-- 
Steve Scherf
steve@Altos.COM    ...!{sun|sco|pyramid|amdahl|uunet}!altos!steve

These opinions are solely mine, but others may share them if they like.