gjc@mitech.COM (02/07/90)
Archive-name: siod-cgol/05-Feb-90 Original-posting-by: gjc@mitech.COM Original-subject: CGOL revisited: A Pratt parser for SIOD (scheme in one defun) Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti) [This is an experimental alt.sources re-posting from the newsgroup(s) comp.lang.scheme. Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).] From time to time people ask me for CGOL, so I've decided to make the technology more available by consing up something that will run in SIOD version 2.3 directly, without the need to bootstrap the thing as with the original CGOL implementation. Please forgive me if the code looks like Maclisp. The values for the binding powers are a tricky area, and these are somewhat like what is documented for Macsyma. ---- START OF PRATT.SCM ---- ;; -*-mode:lisp-*- ;; ;; A simple Pratt-Parser for SIOD: 2-FEB-90, George Carrette, GJC@PARADIGM.COM ;; ;; COPYRIGHT (c) 1990 BY ;; PARADIGM ASSOCIATES INCORPORATED, CAMBRIDGE, MASSACHUSETTS. ;; See the source file SLIB.C for more information. ;; ;; ;; Based on a theory of parsing presented in: ;; ;; Pratt, Vaughan R., ``Top Down Operator Precedence,'' ;; ACM Symposium on Principles of Programming Languages ;; Boston, MA; October, 1973. ;; ;; The following terms may be useful in deciphering this code: ;; NUD -- NUll left Denotation (op has nothing to its left (prefix)) ;; LED -- LEft Denotation (op has something to left (postfix or infix)) ;; LBP -- Left Binding Power (the stickiness to the left) ;; RBP -- Right Binding Power (the stickiness to the right) ;; ;; ;; Example calls ;; ;; (pl '(f [ a ] = a + b / c)) => (= (f a) (+ a (/ b c))) ;; ;; (pl '(if g [ a COMMA b ] then a > b else k * c + a * b)) ;; => (if (g a b) (> a b) (+ (* k c) (* a b))) ;; ;; Notes: ;; ;; This code must be used with siod.scm loaded, in siod version 2.3 ;; ;; For practical use you will want to write some code to ;; break up input into tokens. (defvar *eof* (list '*eof*)) ;; (defun pl (l) ;; parse a list of tokens (setq l (append l '($))) (toplevel-parse (lambda (op arg) (cond ((eq op 'peek) (if l (car l) *eof*)) ((eq op 'get) (if l (pop l) *eof*)) ((eq op 'unget) (push arg l)))))) (defun peek-token (stream) (stream 'peek nil)) (defun read-token (stream) (stream 'get nil)) (defun unread-token (x stream) (stream 'unget x)) (defun toplevel-parse (stream) (if (eq *eof* (peek-token stream)) (read-token stream) (parse -1 stream))) (defun get (sym key) ;; symbolconc takes the place of an explicit hash-table (setq sym (symbolconc sym '+INTERNAL-PLIST)) (and (symbol-bound? sym) (cdr (assq key (symbol-value sym))))) (defun putprop (sym val key) (set-cdr! (let ((cell (symbolconc sym '+INTERNAL-PLIST))) (or (assq key (if (symbol-bound? cell) (symbol-value cell) (set-symbol-value! cell nil))) (car (set-symbol-value! cell (cons (list key) (symbol-value cell)))))) val)) (defun plist (sym) (setq sym (symbolconc sym '+INTERNAL-PLIST)) (and (symbol-bound? sym) (symbol-value sym))) (defun value-if-symbol (x) (if (symbol? x) (symbol-value x) x)) (defun nudcall (token stream) (if (symbol? token) (if (get token 'nud) ((value-if-symbol (get token 'nud)) token stream) (if (get token 'led) (error 'not-a-prefix-operator token) token) token) token)) (defun ledcall (token left stream) ((value-if-symbol (or (and (symbol? token) (get token 'led)) (error 'not-an-infix-operator token))) token left stream)) (defun lbp (token) (or (and (symbol? token) (get token 'lbp)) 200)) (defun rbp (token) (or (and (symbol? token) (get token 'rbp)) 200)) (defvar *parse-debug* nil) (defun parse (rbp-level stream) (if *parse-debug* (print `(parse ,rbp-level))) (defun parse-loop (translation) (if (< rbp-level (lbp (peek-token stream))) (parse-loop (ledcall (read-token stream) translation stream)) (begin (if *parse-debug* (print translation)) translation))) (parse-loop (nudcall (read-token stream) stream))) (defun header (token) (or (get token 'header) token)) (defun parse-prefix (token stream) (list (header token) (parse (rbp token) stream))) (defun parse-infix (token left stream) (list (header token) left (parse (rbp token) stream))) (defun parse-nary (token left stream) (cons (header token) (cons left (prsnary token stream)))) (defun parse-matchfix (token left stream) (cons (header token) (prsmatch (or (get token 'match) token) stream))) (defun prsnary (token stream) (defun loop (l) (if (eq? token (peek-token stream)) (begin (read-token stream) (loop (cons (parse (rbp token) stream) l))) (reverse l))) (loop (list (parse (rbp token) stream)))) (defun prsmatch (token stream) (if (eq? token (peek-token stream)) (begin (read-token stream) nil) (begin (defun loop (l) (if (eq? token (peek-token stream)) (begin (read-token stream) (reverse l)) (if (eq? 'COMMA (peek-token stream)) (begin (read-token stream) (loop (cons (parse 10 stream) l))) (error 'comma-or-match-not-found (read-token stream))))) (loop (list (parse 10 stream)))))) (defun delim-err (token stream) (error 'illegal-use-of-delimiter token)) (defun erb-error (token left stream) (error 'too-many token)) (defun premterm-err (token stream) (error 'premature-termination-of-input token)) (defmac (defprops form) (defun loop (l result) (if (null? l) `(begin ,@result) (loop (cddr l) `((putprop ',(cadr form) ',(cadr l) ',(car l)) ,@result)))) (loop (cddr form) nil)) (defprops $ lbp -1 nud premterm-err) (defprops COMMA lbp 10 nud delim-err) (defprops ] nud delim-err led erb-err lbp 5) (defprops [ nud open-paren-nud led open-paren-led lbp 200) (defprops if nud if-nud rbp 45) (defprops then nud delim-err lbp 5 rbp 25) (defprops else nud delim-err lbp 5 rbp 25) (defprops - nud parse-prefix led parse-nary lbp 100 rbp 100) (defprops + nud parse-prefix led parse-nary lbp 100 rbp 100) (defprops * led parse-nary lbp 120) (defprops = led parse-infix lbp 80 rbp 80) (defprops ** lbp 140 rbp 139 led parse-infix) (defprops := led parse-infix lbp 80 rbp 80) (defprops / led parse-infix lbp 120 rbp 120) (defprops > led parse-infix lbp 80 rbp 80) (defprops < led parse-infix lbp 80 rbp 80) (defprops >= led parse-infix lbp 80 rbp 80) (defprops <= led parse-infix lbp 80 rbp 80) (defprops not nud parse-prefix lbp 70 rbp 70) (defprops and led parse-nary lbp 65) (defprops or led parse-nary lbp 60) (defun open-paren-nud (token stream) (if (eq (peek-token stream) ']) nil (let ((right (prsmatch '] stream))) (if (cdr right) (cons 'sequence right) (car right))))) (defun open-paren-led (token left stream) (cons (header left) (prsmatch '] stream))) (defun if-nud (token stream) (define pred (parse (rbp token) stream)) (define then (if (eq? (peek-token stream) 'then) (parse (rbp (read-token stream)) stream) (error 'missing-then))) (if (eq? (peek-token stream) 'else) `(if ,pred ,then ,(parse (rbp (read-token stream)) stream)) `(if ,pred ,then))) ---- END OF PRATT.SCM ----