(in-package :amlparser) ;;(eval-when (load) ;; (export '(meanings parse-from-string) :amlparser) ;;(export '(mreadl mreadlist p pc rc matrt breakup smd oplist)) ;; ) (defun amlparser::simp(x)(user::simp x)) (defun user::meanings() (amlparser::meanings)) (defvar moretoks nil) ;; TO DO: ;; special function names, special variable names, ;; Sample inputs and correct output: ;; (x^2sin2x)/(p^2-sinx)^2 ;; => ((/ (* (^ |x| 2) (|sin| (* |2| |x|))) (^ (- (^ |p| 2) (|sin| |x|)) 2))) ;; x^2ysinaxcosby ;; => ((* (* (* (^ |x| 2) |y|) (|sin| (* |a| |x|))) (|cos| (* |b| |y|)))) ;; Equations that WILL NOT parse correctly: ;; ;; [:integral:](dx/x)sqrt(x) ;; (the differential must be in the rightmost factor in the integrand, ;; i.e. this could be written [:integral:]sqrt(x)(dx/x) (defparameter *ambigmath* '((S -> (S EQUALS EXPR) infix-qfuncall) (S -> (S LT EXPR) infix-qfuncall) (S -> (S GT EXPR) infix-qfuncall) (S -> (S LE EXPR) infix-qfuncall) (S -> (S GE EXPR) infix-qfuncall) (S -> (EXPR) identity) (EXPR -> (EXPR PLUS TERM) infix-qfuncall) (EXPR -> (EXPR MINUS TERM) infix-qfuncall) (EXPR -> (TERM) identity) ;; TERMs are products of the various subspecies of TERM. ;; Note that multiplication without the * has higher precedence than ;; either multiplication with * or division. (TERM -> (TERM TIMES ANY_TERM) infix-qfuncall) (TERM -> (TERM DIV ANY_TERM) infix-qfuncall) (TERM -> (ANY_TERM) identity) ;; ANY_TERM rewrites to any one of the various TERM types (ANY_TERM -> (LARA_TERM) identity) (ANY_TERM -> (LARF_TERM) identity) (ANY_TERM -> (LARN_TERM) identity) (ANY_TERM -> (MINUS ANY_TERM) negate) ;; LARA_TERMs can be multiplied on the left or on the right by anything ;; (vars or other sorts of terms) (LARA_TERM -> (LARA_TERM LARA_FACTOR) makeprod) (LARA_TERM -> (LARA_FACTOR) identity) ;; If we multiply a LARF_TERM on the right by a factor, the result ;; is a LARA_TERM because it can now be multiplied by VAR on the right (LARA_TERM -> (LARF_TERM DELIMITED_NONVAR_FACTOR) makeprod) ;; Factors in products. (LARA_FACTOR -> (LARA_FACTOR EXP LEFT_DELIMITED_ITEM) make-expt) (LARA_FACTOR -> (LARA_FACTOR FACTORIAL) factorial) (LARA_FACTOR -> (DELIMITED_FACTOR) identity) ;; These are the factors in a product that are well-delimited, by ;; parenthases or otherwise. I split it up into these two nonterminals ;; because this is needed to make FUNAPP_NOPARENS work. (DELIMITED_FACTOR -> (DELIMITED_NONVAR_FACTOR) identity) (DELIMITED_FACTOR -> (VAR) identity) (DELIMITED_FACTOR -> (NUMBER) identity) ;; Doesn't include variables and numbers, which are also delimited (DELIMITED_NONVAR_FACTOR -> (LPAREN EXPR RPAREN) parenfact) (DELIMITED_NONVAR_FACTOR -> (FUNAPP_PARENS) identity) (DELIMITED_NONVAR_FACTOR -> (SQRT LPAREN EXPR RPAREN) funapp-parens-onearg) (DELIMITED_NONVAR_FACTOR -> (DELIMITED_NONVAR_FACTOR FACTORIAL) factorial) ;; These are the items where the left side is ;; well-delimited, but the right side may not be. For example, ;; functions applications where the argument is not enclosed in ;; parenthases, like sinx. I say this is not right-delimited ;; because we cannot see where the function ends without processing ;; the expression to the right (eg, in sinxyf(x), what is inside the ;; sine?) ;; Note that All factors in the language are, in fact, left-delimited (LEFT_DELIMITED_ITEM -> (DELIMITED_NONVAR_FACTOR) identity) (LEFT_DELIMITED_ITEM -> (LARF_FACTOR) identity) (LEFT_DELIMITED_ITEM -> (VAR) identity) (LEFT_DELIMITED_ITEM -> (NUMBER) identity) (LEFT_DELIMITED_ITEM -> (MINUS LEFT_DELIMITED_ITEM) negate) ;; LARF_TERM can be multiplied by anything on the left but only ;; by other LARFs (produces another LARF) and FACTORs ;; (produces a FACTOR) on the right unless there is an asterisk. ;; This is because, for example, sinxy does not mean (sinx)*y ;; but sinxcosx does mean (sinx)*(cosx) (LARF_TERM -> (LARA_TERM LARF_FACTOR) makeprod) (LARF_TERM -> (LARF_TERM LARF_FACTOR) makeprod) (LARF_TERM -> (LARF_FACTOR) identity) (LARF_FACTOR -> (FUNAPP_NOPARENS) identity) ;; I could do this more simply by allowing exprlists of one element; ;; however, this makes every expression reduce to both expr and (expr), which ;; is annoying (more ambiguity) (FUNAPP_PARENS -> (NAME LPAREN EXPRLIST RPAREN) funapp-parens) (FUNAPP_PARENS -> (NAME LPAREN EXPR RPAREN) funapp-parens-onearg) (EXPRLIST -> (EXPR COMMA EXPR) make-1-3-exprlist) (EXPRLIST -> (EXPR COMMA EXPR COMMA EXPRLIST) extend-1-3-exprlist-5) (EXPRLIST -> (EXPR COMMA EXPRLIST) extend-1-exprlist-3) (FUNAPP_NOPARENS -> (FUN FUNARG_NOPARENS) makeoneargfun) (FUNAPP_NOPARENS -> (FUN EXP LEFT_DELIMITED_ITEM FUNARG_NOPARENS) expfun) (FUNARG_NOPARENS -> (VAR_PROD) identity) (FUNARG_NOPARENS -> (LARA_FACTOR) identity) (FUNARG_NOPARENS -> (LEFT_DELIMITED_ITEM) identity) (FUNARG_NOPARENS -> (MINUS FUNARG_NOPARENS) negate) ;; VAR-PROD is a product of several variables, with no asterisk (VAR_PROD -> (VAR_PROD VAR_POWER) makeprod) (VAR_PROD -> (VAR_POWER) identity) ;; This is a variable or number, possibly raised to a power (VAR_POWER -> (VAR EXP LEFT_DELIMITED_ITEM) make-expt) (VAR_POWER -> (NUMBER EXP LEFT_DELIMTIED_ITEM) make-expt) (VAR_POWER -> (VAR) identity) (VAR_POWER -> (NUMBER) identity) (LARN_TERM -> (LARF_TERM LARN_FACTOR) makeprod) (LARN_TERM -> (LARA_TERM LARN_FACTOR) makeprod) (LARN_TERM -> (LARN_FACTOR) identity) (LARN_FACTOR -> (SUMMATION) identity) (LARN_FACTOR -> (INTEGRATION) identity) ;; This handles expressions like integral x+f(x)dx (INTEGRATION -> (INTEGRAL EXPR DIFFERENTIAL) integrate) ;; This handles expressions like integral dx/x (INTEGRATION -> (INTEGRAL INTEGRAND) integrate-integrand) ;; Parsing the integrand: ;; we want to include only terms divisible by a differential (i.e., ;; (xdx)/(sinx) is legal but sinx + dx is not). This could be done by ;; allowing any term in the integrand then using sematics to extract a ;; differential if possible. Instead, I allow only terms whose rightmost ;; factor is a fraction with a differential in the numerator ;; Note that this hack results in ambiguous parses with respect to the ;; associativity of multiplication. I rely on the simplifier to discard ;; the unwanted parses by grouping multiplications together. ;;(INTEGRAND -> (INTEGRAND DIV ANY_TERM) quotient-make-integrand) ;;(INTEGRAND -> (TERM INTEGRAND) product-make-integrand) ;;(INTEGRAND -> (DIFFERENTIAL) differential-make-integrand) ;;(INTEGRAND -> (LPAREN INTEGRAND RPAREN) parenfact) (INTEGRAND -> (INTEGRAND DIV ANY_TERM) quotient-make-integrand) (INTEGRAND -> (TERM DIFFERENTIAL) product-make-integrand) (INTEGRAND -> (DIFFERENTIAL) differential-make-integrand) (INTEGRAND -> (TERM DIFFNTL_FACTOR) product-extend-integrand) (INTEGRAND -> (DIFFNTL_FACTOR) identity) (DIFFNTL_FACTOR -> (LPAREN INTEGRAND RPAREN) parenfact) ;; notation for definite integral sign: [:integral lbound, ubound:] (INTEGRAL -> (LBRACK COLON SYM_INTEGRAL EXPR COMMA EXPR COLON RBRACK) list) ;; indefinite integral sign (INTEGRAL -> (LBRACK COLON SYM_INTEGRAL COLON RBRACK) list) ;; Differentials, such as dx (DIFFERENTIAL -> (DIFFOP VAR) list) ;; Sigma notation for sums (SUMMATION -> (SIGMA TERM) make-summation) ;; This is the notaion I use for the big-sigma summation notation: ;; [: sum, lowerbound, upperbound :] (SIGMA -> (LBRACK COLON SYM_SUM S COMMA S COLON RBRACK) list) ;; Might need these later ;;(STRING -> (STRING NAME) makestring) ;;(STRING -> (NAME) symbol-name) ;; VAR can be any symbol that is not a known function or a number. This ;; is built into the parser (see the lexical-rules function below) ;; Notation for subscripted variables: a[i] (VAR -> (VAR LBRACK EXPR RBRACK) make-subscripted-var) ;; Square roots of numbers should be treated as numbers (NUMBER -> (SQRT LPAREN NUMBER RPAREN) funapp-parens-onearg) (LPAREN -> \( \() (RPAREN -> \) \)) (LBRACK -> \[ \[) (RBRACK -> \] \]) (COLON -> \: \:) (PLUS -> + +) (TIMES -> * *) (MINUS -> - -) (DIV -> / /) (COMMA -> \, \,) (FACTORIAL -> ! !) (EXP -> ^ ^) (DIFFOP -> D D) (DIFFOP -> |d| |d|) ;; Need these rules to make sure d can be a variable as well as the diffop (VAR -> D D) (VAR -> |d| |d|) (EQUALS -> = =) (LT -> < <) (GT -> > >) (LE -> (LT EQUALS) lesign) (GE -> (GT EQUALS) gesign) (SYM_SUM -> SUM SUM) (SYM_INTEGRAL -> INTEGRAL INTEGRAL) (SQRT -> SQRT SQRT) )) (defun factorial (arg fact) (list fact arg)) (defun negate (minus arg) (list minus arg)) (defun make-expt (a caret b) (declare (ignore caret)) (list 'expt a b)) (defun funapp-parens (f lparen args rparen) (cons f args)) (defun funapp-parens-onearg (f lparen arg rparen) (list f arg)) (defun integrate (integral expr differential) (if (= (length integral) 8) (let ((lbound (nth 3 integral)) (ubound (nth 5 integral))) ;; Definite integral (list 'integrate expr (cadr differential) lbound ubound)) (list 'integrate expr (cadr differential)))) ;; Indefinite integral ;; For the less-nice integrals, where the differential is not on the far right of ;; the expression (defun integrate-integrand (integral integrand) (integrate integral (car integrand) (list 'D (cadr integrand)))) (defun make-summation (sigma-list exp) (let ((lbound (nth 3 sigma-list)) (ubound (nth 5 sigma-list))) (list 'sum exp lbound ubound))) (defun expfun (fun caret exponent arg) (list 'expt (list fun arg) exponent)) (defun makestring (str name) (concatenate 'string str (symbol-name name))) (defun make-subscripted-var (var lbrack expr rbrack) (list 'subscript var expr)) (defun make-1-3-exprlist (e1 comma e2) (list e1 e2)) (defun extend-1-3-exprlist-5 (e1 comma1 e2 comma2 elist) (cons e1 (cons e2 elist))) (defun extend-1-exprlist-3 (e comma elist) (cons e elist)) (defun differential-make-integrand (differential) (list 1 (cadr differential))) (defun quotient-make-integrand (integrand div term) (list (list div (car integrand) term) (cadr integrand))) (defun product-extend-integrand (term integrand) (list (list '* term (car integrand)) (cadr integrand))) (defun product-make-integrand (term differential) (list term (cadr differential))) (defun lesign (lt eq) '<=) (defun gesign (gt eq) '>=) (defun lexical-rules (word) "Return a list of rules with word on the right hand side." ;; specially hacked for *mathgram* so that if "foo" is on oplist then ;; fun -> foo is a legit rule. ;; if "foo" is NOT on oplist then ;; var -> is a legit rule. ;; no overlaps except if you put specifics into the grammar! ;; Changed so name -> foo is legitimate for anything ;; var -> foo is legitimate for anything not in oplist ;; fun -> foo is legitimate for anything in oplist ;; 10/18/99 (or (find-all word *grammar* :key #'rule-rhs :test #'equal) (cond ((numberp word) `((number -> ,word ,word))) ((not (isopsym word)) `((name -> ,word ,word) (var -> ,word ,word))) ;; add rules fun -> op (t `((name -> ,word ,word) (fun -> ,word ,word)))) )) ;; (mapcar #'(lambda (cat) `(,cat -> ,word ,word)) *open-categories*) ;***RJF 5/10/99 ;; Changed to simplify expressions before removing duplicates ;; 11/8/99 (RW) (defun meanings (&optional words) "Return all possible meanings of a phrase. Throw away the syntactic part." ;;******* (tmap #'unpack (remove-duplicates (mapcar #'simp (mapcar #'tree-sem (parser words))) :test #'equal))) (defparameter onechartoks t) ;; the following list should include all operators known. They should ;; be sorted so that the longest prefix comes first. That is, cosh before cos. ;; in Here's a few ;; Changed all the function names to uppercase ;; GRW, 2/14/2000 (defparameter oplist '("COSH" "COS" "SINH" "SIN" "TANH" "TAN" "ACOSH" "ACOS" "ARCCOSH" "ARCCOS" "ASINH" "ASIN" "ARCSINH" "ARCSIN" "ATANH" "ATAN" "ARCTANH" "ARCTAN" "SUM" "SQRT" "INTEGRAL" "LN" "LOG" "COT" "EXP" "CSCH" "CSC" "COTH" "COT" "SECH" "SEC" ;; etc for other spellings cosech, cotanh ?, also capitalized? )) ;; put symbols where we can get them fast. (defparameter oplistsyms (mapc #'(lambda (z)(setf (get (intern z :amlparser) ;;WARNING 'mathop) t)) oplist)) ;; string-downcase added to ensure that uppercase operators are recognized ;; RW 12/2/99 ;; Changed to string-upcase, so operators are always in uppercase. ;; GRW 2/14/2000 (defun breakup (s2 oplist) (dolist (s oplist (values (subseq s2 0 1)(subseq s2 1))) (if (equal 0 (search s (string-upcase s2) :start1 0 :start2 0)) (return (values s (subseq s2 (length s))))))) ;; simpsimp functions ;; ;; These functions extend the simplifying abilities of Prof. Fateman's ;; simplifier ;; parse-from-string: Parses a string and returns equivalent lisp code (defun parse-from-string (str) (with-input-from-string (*standard-input* str) (meanings))) (in-package :amlparser) ;;; -*- Mode: Lisp; Syntax: Common-Lisp -*- ;;;; Some of this code is from Paradigms of AI Programming ;;;; Those portions copyright (c) 1991 Peter Norvig ;;;; CFG parser based on Norvig's syntax2.lisp: The PSG-based natural language parser. ;;;; This version handles semantics as described in Section 19.5. ;;;; Includes *grammar5* *grammar6* and *mathgram*. USE one of these.; ;;;; Must also read in auxmacs.lisp auxfns.lisp to get memoization. ;(load "auxmacs") (load "auxfns") ;; Lisp-matlab (Lmat) parser (just the lexical part, for now) ;;(c) copyright 1990, 1999 by Richard J. Fateman ;; This started as Lmath, a Mathematica parser written in Lisp. ;; it was converted to a MATLAB language parser, starting 5/6/99 by RJF ;; it can also be used as a lexical analyzer for tokens read by ;; the CFG parser based on Norvig's syntax2.cl file ;(declaim (optimize (speed 3)(safety 0))) ;;(eval-when (compile) (load "mat")); ;; get all the symbols from this file ;;(in-package :mat) (defvar matbuffer nil) (defvar stream T) ;; if needed ;; The first section consists of readtable hacking for matlab parser. ;; We set up a separate readtable for ;; matlab input, and utilize it when scanning. ;; We use lisp atoms to store information on tokens. (defvar matrt (copy-readtable nil)) ;;(setf (readtable-case matrt) :preserve) ;preserve the case of symbols read in ;; I don't like the following choice, but the common lisp ;; standards committee majority make it very difficult ;; to have both cases work. (setf (readtable-case matrt) :upcase) ;don't preserve the case of symbols read in ;; debugging flags (setq *print-level* nil *print-length* nil *print-pretty* t) (defun pc()(peek-char nil stream nil #\newline)) (defun rc()(read-char stream)) (defun char-to-int (c) ;; return the integer 0-9 corresponding to ;; the character c, #\0 - #\9 ;; will not work in larger bases though.. (let ((h (char-int c))) (cond ((< h 48)(- h 7)) ;; #\A=17 ((< h 58) (- h 48)) ; #\0 is 48 in ascii. (t (- h 87)) ; #\a=97 ))) (defun collect-integer (val r) (cond ((eql (pc) #\newline) val) ((digit-char-p (pc) r) ;r is radix. Matlab is always 10, I guess (collect-integer (+ (char-to-int (rc))(* r val)) r)) (t val))) ;; to test scanner, try typing ;; (mreadl) (defun single-macro-character (stream char) (declare (ignore stream)) (intern (string char) :amlparser ;;; WARNING )) (mapc #'(lambda(r)(set-macro-character r #'single-macro-character nil matrt)) (coerce "+-*/\^&[]()=,%!:{}" 'list)) ;(set-syntax-from-char #\' #\" matrt nil) ;matlab uses ' instead of " (set-macro-character #\< #'(lambda (stream char) (declare (ignore char)) (case (pc) (#\newline '<) (#\= (rc) '<=) (t '<))) nil matrt) ;; the apostrophe is used for string delimiter and transpose. to ;; figure out its use we look at the previous token and if it is ;; an operator the apostrophe is an open-quote. (set-macro-character #\' #'(lambda (stream char) (declare (ignore char)) (cond ((or (null prev) (mattop prev)) (readquote nil)) ; ='Hello a String' (t 'transpose)) ; A' ) nil matrt) (defun mattop(x) (get x 'matop)) ;;; read a string until a "'" or eof (defun readquote (s) (let ((r (read-char stream nil 'eof))) (cond ((eql r 'eof) (coerce (nreverse (cons r s)) 'string)) ((eql r #\' )(coerce (nreverse s) 'string)) (t (readquote (cons r s)))))) (set-macro-character #\~ #'(lambda (stream char) (declare (ignore char)) (case (pc) (#\newline '~) (#\= (rc) '~=) (t '~))) nil matrt) (set-macro-character #\> #'(lambda (stream char) (declare (ignore char)) (case (pc) (#\newline '>) (#\= (rc) '>=) (t '>))) nil matrt) (set-macro-character #\= #'(lambda(stream char) (declare (ignore char)) (case (pc) (#\newline '|=|) (#\= (rc) '|==|) (t '|=|))) nil matrt) ;; 3 dots, ... means throw out rest of line and #\newline. read from next line (set-macro-character #\. #'(lambda (stream char) (declare (ignore char)) ;; (format t "got a .") (case (pc) (#\newline '|.|) (#\. (rc) ;; (format t "got another .") (case (pc) (#\newline '|..|) (#\. ; ... ;;(format t "got a third .") (loop (if (eql (rc) #\newline) (return (rt)))) ;; get rid of rest of line ) (t '|..|))) (#\/ (rc) 'array-rightdiv) (#\* (rc) 'array-mult) (#\\ (rc) 'array-leftdiv) (#\^ (rc) 'array-power) (#\' (rc) 'array-transpose) (t '|.|))) nil matrt) (set-macro-character #\newline #'(lambda(stream char) (declare (ignore char)) 'e-o-l) nil matrt) (defun makemt(x)(setf (get x 'matop) t)) (makemt '\.) (makemt '\;) (mapc #'makemt '(= * ^ & + - [ ] > >= < <= ! ~ ~= / )) (mapc #'makemt '(|.| |..| |(| |)|)) (makemt '|\\|) (makemt '|'|) (makemt '|,|) (defun commentskip (stream char) (declare (ignore c)) (loop (if (eql(rc) #\newline) (return(mread1))))) (set-macro-character #\% #'commentskip nil matrt) ;) ;;; end of the lexical analysis part #| The Falcon project at UIUC also wrote a parser for Matlab, with the goal of generating very fast Fortran 90 code. Among the problematical issues indicated, is the Matlab view of symbols that might be functions or variable or both. e.g. i is a function that returns sqrt(-1) unless it is first assigned a value e.g. i=5 Approximation to the rules: (see Luiz deRose PhD, UIUC, 1996) Initially assign an identifier a variable / function meaning by: 1. all built-in names of functions are functions. 2. all m-file names are functions. (ie.. k.m means k is a function) 3. every other identifier is an unknown. Then scan the program. If an identifier first appears (in lexical order) on the rhs of an assignment, it is a function. If it appears on the lhs, it is (also) a variable. Some identifiers can be multiple-use. |# ;;---------------------------------------------------------- ;;; The Parser ;; You can use (p) to try out the parser by typing in from the ;; keyboard. It sets up the readtable and calls parse-comp. ;; Reading from lines is set up so that if a sentence ends at ;; an end-of-line, the parse is completed. Otherwise, the e-o-l ;; is absorbed and the reading continued. A continuation line ;; can be forced by a \. (This is Matematica's usual operation) (defvar interactive t) ; t means 2 eol's ends a command. not for files. ;; ps will read from a Matlab stream // print to std output ;; e.g. (ps (open "foo.text")) (defun ps(stream &aux (interactive nil) res (*readtable* matrt) (matbuffer nil) (prev nil) z) (rt) (loop (setq res (catch 'endofparse(parse-comp t))) ;; end=t means a #\newline will end expr. (print (cond ;((null res) (return 'done)) ((eq #\newline (pc)) (rc) res) ;; proper ending ((setq z(rt)) (cond ((equal z 'e-o-l)) ;;may also be proper ending (t(format t "~%garbage at end of expression:~s~%" z ))) res))))) (defun psm ;; (meval (parse ( stream-from-file))) (stream &aux (interactive nil) res (*readtable* matrt)(matbuffer nil)z) (rt) (loop (setq res (catch 'endofparse(parse-comp t))) ;; end=t means a #\newline will end expr. (print (cond ;((null res) (return 'done)) ((eq #\newline (pc)) (rc) res) ;; proper ending ((setq z(meval(rt))) ;;; call meval on stuff read in. (cond ((equal z 'e-o-l)) ;;may also be proper ending (t(format t "~%garbage at end of expression:~s~%" z ))) res))))) ;;mreadl is a debugging loop that just reads lexemes until it reads eol (defvar next nil) (defvar prev nil) (defun mreadl(&aux (stream *standard-input* ) next (*readtable* matrt) (prev nil)) (loop (setq next (mread1)) (when (eq next 'e-o-l) (return 'done)) (setf prev next) (print next))) (defun mreadlist() (let ((stream *standard-input* ) (next nil) (*readtable* matrt) (prev nil) (moretoks nil) (tokens nil)) (declare (special moretoks)) (loop (setq next (mread1)) (if (eq next 'e-o-l) (return (nreverse tokens))) (cond ((and onechartoks ;; NIL if we want to allow xcoshx as just one var (setf moretoks (stringmathdecomp next))) ;; xcoshx = x cosh x ? (dolist (i moretoks) (push i tokens) (setf prev i))) (t (setf prev next) (push next tokens)))) )) (defun stringmathdecomp(s) (cond ((null (symbolp s))nil) ; numbers (t(setf s (smd(symbol-name s)))))) ; s= |xcoshx| ==> "xcoshx" (defun smd(s) (cond ((= 0 (length s))nil) (t(multiple-value-bind (first rest) (breakup s oplist) (cons (intern first :amlparser ;;; WARNING )(smd rest)))))) ;; the following list should include all operators known. They should ;; be sorted so that the longest prefix comes first. That is, cosh before cos. ;; in Here's a few ;; put symbols where we can get them fast. (setf oplistsyms (mapc #'(lambda (z)(setf (get (intern z :amlparser) ;;;WARNING 'mathop) t)) oplist)) (defun isopsym(r)(if (symbolp r)(get r 'mathop) nil)) ;; same except this reads from file named filename (defun mreadf ( filename &aux next (*readtable* matrt) (prev nil)) (with-open-file (stream filename :direction :input) (declare (special stream)) (loop (setq next (mread1)) (when (eq (peek-char t stream nil 'eof ) 'eof) (return 'done)) (cond (onechartoks ;; NIL if we want to allow xcoshx as just one var (setf moretoks (stringmathdecomp next)) ;; xcoshx = x cosh x ? (dolist (i moretoks) (print i) (setf prev i))) (t (setf prev next) (print next )))))) (defmacro rt()`(cond((null matbuffer)(mread1)) (t (prog1 matbuffer (setq matbuffer nil ))))) (defmacro eolp(end) ;;used all over to see if we've reached an end of line `(and ,end (eq 'e-o-l (peek-token)))) ;; this function reads a token. Although it looks like it ;; just reads a lisp s-expression or number, it uses a different ;; read-table. If mread1 encounters a #\newline, it returns the ;; atom e-o-l, as specified in the read-table. (defun mread1() ;; (format t "~% next char = ~s" (pc)) (cond ((member (pc)'( #\space #\tab #\page) :test #'char=) (rc)(mread1)) ((digit-char-p (pc));; next character is a digit 0-9 (collect-integer (char-to-int(read-char stream)) 10)) ;radix 10 default (t (intern (or(read-preserving-whitespace stream nil 'e-o-l) 'False) :amlparser) ;;;;;;; WARNING. TRICKY... ;; nil reads as False ))) (defun p (&optional(stream *standard-input*) &aux (interactive t) res (*readtable* matrt) (matbuffer nil) (prev nil)) ; (rt) ;;get something in matbuffer (setq res (catch 'endofparse (parse-comp t))) ;; end=t means a #\newline will end expr. (cond((eq matbuffer 'e-o-l) (if res res 'Null)) ;; proper ending (t (format t "~%Unexpected token at end of expression:~s~%" matbuffer) res))) (defun pt ( parseprog &optional(stream *standard-input*) ;parse test &aux (interactive t) res (*readtable* matrt) (matbuffer nil) (prev nil)) ; (rt) ;;get something in matbuffer (setq res (catch 'endofparse (funcall parseprog t))) ;; end=t means a #\newline will end expr. (cond((eq matbuffer 'e-o-l) (if res res 'Null)) ;; proper ending (t (format t "~%Unexpected token at end of expression:~s~%" matbuffer) res))) (defun peek-token() (cond(matbuffer) (t (setq matbuffer(mread1))))) ;;(use-package :mat) ;; Here is where Norvig's code begins (defvar *grammar* "The grammar used by GENERATE.") (defstruct (rule (:type list)) lhs -> rhs sem) (defstruct (tree (:type list) (:include rule) (:copier nil) (:constructor new-tree (lhs sem rhs)))) (defstruct (parse) "A parse tree and a remainder." tree rem) (defun parse-lhs (parse) (tree-lhs (parse-tree parse))) (defun rules-starting-with (cat) "Return a list of rules where cat starts the rhs." (find-all cat *grammar* :key #'(lambda (rule) (first-or-nil (rule-rhs rule))))) (defun complete-parses (parses) "Those parses that are complete (have no remainder)." (find-all-if #'null parses :key #'parse-rem)) (defun append1 (items item) "Add item to end of list of items." (append items (list item))) (defun parser (&optional words) "Return all complete parses of a list of words." (clear-memoize 'parse) ;*** (if (null words)(setf words (mreadlist))) ;; read from input (mapcar #'parse-tree (complete-parses (parse words)))) (defun use (grammar) "Switch to a new grammar." (clear-memoize 'rules-starting-with) (clear-memoize 'lexical-rules) (length (setf *grammar* grammar))) (defparameter *open-categories* '(N V A ;Name ) "Categories to consider for unknown words") (defun parse (words) "Bottom-up parse, returning all parses of any prefix of words. This version has semantics." (unless (null words) (mapcan #'(lambda (rule) (extend-parse (rule-lhs rule) (rule-sem rule) ;*** (list (first words)) (rest words) nil)) (lexical-rules (first words))))) (defun extend-parse (lhs sem rhs rem needed) ;*** "Look for the categories needed to complete the parse. This version has semantics." (if (null needed) ;; If nothing is needed, return this parse and upward extensions, ;; unless the semantics fails (let ((parse (make-parse :tree (new-tree lhs sem rhs) :rem rem))) (unless (null (apply-semantics (parse-tree parse))) ;*** (cons parse (mapcan #'(lambda (rule) (extend-parse (rule-lhs rule) (rule-sem rule) ;*** (list (parse-tree parse)) rem (rest (rule-rhs rule)))) (rules-starting-with lhs))))) ;; otherwise try to extend rightward (mapcan #'(lambda (p) (if (eq (parse-lhs p) (first needed)) (extend-parse lhs sem (append1 rhs (parse-tree p)) ;*** (parse-rem p) (rest needed)))) (parse rem)))) (defun apply-semantics (tree) "For terminal nodes, just fetch the semantics. Otherwise, apply the sem function to its constituents." (if (terminal-tree-p tree) (tree-sem tree) (setf (tree-sem tree) (apply (tree-sem tree) (mapcar #'tree-sem (tree-rhs tree)))))) (defun terminal-tree-p (tree) "Does this tree have a single word on the rhs?" (and (length=1 (tree-rhs tree)) (atom (first (tree-rhs tree))))) (memoize 'lexical-rules) (memoize 'rules-starting-with) (memoize 'parse :test #'eq) ;;;; Grammars (defun integers (start end) "A list of all the integers in the range [start...end] inclusive." (if (> start end) nil (cons start (integers (+ start 1) end)))) (defun infix-funcall (arg1 function arg2) "Apply the function to the two arguments" (funcall function arg1 arg2)) (defun infix-qfuncall (arg1 function arg2) "Apply the function to the two arguments" (list function arg1 arg2)) (defun union* (x y) (if (null (intersection x y)) (append x y))) (defun set-diff (x y) (if (subsetp y x) (set-difference x y))) ;; end of Norvig's code ;; RJF did this below. ;;; this definitely has bugs in it. *mathgram2* is better. (defun equalsign(x e y)(list 'setf x y)) (defun comma(x e y)(cons x y)) (defun \, (x y)(cons x y)) (defun aster(x e y)(list e x y)) (defun plu(x e y)(list e x y)) (defun makeprod(x y)(list '* x y)) (defun makepower(x arrow y)(list 'expt x y)) (defun brackfun(a b c d)`(aref ,a ,@c)) (defun parenfact(a b c)b) (defun makeoneargfun (f x)(list f x)) ; (function ->(FUN LPAR exprlist RPAR ) funapp) ;;function call (defun funapp(a b c d) (cons a c )) ;; b(c)^c could be b * c^c or (b(c))^c or b (c^c) if we don't know if b is fun or var. ;;[1] USER(314): (m) ;;a=sin(c)^c ;; is this ambiguous?? ;;((SETF |a| (|sin| (EXPT |c| |c|))) ;; or ... (SETF |a| (EXPT (|sin| |c|) |c|))) answer 7/21/99 RJF ;; but a=b(c)^c is not. ;; nor is ;; a=sin c^c (defun tmap(f x) ;; map f over the tree x (cond ((null x)nil) ((consp x)(cons (tmap f (car x)) (tmap f (cdr x)))) (t (funcall f x)))) (defun unpack(x) (if (symbolp x) (intern (symbol-name x)) x)) ;; to remove ucky package qualifiers from an answer, try ;; (tmap #'unpack answer) (use *ambigmath*)