(* ******************************************************************** * Postfix arithmetic parser/evaluator * CMSC 330 example, based on project 5 starter code * 20091105 * ********************************************************************) #load "str.cma" type ast = Num of int | App of string * ast * ast let rec unparse = function (* simple version *) Num n -> string_of_int n | App (o, x, y) -> "(" ^ String.concat " " [o; unparse x; unparse y] ^ ")" let rec unparse = function (* literal version *) | Num n -> "Num(" ^ (string_of_int n) ^ ")" | App (o, x, y) -> "App(" ^ (String.concat ", " [("'" ^ o ^ "'"); unparse x; unparse y]) ^ ")" (************************************************************************) (* Lexing, same as scheme.ml starter code *) let rec regex_or = function [] -> "" | (r::[]) -> r | (r::rs) -> r ^ "\\|" ^ (regex_or rs) let token_re = Str.regexp (regex_or ["("; ")"; "[0-9]+"; "[*+/-]"]) let whitespace_re = Str.regexp "[ \t\n]" exception Lex_error of int let tokenize s = let rec tokenize' pos s = if pos >= String.length s then [] else begin if (Str.string_match token_re s pos) then let token = Str.matched_string s in let new_pos = Str.match_end () in (token)::(tokenize' new_pos s) else if (Str.string_match whitespace_re s pos) then tokenize' (Str.match_end ()) s else raise (Lex_error pos) end in tokenize' 0 s (************************************************************************) (* Parsing *) let t_oparen = "^\\((\\)$" (* "(" *) let t_cparen = "^\\()\\)$" (* ")" *) let t_num = "^\\([0-9]+\\)$" (* "0-9" *) let t_op = "^\\([*+/-]\\)$" (* "*|+|-|/" *) let tokens = ref [] (* store of unparsed tokens *) (* look ahead one token, return true if that token matches regexp x *) let p_lookahead x = match !tokens with h::t -> Str.string_match (Str.regexp x) h 0 | [] -> failwith "Lookahead passed end of tokens" (* match the next token with regexp x, return the first capture group *) let p_match x = match !tokens with h::t -> if(Str.string_match (Str.regexp x) h 0) then (tokens := t; Str.matched_group 1 h) else failwith "Match error (2)" | [] -> failwith "Match error (1)" (* Arithmetic expression grammar: E -> ( A ) | n A -> o E E o -> + | - | * | / n -> 0...9 *) let parse l = let rec parse_expr () = (* E (expressions) *) if(p_lookahead t_num) then ( (* E -> n *) Num(int_of_string (p_match t_num)) ) else if(p_lookahead t_oparen) then ( (* E -> ( A ) *) ignore (p_match t_oparen); (* "(" *) let v = parse_application () in (* A *) ignore (p_match t_cparen); (* ")" *) v ) else failwith ("Parse error: unknown lookahead token") and parse_application () = (* A (application) *) if(p_lookahead t_op) then ( (* A -> o E E *) let o = p_match t_op in (* +|-|*|/ *) let x = parse_expr () in (* E *) let y = parse_expr () in (* E *) App(o, x, y) ) else failwith ("Parse error: unknown lookahead token") in tokens := l; parse_expr () (************************************************************************) (* Evaluator *) type value = int let string_of_value x = string_of_int x (* simple evaluator *) let rec eval x = match x with Num n -> n | App(o, x, y) -> (match o with "+" -> (eval x) + (eval y) | "-" -> (eval x) - (eval y) | "*" -> (eval x) * (eval y) | "/" -> (eval x) / (eval y) | _ -> failwith ("unknown op: " ^ o)) (* can't happen *)