On this page:
18.1 Scaling up with syntax
18.2 The Loot+   interpreter
18.3 A bit more sugar
18.4 Exceptional behavior
18.5 Exceptional transformation
18.6 Quotation
18.7 Pattern matching
7.4

18 Mug: matching, throwing, quoting

    18.1 Scaling up with syntax

    18.2 The Loot+ interpreter

    18.3 A bit more sugar

    18.4 Exceptional behavior

    18.5 Exceptional transformation

    18.6 Quotation

    18.7 Pattern matching

18.1 Scaling up with syntax

We have developed a small, but representative functional programming language. But there’s still a long way to go from our Loot language to the kind of constructs we expect in a modern, expressive programming language. In particular, there’s a fairly large gap between Loot and the subset of Racket we’ve explored so far in this class.

For example, our programs have made extensive use of pattern matching, quotation, quasi-quotation, and lots of built-in functions. In this section, we’ll examine how to scale Loot up to a language that’s nicer to program in. As we’ll see, much of this can be accomplished without extending the compiler. Rather we can explain these language features by elaboration of fancier language syntax into the existing core forms.

In this chapter, we’ll explore several ideas at the level of an interpreter, but the techniques should work just as well for the compiler.

18.2 The Loot+ interpreter

Let us start with an interprter for the Loot language, plus all of the extensions considered in the various assignments up through Assignment 7.

mug/interp-env.rkt

  #lang racket
  (provide (all-defined-out))
  (require (only-in "syntax.rkt" prim?))
   
  ;; type Expr =
  ;; | Integer
  ;; | Boolean
  ;; | Character
  ;; | String
  ;; | Symbol
  ;; | Variable
  ;; | '()
  ;; | `(box ,Expr)
  ;; | `(if ,Expr ,Expr ,Expr)
  ;; | `(let ,(List Variable Expr) ... ,Expr)
  ;; | `(letrec ,(List Variable Lambda) ... ,Expr)
  ;; | `(λ ,Bindings ,Expr)
  ;; | `(apply ,Expr ,Expr)
  ;; | `(,Prim ,Expr ...)
  ;; | `(,Expr ,Expr ...)
   
  ;; type Value =
  ;; | Integer
  ;; | Boolean
  ;; | Character
  ;; | String
  ;; | Symbol
  ;; | '()
  ;; | (Box Value)
  ;; | (Cons Value Value)
  ;; | Function
   
  ;; type Function =
  ;; | (Values ... -> Answer)
   
  ;; type Answer = Value | 'err
   
  ;; type REnv = (Listof (List Variable Value))
   
  ;; Expr REnv -> Answer
  (define (interp-env e r)
    (match e
      ;; produce fresh strings each time a literal is eval'd
      [(? string? s) (string-copy s)]
      [(? value? v) v]
      [''() '()]
      [`',(? symbol? s) s]    
      [`(if ,e0 ,e1 ,e2)
       (match (interp-env e0 r)
         ['err 'err]
         [v
          (if v
              (interp-env e1 r)
              (interp-env e2 r))])]
      [(? symbol? x)
       (lookup r x)]
      [`(let (,`(,xs ,es) ...) ,e)
       (match (interp-env* es r)
         ['err 'err]
         [vs
          (interp-env e (append (zip xs vs) r))])]
      [`(letrec (,`(,xs ,es) ...) ,e)
       (letrec ((r* (λ ()
                      (append
                       (zip xs
                            (map (λ (l) (λ vs (apply (interp-env l (r*)) vs)))
                                 es))
                       r))))
         (interp-env e (r*)))]
      [`(λ (,xs ...) ,e)
       (λ vs
         (if (= (length vs) (length xs))
             (interp-env e (append (zip xs vs) r))
             'err))]
      [`(λ (,xs ... . ,x) ,e)
       (λ vs
         (if (>= (length vs) (length xs))
             (interp-env e (append (zip/remainder xs vs x) r))
             'err))]
      [`(apply ,e0 ,e1)
       (let ((v0 (interp-env e0 r))
             (vs (interp-env e1 r)))
         (if (list? vs)
             (apply v0 vs)
             'err))]
      [`(,(? prim? p) ,es ...)
       (let ((as (interp-env* es r)))
         (interp-prim p as))]
      [`(,e ,es ...)
       (match (interp-env* (cons e es) r)
         [(list f vs ...)
          (if (procedure? f)
              (apply f vs)
              'err)]
         ['err 'err])]
      [_ 'err]))
   
  ;; (Listof Expr) REnv (Listof Defn) -> (Listof Value) | 'err
  (define (interp-env* es r)
    (match es
      ['() '()]
      [(cons e es)
       (match (interp-env e r)
         ['err 'err]
         [v (cons v (interp-env* es r))])]))
   
  ;; Any -> Boolean
  (define (value? x)
    (or (integer? x)
        (boolean? x)
        (char? x)
        (string? x)))
   
  ;; Prim (Listof Answer) -> Answer
  (define (interp-prim p as)
    (match (cons p as)
      [(list p (? value?) ... 'err _ ...) 'err]
      [(list '- (? integer? i0)) (- i0)]
      [(list '- (? integer? i0) (? integer? i1)) (- i0 i1)]
      [(list 'abs (? integer? i0)) (abs i0)]
      [(list 'add1 (? integer? i0)) (+ i0 1)]
      [(list 'sub1 (? integer? i0)) (- i0 1)]
      [(list 'zero? (? integer? i0)) (zero? i0)]
      [(list 'char? v0) (char? v0)]
      [(list 'integer? v0) (integer? v0)]
      [(list 'boolean? v0) (boolean? v0)]
      [(list 'integer->char (? codepoint? i0)) (integer->char i0)]
      [(list 'char->integer (? char? c)) (char->integer c)]
      [(list '+ (? integer? i0) (? integer? i1)) (+ i0 i1)]
      [(list 'cons v0 v1) (cons v0 v1)]
      [(list 'car (? cons? v0)) (car v0)]
      [(list 'cdr (? cons? v0)) (cdr v0)]
      [(list 'string? v0) (string? v0)]
      [(list 'box? v0) (box? v0)]
      [(list 'empty? v0) (empty? v0)]
      [(list 'cons? v0) (cons? v0)]
      [(list 'cons v0 v1) (cons v0 v1)]
      [(list 'box v0) (box v0)]
      [(list 'unbox (? box? v0)) (unbox v0)]
      [(list 'string-length (? string? v0)) (string-length v0)]
      [(list 'make-string (? natural? v0) (? char? v1)) (make-string v0 v1)]
      [(list 'string-ref (? string? v0) (? natural? v1))
       (if (< v1 (string-length v0))
           (string-ref v0 v1)
           'err)]
      [(list '= (? integer? v0) (? integer? v1)) (= v0 v1)]
      [(list '< (? integer? v0) (? integer? v1)) (< v0 v1)]
      [(list '<= (? integer? v0) (? integer? v1)) (<= v0 v1)]
      [(list 'char=? (? char? v0) (? char? v1)) (char=? v0 v1)]
      [(list 'boolean=? (? boolean? v0) (? boolean? v1)) (boolean=? v0 v1)]
      [(list 'eq? v0 v1) (eq? v0 v1)]
      [(list 'gensym) (gensym)]
      [(list 'symbol? v0) (symbol? v0)]
      [(list 'procedure? v0) (procedure? v0)]      
      [_ 'err]))
   
  ;; REnv Variable -> Answer
  (define (lookup env x)
    (match env
      ['() 'err]
      [(cons (list y v) env)
       (match (symbol=? x y)
         [#t v]
         [#f (lookup env x)])]))
   
  ;; REnv Variable Value -> Value
  (define (ext r x v)
    (cons (list x v) r))
   
  ;; Any -> Boolean
  (define (codepoint? x)
    (and (integer? x)
         (<= 0 x #x10FFFF)
         (not (<= #xD800 x #xDFFF))))
   
  ;; (Listof A) (Listof B) -> (Listof (List A B))
  (define (zip xs ys)
    (match* (xs ys)
      [('() '()) '()]
      [((cons x xs) (cons y ys))
       (cons (list x y) (zip xs ys))]))
   
  ;; like zip but ys can be longer and remainder is associated with r
  (define (zip/remainder xs ys r)
    (match* (xs ys)
      [('() ys) (list (list r ys))]
      [((cons x xs) (cons y ys))
       (cons (list x y) (zip/remainder xs ys r))]))
   
      
   
18.3 A bit more sugar

As we saw in Loot, we can consider syntaxtic extensions of language that elaborate into the core Expr form of a language. We saw this with the define-form that we rewrote into letrec. We can consider further extensions such as and, or, and even cond.

Here are functions for transforming each of these forms into simpler forms:

Examples

> (define (cond->if c)
    (match c
      [`(cond (else ,e)) e]
      [`(cond (,c ,e) . ,r)
       `(if ,c ,e (cond ,@r))]))
> (define (and->if c)
    (match c
      [`(and) #t]
      [`(and ,e) e]
      [`(and ,e . ,r)
       `(if ,e (and ,@r) #f)]))
> (define (or->if c)
    (match c
      [`(or) #f]
      [`(or ,e) e]
      [`(or ,e . ,r)
       (let ((x (gensym)))
         `(let ((,x ,e))
            (if ,x ,x (or ,@r))))]))

Note that these functions do not necessarily eliminate all cond, and, or or forms, but rather eliminate one occurrence, potentially creating a new occurrence within a subexpression:

Examples

> (cond->if '(cond [(even? x) 8] [else 9]))

'(if (even? x) 8 (cond (else 9)))

> (cond->if '(cond [else 9]))

9

> (and->if '(and))

#t

> (and->if '(and 8))

8

> (and->if '(and 8 9))

'(if 8 (and 9) #f)

> (or->if '(or))

#f

> (or->if '(or 8))

8

> (or->if '(or 8 9))

'(let ((g8240 8)) (if g8240 g8240 (or 9)))

The idea is that another function will drive the repeated use of these functions until all these extended forms are eliminated.

You may wonder why the or elaboration is complicated by the let-binding. Consider a potential simpler approach:

Examples

> (define (or->if-simple c)
    (match c
      [`(or) #f]
      [`(or ,e) e]
      [`(or ,e . ,r)
       `(if ,e ,e (or ,@r))]))

But compare the elaboration of the following exmample:

Examples

> (or->if-simple '(or (some-expensive-function) #t))

'(if (some-expensive-function) (some-expensive-function) (or #t))

> (or->if '(or (some-expensive-function) #t))

'(let ((g8244 (some-expensive-function))) (if g8244 g8244 (or #t)))

The second program is much more efficient. Moreover, if some-expensive-function had side-effects, the first program would duplicate them, thereby changing the program’s intended behavior.

We can incorporate these new functions into the desugar function, which will transform extended programs into “core” expressions:

Examples

; Expr+ -> Expr
> (define (desugar e+)
    (match e+
      [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e)
       `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es)
          ,(desugar e))]
      [(? symbol? x)         x]
      [(? imm? i)            i]
      [`',(? symbol? s)      `',s]
      [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))]
      [`(if ,e0 ,e1 ,e2)     `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))]
      [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))]
      [`(letrec ,bs ,e0)
       `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs)
          ,(desugar e0))]
      [`(λ ,xs ,e0)          `(λ ,xs ,(desugar e0))]
      [`(cond . ,_)          (desugar (cond->if e+))]
      [`(and . ,_)           (desugar (and->if e+))]
      [`(or . ,_)            (desugar (or->if e+))]
      [`(,e . ,es)           `(,(desugar e) ,@(map desugar es))]))

Note how a cond, and, or or form are transformed and then desugared again. This will take care of eliminating any derived forms introduced by the transformation, which is useful so that derived forms can be defined in terms of other derived forms, including itself!

Examples

> (desugar '(cond [(even? x) 8] [else 9]))

'(if (even? x) 8 9)

> (desugar '(cond [else 9]))

9

> (desugar '(and))

#t

> (desugar '(and 8))

8

> (desugar '(and 8 9))

'(if 8 9 #f)

> (desugar '(or))

#f

> (desugar '(or 8))

8

> (desugar '(or 8 9))

'(let ((g8260 8)) (if g8260 g8260 9))

Derived forms that can be elaborated away by rewriting into more primitive forms are sometimes called syntactic sugar since they are not fundamental but “sweeten” the experience of writing programs with useful shorthands. We call the elaboration function desugar to indicate that it is eliminating the syntactic sugar.

18.4 Exceptional behavior

To see an example of taking the idea of program transformation as a method for implementing language features, let’s consider the case of exceptions and exception handlers, a common feature of modern high-level languages.

Consider the following program for computing the product of all the elements in a binary tree:

Examples

; BT -> Number
; Multiply all the numbers in given binary tree
> (define (prod bt)
    (match bt
      ['leaf 1]
      [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))]))
> (prod 'leaf)

1

> (prod '(node 8 leaf leaf))

8

> (prod '(node 8 (node 2 leaf leaf) (node 4 leaf leaf)))

64

Now consider the work done in an example such as this:

Examples

> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))

0

From a quick scan of the elements, we know the answer is 0 without doing any arithmetic. But the prod function will do a bunch of multiplication to actually figure this out.

To see, let’s use a helper function to replace * that prints every it multiplies two numbers:

Examples

; Number Number -> Number
> (define (mult x y)
    (printf "mult: ~a x ~a\n" x y)
    (* x y))
; BT -> Number
; Multiply all the numbers in given binary tree
> (define (prod bt)
    (match bt
      ['leaf 1]
      [`(node ,v ,l ,r) (mult v (mult (prod l) (prod r)))]))
> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))

mult: 1 x 1

mult: 0 x 1

mult: 1 x 1

mult: 2 x 1

mult: 1 x 1

mult: 3 x 1

mult: 2 x 3

mult: 4 x 6

mult: 0 x 24

mult: 9 x 0

0

This could potentially be bad if the tree were quite large.

How can we do better? One option is to detect if the value at a node is zero and simply avoid recurring on the left and right subtrees at that point:

Examples

; BT -> Number
; Multiply all the numbers in given binary tree
> (define (prod bt)
    (match bt
      ['leaf 1]
      [`(node ,v ,l ,r)
       (if (zero? v)
           0
           (mult v (mult (prod l) (prod r))))]))

Does this help our answer? Only slightly:

Examples

> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))

mult: 1 x 1

mult: 2 x 1

mult: 1 x 1

mult: 3 x 1

mult: 2 x 3

mult: 4 x 6

mult: 0 x 24

mult: 9 x 0

0

Why?

The problem is that you may encounter the zero element deep within a tree. At that point you not only want to avoid doing the multiplication of subtrees, but also of the elements surrounding the zero. But we seemingly don’t have control over the context surrounding the node with a zero in it, just the subtrees. What can we do?

One option, if the language provides it, is to raise an exception, signalling that a zero element has been found. An outer function can catch that exception and produce zero. Such a program will avoid doing any multiplication in case there’s a zero in the tree.

Racket comes with an exception mechanism that uses raise to signal an exception, which is propagated to the nearest enclosing exception handler. If there is no such handler, an uncaught exception error occurs.

Examples

> (raise 5)

uncaught exception: 5

> (mult (raise 5) 2)

uncaught exception: 5

> (mult (raise (mult 5 3)) 2)

mult: 5 x 3

uncaught exception: 15

The general form of an exception handler uses the with-handlers form that includes a series of predicates and handler expressions. We’ll consider a simpler form called catch that unconditionally catches any exception throw and handles it with a function that takes the raised value as an argument. It can be expressed in terms of the more sophisticated with-handlers form:

Examples

> (define-syntax-rule (catch e f)
    (with-handlers ([(λ (x) #t) f]) e))
> (catch (raise 5) (λ (x) x))

5

> (catch (mult (raise 5) 2) (λ (x) x))

5

> (catch (mult (raise (mult 5 3)) 2) (λ (x) x))

mult: 5 x 3

15

> (catch (mult (mult 5 3) 2) (λ (x) x))

mult: 5 x 3

mult: 15 x 2

30

> (catch (mult (mult 5 3) 2) (λ (x) (mult x x)))

mult: 5 x 3

mult: 15 x 2

30

> (catch (mult (raise (mult 5 3)) 2) (λ (x) (mult x x)))

mult: 5 x 3

mult: 15 x 15

225

Now we can solve our problem:

Examples

; BT -> Number
; Multiply all the numbers in given binary tree
> (define (prod bt)
    (catch (prod/r bt) (λ (x) 0)))
; BT -> Number
; Throws: 0
> (define (prod/r bt)
    (match bt
      ['leaf 1]
      [`(node ,v ,l ,r)
       (if (zero? v)
           (raise 0)
           (mult v (mult (prod/r l) (prod/r r))))]))
> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))

0

(This code is a bit problematic for reasons that are beside the point of this section, but... the problem is this will catch any exception, including things like system signals, out of memory exceptions, etc. A better solution would have the handler check that the exception value was 0 and re-raise it if not. That way it doesn’t “mask” any other exceptions.)

This code works great for our purposes, but what if the language didn’t provide an exception handling mechanism? Could we achieve the same effect without relying on exceptions?

One solution is to re-write the program in what’s called continuation passing style (CPS). Continuation passing style makes explicit what is implicit in the recursive calls to prod in our original program, which is that after recursively computing the product of the subtree, we have to do more work such as another recursive call and multiplication. By making this work explicit, we gain control over it and have the option to do things like throw away this work.

Here is the basic idea. We will write a version of prod that takes an additional argument which represents “the work to be done after this function call completes.” It will take a single argument, a number, which is the result of this function call, and it will produce some final result for the computation (in this case, a number).

In general, we want (k (prod bt))(prod/k bt k) for all functions k and binary trees bt.

Starting from the spec, we have:

Examples

; BT (Number -> Number) -> Number
> (define (prod/k bt k)
    (k (prod bt)))

We can unroll the definition of prod:

Examples

> (define (prod/k bt k)
    (match bt
      ['leaf (k 1)]
      [`(node ,v ,l ,r)
       (k (mult v (mult (prod l) (prod r))))]))

Now we’d like to replace the calls to prod with calls to prod/k, which we can do by recognizing the work to be done around the call to prod and placing it in the continuation argument to prod/k. Let’s do the first call:

Examples

> (define (prod/k bt k)
    (match bt
      ['leaf (k 1)]
      [`(node ,v ,l ,r)
       (prod/k l (λ (pl)
                   (k (mult v (mult pl (prod r))))))]))

Doing this again, we get:

Examples

> (define (prod/k bt k)
    (match bt
      ['leaf (k 1)]
      [`(node ,v ,l ,r)
       (prod/k l (λ (pl)
                   (prod/k r (λ (pr)
                               (k (mult v (mult pl pr)))))))]))

Now we have a definition of prod/k that is independent of prod that satisfies the spec we started with.

A couple of things to note:

We can recreate the original function by giving the appropriate initial continuation:

Examples

; BT -> Number
> (define (prod bt)
    (prod/k bt (λ (x) x)))

Now, this code doesn’t do anything smart on zero elements; it does exactly the same multiplications our first program does:

Examples

> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))

mult: 1 x 1

mult: 0 x 1

mult: 1 x 1

mult: 2 x 1

mult: 1 x 1

mult: 3 x 1

mult: 2 x 3

mult: 4 x 6

mult: 0 x 24

mult: 9 x 0

0

However, with a small tweak, we can get the behavior of the exception-handling code.

Consider this definition:

Examples

; BT (Number -> Number) -> Number
> (define (prod/k bt k)
    (match bt
      ['leaf (k 1)]
      [`(node ,v ,l ,r)
       (if (zero? v)
           0
           (prod/k l (λ (pl)
                      (prod/k r (λ (pr)
                                  (k (mult v (mult pl pr))))))))]))
; BT -> Number
> (define (prod bt)
    (prod/k bt (λ (x) x)))

Notice that this program, when the value in a node is zero, immediately returns 0. It does not do any of the work represented by k. It does something akin to raising an exception: it blows off all the work of the surround context and returns a value to the “handler” (in this case, prod).

Returning to our example, we can see that no multiplications occur:

Examples

> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))

0

We’ve now achieved our original goal without the use of exception handlers. We achieved this by rewriting our program to make explicit the work that remains to do, giving us the ability to avoid doing it when necessary. This is a slighly simplified version of the general exception handling transformation, which we will look at next, since there’s only a single handler and all it does it produce 0. But, the by-hand transformation we did provides a useful blueprint for how can generally transform programs that use exception handling into ones that don’t.

18.5 Exceptional transformation

Let’s consider a very small subset of expressions, extended with raise and catch, and see how we can transform away those added mechanisms:

; An Expr is one of:
; - Integer
; - Variable
; - (if ,Expr ,Expr ,Expr)
; - (,Prim1 ,Expr)
; - (,Prim2 ,Expr ,Expr)
; - (raise ,Expr)
; - (catch ,Expr (λ (,Variable) ,Expr))

Here is the basic idea of the transformation, we transform every expression into a function of two arguments. The two arguments represent the two ways an expression may produce results: either by returning normally or by raising an exception.

So for example, if the original expression were 1, we’d want the transformed program to be

'(λ (retn raze) (retn 1))

Why? Because 1 just produces 1; it can’t possibly raise an exception. So given the two ways of producing a value, we choose the ret way and “return” by apply retn to the value we want to return: 1.

Suppose the original expression is (raise 1). Then we want to produce:

'(λ (retn raze) (raze 1))

This is choosing to not return a value, but rather “raise” an exception by calling the raze function.

This is a lot like the by-hand transformation we did, except we now have two continuations: one to represent work to do after returning (normally) and one for work to do after raising an exception.

At the top-level, to run an expression we simply plug in appropriate definitions for retn and raze. The retn function should just produce the result, i.e. it should be (λ (x) x), while raze should signal an uncaught exception. Since our language has such a simple model of errors, we’ll just cause an error to occur, i.e. (λ (x) (add1 #f)). Let’s try our examples.

Examples

> (interp-env '((λ (retn raze) (retn 1)) (λ (x) x) (λ (x) (add1 #f))) '())

1

> (interp-env '((λ (retn raze) (raze 1)) (λ (x) x) (λ (x) (add1 #f))) '())

'err

What about something like (add1 e)?

Well if e returns normally, then the whole thing should produce one more than that value. If e raises an exception, then (add1 e) should raise that exception.

Suppose t where the transformed version of e, which means it is a function of two parameters: what to do if e returns and what to do if e raises.

Then the transformation of (add1 e) is
(λ (retn raze)
  (t (λ (x) (retn (add1 x))) (λ (x) (raze x))))

This can be simplified slightly by observing that (λ (x) (raze x)) is equal to raze:
(λ (retn raze)
  (t (λ (x) (retn (add1 x))) raze))

How about something like (catch e0 (λ (x) e1))? If e0 produces a value normally, then the whole expression produces that value normally. However if e0 raises an expression then the whole expression produces whatever e1 with x bound to the raised value produces. Let t0 and t1 be the transformed versions of e0 and e1. Then transformation of the whole expressions should be

(λ (retn raze)
  (t0 retn (λ (x) (t1 retn raze))))

One thing to notice here is that we are running t0 with a raze function that, if called, will run t1 normally.

Guided by the examples, let’s define the transformation (note: we have to take care of avoiding unintended variable capture):

Examples

; Expr -> Expr
> (define (exn-transform e)
    (match e
      [(? integer? i) `(λ (retn raze) (retn ,i))]
      [(? symbol? x)
       (let ((retn (gensym 'retn))
             (raze (gensym 'raze)))
         `(λ (,retn ,raze) (,retn ,x)))]
      [`(if ,e0 ,e1 ,e2)
       (let ((t0 (exn-transform e0))
             (t1 (exn-transform e1))
             (t2 (exn-transform e2))
             (retn (gensym 'retn))
             (raze (gensym 'raze)))
         `(λ (,retn ,raze)
            (,t0
             (λ (x)
               ((if x ,t1 ,t2) ,retn ,raze))
             ,raze)))]
      [`(,(? prim? p) ,e0)
       (let ((t0 (exn-transform e0))
             (retn (gensym 'retn))
             (raze (gensym 'raze)))
         `(λ (,retn ,raze)
            (,t0 (λ (v) (,retn (,p v)))
                 ,raze)))]
      [`(,(? prim? p) ,e0 ,e1)
       (let ((t0 (exn-transform e0))
             (t1 (exn-transform e1))
             (retn (gensym 'retn))
             (raze (gensym 'raze))
             (v0 (gensym 'v0)))
         `(λ (,retn ,raze)
            (,t0 (λ (,v0)
                   (,t1 (λ (v1) (,retn (,p v0 v1)))
                        ,raze))
                 ,raze)))]
      [`(raise ,e)
       (let ((t (exn-transform e))
             (retn (gensym 'retn))
             (raze (gensym 'raze)))
         `(λ (,retn ,raze)
            (,t ,raze ,raze)))]
      [`(catch ,e0 (λ (,x) ,e1))
       (let ((t0 (exn-transform e0))
             (t1 (exn-transform e1))
             (retn (gensym 'retn))
             (raze (gensym 'raze)))
  
         `(λ (,retn ,raze)
            (,t0 ,retn
                 (λ (,x)
                   (,t1 ,retn ,raze)))))]))

Here’s what the transformation looks like on examples:

Examples

> (exn-transform '1)

'(λ (retn raze) (retn 1))

> (exn-transform '(raise 1))

'(λ (retn8311 raze8312) ((λ (retn raze) (retn 1)) raze8312 raze8312))

> (exn-transform '(catch (raise 1) (λ (x) x)))

'(λ (retn8317 raze8318) ((λ (retn8313 raze8314) ((λ (retn raze) (retn 1)) raze8314 raze8314)) retn8317 (x) ((λ (retn8315 raze8316) (retn8315 x)) retn8317 raze8318))))

> (exn-transform '(catch (raise 1) (λ (x) (add1 x))))

'(λ (retn8325 raze8326) ((λ (retn8319 raze8320) ((λ (retn raze) (retn 1)) raze8320 raze8320)) retn8325 (x) ((λ (retn8323 raze8324) ((λ (retn8321 raze8322) (retn8321 x)) (v) (retn8323 (add1 v))) raze8324)) retn8325 raze8326))))

> (exn-transform '(catch (add1 (raise 1)) (λ (x) 1)))

'(λ (retn8331 raze8332) ((λ (retn8329 raze8330) ((λ (retn8327 raze8328) ((λ (retn raze) (retn 1)) raze8328 raze8328)) (v) (retn8329 (add1 v))) raze8330)) retn8331 (x) ((λ (retn raze) (retn 1)) retn8331 raze8332))))

> (exn-transform '(catch (add1 (raise 1)) (λ (x) (raise x))))

'(λ (retn8341 raze8342) ((λ (retn8335 raze8336) ((λ (retn8333 raze8334) ((λ (retn raze) (retn 1)) raze8334 raze8334)) (v) (retn8335 (add1 v))) raze8336)) retn8341 (x) ((λ (retn8339 raze8340) ((λ (retn8337 raze8338) (retn8337 x)) raze8340 raze8340)) retn8341 raze8342))))

Now let’s give it a spin:

Examples

> (define (run e)
    (interp-env `(,(exn-transform e) (λ (x) x) (λ (x) (add1 #f))) '()))
> (run '1)

1

> (run '(raise 1))

'err

> (run '(catch (raise 1) (λ (x) x)))

1

> (run '(catch (raise 1) (λ (x) (add1 x))))

2

> (run '(catch (add1 (raise 1)) (λ (x) 1)))

1

> (run '(catch (add1 (raise 1)) (λ (x) (raise x))))

'err

> (run '(if (raise 0) 1 2))

'err

> (run '(if (zero? 0) (raise 1) 2))

'err

18.6 Quotation

We have seen how to interpret limited uses of quote, such as in '() and 'x, i.e. the empty list and symbols.

But we’ve also been using using quote more generally where we can write down an arbitrary constant s-expression within a quote:

Examples

> '#t

#t

> '5

5

> '(1 2 3)

'(1 2 3)

> '(add1 x)

'(add1 x)

> '(car '(1 2 3))

'(car '(1 2 3))

> '(((1) 2) 3)

'(((1) 2) 3)

> '(1 . 2)

'(1 . 2)

> '("asdf" fred ((one)))

'("asdf" fred ((one)))

We can understand the more general quote form as a shorthand for expressions that construct an equivalent list to the one denoted by the s-expression.

For example,
  • '1 is shorthand for 1,

  • '(1 . 2) is shorthand for (cons '1 '2), which is shorthand for (cons 1 2),

  • '(1 2 3) is shorthand for (cons '1 '(2 3)), which is shorthand for (cons 1 (cons '2 '(3))), which is shorthand for (cons 1 (cons 2 (cons '3 '()))), which is shorthand for (cons 1 (cons 2 (cons 3 '()))),

  • '() is as simple as possible (the empty list),

  • 'x is as simple as possible (a symbol), and

  • 5 is as simple as possible.

Guided by these examples, we can write a function that transforms the s-expression inside of a quote into an equivalent expression that only uses quote for constructing symbols and the empty list:

Examples

; S-Expr -> Expr
; Produce an expression that evaluates to given s-expression, without
; use of quote (except for symbols and empty list)
> (define (quote->expr d)
    (match d
      [(? boolean?) d]
      [(? integer?) d]
      [(? string?) d]
      [(? char?) d]
      [(? symbol?) (list 'quote d)]
      [(cons x y) (list 'cons (quote->expr x) (quote->expr y))]
      ['() ''()]))
> (quote->expr 'x)

''x

> (quote->expr 5)

5

> (quote->expr "Fred")

"Fred"

> (quote->expr '(1 . 2))

'(cons 1 2)

> (quote->expr '(1 2 3))

'(cons 1 (cons 2 (cons 3 '())))

> (quote->expr '(car '(1 2 3)))

'(cons 'car (cons (cons 'quote (cons (cons 1 (cons 2 (cons 3 '()))) '())) '()))

> (quote->expr '(((1) 2) 3))

'(cons (cons (cons 1 '()) (cons 2 '())) (cons 3 '()))

> (quote->expr '(1 . 2))

'(cons 1 2)

> (quote->expr '("asdf" fred ((one))))

'(cons "asdf" (cons 'fred (cons (cons (cons 'one '()) '()) '())))

We can now incorporate this into desugar to eliminate uses of compound-data quote:

Examples

; Expr+ -> Expr
> (define (desugar e+)
    (match e+
      [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e)
       `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es)
          ,(desugar e))]
      [(? symbol? x)         x]
      [(? imm? i)            i]
      [`',(? symbol? s)      `',s]
      [`',d                  (quote->expr d)]
      [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))]
      [`(if ,e0 ,e1 ,e2)     `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))]
      [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))]
      [`(letrec ,bs ,e0)
       `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs)
          ,(desugar e0))]
      [`(λ ,xs ,e0)          `(λ ,xs ,(desugar e0))]
      [`(cond . ,_)          (desugar (cond->if e+))]
      [`(and . ,_)           (desugar (and->if e+))]
      [`(or . ,_)            (desugar (or->if e+))]
      [`(,e . ,es)           `(,(desugar e) ,@(map desugar es))]))

And now we can desugar programs such as these:

Examples

> (desugar '(map f '(1 2 3)))

'(map f (cons 1 (cons 2 (cons 3 '()))))

> (desugar '(map f '(and 1 2)))

'(map f (cons 'and (cons 1 (cons 2 '()))))

> (desugar '(if x '(1 . 2) 3))

'(if x (cons 1 2) 3)

And our prior interpterter will work just fine on these programs:

Examples

> (interp-env (desugar '(map f '(1 2 3))) `((map ,map) (f ,add1)))

'(2 3 4)

> (interp-env (desugar '(map f '(and 1 2))) `((map ,map) (f ,identity)))

'(and 1 2)

> (interp-env (desugar '(if x '(1 . 2) 3)) `((x #t)))

'(1 . 2)

And:

Examples

> (interp-env (desugar ''(((1) 2) 3)) '())

'(((1) 2) 3)

> (interp-env (desugar ''(1 . 2)) '())

'(1 . 2)

> (interp-env (desugar ''("asdf" fred ((one)))) '())

'("asdf" fred ((one)))

18.7 Pattern matching

One of the most ubiquitous language features we’ve used, but not implemented, is pattern matching with the match form.

Pattern matching too can be seen as syntactic sugar since it’s easy to imagine how you could rewrite uses of match into equivalent expressions that didn’t involve match.

For example, consider the program:

; BT -> Number
; Multiply all the numbers in given binary tree
(define (prod bt)
  (match bt
    ['leaf 1]
    [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))]))

An alternative, equivalent, formulation that doesn’t use match is the following:

; BT -> Number
; Multiply all the numbers in given binary tree
(define (prod bt)
  (cond
    [(eq? 'leaf bt) 1]
    [(and (list? bt)
          (= 4 (length bt))
          (eq? 'node (first bt)))
     (let ((v (second bt))
           (l (third bt))
           (r (fourth bt)))
       (* v (* (prod l) (prod r))))]
    ; corresponds to a match failure
    [else (add1 #f)]))

This code is less nice to read and write, but essentially does the same thing the pattern-matching code does.

In this example, each match-clause becomes a cond-clause. The question-part of each cond-clause is an expression that determines whether the corresponding pattern-part of the match-clause matches. The answer-part of each cond-clause corresponds to the expression-part of the match-clause, with an add let-form that destructures the scrutinee and binds the pattern variables of the pattern-part.

Let’s consider the following extension to the grammar of Expr+ to include a simplified version of the pattern matchin form we’ve been using:

; type Expr+ =
; ....
; | Match
 
; type Match = (match ,Expr+ ,(list Pat Expr+) ...)
 
; type Pat =
; | #t
; | #f
; | Integer
; | String
; | Variable
; | _
; | '()
; | (quote ,Symbol)
; | (cons ,Pat ,Pat)
; | (list ,Pat ...)
; | (? ,Expr ,Pat ...)

A match form consists of an expression to match against, sometimes callsed the scrutinee, followed by some number of pattern-matching clauses; each one consists of a pattern and expression to evaluate should the pattern match the scrutinee’s value.

Here a pattern can either be a literal boolean, integer, string, empty list, or symbol, or a pattern variable, which matches anything and binds the value to the variable, a “wildcard” which matches anything and binds nothing, a cons pattern which matches pairs of things that match the subpatterns, list pattern which matches lists of a fixed-size where elements matches the subpatterns, or a ? pattern which matches if the results of evaluated the first subexpression applied to scrutinee produces true and all of the subpatterns match.

This doesn’t include the quasiquote-patterns we used above, but still this is a useful subset of pattern matching and allows us to write programs such as:

; BT -> Number
; Multiply all the numbers in given binary tree
(define (prod bt)
  (match bt
    ['leaf 1]
    [(list 'node v l r) (* v (* (prod l) (prod r)))]))

As alluded to above, each pattern plays two roles: it used to determine whether the scrutinee matches the pattern, and it used to bind variable names (in the scope of the RHS expression) to sub-parts of the scrutinee when it does match.

We can write two helper functions to accomplish each of these tasks:
  • rewrite patterns into Boolean valued expressions that answer whether the pattern matches the scrutinee,

  • rewrite pattern and RHS in to expressions in which the pattern variables of pattern are bound to the appropriately deconstructed parts of the scrutinee.

Assume: the scrutinee is a variable. (It’s easy to establish this assumption in general.)

We need two functions:

; Pat Variable -> Expr
; Produces an expression determining if p matches v
(define (pat-match p v) ...)
 
; Pat Variable Expr -> Expr
; Produce an expression that deconstructs v and binds pattern variables
; of p in scope of e.
; ASSUME: v matches p
(define (pat-bind p v e) ...)

Let’s turn to pat-match first.

Suppose the pattern is a literal #t. When does v match it? When v is eq? to #t.

So an expression that produces true when this pattern matches is (eq? #t v).

Handling #f, integers, characters, symbols, and the empty list is similar.

What about variables? Suppose the pattern is x. When does v match it? Always. So #t is an expression that produces true with this pattern matches.

Wildcards are the same.

What about when the pattern is a cons-pattern? Suppose the pattern is (cons p1 p2) for some patterns p1 and p2. When does v match (cons p1 p2)? When v is a pair and (car v) matches p1 and (cdr v) matches p2.

A list pattern is similar, except that the scrunity must be a list with as many elements as there are patterns, and the elements must match the corresponding subpattern.

What about a ? pattern? When does v match it? Suppose the pattern is (? even?). When does v match it? When (even? v) is true. If the pattern had a non-empty list of sub-patterns they would all need to match v, too.

We can now formulate a defintion of pat-match:

; Pat Variable -> Expr
; Produces an expression determining if p matches v
(define (pat-match p v)
  (match p
    [#t `(eq? #t ,v)]
    [#f `(eq? #f ,v)]
    [(? integer? i) `(eq? ,i ,v)]
    [(? string? s)
     `(and (string? ,v)
           (string=? ,s ,v))]
    [(list 'quote '()) `(eq? '() ,v)]
    [(? symbol?) #t]
    [(list 'quote (? symbol? s)) `(eq? ,v ',s)]
    [(list 'cons p1 p2)
     (let ((v1 (gensym))
           (v2 (gensym)))
       `(and (cons? ,v)
             (let ((,v1 (car ,v))
                   (,v2 (cdr ,v)))
               (and ,(pat-match p1 v1)
                    ,(pat-match p2 v2)))))]
    [(cons 'list ps)
     `(and (list? ,v)
           (= (length ,v) ,(length ps))
           ,(pat-match-list ps v))]
    [(cons '? (cons e ps))
     `(and (,e ,v)
           ,(pats-match ps v))]))

The list-pattern case relies on a helper function pat-match-list and the ?-pattern case relies on pats-match, both defined below:

; (Listof Pat) Variable -> Expr
; Produces an expression determining if every ps matches x
(define (pats-match ps v)
  (match ps
    ['() #t]
    [(cons p ps)
     `(and ,(pat-match p v)
           ,(pats-match ps v))]))
 
; (Listof Pat) Variable -> Expr
; Produces an expression determining if each ps matches each element of list v
(define (pat-match-list ps v)
  (match ps
    ['() #t]
    [(cons p ps)
     (let ((v1 (gensym))
           (v2 (gensym)))
       `(let ((,v1 (car ,v))
              (,v2 (cdr ,v)))
          (and ,(pat-match p v1)
               ,(pat-match-list ps v2))))]))

Here are some examples:

Examples

> (pat-match 'leaf 'bt)

#t

> (pat-match '(list 'node v l r) 'bt)

'(and (list? bt) (= (length bt) 4) (let ((g8408 (car bt)) (g8409 (cdr bt))) (and (eq? g8408 'node) (let ((g8410 (car g8409)) (g8411 (cdr g8409))) (and #t (let ((g8412 (car g8411)) (g8413 (cdr g8411))) (and #t (let ((g8414 (car g8413)) (g8415 (cdr g8413))) (and #t #t)))))))))

> (pat-match '(list 'node (? even? v) l r) 'bt)

'(and (list? bt) (= (length bt) 4) (let ((g8416 (car bt)) (g8417 (cdr bt))) (and (eq? g8416 'node) (let ((g8418 (car g8417)) (g8419 (cdr g8417))) (and (and (even? g8418) (and #t #t)) (let ((g8420 (car g8419)) (g8421 (cdr g8419))) (and #t (let ((g8422 (car g8421)) (g8423 (cdr g8421))) (and #t #t)))))))))

These aren’t very readable programs that emerge, however, we check that they’re doing the right thing. Note that the elaboration requires a few functions to be available, such as list?, and length. We make these available in an initial environment:

Examples

> (define env0
    `((length ,length)
      (list? ,list?)))
> (interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 'leaf 'bt))) env0)

#t

> (interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 8 'bt))) env0)

#f

> (interp-env (desugar
               `(let ((bt '(node 1 leaf leaf)))
                 ,(pat-match '(list 'node v l r) 'bt)))
              env0)

#t

> (interp-env (desugar
               `(let ((bt '(node 1 leaf leaf)))
                 ,(pat-match '(list 'node (? zero?) l r) 'bt)))
              env0)

#f

> (interp-env (desugar
               `(let ((bt '(node 0 leaf leaf)))
                 ,(pat-match '(list 'node (? zero?) l r) 'bt)))
              env0)

#t

Now moving on to pat-bind, it follows a similar structure, but we always assume the given pattern matches the scrutinee. The addition Expr argument represents the right-hand-side expression of the match-clause. The idea is that the pattern variables of p are bound to sub-parts of v in e.

; Pat Variable Expr -> Expr
; Produce an expression that deconstructs v and binds pattern variables
; of p in scope of e.
; ASSUME: v matches p
(define (pat-bind p v e)
  (match p
    [#t e]
    [#f e]
    [(? integer?) e]
    [(? string?) e]
    [(list 'quote '()) e]
    ['_ e]
    [(? symbol? x) `(let ((,x ,v)) ,e)]
    [(list 'quote (? symbol?)) e]
    [(list 'cons p1 p2)
     (let ((v1 (gensym))
           (v2 (gensym)))
       `(let ((,v1 (car ,v))
              (,v2 (cdr ,v)))
          ,(pat-bind p1 v1
                     (pat-bind p2 v2 e))))]
    [(cons 'list ps)
     (pat-bind-list ps v e)]
    [(cons '? (cons _ ps))
     (pats-bind ps v e)]))

Here are some examples:

Examples

> (pat-bind 'leaf 'bt 'bt)

'(let ((leaf bt)) bt)

> (pat-bind '(list 'node v l r) 'bt 'v)

'(let ((g8448 (car bt)) (g8449 (cdr bt))) (let ((g8450 (car g8449)) (g8451 (cdr g8449))) (let ((v g8450)) (let ((g8452 (car g8451)) (g8453 (cdr g8451))) (let ((l g8452)) (let ((g8454 (car g8453)) (g8455 (cdr g8453))) (let ((r g8454)) v)))))))

> (pat-bind '(list 'node (? even? v) l r) 'bt 'v)

'(let ((g8456 (car bt)) (g8457 (cdr bt))) (let ((g8458 (car g8457)) (g8459 (cdr g8457))) (let ((v g8458)) (let ((g8460 (car g8459)) (g8461 (cdr g8459))) (let ((l g8460)) (let ((g8462 (car g8461)) (g8463 (cdr g8461))) (let ((r g8462)) v)))))))

These are tough to read, but we can confirm what they compute:

Examples

> (interp-env (desugar
               `(let ((bt '(node 0 leaf leaf)))
                  ,(pat-bind '(list 'node (? zero? z) l r) 'bt 'z)))
              '())

0

Putting the pieces together, we can now write a match->cond function that rewrites a match-expression into a cond-expression:

; Match -> Expr
; Rewrite match expression into an equivalent cond expression
(define (match->cond m)
  (match m
    [(cons 'match (cons e mcs))
     (let ((x (gensym)))
       `(let ((,x ,e))
          (cond ,@(map (λ (mc)
                         (match mc
                           [(list p e)
                            (list (pat-match p x) (pat-bind p x e))]))
                       mcs)
                ; fall through to error
                [else (add1 #f)])))]))

Examples

> (match->cond '(match '(node 2 leaf leaf)
                  ['leaf 0]
                  [(list 'node v l r) v]))

'(let ((g8472 '(node 2 leaf leaf))) (cond ((eq? g8472 'leaf) 0) ((and (list? g8472) (= (length g8472) 4) (let ((g8473 (car g8472)) (g8474 (cdr g8472))) (and (eq? g8473 'node) (let ((g8475 (car g8474)) (g8476 (cdr g8474))) (and #t (let ((g8477 (car g8476)) (g8478 (cdr g8476))) (and #t (let ((g8479 (car g8478)) (g8480 (cdr g8478))) (and #t #t))))))))) (let ((g8481 (car g8472)) (g8482 (cdr g8472))) (let ((g8483 (car g8482)) (g8484 (cdr g8482))) (let ((v g8483)) (let ((g8485 (car g8484)) (g8486 (cdr g8484))) (let ((l g8485)) (let ((g8487 (car g8486)) (g8488 (cdr g8486))) (let ((r g8487)) v)))))))) (else (car '()))))

Finally, we can incorporate match->cond into desugar:

Examples

; Expr+ -> Expr
(define (desugar e+)
  (match e+
    [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e)
     `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es)
        ,(desugar e))]
    [(? symbol? x)         x]
    [(? imm? i)            i]
    [`',(? symbol? s)      `',s]
    [`',d                  (quote->expr d)]
    [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))]
    [`(if ,e0 ,e1 ,e2)     `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))]
    [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))]
    [`(letrec ,bs ,e0)
     `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs)
        ,(desugar e0))]
    [`(λ ,xs ,e0)          `(λ ,xs ,(desugar e0))]
    [`(cond . ,_)          (desugar (cond->if e+))]
    [`(and . ,_)           (desugar (and->if e+))]
    [`(or . ,_)            (desugar (or->if e+))]
    [`(match . ,_)         (desugar (match->cond e+))]  ; new
    [`(,e . ,es)           `(,(desugar e) ,@(map desugar es))]))

Now we can interpret programs such as this:

Examples

> (interp-env
   (desugar
    '(begin (define (prod bt)
              (match bt
                ['leaf 1]
                [(list 'node v l r)
                 (* v (* (prod l) (prod r)))]))
  
            (prod '(node 3 (node 4 leaf leaf) leaf))))
   `((* ,*) (list? ,list?) (length ,length)))

12