On this page:
14.1 Functions
14.2 An Interpreter for Functions
14.3 Conventions of Calling
14.4 Compiling Function Calls and Definitions
14.5 On Names and Labels
14.6 A Compiler for Iniquity
8.1

14 Iniquity: function definitions and calls

    14.1 Functions

    14.2 An Interpreter for Functions

    14.3 Conventions of Calling

    14.4 Compiling Function Calls and Definitions

    14.5 On Names and Labels

    14.6 A Compiler for Iniquity

14.1 Functions

With Hustle: heaps and lists, we removed a major computational shortcoming by adding the ability to use inductively defined data. Doing so gives programmers the ability to represent arbitrarily large pieces of information.

And yet, the language remains hamstrung. It has no mechanism to compute with such data. Sure, a programmer could compute the sum of the first n elements of a list, for some fixed n. But the size of this program would be proportional to the size of n. Want to compute the sum of a billion element list? You’ll need (at least) a billion expressions. Want to compute the sum of a larger list? Write a longer program! But if you want to compute the sum of any list, regardless of its size? You’ll need an arbitrarily long program. Of course programs are always of some fixed size, since after all, you have to write them down and at some point you have to stop writing. This means the expressiveness of our language is still severely restricted.

The solution is to bring in the computational analog of inductive data. When you have arbitrarily large data, you need arbitrarily long running computations to process them. Crucially, these arbitrarily long running computations need to be described by finite sized programs. The analog of inductive data are recursive functions.

So let’s now remove the computational shackles by incorporating functions, and in particular, recursive functions, which will allow us to compute over arbitrarily large data with finite-sized programs.

Let’s call it Iniquity.

We will extend the syntax by introducing a new syntactic category of programs, which consist of a sequence of function definitions followed by an expression:

(define (f0 x00 ...) e0)
(define (f1 x10 ...) e1)
...
e

And the syntax of expressions will be extended to include function calls:

(fi e0 ...)

where fi is one of the function names defined in the program.

Note that functions can have any number of parameters and, symmetrically, calls can have any number of arguments. A program consists of zero or more function definitions followed by an expression.

An example concrete Iniquity program is:

iniquity/example/len.rkt

  #lang racket
   
  ;; Compute the length of the list
  (define (len xs)
    (if (empty? xs)
        0
        (add1 (len (cdr xs)))))
   
  (len (cons "a" (cons "b" (cons "c" '()))))
   

To represent these kinds of programs, we extend the definition of ASTs as follows:

iniquity/ast.rkt

  #lang racket
  (provide (all-defined-out))
   
  ;; type Prog = (Prog (Listof Defn) Expr)
  (struct Prog (ds e) #:prefab)
   
  ;; type Defn = (Defn Id (Listof Id) Expr)
  (struct Defn (f xs e) #:prefab)
   
  ;; type Expr = (Eof)
  ;;           | (Empty)
  ;;           | (Int Integer)
  ;;           | (Bool Boolean)
  ;;           | (Char Character)
  ;;           | (Str String)
  ;;           | (Prim0 Op0)
  ;;           | (Prim1 Op1 Expr)
  ;;           | (Prim2 Op2 Expr Expr)
  ;;           | (Prim3 Op3 Expr Expr Expr)
  ;;           | (If Expr Expr Expr)
  ;;           | (Begin Expr Expr)
  ;;           | (Let Id Expr Expr)
  ;;           | (Var Id)
  ;;           | (App Id (Listof Expr))
  ;; type Id   = Symbol
  ;; type Op0  = 'read-byte
  ;; type Op1  = 'add1 | 'sub1 | 'zero?
  ;;           | 'char? | 'integer->char | 'char->integer
  ;;           | 'write-byte | 'eof-object?
  ;;           | 'box | 'car | 'cdr | 'unbox
  ;;           | 'empty? | 'cons? | 'box?
  ;;           | 'vector? | vector-length
  ;;           | 'string? | string-length
  ;; type Op2  = '+ | '- | '< | '=
  ;;           | 'cons
  ;;           | 'make-vector | 'vector-ref
  ;;           | 'make-string | 'string-ref
  ;; type Op3  = 'vector-set!
  (struct Eof   ()           #:prefab)
  (struct Empty ()           #:prefab)
  (struct Int   (i)          #:prefab)
  (struct Bool  (b)          #:prefab)
  (struct Char  (c)          #:prefab)
  (struct Str   (s)          #:prefab)
  (struct Prim0 (p)          #:prefab)
  (struct Prim1 (p e)        #:prefab)
  (struct Prim2 (p e1 e2)    #:prefab)
  (struct Prim3 (p e1 e2 e3) #:prefab)
  (struct If    (e1 e2 e3)   #:prefab)
  (struct Begin (e1 e2)      #:prefab)
  (struct Let   (x e1 e2)    #:prefab)
  (struct Var   (x)          #:prefab)
  (struct App   (f es)       #:prefab)
   

The parser will need to be updated to parse programs, not just expressions. Since a program is a sequence of forms, we will assume the reader will read in all of these forms and construct a list of the elements. So the program parser parse takes a list of s-expressions. There is also a new parse for function definitions, parse-definition. The parser for expressions parse-e is updated to include function applications.

iniquity/parse.rkt

  #lang racket
  (provide parse parse-define parse-e)
  (require "ast.rkt")
   
  ;; [Listof S-Expr] -> Prog
  (define (parse s)
    (match s
      [(cons (and (cons 'define _) d) s)
       (match (parse s)
         [(Prog ds e)
          (Prog (cons (parse-define d) ds) e)])]
      [(cons e '()) (Prog '() (parse-e e))]
      [_ (error "program parse error")]))
   
  ;; S-Expr -> Defn
  (define (parse-define s)
    (match s
      [(list 'define (list-rest (? symbol? f) xs) e)
       (if (andmap symbol? xs)
           (Defn f xs (parse-e e))
           (error "parse definition error"))]
      [_ (error "Parse defn error" s)]))
   
  ;; S-Expr -> Expr
  (define (parse-e s)
    (match s
      [(? integer?)                  (Int s)]
      [(? boolean?)                  (Bool s)]
      [(? char?)                     (Char s)]
      [(? string?)                   (Str s)]
      ['eof                          (Eof)]
      [(? symbol?)                   (Var s)]
      [(list 'quote (list))          (Empty)]
      [(list (? (op? op0) p0))       (Prim0 p0)]
      [(list (? (op? op1) p1) e)     (Prim1 p1 (parse-e e))]
      [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))]
      [(list (? (op? op3) p3) e1 e2 e3)
       (Prim3 p3 (parse-e e1) (parse-e e2) (parse-e e3))]
      [(list 'begin e1 e2)
       (Begin (parse-e e1) (parse-e e2))]
      [(list 'if e1 e2 e3)
       (If (parse-e e1) (parse-e e2) (parse-e e3))]
      [(list 'let (list (list (? symbol? x) e1)) e2)
       (Let x (parse-e e1) (parse-e e2))]
      [(cons (? symbol? f) es)
       (App f (map parse-e es))]
      [_ (error "Parse error" s)]))
   
  (define op0
    '(read-byte peek-byte void))
   
  (define op1
    '(add1 sub1 zero? char? write-byte eof-object?
           integer->char char->integer
           box unbox empty? cons? box? car cdr
           vector? vector-length string? string-length))
  (define op2
    '(+ - < = cons eq? make-vector vector-ref make-string string-ref))
  (define op3
    '(vector-set!))
   
  (define (op? ops)
    (λ (x)
      (and (symbol? x)
           (memq x ops))))
   

Because of the change from a program being a single expression to a sequence, we have to update the utilities that read program files, i.e. interp-file.rkt and compile-file.rkt:

iniquity/interp-file.rkt

  #lang racket
  (provide main)
  (require "parse.rkt" "interp.rkt" "read-all.rkt")
   
  ;; String -> Void
  ;; Parse and interpret contents of given filename,
  ;; print result on stdout
  (define (main fn)
    (let ((p (open-input-file fn)))
      (begin
        (read-line p) ; ignore #lang racket line
        (let ((r (interp (parse (read-all p)))))
          (unless (void? r)
            (println r)))
        (close-input-port p))))
   

iniquity/compile-file.rkt

  #lang racket
  (provide main)
  (require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer)
   
  ;; String -> Void
  ;; Compile contents of given file name,
  ;; emit asm code on stdout
  (define (main fn)
    (let ((p (open-input-file fn)))
      (begin
        (read-line p) ; ignore #lang racket line
        (displayln (asm-string (compile (parse (read-all p)))))
        (close-input-port p))))
   

14.2 An Interpreter for Functions

Writing an interpreter for Iniquity is not too hard. The main idea is that the interpretation of expression is now parameterized by a set of function definitions from the program. It serves as a second kind of environment that gets passed around and is used to resolve function definitions when interpreting function calls.

The way a function call is interpreted is to first interpret all of the arguments, building up a list of results. Then the definition of the function being called is looked up. If the function has the same number of parameters as there are arguments in the call, the body of the function is interpreted in an enviorment that maps each parameter to to the corresponding argument. That’s it.

iniquity/interp.rkt

  #lang racket
  (provide interp interp-env)
  (require "ast.rkt"
           "env.rkt"
           "interp-prims.rkt")
   
  ;; type Answer = Value | 'err
   
  ;; type Value =
  ;; | Integer
  ;; | Boolean
  ;; | Character
  ;; | Eof
  ;; | Void
  ;; | '()
  ;; | (cons Value Value)
  ;; | (box Value)
  ;; | (vector Value ...)
  ;; | (string Char ...)
   
  ;; type REnv = (Listof (List Id Value))
  ;; type Defns = (Listof Defn)
   
  ;; Prog -> Answer
  (define (interp p)
    (match p
      [(Prog ds e)
       (interp-env e '() ds)]))
   
  ;; Expr Env Defns -> Answer
  (define (interp-env e r ds)
    (match e
      [(Int i)  i]
      [(Bool b) b]
      [(Char c) c]
      [(Eof)    eof]
      [(Empty)  '()]
      [(Var x)  (lookup r x)]
      [(Str s)  s]
      [(Prim0 'void) (void)]
      [(Prim0 'read-byte) (read-byte)]
      [(Prim0 'peek-byte) (peek-byte)]
      [(Prim1 p e)
       (match (interp-env e r ds)
         ['err 'err]
         [v (interp-prim1 p v)])]
      [(Prim2 p e1 e2)
       (match (interp-env e1 r ds)
         ['err 'err]
         [v1 (match (interp-env e2 r ds)
               ['err 'err]
               [v2 (interp-prim2 p v1 v2)])])]
      [(Prim3 p e1 e2 e3)
       (match (interp-env e1 r ds)
         ['err 'err]
         [v1 (match (interp-env e2 r ds)
               ['err 'err]
               [v2 (match (interp-env e3 r ds)
                     ['err 'err]
                     [v3 (interp-prim3 p v1 v2 v3)])])])]
      [(If p e1 e2)
       (match (interp-env p r ds)
         ['err 'err]
         [v
          (if v
              (interp-env e1 r ds)
              (interp-env e2 r ds))])]
      [(Begin e1 e2)
       (match (interp-env e1 r ds)
         ['err 'err]
         [_    (interp-env e2 r ds)])]
      [(Let x e1 e2)
       (match (interp-env e1 r ds)
         ['err 'err]
         [v (interp-env e2 (ext r x v) ds)])]
      [(App f es)
       (match (interp-env* es r ds)
         ['err 'err]
         [vs
          (match (defns-lookup ds f)
            [(Defn f xs e)
             ; check arity matches
             (if (= (length xs) (length vs))
                 (interp-env e (zip xs vs) ds)
                 'err)])])]))
   
  ;; (Listof Expr) REnv Defns -> (Listof Value) | 'err
  (define (interp-env* es r ds)
    (match es
      ['() '()]
      [(cons e es)
       (match (interp-env e r ds)
         ['err 'err]
         [v (match (interp-env* es r ds)
              ['err 'err]
              [vs (cons v vs)])])]))
   
  ;; Defns Symbol -> Defn
  (define (defns-lookup ds f)
    (findf (match-lambda [(Defn g _ _) (eq? f g)])
           ds))
   
  (define (zip xs ys)
    (match* (xs ys)
      [('() '()) '()]
      [((cons x xs) (cons y ys))
       (cons (list x y)
             (zip xs ys))]))
   

A couple of things to note:

We can try it out:

Examples

> (interp
   (parse
    '[(define (double x) (+ x x))
      (double 5)]))

10

We can see it works with recursive functions, too. Here’s a recursive function for computing triangular numbers:

Examples

> (interp
    (parse
     '[(define (tri x)
         (if (zero? x)
             0
             (+ x (tri (sub1 x)))))
  
       (tri 9)]))

45

We can even define mutually recursive functions such as even? and odd?:

Examples

> (interp
    (parse
     '[(define (even? x)
         (if (zero? x)
             #t
             (odd? (sub1 x))))
  
       (define (odd? x)
         (if (zero? x)
             #f
             (even? (sub1 x))))
       (even? 101)]))

#f

And the utility for interpreting programs in files works as well:

shell

> racket -t interp-file.rkt -m example/len.rkt
3

14.3 Conventions of Calling

We’ve seen how to make calls in assembly already and our compiler emits code to call functions defined in C in our runtime such write_byte and read_byte. Let’s review the basics.

Suppose we want a function that does the same thing as (define (dbl x) (+ x x)). We can implement it in assembly with a labelled block of code:

(seq (Label 'dbl)
     (Mov 'rax (Offset 0 'rsp))
     (Add 'rax 'rax)
     (Ret))

This function expects its argument to be available as the first position on the stack. That’s different from the calling convention defined by the System V ABI and used to call C code, but we can make our conventions for our language, so long as we’re mindful of respecting the System V ABI when interacting with code generated by other compilers (e.g. gcc).

So under a calling convention in which arguments are passed on the stack, a caller should push a value for the argument before calling the function and then pop it off after the function call returns:

(seq (%%% "Calling dbl(5)")
     (Mov 'rax 5)
     (Push 'rax)
     (Call 'dbl)
     ; rax holds 10 now
     ; pop the argument
     (Add rsp 8))

This almost works, but has a crucial flaw. The problem is that Call is an instruction that pushes on the stack. It pushes the return address, i.e. the location of the instruction the function should return to when it’s done, which will be located in (Offset 'rsp 0) when control jumps to (Label 'dbl). That means that the argument will be in (Offset 'rsp 8).

So we can touch-up the example as follows and it will work:

Examples

> (asm-interp
    (seq (Global 'entry)
         (Label 'entry)
         (%%% "Calling dbl(5)")
         (Mov 'rax 5)
         (Push 'rax)
         (Call 'dbl)
         ; rax holds 10 now
         ; pop the argument
         (Add rsp 8)
         (Ret)
  
         (Label 'dbl)
         (Mov 'rax (Offset 'rsp 8))
         (Add 'rax 'rax)
         (Ret)))

10

One of the unfortunate things about this set-up is that the code for dbl has to “skip” past the return pointer on the stack to access the arguments.

Think for a moment about a call in Racket:

(define (dbl x)
  (+ x x))
 
(dbl 5)

Once the function call has fully evaluated it’s arguments (in this case the argument is a literal, so it’s already evaluated), then it should evaluate the body of the called function in an environment in which the parameter (here: x) is bound to the argument (5), hence (dbl 5) is equivalent to:

(let ((x 5))
  (+ x x))

The problem with this perspective on function calls is that it doesn’t work well with the Call instruction pushing the return pointer on as the top frame of the stack before jumping to the function body. In the let-expression, x occurs at lexical address 0, but because of the return address being on the stack, the value of x is really at (Offset 'rsp 8).

We can fix this, but let’s recall that Call can be expressed in terms of more primitive instructions: all a call is doing is computing the return address—the location of the instruction following the call—pushing that address on the stack, and then jumping to the given label.

We can do this ourselves, although we will need to use a new instruction: Lea:

(seq (%%% "Calling dbl(5)")
     (Mov 'rax 5)
     (Push 'rax)
     ; Call 'dbl but without using Call
     (let ((rp (gensym)))
       (seq (Lea 'rax rp)
            (Push 'rax)
            (Jmp 'dbl)
            (Label rp)))
     ; rax holds 10 now
     ; pop the argument
     (Add rsp 8)
     (Ret))

The Lea instruction is the “load effective address” instruction; it can compute the location of a given label. Here we are labelling the spot immediately after the jump to dbl, which is where we’d like the function call to return to.

We can verify this works just like before:

Examples

> (asm-interp
    (seq (Global 'entry)
         (Label 'entry)
         (%%% "Calling dbl(5)")
         (Mov 'rax 5)
         (Push 'rax)
         ; Call but without using Call
         (let ((rp (gensym)))
           (seq (Lea 'rax rp)
                (Push 'rax)
                (Jmp 'dbl)
                (Label rp)))
         ; rax holds 10 now
         ; pop the argument
         (Add rsp 8)
         (Ret)
  
         (Label 'dbl)
         (Mov 'rax (Offset 'rsp 8))
         (Add 'rax 'rax)
         (Ret)))

10

What’s nice about expressing things in their more primitive form is we can now change the way in which calls are made. For example, we can now push the address on the stack before the arguments:

(seq (%%% "Calling dbl(5)")
     ; Call 'dbl but without using Call
     (let ((rp (gensym)))
       (seq (Lea 'rax rp)
            (Push 'rax)    ; push return address
            (Mov 'rax 5)
            (Push 'rax)    ; *then* push argument
            (Jmp 'dbl)
            (Label rp)))
     ; rax holds 10 now
     ; pop the argument
     (Add rsp 8)
     (Ret))

This way the called function can fetch variable bindings by their lexical address, i.e. x will be at (Offset rsp 0).

The problem now is that the called function doesn’t have the return address at the top off the stack when it does its Ret, rather it has the value of its argument.

But the function knows how many arguments it takes and these arguments will be popped by the caller as soon as the function returns, so here’s an idea: let’s have the called function pop the arguments off. (Note that this is just like how let works: it pops its bindings off after the body is done.) After the arguments are popped, where is the return address on the stack? (Offset 'rsp 0). So after the arguments are popped, (Ret) works as expected.

Here’s a complete version where the caller no longer pops the arguments but instead leaves it up to the function:

Examples

> (asm-interp
    (seq (Global 'entry)
         (Label 'entry)
         (%%% "Calling dbl(5)")
         ; Call but without using Call
         (let ((rp (gensym)))
           (seq (Lea 'rax rp)
                (Push 'rax)    ; push return address
                (Mov 'rax 5)
                (Push 'rax)    ; *then* push argument
                (Jmp 'dbl)
                (Label rp)))
         ; rax holds 10 now
         ; no need to pop argument
         (Ret)
  
         (Label 'dbl)
         (Mov 'rax (Offset 'rsp 0)) ; x is at offset 0 now
         (Add 'rax 'rax)
         (Add 'rsp 8)               ; pop argument off
         (Ret)))

10

It works as expected.

Let’s use this as the basis of our calling convention.

A function call should:

The call will jump to the return address with all of these item popped off the stack.

A function should:

You may notice that things will go wrong if a call pushes a number of arguments that doesn’t match the number of parameters to the function, e.g. compiling something like:

(define (dbl x)
  (+ x x))
 
(dbl 1 2 3)

In Iniquity, it’s possible to statically determine whether the function call’s number of arguments match the function’s number of parameters and we can consider mismatches as syntax errors (and thus our compiler need not worry about this happening). In more expressive languages, this won’t be the case, but we can consider how to check that these two numbers match at run-time. For now, let’s not worry about it.

14.4 Compiling Function Calls and Definitions

With our calling convention in place, it’s pretty easy to compile function definitions and function calls. A function definition:

(define (f x ...)
  e)

Should be compiled as:

(seq (Label f)
     (compile-e e (list x ...))
     (Add 'rsp (* 8 (length (list x ....))))
     (Ret))

This creates a label based on the function’s name. The body of the function is compiled in an environment in which all of the parameters are bound. After the body executes, all of the arguments are popped from the stack, leaving the return address at the top of the stack, at which point the function returns.

For a function call:

(f e0 ...)

We can uses the following helper for compiling a sequence of expressions and pushing their values on the stack:

; [Listof Expr] CEnv -> Asm
(define (compile-es es c)
  (match es
    ['() '()]
    [(cons e es)
     (seq (compile-e e c)
          (Push rax)
          (compile-es es (cons #f c)))]))

Using this, the call can be compiled as:

(let ((r (gensym 'ret)))
  (seq (Lea rax r)
       (Push rax)
       (compile-es es (cons #f c))
       (Jmp (symbol->label f))
       (Label r)))

Notice that we compile es in a static environment that is one frame larger than that of the call because we have pushed the return address on the stack and need to adjust the offsets of variable references in es.

It’s convenient that we evaluate es, saving the results to the stack, which is just where they need to be in order to make the function call. There is a subtle problem with this code though: compile-es generates code to execute the expression in es from left to right, pushing to the stack along the way. Thus the last argument will be the first element of the stack and the first argument will be the furthest element. That suggests we should compile the body of a function with its parameter list reversed so that the last parameter is at offset 0 and its first parameter is as (sub1 n) where n is the number of parameters. Touching up the code, we compile function definitions as:

(seq (Label f)
     (compile-e e (reverse (list x ...)))
     (Add 'rsp (* 8 (length (list x ....))))
     (Ret))

Now writing the complete definitions for compile-define and compile-app, we have:

; Defn -> Asm
(define (compile-define d)
  (match d
    [(Defn f xs e)
     (seq (Label f)
          (compile-e e (reverse xs))
          (Add rsp (* 8 (length xs)))
          (Ret))]))
 
; Id [Listof Expr] CEnv -> Asm
(define (compile-app f es c)
  (let ((r (gensym 'ret)))
    (seq (Lea rax r)
         (Push rax)
         (compile-es es (cons #f c))
         (Jmp (symbol->label f))
         (Label r))))
14.5 On Names and Labels

There is one final wrinkle, which is that identifiers in our language include many things which are not valid labels for the Nasm assembler. Hence compiling a function like:

(define (^weird% x) x)

will cause the assembler to reject the emitted code since '^weird% is not a valid label name. Labels must consist only of letters, numbers, _, $, ?, @, ~, and ?.

We solve this problem by using a function that maps arbitrary Racket symbols to valid Nasm labels (represented as symbols). The function has the property distinct symbols always map to distinct labels.

Examples

> (symbol->label '^weird%)

'label__weird__c3e020e4e5471e4

Using this function, we can touch up our code:

; Defn -> Asm
(define (compile-define d)
  (match d
    [(Defn f xs e)
     (seq (Label (symbol->label f))
          (compile-e e (reverse xs))
          (Add rsp (* 8 (length xs)))
          (Ret))]))
14.6 A Compiler for Iniquity

The last piece of the puzzle is the function for emitting code for a complete program:

; Prog -> Asm
(define (compile p)
  (match p
    [(Prog ds e)
     (prog (externs)
           (Global 'entry)
           (Label 'entry)
           (Mov rbx rdi) ; recv heap pointer
           (compile-e e '())
           (Ret)
           (compile-defines ds)
           (Label 'raise_error_align)
           (Sub rsp 8)
           (Jmp 'raise_error))]))

It relies on a helper compile-defines for compiling each function definition and flattening the assembly instructions into a single list:

; [Listof Defn] -> Asm
(define (compile-defines ds)
  (match ds
    ['() (seq)]
    [(cons d ds)
     (seq (compile-define d)
          (compile-defines ds))]))

Here’s an example of the code this compiler emits:

Examples

> (displayln
   (asm-string
    (compile
     (parse '[(define (double x) (+ x x)) (double 5)]))))

        global entry

        default rel

        section .text

        extern peek_byte

        extern read_byte

        extern write_byte

        extern raise_error

        global entry

entry:

        mov rbx, rdi

        lea rax, [rel ret6765]

        push rax

        mov rax, 80

        push rax

        jmp label_double_6334fa372629b92

ret6765:

        ret

label_double_6334fa372629b92:

        mov rax, [rsp + 0]

        push rax

        mov rax, [rsp + 8]

        pop r8

        mov r9, r8

        and r9, 15

        cmp r9, 0

        jne raise_error_align

        mov r9, rax

        and r9, 15

        cmp r9, 0

        jne raise_error_align

        add rax, r8

        add rsp, 8

        ret

raise_error_align:

        mov r15, rsp

        and r15, 8

        sub rsp, r15

        call raise_error

And we can confirm running the code produces results consistent with the interpreter:

Examples

> (current-objs '("runtime.o"))
> (define (run p)
    (unload/free (asm-interp (compile (parse p)))))
> (run '[(define (double x) (+ x x))
         (double 5)])

10

> (run '[(define (tri x)
           (if (zero? x)
               0
               (+ x (tri (sub1 x)))))
         (tri 9)])

45

> (run '[(define (even? x)
           (if (zero? x)
               #t
               (odd? (sub1 x))))
         (define (odd? x)
           (if (zero? x)
               #f
               (even? (sub1 x))))
         (even? 101)])

#f

The complete compiler code:

iniquity/compile.rkt

  #lang racket
  (provide (all-defined-out))
  (require "ast.rkt" "types.rkt" "compile-ops.rkt" a86/ast)
   
  ;; Registers used
  (define rax 'rax) ; return
  (define rbx 'rbx) ; heap
  (define rsp 'rsp) ; stack
  (define rdi 'rdi) ; arg
   
  ;; type CEnv = [Listof Variable]
   
  ;; Prog -> Asm
  (define (compile p)
    (match p
      [(Prog ds e)  
       (prog (externs)
             (Global 'entry)
             (Label 'entry)
             (Mov rbx rdi)   ; recv heap pointer
             (compile-e e '())
             (Ret)
             (compile-defines ds)
             (Label 'raise_error_align)
             pad-stack
             (Call 'raise_error))]))
   
  (define (externs)
    (seq (Extern 'peek_byte)
         (Extern 'read_byte)
         (Extern 'write_byte)
         (Extern 'raise_error)))
   
  ;; [Listof Defn] -> Asm
  (define (compile-defines ds)
    (match ds
      ['() (seq)]
      [(cons d ds)
       (seq (compile-define d)
            (compile-defines ds))]))
   
  ;; Defn -> Asm
  (define (compile-define d)
    (match d
      [(Defn f xs e)
       (seq (Label (symbol->label f))
            (compile-e e (reverse xs))
            (Add rsp (* 8 (length xs))) ; pop args
            (Ret))]))
   
  ;; Expr CEnv -> Asm
  (define (compile-e e c)
    (match e
      [(Int i)            (compile-value i)]
      [(Bool b)           (compile-value b)]
      [(Char c)           (compile-value c)]
      [(Eof)              (compile-value eof)]
      [(Empty)            (compile-value '())]
      [(Var x)            (compile-variable x c)]
      [(Str s)            (compile-string s)]
      [(Prim0 p)          (compile-prim0 p c)]
      [(Prim1 p e)        (compile-prim1 p e c)]
      [(Prim2 p e1 e2)    (compile-prim2 p e1 e2 c)]
      [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)]
      [(If e1 e2 e3)      (compile-if e1 e2 e3 c)]
      [(Begin e1 e2)      (compile-begin e1 e2 c)]
      [(Let x e1 e2)      (compile-let x e1 e2 c)]
      [(App f es)         (compile-app f es c)]))
   
  ;; Value -> Asm
  (define (compile-value v)
    (seq (Mov rax (imm->bits v))))
   
  ;; Id CEnv -> Asm
  (define (compile-variable x c)
    (let ((i (lookup x c)))
      (seq (Mov rax (Offset rsp i)))))
   
  ;; String -> Asm
  (define (compile-string s)
    (let ((len (string-length s)))
      (if (zero? len)
          (seq (Mov rax type-str))
          (seq (Mov rax len)
               (Mov (Offset rbx 0) rax)
               (compile-string-chars (string->list s) 8)
               (Mov rax rbx)
               (Or rax type-str)
               (Add rbx
                    (+ 8 (* 4 (if (odd? len) (add1 len) len))))))))
   
  ;; [Listof Char] Integer -> Asm
  (define (compile-string-chars cs i)
    (match cs
      ['() (seq)]
      [(cons c cs)
       (seq (Mov rax (char->integer c))
            (Mov (Offset rbx i) 'eax)
            (compile-string-chars cs (+ 4 i)))]))
   
  ;; Op0 CEnv -> Asm
  (define (compile-prim0 p c)
    (compile-op0 p))
   
  ;; Op1 Expr CEnv -> Asm
  (define (compile-prim1 p e c)
    (seq (compile-e e c)
         (compile-op1 p)))
   
  ;; Op2 Expr Expr CEnv -> Asm
  (define (compile-prim2 p e1 e2 c)
    (seq (compile-e e1 c)
         (Push rax)
         (compile-e e2 (cons #f c))
         (compile-op2 p)))
   
  ;; Op3 Expr Expr Expr CEnv -> Asm
  (define (compile-prim3 p e1 e2 e3 c)
    (seq (compile-e e1 c)
         (Push rax)
         (compile-e e2 (cons #f c))
         (Push rax)
         (compile-e e3 (cons #f (cons #f c)))
         (compile-op3 p)))
   
  ;; Expr Expr Expr CEnv -> Asm
  (define (compile-if e1 e2 e3 c)
    (let ((l1 (gensym 'if))
          (l2 (gensym 'if)))
      (seq (compile-e e1 c)
           (Cmp rax val-false)
           (Je l1)
           (compile-e e2 c)
           (Jmp l2)
           (Label l1)
           (compile-e e3 c)
           (Label l2))))
   
  ;; Expr Expr CEnv -> Asm
  (define (compile-begin e1 e2 c)
    (seq (compile-e e1 c)
         (compile-e e2 c)))
   
  ;; Id Expr Expr CEnv -> Asm
  (define (compile-let x e1 e2 c)
    (seq (compile-e e1 c)
         (Push rax)
         (compile-e e2 (cons x c))
         (Add rsp 8)))
   
  ;; Id [Listof Expr] CEnv -> Asm
  ;; The return address is placed above the arguments, so callee pops
  ;; arguments and return address is next frame
  (define (compile-app f es c)
    (let ((r (gensym 'ret)))
      (seq (Lea rax r)
           (Push rax)
           (compile-es es (cons #f c))
           (Jmp (symbol->label f))
           (Label r))))
   
  ;; [Listof Expr] CEnv -> Asm
  (define (compile-es es c)
    (match es
      ['() '()]
      [(cons e es)
       (seq (compile-e e c)
            (Push rax)
            (compile-es es (cons #f c)))]))
   
  ;; Id CEnv -> Integer
  (define (lookup x cenv)
    (match cenv
      ['() (error "undefined variable:" x)]
      [(cons y rest)
       (match (eq? x y)
         [#t 0]
         [#f (+ 8 (lookup x rest))])]))
   
  ;; Symbol -> Label
  ;; Produce a symbol that is a valid Nasm label
  (define (symbol->label s)
    (string->symbol
     (string-append
      "label_"
      (list->string
       (map (λ (c)
              (if (or (char<=? #\a c #\z)
                      (char<=? #\A c #\Z)
                      (char<=? #\0 c #\9)
                      (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?)))
                  c
                  #\_))
           (string->list (symbol->string s))))
      "_"
      (number->string (eq-hash-code s) 16))))