# The Mu computer's level-2 language, also called Mu. # http://akkartik.name/post/mu-2019-2 # # To run: # $ ./ntranslate init.linux 0*.subx apps/mu.subx # == Goals # 1. Be memory safe. It should be impossible to corrupt the heap, or to create # a bad pointer. (Requires strong type safety.) # 2. Do as little as possible to achieve goal 1. # - runtime checks to avoid complex static analysis # - minimize impedance mismatch between source language and SubX target # == Language description # A program is a sequence of function definitions. # # Function example: # fn foo n: int -> result/eax: int { # ... # } # # Functions consist of a name, optional inputs, optional outputs and a block. # # Function inputs and outputs are variables. All variables have a type and # storage specifier. They can be placed either in memory (on the stack) or in # one of 6 named registers. # eax ecx edx ebx esi edi # Variables in registers must be primitive 32-bit types. # Variables not explicitly placed in a register are on the stack. # Variables in registers need not have a name; in that case you refer to them # directly by the register name. # # Function inputs are always passed in memory (on the stack), while outputs # are always returned in registers. # # Blocks mostly consist of statements. # # Statements mostly consist of a name, optional inputs and optional outputs. # # Statement inputs are variables or literals. Variables need to specify type # (and storage) the first time they're mentioned but not later. # # Statement outputs, like function outputs, must be variables in registers. # # Statement names must be either primitives or user-defined functions. # # Primitives can write to any register. # User-defined functions only write to hard-coded registers. Outputs of each # call must have the same registers as in the function definition. # # There are some other statement types: # - blocks. Multiple statements surrounded by '{...}' and optionally # prefixed with a label name and ':' # - { # ... # } # - foo: { # ... # } # # - variable definitions on the stack. E.g.: # - var foo: int # - var bar: (array int 3) # There's no initializer; variables are automatically initialized. # # - variables definitions in a register. E.g.: # - var foo/eax : int <- add bar 1 # The initializer is mandatory and must be a valid instruction that writes # a single output to the right register. In practice registers will # usually be either initialized by primitives or copied from eax. # - var eax : int <- foo bar quux # var floo/ecx : int <- copy eax # # Still todo: # global variables # heap allocations (planned name: 'handle') # user-defined types: 'type' for structs, 'choice' for unions # short-lived 'address' type for efficiently writing inside nested structs # # Formal types: # A program is a linked list of functions # A function contains: # name: string # inouts: linked list of vars <-- 'inouts' is more precise than 'inputs' # data: (address var) # next: (address list) # outputs: linked list of vars # data: (address var) # next: (address list) # body: block # A var-type contains: # name: string # type: s-expression of type ids # Statements are not yet fully designed. # statement = var definition or simple statement or block # simple statement: # name: string # inouts: linked list of vars # outputs: linked list of vars # block = linked list of statements # == Translation # Now that we know what the language looks like in the large, let's think # about how translation happens from the bottom up. The interplay between # variable scopes and statements using variables is the most complex aspect of # translation. # # Assume that we maintain a 'functions' list while parsing source code. And a # 'primitives' list is a global constant. Both these contain enough information # to perform type-checking on function calls or primitive statements, respectively. # # Defining variables pushes them on a stack with the current block depth and # enough information about their location (stack offset or register id). # Starting a block increments the current block id. # Each statement now has enough information to emit code for it. # Ending a block is where the magic happens: # pop all variables at the current block depth # emit code to restore all register variables introduced at the current depth # emit code to clean up all stack variables at the current depth (just increment esp) # decrement the current block depth # # One additional check we'll need is to ensure that a variable in a register # isn't shadowed by a different one. That may be worth a separate data # structure but for now repeatedly scanning the var stack should suffice. # # Formal types: # functions, primitives: linked list of info # name: string # inouts: linked list of vars # outputs: linked list of vars # live-vars: stack of vars # var: # name: string # type: s-expression? Just a type id for now. # block: int # stack-offset: int (added to ebp) # register: string # either usual register names # or '*' to indicate any register # At most one of stack-offset or register-index must be non-zero. # A register of '*' designates a variable _template_. Only legal in formal # parameters for primitives. # == Compiling a single instruction # Determine the function or primitive being called. # If no matches, show all functions/primitives with the same name, along # with reasons they don't match. (type and storage checking) # It must be a function if: # #outputs > 1, or # #inouts > 2, or # #inouts + #outputs > 2 # If it's a function, emit: # (low-level-name ...) # Otherwise (it's a primitive): # assert(#inouts <= 2 && #outs <= 1 && (#inouts + #outs) <= 2) # emit opcode # emit-rm32(inout[0]) # if out[0] exists: emit-r32(out[0]) # else if inout[1] is a literal: emit-imm32(inout[1]) # else: emit-rm32(inout[1]) # emit-rm32 and emit-r32 should check that the variable they intend is still # available in the register. # == Emitting a block # Emit block name if necessary # Emit '{' # When you encounter a statement, emit it as above # When you encounter a variable declaration # emit any code needed for it (bzeros) # push it on the var stack # update register dict if necessary # When you encounter '}' # While popping variables off the var stack until block id changes # Emit code needed to clean up the stack # either increment esp # or pop into appropriate register # The rest is straightforward. == data Program: # (address function) 0/imm32 Function-name: 0/imm32 Function-subx-name: 4/imm32 Function-inouts: # (address list var) 8/imm32 Function-outputs: # (address list var) 0xc/imm32 Function-body: # (address block) 0x10/imm32 Function-next: # (address function) 0x14/imm32 Function-size: 0x18/imm32/24 Stmt-name: 0/imm32 Stmt-inouts: 4/imm32 Stmt-outputs: 8/imm32 Stmt-next: 0xc/imm32 Stmt-size: 0x10/imm32 Var-name: 0/imm32 Var-type: 4/imm32 Var-block: 8/imm32 Var-stack-offset: 0xc/imm32 Var-register-index: 0x10/imm32 Var-size: 0x14/imm32 == code Entry: # . prologue 89/<- %ebp 4/r32/esp (new-segment Heap-size Heap) # if (argv[1] == "test') run-tests() { # if (argc <= 1) break 81 7/subop/compare *ebp 1/imm32 7e/jump-if-lesser-or-equal break/disp8 # if (argv[1] != "test") break (kernel-string-equal? *(ebp+8) "test") # => eax 3d/compare-eax-and 0/imm32 74/jump-if-equal break/disp8 # (run-tests) # syscall(exit, *Num-test-failures) 8b/-> *Num-test-failures 3/r32/ebx eb/jump $mu-main:end/disp8 } # otherwise convert Stdin (convert-mu Stdin Stdout) (flush Stdout) # syscall(exit, 0) bb/copy-to-ebx 0/imm32 $mu-main:end: b8/copy-to-eax 1/imm32/exit cd/syscall 0x80/imm8 convert-mu: # in : (address buffered-file), out : (address buffered-file) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # (parse-mu *(ebp+8)) (check-mu-types) (emit-subx *(ebp+0xc)) $convert-mu:end: # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return test-convert-empty-input: # empty input => empty output # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # setup (clear-stream _test-input-stream) (clear-stream _test-input-buffered-file->buffer) (clear-stream _test-output-stream) (clear-stream _test-output-buffered-file->buffer) # (convert-mu _test-input-buffered-file _test-output-buffered-file) (flush _test-output-buffered-file) (check-stream-equal _test-output-stream "" "F - test-convert-empty-input") # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return test-convert-function-skeleton: # empty function decl => function prologue and epilogue # fn foo { # } # => # foo: # # . prologue # 55/push-ebp # 89/<- %ebp 4/r32/esp # # . epilogue # 89/<- %esp 5/r32/ebp # 5d/pop-to-ebp # c3/return # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # setup (clear-stream _test-input-stream) (clear-stream _test-input-buffered-file->buffer) (clear-stream _test-output-stream) (clear-stream _test-output-buffered-file->buffer) # (write _test-input-stream "fn foo {\n") (write _test-input-stream "}\n") # convert (convert-mu _test-input-buffered-file _test-output-buffered-file) (flush _test-output-buffered-file) #? # dump _test-output-stream {{{ #? (write 2 "^") #? (write-stream 2 _test-output-stream) #? (write 2 "$\n") #? (rewind-stream _test-output-stream) #? # }}} # check output (check-next-stream-line-equal _test-output-stream "foo:" "F - test-convert-function-skeleton/0") (check-next-stream-line-equal _test-output-stream "# . prologue" "F - test-convert-function-skeleton/1") (check-next-stream-line-equal _test-output-stream "55/push-ebp" "F - test-convert-function-skeleton/2") (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp" "F - test-convert-function-skeleton/3") (check-next-stream-line-equal _test-output-stream "# . epilogue" "F - test-convert-function-skeleton/4") (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp" "F - test-convert-function-skeleton/5") (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp" "F - test-convert-function-skeleton/6") (check-next-stream-line-equal _test-output-stream "c3/return" "F - test-convert-function-skeleton/7") # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return test-convert-multiple-function-skeletons: # multiple functions correctly organized into a linked list # fn foo { # } # fn bar { # } # => # foo: # # . prologue # 55/push-ebp # 89/<- %ebp 4/r32/esp # # . epilogue # 89/<- %esp 5/r32/ebp # 5d/pop-to-ebp # c3/return # bar: # # . prologue # 55/push-ebp # 89/<- %ebp 4/r32/esp # # . epilogue # 89/<- %esp 5/r32/ebp # 5d/pop-to-ebp # c3/return # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # setup (clear-stream _test-input-stream) (clear-stream _test-input-buffered-file->buffer) (clear-stream _test-output-stream) (clear-stream _test-output-buffered-file->buffer) # (write _test-input-stream "fn foo {\n") (write _test-input-stream "}\n") (write _test-input-stream "fn bar {\n") (write _test-input-stream "}\n") # convert (convert-mu _test-input-buffered-file _test-output-buffered-file) (flush _test-output-buffered-file) #? # dump _test-output-stream {{{ #? (write 2 "^") #? (write-stream 2 _test-output-stream) #? (write 2 "$\n") #? (rewind-stream _test-output-stream) #? # }}} # check first function (check-next-stream-line-equal _test-output-stream "foo:" "F - test-convert-multiple-function-skeletons/0") (check-next-stream-line-equal _test-output-stream "# . prologue" "F - test-convert-multiple-function-skeletons/1") (check-next-stream-line-equal _test-output-stream "55/push-ebp" "F - test-convert-multiple-function-skeletons/2") (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp" "F - test-convert-multiple-function-skeletons/3") (check-next-stream-line-equal _test-output-stream "# . epilogue" "F - test-convert-multiple-function-skeletons/4") (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp" "F - test-convert-multiple-function-skeletons/5") (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp" "F - test-convert-multiple-function-skeletons/6") (check-next-stream-line-equal _test-output-stream "c3/return" "F - test-convert-multiple-function-skeletons/7") # check second function (check-next-stream-line-equal _test-output-stream "bar:" "F - test-convert-multiple-function-skeletons/10") (check-next-stream-line-equal _test-output-stream "# . prologue" "F - test-convert-multiple-function-skeletons/11") (check-next-stream-line-equal _test-output-stream "55/push-ebp" "F - test-convert-multiple-function-skeletons/12") (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp" "F - test-convert-multiple-function-skeletons/13") (check-next-stream-line-equal _test-output-stream "# . epilogue" "F - test-convert-multiple-function-skeletons/14") (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp" "F - test-convert-multiple-function-skeletons/15") (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp" "F - test-convert-multiple-function-skeletons/16") (check-next-stream-line-equal _test-output-stream "c3/return" "F - test-convert-multiple-function-skeletons/17") # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return test-convert-function-with-arg: # function with one arg and a copy instruction # fn foo n : int -> result/eax : int { # result <- copy n # } # => # foo: # # . prologue # 55/push-ebp # 89/<- %ebp 4/r32/esp # { # # result <- copy n # 8b/-> *(ebp+8) 0/r32/eax # } # # . epilogue # 89/<- %esp 5/r32/ebp # 5d/pop-to-ebp # c3/return # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # setup (clear-stream _test-input-stream) (clear-stream _test-input-buffered-file->buffer) (clear-stream _test-output-stream) (clear-stream _test-output-buffered-file->buffer) # (write _test-input-stream "fn foo {\n") (write _test-input-stream "}\n") # convert (convert-mu _test-input-buffered-file _test-output-buffered-file) (flush _test-output-buffered-file) #? # dump _test-output-stream {{{ #? (write 2 "^") #? (write-stream 2 _test-output-stream) #? (write 2 "$\n") #? (rewind-stream _test-output-stream) #? # }}} # check output (check-next-stream-line-equal _test-output-stream "foo:" "F - test-convert-function-skeleton/0") (check-next-stream-line-equal _test-output-stream "# . prologue" "F - test-convert-function-skeleton/1") (check-next-stream-line-equal _test-output-stream "55/push-ebp" "F - test-convert-function-skeleton/2") (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp" "F - test-convert-function-skeleton/3") (check-next-stream-line-equal _test-output-stream "# . epilogue" "F - test-convert-function-skeleton/4") (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp" "F - test-convert-function-skeleton/5") (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp" "F - test-convert-function-skeleton/6") (check-next-stream-line-equal _test-output-stream "c3/return" "F - test-convert-function-skeleton/7") # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return parse-mu: # in : (address buffered-file) # pseudocode # var curr-function = Program # var line : (stream byte 512) # var word-slice : slice # while true # line loop # clear-stream(line) # read-line-buffered(in, line) # if (line->write == 0) break # end of file # while true # word loop # word-slice = next-word-or-string(line) # if slice-empty?(word-slice) # end of line # break # else if slice-starts-with?(word-slice, "#") # comment # break # end of line # else if slice-equal(word-slice, "fn") # var new-function : (address function) = new function # populate-mu-function(in, new-function) # *curr-function = new-function # curr-function = &new-function->next # else # abort() # # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx 52/push-edx 57/push-edi # var line/ecx : (stream byte 512) 81 5/subop/subtract %esp 0x200/imm32 68/push 0x200/imm32/length 68/push 0/imm32/read 68/push 0/imm32/write 89/<- %ecx 4/r32/esp # var word-slice/edx : slice 68/push 0/imm32/end 68/push 0/imm32/start 89/<- %edx 4/r32/esp # var curr-function/edi : (address function) = Program bf/copy-to-edi Program/imm32 { $parse-mu:line-loop: (clear-stream %ecx) (read-line-buffered *(ebp+8) %ecx) # if (line->write == 0) break 81 7/subop/compare *ecx 0/imm32 0f 84/jump-if-equal break/disp32 #? # dump line {{{ #? (write 2 "line: ^") #? (write-stream 2 %ecx) #? (write 2 "$\n") #? (rewind-stream %ecx) #? # }}} { # word loop $parse-mu:word-loop: (next-word-or-string %ecx %edx) # if slice-empty?(word-slice) break (slice-empty? %edx) 3d/compare-eax-and 0/imm32 0f 85/jump-if-not-equal break/disp32 # if (*word-slice->start == "#") break # . eax = *word-slice->start 8b/-> *edx 0/r32/eax 8a/copy-byte *eax 0/r32/AL 81 4/subop/and %eax 0xff/imm32 # . if (eax == '#') break 3d/compare-eax-and 0x23/imm32/hash 0f 84/jump-if-equal break/disp32 # if (slice-equal?(word-slice, "fn")) parse a function { (slice-equal? %edx "fn") 3d/compare-eax-and 0/imm32 0f 84/jump-if-equal break/disp32 # var new-function/eax : (address function) = populate-mu-function() (allocate Heap *Function-size) # => eax (populate-mu-function-header %ecx %eax) (populate-mu-function-body *(ebp+8) %eax) # *curr-function = new-function 89/<- *edi 0/r32/eax # curr-function = &new-function->next 8d/address-> *(eax+0x10) 7/r32/edi e9/jump $parse-mu:word-loop/disp32 } # otherwise abort e9/jump $parse-mu:abort/disp32 } # end word loop e9/jump loop/disp32 } # end line loop $parse-mu:end: # . reclaim locals 81 0/subop/add %esp 0x214/imm32 # . restore registers 5f/pop-to-edi 5a/pop-to-edx 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return $parse-mu:abort: # error("unexpected top-level command: " word-slice "\n") (write-buffered Stderr "unexpected top-level command: ") (write-buffered Stderr %edx) (write-buffered Stderr "\n") (flush Stderr) # . syscall(exit, 1) bb/copy-to-ebx 1/imm32 b8/copy-to-eax 1/imm32/exit cd/syscall 0x80/imm8 # never gets here # errors considered: # fn foo { { # fn foo { } # fn foo { } { # fn foo # no block populate-mu-function-header: # first-line : (address stream byte), out : (address function) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx 57/push-edi # edi = out 8b/-> *(ebp+0xc) 7/r32/edi # var word-slice/ecx : slice 68/push 0/imm32/end 68/push 0/imm32/start 89/<- %ecx 4/r32/esp # save function name (next-word *(ebp+8) %ecx) (slice-to-string Heap %ecx) # => eax 89/<- *edi 0/r32/eax # assert that next token is '{' (next-word *(ebp+8) %ecx) (slice-equal? %ecx "{") 3d/compare-eax-and 0/imm32 74/jump-if-equal $populate-mu-function-header:abort/disp8 # assert that there's no further token { # word-slice = next-word(line) (next-word *(ebp+8) %ecx) # if (word-slice == '') break (slice-empty? %ecx) 3d/compare-eax-and 0/imm32 75/jump-if-not-equal break/disp8 # if (slice-starts-with?(word-slice, "#")) break # . eax = *word-slice->start 8b/-> *edx 0/r32/eax 8a/copy-byte *eax 0/r32/AL 81 4/subop/and %eax 0xff/imm32 # . if (eax == '#') break 3d/compare-eax-and 0x23/imm32/hash 74/jump-if-equal break/disp8 # otherwise abort eb/jump $populate-mu-function-header:abort/disp8 } $populate-mu-function-header:end: # . reclaim locals 81 0/subop/add %esp 8/imm32 # . restore registers 5f/pop-to-edi 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return $populate-mu-function-header:abort: # error("function header not in form 'fn {'") (write-buffered Stderr "function header not in form 'fn {' -- '") (rewind-stream *(ebp+8)) (write-stream 2 *(ebp+8)) (write-buffered Stderr "'\n") (flush Stderr) # . syscall(exit, 1) bb/copy-to-ebx 1/imm32 b8/copy-to-eax 1/imm32/exit cd/syscall 0x80/imm8 # never gets here # errors considered: # { abc populate-mu-function-body: # in : (address buffered-file), out : (address function) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx 52/push-edx 53/push-ebx # var line/ecx : (stream byte 512) 81 5/subop/subtract %esp 0x200/imm32 68/push 0x200/imm32/length 68/push 0/imm32/read 68/push 0/imm32/write 89/<- %ecx 4/r32/esp # var word-slice/edx : slice 68/push 0/imm32/end 68/push 0/imm32/start 89/<- %edx 4/r32/esp # var open-curly-count/ebx : int = 1 bb/copy-to-ebx 1/imm32 { # line loop $populate-mu-function-body:line-loop: # if (open-curly-count == 0) break 81 7/subop/compare %ebx 0/imm32 0f 84/jump-if-equal break/disp32 # line = read-line-buffered(in) (clear-stream %ecx) (read-line-buffered *(ebp+8) %ecx) # if (line->write == 0) break 81 7/subop/compare *ecx 0/imm32 0f 84/jump-if-equal break/disp32 # word-slice = next-word(line) (next-word %ecx %edx) # if slice-empty?(word-slice) continue (slice-empty? %ecx) 3d/compare-eax-and 0/imm32 75/jump-if-not-equal loop/disp8 # if (slice-starts-with?(word-slice, '#') continue # . eax = *word-slice->start 8b/-> *edx 0/r32/eax 8a/copy-byte *eax 0/r32/AL 81 4/subop/and %eax 0xff/imm32 # . if (eax == '#') continue 3d/compare-eax-and 0x23/imm32/hash 74/jump-if-equal loop/disp8 { # if slice-equal?(word-slice, "{") ++open-curly-count { (slice-equal? %ecx "{") 3d/compare-eax-and 0/imm32 74/jump-if-equal break/disp8 43/increment-ebx eb/jump $curly-found:end/disp8 } # else if slice-equal?(word-slice, "}") --open-curly-count { (slice-equal? %ecx "}") 3d/compare-eax-and 0/imm32 74/jump-if-equal break/disp8 4b/decrement-ebx eb/jump $curly-found:end/disp8 } # else break eb/jump $populate-mu-function-body:end/disp8 } # - check for invalid tokens after curly $curly-found:end: # second-word-slice = next-word(line) (next-word %ecx %edx) # if slice-empty?(second-word-slice) continue (slice-empty? %ecx) 3d/compare-eax-and 0/imm32 0f 85/jump-if-not-equal loop/disp32 # if (slice-starts-with?(second-word-slice, '#') continue # . eax = *second-word-slice->start 8b/-> *edx 0/r32/eax 8a/copy-byte *eax 0/r32/AL 81 4/subop/and %eax 0xff/imm32 # . if (eax == '#') continue 3d/compare-eax-and 0x23/imm32/hash 0f 84/jump-if-equal loop/disp32 # abort eb/jump $populate-mu-function-body:abort/disp8 } # end line loop $populate-mu-function-body:end: # . reclaim locals 81 0/subop/add %esp 0x214/imm32 # . restore registers 5b/pop-to-ebx 5a/pop-to-edx 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return $populate-mu-function-body:abort: # error("'{' or '}' should be on its own line, but got '") (write-buffered Stderr "'{' or '}' should be on its own line, but got '") (rewind-stream %ecx) (write-stream 2 %ecx) (write-buffered Stderr "'\n") (flush Stderr) # . syscall(exit, 1) bb/copy-to-ebx 1/imm32 b8/copy-to-eax 1/imm32/exit cd/syscall 0x80/imm8 # never gets here check-mu-types: # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # $check-types:end: # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return emit-subx: # out : (address buffered-file) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx 57/push-edi # edi = out 8b/-> *(ebp+8) 7/r32/edi # var curr/ecx : (address function) = Program 8b/-> *Program 1/r32/ecx { # if (curr == NULL) break 81 7/subop/compare %ecx 0/imm32 0f 84/jump-if-equal break/disp32 (emit-subx-function %edi %ecx) # curr = curr->next 8b/-> *(ecx+0x10) 1/r32/ecx e9/jump loop/disp32 } $emit-subx:end: # . restore registers 5f/pop-to-edi 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return # == Emitting a function # Emit function header # Emit function prologue # Translate function body # Emit function epilogue emit-subx-function: # out : (address buffered-file), f : (address function) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx 57/push-edi # edi = out 8b/-> *(ebp+8) 7/r32/edi # ecx = f 8b/-> *(ebp+0xc) 1/r32/ecx # (write-buffered %edi *ecx) (write-buffered %edi ":\n") (emit-subx-prologue %edi) (emit-subx-block %edi *(ecx+4)) # TODO: offset (emit-subx-epilogue %edi) $emit-subx-function:end: # . restore registers 5f/pop-to-edi 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return emit-subx-block: # out : (address buffered-file), block : (address block) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # $emit-subx-block:end: # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return emit-subx-statement: # out : (address buffered-file), stmt : (address statement), vars : (stack var), primitives : (address opcode-info), functions : (address function) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx # if stmt matches a primitive, emit it { (find-matching-function *(ebp+0x14) *(ebp+0xc)) 3d/compare-eax-and 0/imm32 74/jump-if-equal break/disp8 (emit-subx-primitive *(ebp+8) *(ebp+0xc) *(ebp+0x10) %eax) e9/jump $emit-subx-statement:end/disp32 } # else if stmt matches a function, emit a call to it { (find-matching-function *(ebp+0x18) *(ebp+0xc)) 3d/compare-eax-and 0/imm32 74/jump-if-equal break/disp8 (emit-subx-call *(ebp+8) *(ebp+0xc) *(ebp+0x10) %eax) e9/jump $emit-subx-statement:end/disp32 } # else abort e9/jump $emit-subx-statement:abort/disp32 $emit-subx-statement:end: # . restore registers 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return $emit-subx-statement:abort: # error("couldn't translate '" stmt "'\n") (write-buffered Stderr "couldn't translate '") #? (emit-string Stderr *(ebp+0xc)) # TODO (write-buffered Stderr "'\n") (flush Stderr) # . syscall(exit, 1) bb/copy-to-ebx 1/imm32 b8/copy-to-eax 1/imm32/exit cd/syscall 0x80/imm8 # never gets here emit-subx-primitive: # out : (address buffered-file), stmt : (address statement), vars : (address variable), primitive : (address function) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx # - emit primitive name 8b/-> *(ebp+0x14) 1/r32/ecx (write-buffered *(ebp+8) *(ecx+4)) # Function-subx-name # - emit arguments # var curr/ecx : (list var) = stmt->inouts 8b/-> *(ebp+0xc) 1/r32/ecx 8b/-> *(ecx+4) 1/r32/ecx # Stmt-inouts { # if (curr == null) break 81 7/subop/compare %ecx 0/imm32 74/jump-if-equal break/disp8 # (emit-subx-call-operand *(ebp+8) *ecx) # curr = curr->next 8b/-> *(ecx+4) 1/r32/ecx } $emit-subx-primitive:end: # . restore registers 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return emit-subx-call: # out : (address buffered-file), stmt : (address statement), vars : (address variable), callee : (address function) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax 51/push-ecx # (write-buffered *(ebp+8) "(") # - emit function name 8b/-> *(ebp+0x14) 1/r32/ecx (write-buffered *(ebp+8) *(ecx+4)) # Function-subx-name # - emit arguments # var curr/ecx : (list var) = stmt->inouts 8b/-> *(ebp+0xc) 1/r32/ecx 8b/-> *(ecx+4) 1/r32/ecx # Stmt-inouts { # if (curr == null) break 81 7/subop/compare %ecx 0/imm32 74/jump-if-equal break/disp8 # (emit-subx-call-operand *(ebp+8) *ecx) # curr = curr->next 8b/-> *(ecx+4) 1/r32/ecx } # (write-buffered *(ebp+8) ")") $emit-subx-call:end: # . restore registers 59/pop-to-ecx 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return emit-subx-call-operand: # out : (address buffered-file), operand : (address variable) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax # (write-buffered *(ebp+8) Space) (write-buffered *(ebp+8) "*(ebp+") 8b/-> *(ebp+0xc) 0/r32/eax (print-int32-buffered *(ebp+8) *(eax+0xc)) # Var-stack-offset (write-buffered *(ebp+8) ")") $emit-subx-call-operand:end: # . restore registers 58/pop-to-eax # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return find-matching-function: # functions : (address function), stmt : (address statement) -> result/eax : (address function) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 51/push-ecx # var curr/ecx : (address function) = functions 8b/-> *(ebp+8) 1/r32/ecx { # if (curr == null) break 81 7/subop/compare %ecx 0/imm32 74/jump-if-equal break/disp8 # if match(curr, stmt) return curr { (mu-stmt-matches-function? *(ebp+0xc) %ecx) # => eax 3d/compare-eax-and 0/imm32 74/jump-if-equal break/disp8 89/<- %eax 1/r32/ecx eb/jump $find-matching-function:end/disp8 } # curr = curr->next 8b/-> *(ecx+0x10) 1/r32/ecx eb/jump loop/disp8 } # return null b8/copy-to-eax 0/imm32 $find-matching-function:end: # . restore registers 59/pop-to-ecx # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return mu-stmt-matches-function?: # stmt : (address statement), primitive : (address opcode-info) => result/eax : boolean # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 51/push-ecx # return primitive->name == stmt->operation 8b/-> *(ebp+8) 1/r32/ecx 8b/-> *(ebp+0xc) 0/r32/eax (string-equal? *ecx *eax) # => eax $mu-stmt-matches-function?:end: # . restore registers 59/pop-to-ecx # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return test-emit-subx-statement-primitive: # Primitive operation on a variable on the stack. # increment foo # => # ff 0/subop/increment *(ebp-8) # # There's a variable on the var stack as follows: # name: 'foo' # type: int # location: -8 (negative numbers are on the stack; # 0-7 are in registers; # higher positive numbers are invalid) # # There's a primitive with this info: # name: 'increment' # inout: int/mem # value: 'ff 0/subop/increment' # # There's nothing in functions. # # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # setup (clear-stream _test-output-stream) (clear-stream _test-output-buffered-file->buffer) # var-foo/ecx : var 68/push 0/imm32/no-register 68/push -8/imm32/stack-offset 68/push 1/imm32/block-depth 68/push 1/imm32/type-int 68/push "foo"/imm32 89/<- %ecx 4/r32/esp # vars/edx : (stack 1) 51/push-ecx/var-foo 68/push 1/imm32/data-length 68/push 1/imm32/top 89/<- %edx 4/r32/esp # operand/esi : (list var) 68/push 0/imm32/next 51/push-ecx/var-foo 89/<- %esi 4/r32/esp # stmt/esi : statement 68/push 0/imm32/next 68/push 0/imm32/outputs 56/push-esi/operands 68/push "increment"/imm32/operation 89/<- %esi 4/r32/esp # primitives/ebx : function 68/push 0/imm32/next 68/push 0/imm32/body 68/push 0/imm32/outputs 51/push-ecx/inouts # hack; in practice we won't have the same var in function definition and call 68/push "ff 0/subop/increment"/imm32/subx-name 68/push "increment"/imm32/name 89/<- %ebx 4/r32/esp # convert (emit-subx-statement _test-output-buffered-file %esi %edx %ebx 0) (flush _test-output-buffered-file) #? # dump _test-output-stream {{{ #? (write 2 "^") #? (write-stream 2 _test-output-stream) #? (write 2 "$\n") #? (rewind-stream _test-output-stream) #? # }}} # check output (check-next-stream-line-equal _test-output-stream "ff 0/subop/increment *(ebp+0xfffffff8)" "F - test-emit-subx-statement-primitive/0") # . reclaim locals 81 0/subop/add %esp 0x48/imm32 # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return test-emit-subx-statement-function-call: # Call a function on a variable on the stack. # f foo # => # (f2 *(ebp-8)) # (Changing the function name supports overloading in general, but here it # just serves to help disambiguate things.) # # There's a variable on the var stack as follows: # name: 'foo' # type: int # location: -8 (negative numbers are on the stack; # 0-7 are in registers; # higher positive numbers are invalid) # # There's nothing in primitives. # # There's a function with this info: # name: 'f' # inout: int/mem # value: 'f2' # # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # setup (clear-stream _test-output-stream) (clear-stream _test-output-buffered-file->buffer) # var-foo/ecx : var 68/push 0/imm32/no-register 68/push -8/imm32/stack-offset 68/push 0/imm32/block-depth 68/push 1/imm32/type-int 68/push "foo"/imm32 89/<- %ecx 4/r32/esp # vars/edx = (stack 1) 51/push-ecx/var-foo 68/push 1/imm32/data-length 68/push 1/imm32/top 89/<- %edx 4/r32/esp # operands/esi : (list var) 68/push 0/imm32/next 51/push-ecx/var-foo 89/<- %esi 4/r32/esp # stmt/esi : statement 68/push 0/imm32/next 68/push 0/imm32/outputs 56/push-esi/inouts 68/push "f"/imm32/operation 89/<- %esi 4/r32/esp # functions/ebx : function 68/push 0/imm32/next 68/push 0/imm32/body 68/push 0/imm32/outputs 51/push-ecx/inouts # hack; in practice we won't have the same var in function definition and call 68/push "f2"/imm32/subx-name 68/push "f"/imm32/name 89/<- %ebx 4/r32/esp # convert (emit-subx-statement _test-output-buffered-file %esi %edx 0 %ebx) (flush _test-output-buffered-file) #? # dump _test-output-stream {{{ #? (write 2 "^") #? (write-stream 2 _test-output-stream) #? (write 2 "$\n") #? (rewind-stream _test-output-stream) #? # }}} # check output (check-next-stream-line-equal _test-output-stream "(f2 *(ebp+0xfffffff8))" "F - test-emit-subx-statement-function-call/0") # . reclaim locals 81 0/subop/add %esp 0x3c/imm32 # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return emit-subx-prologue: # out : (address buffered-file) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # (write-buffered *(ebp+8) "# . prologue\n") (write-buffered *(ebp+8) "55/push-ebp\n") (write-buffered *(ebp+8) "89/<- %ebp 4/r32/esp\n") $emit-subx-prologue:end: # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return emit-subx-epilogue: # out : (address buffered-file) # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp # (write-buffered *(ebp+8) "# . epilogue\n") (write-buffered *(ebp+8) "89/<- %esp 5/r32/ebp\n") (write-buffered *(ebp+8) "5d/pop-to-ebp\n") (write-buffered *(ebp+8) "c3/return\n") $emit-subx-epilogue:end: # . epilogue 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return