https://github.com/akkartik/mu/blob/master/apps/mu.subx
   1 # The Mu computer's level-2 language, also called Mu.
   2 # http://akkartik.name/post/mu-2019-2
   3 #
   4 # To run:
   5 #   $ ./ntranslate init.linux 0*.subx apps/mu.subx
   6 
   7 # == Goals
   8 # 1. Be memory safe. It should be impossible to corrupt the heap, or to create
   9 # a bad pointer. (Requires strong type safety.)
  10 # 2. Do as little as possible to achieve goal 1.
  11 #   - runtime checks to avoid complex static analysis
  12 #   - minimize impedance mismatch between source language and SubX target
  13 
  14 # == Language description
  15 # A program is a sequence of function definitions.
  16 #
  17 # Function example:
  18 #   fn foo n: int -> result/eax: int {
  19 #     ...
  20 #   }
  21 #
  22 # Functions consist of a name, optional inputs, optional outputs and a block.
  23 #
  24 # Function inputs and outputs are variables. All variables have a type and
  25 # storage specifier. They can be placed either in memory (on the stack) or in
  26 # one of 6 named registers.
  27 #   eax ecx edx ebx esi edi
  28 # Variables in registers must be primitive 32-bit types.
  29 # Variables not explicitly placed in a register are on the stack.
  30 # Variables in registers need not have a name; in that case you refer to them
  31 # directly by the register name.
  32 #
  33 # Function inputs are always passed in memory (on the stack), while outputs
  34 # are always returned in registers.
  35 #
  36 # Blocks mostly consist of statements.
  37 #
  38 # Statements mostly consist of a name, optional inputs and optional outputs.
  39 #
  40 # Statement inputs are variables or literals. Variables need to specify type
  41 # (and storage) the first time they're mentioned but not later.
  42 #
  43 # Statement outputs, like function outputs, must be variables in registers.
  44 #
  45 # Statement names must be either primitives or user-defined functions.
  46 #
  47 # Primitives can write to any register.
  48 # User-defined functions only write to hard-coded registers. Outputs of each
  49 # call must have the same registers as in the function definition.
  50 #
  51 # There are some other statement types:
  52 #   - blocks. Multiple statements surrounded by '{...}' and optionally
  53 #     prefixed with a label name and ':'
  54 #       - {
  55 #           ...
  56 #         }
  57 #       - foo: {
  58 #           ...
  59 #         }
  60 #
  61 #   - variable definitions on the stack. E.g.:
  62 #       - var foo: int
  63 #       - var bar: (array int 3)
  64 #     There's no initializer; variables are automatically initialized.
  65 #
  66 #   - variables definitions in a register. E.g.:
  67 #       - var foo/eax : int <- add bar 1
  68 #     The initializer is mandatory and must be a valid instruction that writes
  69 #     a single output to the right register. In practice registers will
  70 #     usually be either initialized by primitives or copied from eax.
  71 #       - var eax : int <- foo bar quux
  72 #         var floo/ecx : int <- copy eax
  73 #
  74 # Still todo:
  75 #   global variables
  76 #   heap allocations (planned name: 'handle')
  77 #   user-defined types: 'type' for structs, 'choice' for unions
  78 #   short-lived 'address' type for efficiently writing inside nested structs
  79 #
  80 # Formal types:
  81 #   A program is a linked list of functions
  82 #   A function contains:
  83 #     name: string
  84 #     inouts: linked list of vars  <-- 'inouts' is more precise than 'inputs'
  85 #       data: (address var)
  86 #       next: (address list)
  87 #     outputs: linked list of vars
  88 #       data: (address var)
  89 #       next: (address list)
  90 #     body: block
  91 #   A var-type contains:
  92 #     name: string
  93 #     type: s-expression of type ids
  94 #   Statements are not yet fully designed.
  95 #   statement = var definition or simple statement or block
  96 #   simple statement:
  97 #     name: string
  98 #     inouts: linked list of vars
  99 #     outputs: linked list of vars
 100 #   block = linked list of statements
 101 
 102 # == Translation
 103 # Now that we know what the language looks like in the large, let's think
 104 # about how translation happens from the bottom up. The interplay between
 105 # variable scopes and statements using variables is the most complex aspect of
 106 # translation.
 107 #
 108 # Assume that we maintain a 'functions' list while parsing source code. And a
 109 # 'primitives' list is a global constant. Both these contain enough information
 110 # to perform type-checking on function calls or primitive statements, respectively.
 111 #
 112 # Defining variables pushes them on a stack with the current block depth and
 113 # enough information about their location (stack offset or register id).
 114 # Starting a block increments the current block id.
 115 # Each statement now has enough information to emit code for it.
 116 # Ending a block is where the magic happens:
 117 #   pop all variables at the current block depth
 118 #   emit code to restore all register variables introduced at the current depth
 119 #   emit code to clean up all stack variables at the current depth (just increment esp)
 120 #   decrement the current block depth
 121 #
 122 # One additional check we'll need is to ensure that a variable in a register
 123 # isn't shadowed by a different one. That may be worth a separate data
 124 # structure but for now repeatedly scanning the var stack should suffice.
 125 #
 126 # Formal types:
 127 #   functions, primitives: linked list of info
 128 #     name: string
 129 #     inouts: linked list of vars
 130 #     outputs: linked list of vars
 131 #   live-vars: stack of vars
 132 #   var:
 133 #     name: string
 134 #     type: s-expression? Just a type id for now.
 135 #     block: int
 136 #     stack-offset: int  (added to ebp)
 137 #     register-index: int
 138 #       0-7 with usual meanings
 139 #       8: any register
 140 #   At most one of stack-offset or register-index must be non-zero.
 141 #   When both are zero the variable is in eax (no way to refer to return
 142 #   address at ebp).
 143 #   A register-index of 8 designates a variable _template_. Only legal in
 144 #   formal parameters for primitives.
 145 
 146 # == Compiling a single instruction
 147 # Determine the function or primitive being called.
 148 #   If no matches, show all functions/primitives with the same name, along
 149 #   with reasons they don't match. (type and storage checking)
 150 #   It must be a function if:
 151 #     #outputs > 1, or
 152 #     #inouts > 2, or
 153 #     #inouts + #outputs > 2
 154 # If it's a function, emit:
 155 #   (low-level-name <rm32 or imm32>...)
 156 # Otherwise (it's a primitive):
 157 #   assert(#inouts <= 2 && #outs <= 1 && (#inouts + #outs) <= 2)
 158 #   emit opcode
 159 #   emit-rm32(inout[0])
 160 #   if out[0] exists: emit-r32(out[0])
 161 #   else if inout[1] is a literal: emit-imm32(inout[1])
 162 #   else: emit-rm32(inout[1])
 163 
 164 # emit-rm32 and emit-r32 should check that the variable they intend is still
 165 # available in the register.
 166 
 167 # == Emitting a block
 168 # Emit block name if necessary
 169 # Emit '{'
 170 # When you encounter a statement, emit it as above
 171 # When you encounter a variable declaration
 172 #   emit any code needed for it (bzeros)
 173 #   push it on the var stack
 174 #   update register dict if necessary
 175 # When you encounter '}'
 176 #   While popping variables off the var stack until block id changes
 177 #     Emit code needed to clean up the stack
 178 #       either increment esp
 179 #       or pop into appropriate register
 180 
 181 # The rest is straightforward.
 182 
 183 == data
 184 
 185 Program:  # (address function)
 186   0/imm32
 187 
 188 Function-name:
 189   0/imm32
 190 Function-subx-name:
 191   4/imm32
 192 Function-inouts:  # (address list var)
 193   8/imm32
 194 Function-outputs:  # (address list var)
 195   0xc/imm32
 196 Function-body:  # (address block)
 197   0x10/imm32
 198 Function-next:  # (address function)
 199   0x14/imm32
 200 Function-size:
 201   0x18/imm32/24
 202 
 203 Stmt-name:
 204   0/imm32
 205 Stmt-inouts:
 206   4/imm32
 207 Stmt-outputs:
 208   8/imm32
 209 Stmt-next:
 210   0xc/imm32
 211 Stmt-size:
 212   0x10/imm32
 213 
 214 Var-name:
 215   0/imm32
 216 Var-type:
 217   4/imm32
 218 Var-block:
 219   8/imm32
 220 Var-stack-offset:
 221   0xc/imm32
 222 Var-register-index:
 223   0x10/imm32
 224 Var-size:
 225   0x14/imm32
 226 
 227 == code
 228 
 229 Entry:
 230     # . prologue
 231     89/<- %ebp 4/r32/esp
 232     (new-segment Heap-size Heap)
 233     # if (argv[1] == "test') run-tests()
 234     {
 235       # if (argc <= 1) break
 236       81 7/subop/compare *ebp 1/imm32
 237       7e/jump-if-lesser-or-equal break/disp8
 238       # if (argv[1] != "test") break
 239       (kernel-string-equal? *(ebp+8) "test")  # => eax
 240       3d/compare-eax-and 0/imm32
 241       74/jump-if-equal break/disp8
 242       #
 243       (run-tests)
 244       # syscall(exit, *Num-test-failures)
 245       8b/-> *Num-test-failures 3/r32/ebx
 246       eb/jump $mu-main:end/disp8
 247     }
 248     # otherwise convert Stdin
 249     (convert-mu Stdin Stdout)
 250     (flush Stdout)
 251     # syscall(exit, 0)
 252     bb/copy-to-ebx 0/imm32
 253 $mu-main:end:
 254     b8/copy-to-eax 1/imm32/exit
 255     cd/syscall 0x80/imm8
 256 
 257 convert-mu:  # in : (address buffered-file), out : (address buffered-file)
 258     # . prologue
 259     55/push-ebp
 260     89/<- %ebp 4/r32/esp
 261     #
 262     (parse-mu *(ebp+8))
 263     (check-mu-types)
 264     (emit-subx *(ebp+0xc))
 265 $convert-mu:end:
 266     # . epilogue
 267     89/<- %esp 5/r32/ebp
 268     5d/pop-to-ebp
 269     c3/return
 270 
 271 test-convert-empty-input:
 272     # empty input => empty output
 273     # . prologue
 274     55/push-ebp
 275     89/<- %ebp 4/r32/esp
 276     # setup
 277     (clear-stream _test-input-stream)
 278     (clear-stream _test-input-buffered-file->buffer)
 279     (clear-stream _test-output-stream)
 280     (clear-stream _test-output-buffered-file->buffer)
 281     #
 282     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 283     (flush _test-output-buffered-file)
 284     (check-stream-equal _test-output-stream "" "F - test-convert-empty-input")
 285     # . epilogue
 286     89/<- %esp 5/r32/ebp
 287     5d/pop-to-ebp
 288     c3/return
 289 
 290 test-convert-function-skeleton:
 291     # empty function decl => function prologue and epilogue
 292     #   fn foo {
 293     #   }
 294     # =>
 295     #   foo:
 296     #     # . prologue
 297     #     55/push-ebp
 298     #     89/<- %ebp 4/r32/esp
 299     #     # . epilogue
 300     #     89/<- %esp 5/r32/ebp
 301     #     5d/pop-to-ebp
 302     #     c3/return
 303     # . prologue
 304     55/push-ebp
 305     89/<- %ebp 4/r32/esp
 306     # setup
 307     (clear-stream _test-input-stream)
 308     (clear-stream _test-input-buffered-file->buffer)
 309     (clear-stream _test-output-stream)
 310     (clear-stream _test-output-buffered-file->buffer)
 311     #
 312     (write _test-input-stream "fn foo {\n")
 313     (write _test-input-stream "}\n")
 314     # convert
 315     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 316     (flush _test-output-buffered-file)
 317 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 323     # check output
 324     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
 325     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
 326     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
 327     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
 328     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
 329     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
 330     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
 331     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
 332     # . epilogue
 333     89/<- %esp 5/r32/ebp
 334     5d/pop-to-ebp
 335     c3/return
 336 
 337 test-convert-multiple-function-skeletons:
 338     # multiple functions correctly organized into a linked list
 339     #   fn foo {
 340     #   }
 341     #   fn bar {
 342     #   }
 343     # =>
 344     #   foo:
 345     #     # . prologue
 346     #     55/push-ebp
 347     #     89/<- %ebp 4/r32/esp
 348     #     # . epilogue
 349     #     89/<- %esp 5/r32/ebp
 350     #     5d/pop-to-ebp
 351     #     c3/return
 352     #   bar:
 353     #     # . prologue
 354     #     55/push-ebp
 355     #     89/<- %ebp 4/r32/esp
 356     #     # . epilogue
 357     #     89/<- %esp 5/r32/ebp
 358     #     5d/pop-to-ebp
 359     #     c3/return
 360     # . prologue
 361     55/push-ebp
 362     89/<- %ebp 4/r32/esp
 363     # setup
 364     (clear-stream _test-input-stream)
 365     (clear-stream _test-input-buffered-file->buffer)
 366     (clear-stream _test-output-stream)
 367     (clear-stream _test-output-buffered-file->buffer)
 368     #
 369     (write _test-input-stream "fn foo {\n")
 370     (write _test-input-stream "}\n")
 371     (write _test-input-stream "fn bar {\n")
 372     (write _test-input-stream "}\n")
 373     # convert
 374     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 375     (flush _test-output-buffered-file)
 376 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 382     # check first function
 383     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-multiple-function-skeletons/0")
 384     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/1")
 385     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/2")
 386     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/3")
 387     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/4")
 388     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/5")
 389     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/6")
 390     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/7")
 391     # check second function
 392     (check-next-stream-line-equal _test-output-stream "bar:"                  "F - test-convert-multiple-function-skeletons/10")
 393     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/11")
 394     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/12")
 395     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/13")
 396     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/14")
 397     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/15")
 398     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/16")
 399     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/17")
 400     # . epilogue
 401     89/<- %esp 5/r32/ebp
 402     5d/pop-to-ebp
 403     c3/return
 404 
 405 test-convert-function-with-arg:
 406     # function with one arg and a copy instruction
 407     #   fn foo n : int -> result/eax : int {
 408     #     result <- copy n
 409     #   }
 410     # =>
 411     #   foo:
 412     #     # . prologue
 413     #     55/push-ebp
 414     #     89/<- %ebp 4/r32/esp
 415     #     {
 416     #     # result <- copy n
 417     #     8b/-> *(ebp+8) 0/r32/eax
 418     #     }
 419     #     # . epilogue
 420     #     89/<- %esp 5/r32/ebp
 421     #     5d/pop-to-ebp
 422     #     c3/return
 423     # . prologue
 424     55/push-ebp
 425     89/<- %ebp 4/r32/esp
 426     # setup
 427     (clear-stream _test-input-stream)
 428     (clear-stream _test-input-buffered-file->buffer)
 429     (clear-stream _test-output-stream)
 430     (clear-stream _test-output-buffered-file->buffer)
 431     #
 432     (write _test-input-stream "fn foo {\n")
 433     (write _test-input-stream "}\n")
 434     # convert
 435     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 436     (flush _test-output-buffered-file)
 437 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 443     # check output
 444     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
 445     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
 446     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
 447     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
 448     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
 449     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
 450     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
 451     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
 452     # . epilogue
 453     89/<- %esp 5/r32/ebp
 454     5d/pop-to-ebp
 455     c3/return
 456 
 457 parse-mu:  # in : (address buffered-file)
 458     # pseudocode
 459     #   var curr-function = Program
 460     #   var line : (stream byte 512)
 461     #   var word-slice : slice
 462     #   while true                                  # line loop
 463     #     clear-stream(line)
 464     #     read-line-buffered(in, line)
 465     #     if (line->write == 0) break               # end of file
 466     #     while true                                # word loop
 467     #       word-slice = next-word-or-string(line)
 468     #       if slice-empty?(word-slice)             # end of line
 469     #         break
 470     #       else if slice-starts-with?(word-slice, "#")  # comment
 471     #         break                                 # end of line
 472     #       else if slice-equal(word-slice, "fn")
 473     #         var new-function : (address function) = new function
 474     #         populate-mu-function(in, new-function)
 475     #         *curr-function = new-function
 476     #         curr-function = &new-function->next
 477     #       else
 478     #         abort()
 479     #
 480     # . prologue
 481     55/push-ebp
 482     89/<- %ebp 4/r32/esp
 483     # . save registers
 484     50/push-eax
 485     51/push-ecx
 486     52/push-edx
 487     57/push-edi
 488     # var line/ecx : (stream byte 512)
 489     81 5/subop/subtract %esp 0x200/imm32
 490     68/push 0x200/imm32/length
 491     68/push 0/imm32/read
 492     68/push 0/imm32/write
 493     89/<- %ecx 4/r32/esp
 494     # var word-slice/edx : slice
 495     68/push 0/imm32/end
 496     68/push 0/imm32/start
 497     89/<- %edx 4/r32/esp
 498     # var curr-function/edi : (address function) = Program
 499     bf/copy-to-edi Program/imm32
 500     {
 501 $parse-mu:line-loop:
 502       (clear-stream %ecx)
 503       (read-line-buffered *(ebp+8) %ecx)
 504       # if (line->write == 0) break
 505       81 7/subop/compare *ecx 0/imm32
 506       0f 84/jump-if-equal break/disp32
 507 +--  6 lines: #?       # dump line ---------------------------------------------------------------------------------------------------------------------------
 513       { # word loop
 514 $parse-mu:word-loop:
 515         (next-word-or-string %ecx %edx)
 516         # if slice-empty?(word-slice) break
 517         (slice-empty? %edx)
 518         3d/compare-eax-and 0/imm32
 519         0f 85/jump-if-not-equal break/disp32
 520         # if (*word-slice->start == "#") break
 521         # . eax = *word-slice->start
 522         8b/-> *edx 0/r32/eax
 523         8a/copy-byte *eax 0/r32/AL
 524         81 4/subop/and %eax 0xff/imm32
 525         # . if (eax == '#') break
 526         3d/compare-eax-and 0x23/imm32/hash
 527         0f 84/jump-if-equal break/disp32
 528         # if (slice-equal?(word-slice, "fn")) parse a function
 529         {
 530           (slice-equal? %edx "fn")
 531           3d/compare-eax-and 0/imm32
 532           0f 84/jump-if-equal break/disp32
 533           # var new-function/eax : (address function) = populate-mu-function()
 534           (allocate Heap *Function-size)  # => eax
 535           (populate-mu-function-header %ecx %eax)
 536           (populate-mu-function-body *(ebp+8) %eax)
 537           # *curr-function = new-function
 538           89/<- *edi 0/r32/eax
 539           # curr-function = &new-function->next
 540           8d/address-> *(eax+0x10) 7/r32/edi
 541           e9/jump $parse-mu:word-loop/disp32
 542         }
 543         # otherwise abort
 544         e9/jump $parse-mu:abort/disp32
 545       } # end word loop
 546       e9/jump loop/disp32
 547     } # end line loop
 548 $parse-mu:end:
 549     # . reclaim locals
 550     81 0/subop/add %esp 0x214/imm32
 551     # . restore registers
 552     5f/pop-to-edi
 553     5a/pop-to-edx
 554     59/pop-to-ecx
 555     58/pop-to-eax
 556     # . epilogue
 557     89/<- %esp 5/r32/ebp
 558     5d/pop-to-ebp
 559     c3/return
 560 
 561 $parse-mu:abort:
 562     # error("unexpected top-level command: " word-slice "\n")
 563     (write-buffered Stderr "unexpected top-level command: ")
 564     (write-buffered Stderr %edx)
 565     (write-buffered Stderr "\n")
 566     (flush Stderr)
 567     # . syscall(exit, 1)
 568     bb/copy-to-ebx  1/imm32
 569     b8/copy-to-eax  1/imm32/exit
 570     cd/syscall  0x80/imm8
 571     # never gets here
 572 
 573 # errors considered:
 574 #   fn foo { {
 575 #   fn foo { }
 576 #   fn foo { } {
 577 #   fn foo  # no block
 578 populate-mu-function-header:  # first-line : (address stream byte), out : (address function)
 579     # . prologue
 580     55/push-ebp
 581     89/<- %ebp 4/r32/esp
 582     # . save registers
 583     50/push-eax
 584     51/push-ecx
 585     57/push-edi
 586     # edi = out
 587     8b/-> *(ebp+0xc) 7/r32/edi
 588     # var word-slice/ecx : slice
 589     68/push 0/imm32/end
 590     68/push 0/imm32/start
 591     89/<- %ecx 4/r32/esp
 592     # save function name
 593     (next-word *(ebp+8) %ecx)
 594     (slice-to-string Heap %ecx)  # => eax
 595     89/<- *edi 0/r32/eax
 596     # assert that next token is '{'
 597     (next-word *(ebp+8) %ecx)
 598     (slice-equal? %ecx "{")
 599     3d/compare-eax-and 0/imm32
 600     74/jump-if-equal $populate-mu-function-header:abort/disp8
 601     # assert that there's no further token
 602     {
 603       # word-slice = next-word(line)
 604       (next-word *(ebp+8) %ecx)
 605       # if (word-slice == '') break
 606       (slice-empty? %ecx)
 607       3d/compare-eax-and 0/imm32
 608       75/jump-if-not-equal break/disp8
 609       # if (slice-starts-with?(word-slice, "#")) break
 610       # . eax = *word-slice->start
 611       8b/-> *edx 0/r32/eax
 612       8a/copy-byte *eax 0/r32/AL
 613       81 4/subop/and %eax 0xff/imm32
 614       # . if (eax == '#') break
 615       3d/compare-eax-and 0x23/imm32/hash
 616       74/jump-if-equal break/disp8
 617       # otherwise abort
 618       eb/jump $populate-mu-function-header:abort/disp8
 619     }
 620 $populate-mu-function-header:end:
 621     # . reclaim locals
 622     81 0/subop/add %esp 8/imm32
 623     # . restore registers
 624     5f/pop-to-edi
 625     59/pop-to-ecx
 626     58/pop-to-eax
 627     # . epilogue
 628     89/<- %esp 5/r32/ebp
 629     5d/pop-to-ebp
 630     c3/return
 631 
 632 $populate-mu-function-header:abort:
 633     # error("function header not in form 'fn <name> {'")
 634     (write-buffered Stderr "function header not in form 'fn <name> {' -- '")
 635     (rewind-stream *(ebp+8))
 636     (write-stream 2 *(ebp+8))
 637     (write-buffered Stderr "'\n")
 638     (flush Stderr)
 639     # . syscall(exit, 1)
 640     bb/copy-to-ebx  1/imm32
 641     b8/copy-to-eax  1/imm32/exit
 642     cd/syscall  0x80/imm8
 643     # never gets here
 644 
 645 # errors considered:
 646 #   { abc
 647 populate-mu-function-body:  # in : (address buffered-file), out : (address function)
 648     # . prologue
 649     55/push-ebp
 650     89/<- %ebp 4/r32/esp
 651     # . save registers
 652     50/push-eax
 653     51/push-ecx
 654     52/push-edx
 655     53/push-ebx
 656     # var line/ecx : (stream byte 512)
 657     81 5/subop/subtract %esp 0x200/imm32
 658     68/push 0x200/imm32/length
 659     68/push 0/imm32/read
 660     68/push 0/imm32/write
 661     89/<- %ecx 4/r32/esp
 662     # var word-slice/edx : slice
 663     68/push 0/imm32/end
 664     68/push 0/imm32/start
 665     89/<- %edx 4/r32/esp
 666     # var open-curly-count/ebx : int = 1
 667     bb/copy-to-ebx 1/imm32
 668     { # line loop
 669 $populate-mu-function-body:line-loop:
 670       # if (open-curly-count == 0) break
 671       81 7/subop/compare %ebx 0/imm32
 672       0f 84/jump-if-equal break/disp32
 673       # line = read-line-buffered(in)
 674       (clear-stream %ecx)
 675       (read-line-buffered *(ebp+8) %ecx)
 676       # if (line->write == 0) break
 677       81 7/subop/compare *ecx 0/imm32
 678       0f 84/jump-if-equal break/disp32
 679       # word-slice = next-word(line)
 680       (next-word %ecx %edx)
 681       # if slice-empty?(word-slice) continue
 682       (slice-empty? %ecx)
 683       3d/compare-eax-and 0/imm32
 684       75/jump-if-not-equal loop/disp8
 685       # if (slice-starts-with?(word-slice, '#') continue
 686       # . eax = *word-slice->start
 687       8b/-> *edx 0/r32/eax
 688       8a/copy-byte *eax 0/r32/AL
 689       81 4/subop/and %eax 0xff/imm32
 690       # . if (eax == '#') continue
 691       3d/compare-eax-and 0x23/imm32/hash
 692       74/jump-if-equal loop/disp8
 693       {
 694         # if slice-equal?(word-slice, "{") ++open-curly-count
 695         {
 696           (slice-equal? %ecx "{")
 697           3d/compare-eax-and 0/imm32
 698           74/jump-if-equal break/disp8
 699           43/increment-ebx
 700           eb/jump $curly-found:end/disp8
 701         }
 702         # else if slice-equal?(word-slice, "}") --open-curly-count
 703         {
 704           (slice-equal? %ecx "}")
 705           3d/compare-eax-and 0/imm32
 706           74/jump-if-equal break/disp8
 707           4b/decrement-ebx
 708           eb/jump $curly-found:end/disp8
 709         }
 710         # else break
 711         eb/jump $populate-mu-function-body:end/disp8
 712       }
 713       # - check for invalid tokens after curly
 714 $curly-found:end:
 715       # second-word-slice = next-word(line)
 716       (next-word %ecx %edx)
 717       # if slice-empty?(second-word-slice) continue
 718       (slice-empty? %ecx)
 719       3d/compare-eax-and 0/imm32
 720       0f 85/jump-if-not-equal loop/disp32
 721       # if (slice-starts-with?(second-word-slice, '#') continue
 722       # . eax = *second-word-slice->start
 723       8b/-> *edx 0/r32/eax
 724       8a/copy-byte *eax 0/r32/AL
 725       81 4/subop/and %eax 0xff/imm32
 726       # . if (eax == '#') continue
 727       3d/compare-eax-and 0x23/imm32/hash
 728       0f 84/jump-if-equal loop/disp32
 729       # abort
 730       eb/jump $populate-mu-function-body:abort/disp8
 731     } # end line loop
 732 $populate-mu-function-body:end:
 733     # . reclaim locals
 734     81 0/subop/add %esp 0x214/imm32
 735     # . restore registers
 736     5b/pop-to-ebx
 737     5a/pop-to-edx
 738     59/pop-to-ecx
 739     58/pop-to-eax
 740     # . epilogue
 741     89/<- %esp 5/r32/ebp
 742     5d/pop-to-ebp
 743     c3/return
 744 
 745 $populate-mu-function-body:abort:
 746     # error("'{' or '}' should be on its own line, but got '")
 747     (write-buffered Stderr "'{' or '}' should be on its own line, but got '")
 748     (rewind-stream %ecx)
 749     (write-stream 2 %ecx)
 750     (write-buffered Stderr "'\n")
 751     (flush Stderr)
 752     # . syscall(exit, 1)
 753     bb/copy-to-ebx  1/imm32
 754     b8/copy-to-eax  1/imm32/exit
 755     cd/syscall  0x80/imm8
 756     # never gets here
 757 
 758 check-mu-types:
 759     # . prologue
 760     55/push-ebp
 761     89/<- %ebp 4/r32/esp
 762     #
 763 $check-types:end:
 764     # . epilogue
 765     89/<- %esp 5/r32/ebp
 766     5d/pop-to-ebp
 767     c3/return
 768 
 769 emit-subx:  # out : (address buffered-file)
 770     # . prologue
 771     55/push-ebp
 772     89/<- %ebp 4/r32/esp
 773     # . save registers
 774     50/push-eax
 775     51/push-ecx
 776     57/push-edi
 777     # edi = out
 778     8b/-> *(ebp+8) 7/r32/edi
 779     # var curr/ecx : (address function) = Program
 780     8b/-> *Program 1/r32/ecx
 781     {
 782       # if (curr == NULL) break
 783       81 7/subop/compare %ecx 0/imm32
 784       0f 84/jump-if-equal break/disp32
 785       (emit-subx-function %edi %ecx)
 786       # curr = curr->next
 787       8b/-> *(ecx+0x10) 1/r32/ecx
 788       e9/jump loop/disp32
 789     }
 790 $emit-subx:end:
 791     # . restore registers
 792     5f/pop-to-edi
 793     59/pop-to-ecx
 794     58/pop-to-eax
 795     # . epilogue
 796     89/<- %esp 5/r32/ebp
 797     5d/pop-to-ebp
 798     c3/return
 799 
 800 # == Emitting a function
 801 # Emit function header
 802 # Emit function prologue
 803 # Translate function body
 804 # Emit function epilogue
 805 
 806 emit-subx-function:  # out : (address buffered-file), f : (address function)
 807     # . prologue
 808     55/push-ebp
 809     89/<- %ebp 4/r32/esp
 810     # . save registers
 811     50/push-eax
 812     51/push-ecx
 813     57/push-edi
 814     # edi = out
 815     8b/-> *(ebp+8) 7/r32/edi
 816     # ecx = f
 817     8b/-> *(ebp+0xc) 1/r32/ecx
 818     #
 819     (write-buffered %edi *ecx)
 820     (write-buffered %edi ":\n")
 821     (emit-subx-prologue %edi)
 822     (emit-subx-block %edi *(ecx+4))  # TODO: offset
 823     (emit-subx-epilogue %edi)
 824 $emit-subx-function:end:
 825     # . restore registers
 826     5f/pop-to-edi
 827     59/pop-to-ecx
 828     58/pop-to-eax
 829     # . epilogue
 830     89/<- %esp 5/r32/ebp
 831     5d/pop-to-ebp
 832     c3/return
 833 
 834 emit-subx-block:  # out : (address buffered-file), block : (address block)
 835     # . prologue
 836     55/push-ebp
 837     89/<- %ebp 4/r32/esp
 838     #
 839 $emit-subx-block:end:
 840     # . epilogue
 841     89/<- %esp 5/r32/ebp
 842     5d/pop-to-ebp
 843     c3/return
 844 
 845 emit-subx-statement:  # out : (address buffered-file), stmt : (address statement), vars : (stack var), primitives : (address opcode-info), functions : (address function)
 846     # . prologue
 847     55/push-ebp
 848     89/<- %ebp 4/r32/esp
 849     # . save registers
 850     50/push-eax
 851     51/push-ecx
 852     # if stmt matches a primitive, emit it
 853     {
 854       (find-matching-function *(ebp+0x14) *(ebp+0xc))
 855       3d/compare-eax-and 0/imm32
 856       74/jump-if-equal break/disp8
 857       (emit-subx-primitive *(ebp+8) *(ebp+0xc) *(ebp+0x10) %eax)
 858       e9/jump $emit-subx-statement:end/disp32
 859     }
 860     # else if stmt matches a function, emit a call to it
 861     {
 862       (find-matching-function *(ebp+0x18) *(ebp+0xc))
 863       3d/compare-eax-and 0/imm32
 864       74/jump-if-equal break/disp8
 865       (emit-subx-call *(ebp+8) *(ebp+0xc) *(ebp+0x10) %eax)
 866       e9/jump $emit-subx-statement:end/disp32
 867     }
 868     # else abort
 869     e9/jump $emit-subx-statement:abort/disp32
 870 $emit-subx-statement:end:
 871     # . restore registers
 872     59/pop-to-ecx
 873     58/pop-to-eax
 874     # . epilogue
 875     89/<- %esp 5/r32/ebp
 876     5d/pop-to-ebp
 877     c3/return
 878 
 879 $emit-subx-statement:abort:
 880     # error("couldn't translate '" stmt "'\n")
 881     (write-buffered Stderr "couldn't translate '")
 882 #?     (emit-string Stderr *(ebp+0xc))  # TODO
 883     (write-buffered Stderr "'\n")
 884     (flush Stderr)
 885     # . syscall(exit, 1)
 886     bb/copy-to-ebx  1/imm32
 887     b8/copy-to-eax  1/imm32/exit
 888     cd/syscall  0x80/imm8
 889     # never gets here
 890 
 891 emit-subx-primitive:  # out : (address buffered-file), stmt : (address statement), vars : (address variable), primitive : (address function)
 892     # . prologue
 893     55/push-ebp
 894     89/<- %ebp 4/r32/esp
 895     # . save registers
 896     50/push-eax
 897     51/push-ecx
 898     # - emit primitive name
 899     8b/-> *(ebp+0x14) 1/r32/ecx
 900     (write-buffered *(ebp+8) *(ecx+4))  # Function-subx-name
 901     # - emit arguments
 902     # var curr/ecx : (list var) = stmt->inouts
 903     8b/-> *(ebp+0xc) 1/r32/ecx
 904     8b/-> *(ecx+4) 1/r32/ecx  # Stmt-inouts
 905     {
 906       # if (curr == null) break
 907       81 7/subop/compare %ecx 0/imm32
 908       74/jump-if-equal break/disp8
 909       #
 910       (emit-subx-call-operand *(ebp+8) *ecx)
 911       # curr = curr->next
 912       8b/-> *(ecx+4) 1/r32/ecx
 913     }
 914 $emit-subx-primitive:end:
 915     # . restore registers
 916     59/pop-to-ecx
 917     58/pop-to-eax
 918     # . epilogue
 919     89/<- %esp 5/r32/ebp
 920     5d/pop-to-ebp
 921     c3/return
 922 
 923 emit-subx-call:  # out : (address buffered-file), stmt : (address statement), vars : (address variable), callee : (address function)
 924     # . prologue
 925     55/push-ebp
 926     89/<- %ebp 4/r32/esp
 927     # . save registers
 928     50/push-eax
 929     51/push-ecx
 930     #
 931     (write-buffered *(ebp+8) "(")
 932     # - emit function name
 933     8b/-> *(ebp+0x14) 1/r32/ecx
 934     (write-buffered *(ebp+8) *(ecx+4))  # Function-subx-name
 935     # - emit arguments
 936     # var curr/ecx : (list var) = stmt->inouts
 937     8b/-> *(ebp+0xc) 1/r32/ecx
 938     8b/-> *(ecx+4) 1/r32/ecx  # Stmt-inouts
 939     {
 940       # if (curr == null) break
 941       81 7/subop/compare %ecx 0/imm32
 942       74/jump-if-equal break/disp8
 943       #
 944       (emit-subx-call-operand *(ebp+8) *ecx)
 945       # curr = curr->next
 946       8b/-> *(ecx+4) 1/r32/ecx
 947     }
 948     #
 949     (write-buffered *(ebp+8) ")")
 950 $emit-subx-call:end:
 951     # . restore registers
 952     59/pop-to-ecx
 953     58/pop-to-eax
 954     # . epilogue
 955     89/<- %esp 5/r32/ebp
 956     5d/pop-to-ebp
 957     c3/return
 958 
 959 emit-subx-call-operand:  # out : (address buffered-file), operand : (address variable)
 960     # . prologue
 961     55/push-ebp
 962     89/<- %ebp 4/r32/esp
 963     # . save registers
 964     50/push-eax
 965     #
 966     (write-buffered *(ebp+8) Space)
 967     (write-buffered *(ebp+8) "*(ebp+")
 968     8b/-> *(ebp+0xc) 0/r32/eax
 969     (print-int32-buffered *(ebp+8) *(eax+0xc))  # Var-stack-offset
 970     (write-buffered *(ebp+8) ")")
 971 $emit-subx-call-operand:end:
 972     # . restore registers
 973     58/pop-to-eax
 974     # . epilogue
 975     89/<- %esp 5/r32/ebp
 976     5d/pop-to-ebp
 977     c3/return
 978 
 979 find-matching-function:  # functions : (address function), stmt : (address statement) -> result/eax : (address function)
 980     # . prologue
 981     55/push-ebp
 982     89/<- %ebp 4/r32/esp
 983     # . save registers
 984     51/push-ecx
 985     # var curr/ecx : (address function) = functions
 986     8b/-> *(ebp+8) 1/r32/ecx
 987     {
 988       # if (curr == null) break
 989       81 7/subop/compare %ecx 0/imm32
 990       74/jump-if-equal break/disp8
 991       # if match(curr, stmt) return curr
 992       {
 993         (mu-stmt-matches-function? *(ebp+0xc) %ecx)  # => eax
 994         3d/compare-eax-and 0/imm32
 995         74/jump-if-equal break/disp8
 996         89/<- %eax 1/r32/ecx
 997         eb/jump $find-matching-function:end/disp8
 998       }
 999       # curr = curr->next
1000       8b/-> *(ecx+0x10) 1/r32/ecx
1001       eb/jump loop/disp8
1002     }
1003     # return null
1004     b8/copy-to-eax 0/imm32
1005 $find-matching-function:end:
1006     # . restore registers
1007     59/pop-to-ecx
1008     # . epilogue
1009     89/<- %esp 5/r32/ebp
1010     5d/pop-to-ebp
1011     c3/return
1012 
1013 mu-stmt-matches-function?:  # stmt : (address statement), primitive : (address opcode-info) => result/eax : boolean
1014     # . prologue
1015     55/push-ebp
1016     89/<- %ebp 4/r32/esp
1017     # . save registers
1018     51/push-ecx
1019     # return primitive->name == stmt->operation
1020     8b/-> *(ebp+8) 1/r32/ecx
1021     8b/-> *(ebp+0xc) 0/r32/eax
1022     (string-equal? *ecx *eax)  # => eax
1023 $mu-stmt-matches-function?:end:
1024     # . restore registers
1025     59/pop-to-ecx
1026     # . epilogue
1027     89/<- %esp 5/r32/ebp
1028     5d/pop-to-ebp
1029     c3/return
1030 
1031 test-emit-subx-statement-primitive:
1032     # Primitive operation on a variable on the stack.
1033     #   increment foo
1034     # =>
1035     #   ff 0/subop/increment *(ebp-8)
1036     #
1037     # There's a variable on the var stack as follows:
1038     #   name: 'foo'
1039     #   type: int
1040     #   location: -8  (negative numbers are on the stack;
1041     #                   0-7 are in registers;
1042     #                   higher positive numbers are invalid)
1043     #
1044     # There's a primitive with this info:
1045     #   name: 'increment'
1046     #   inout: int/mem
1047     #   value: 'ff 0/subop/increment'
1048     #
1049     # There's nothing in functions.
1050     #
1051     # . prologue
1052     55/push-ebp
1053     89/<- %ebp 4/r32/esp
1054     # setup
1055     (clear-stream _test-output-stream)
1056     (clear-stream _test-output-buffered-file->buffer)
1057     # var-foo/ecx : var
1058     68/push 0/imm32/no-register
1059     68/push -8/imm32/stack-offset
1060     68/push 1/imm32/block-depth
1061     68/push 1/imm32/type-int
1062     68/push "foo"/imm32
1063     89/<- %ecx 4/r32/esp
1064     # vars/edx : (stack 1)
1065     51/push-ecx/var-foo
1066     68/push 1/imm32/data-length
1067     68/push 1/imm32/top
1068     89/<- %edx 4/r32/esp
1069     # operand/esi : (list var)
1070     68/push 0/imm32/next
1071     51/push-ecx/var-foo
1072     89/<- %esi 4/r32/esp
1073     # stmt/esi : statement
1074     68/push 0/imm32/next
1075     68/push 0/imm32/outputs
1076     56/push-esi/operands
1077     68/push "increment"/imm32/operation
1078     89/<- %esi 4/r32/esp
1079     # primitives/ebx : function
1080     68/push 0/imm32/next
1081     68/push 0/imm32/body
1082     68/push 0/imm32/outputs
1083     51/push-ecx/inouts  # hack; in practice we won't have the same var in function definition and call
1084     68/push "ff 0/subop/increment"/imm32/subx-name
1085     68/push "increment"/imm32/name
1086     89/<- %ebx 4/r32/esp
1087     # convert
1088     (emit-subx-statement _test-output-buffered-file %esi %edx %ebx 0)
1089     (flush _test-output-buffered-file)
1090 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
1096     # check output
1097     (check-next-stream-line-equal _test-output-stream "ff 0/subop/increment *(ebp+0xfffffff8)" "F - test-emit-subx-statement-primitive/0")
1098     # . reclaim locals
1099     81 0/subop/add %esp 0x48/imm32
1100     # . epilogue
1101     89/<- %esp 5/r32/ebp
1102     5d/pop-to-ebp
1103     c3/return
1104 
1105 test-emit-subx-statement-function-call:
1106     # Call a function on a variable on the stack.
1107     #   f foo
1108     # =>
1109     #   (f2 *(ebp-8))
1110     # (Changing the function name supports overloading in general, but here it
1111     # just serves to help disambiguate things.)
1112     #
1113     # There's a variable on the var stack as follows:
1114     #   name: 'foo'
1115     #   type: int
1116     #   location: -8  (negative numbers are on the stack;
1117     #                   0-7 are in registers;
1118     #                   higher positive numbers are invalid)
1119     #
1120     # There's nothing in primitives.
1121     #
1122     # There's a function with this info:
1123     #   name: 'f'
1124     #   inout: int/mem
1125     #   value: 'f2'
1126     #
1127     # . prologue
1128     55/push-ebp
1129     89/<- %ebp 4/r32/esp
1130     # setup
1131     (clear-stream _test-output-stream)
1132     (clear-stream _test-output-buffered-file->buffer)
1133     # var-foo/ecx : var
1134     68/push 0/imm32/no-register
1135     68/push -8/imm32/stack-offset
1136     68/push 0/imm32/block-depth
1137     68/push 1/imm32/type-int
1138     68/push "foo"/imm32
1139     89/<- %ecx 4/r32/esp
1140     # vars/edx = (stack 1)
1141     51/push-ecx/var-foo
1142     68/push 1/imm32/data-length
1143     68/push 1/imm32/top
1144     89/<- %edx 4/r32/esp
1145     # operands/esi : (list var)
1146     68/push 0/imm32/next
1147     51/push-ecx/var-foo
1148     89/<- %esi 4/r32/esp
1149     # stmt/esi : statement
1150     68/push 0/imm32/next
1151     68/push 0/imm32/outputs
1152     56/push-esi/inouts
1153     68/push "f"/imm32/operation
1154     89/<- %esi 4/r32/esp
1155     # functions/ebx : function
1156     68/push 0/imm32/next
1157     68/push 0/imm32/body
1158     68/push 0/imm32/outputs
1159     51/push-ecx/inouts  # hack; in practice we won't have the same var in function definition and call
1160     68/push "f2"/imm32/subx-name
1161     68/push "f"/imm32/name
1162     89/<- %ebx 4/r32/esp
1163     # convert
1164     (emit-subx-statement _test-output-buffered-file %esi %edx 0 %ebx)
1165     (flush _test-output-buffered-file)
1166 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
1172     # check output
1173     (check-next-stream-line-equal _test-output-stream "(f2 *(ebp+0xfffffff8))" "F - test-emit-subx-statement-function-call/0")
1174     # . reclaim locals
1175     81 0/subop/add %esp 0x3c/imm32
1176     # . epilogue
1177     89/<- %esp 5/r32/ebp
1178     5d/pop-to-ebp
1179     c3/return
1180 
1181 emit-subx-prologue:  # out : (address buffered-file)
1182     # . prologue
1183     55/push-ebp
1184     89/<- %ebp 4/r32/esp
1185     #
1186     (write-buffered *(ebp+8) "# . prologue\n")
1187     (write-buffered *(ebp+8) "55/push-ebp\n")
1188     (write-buffered *(ebp+8) "89/<- %ebp 4/r32/esp\n")
1189 $emit-subx-prologue:end:
1190     # . epilogue
1191     89/<- %esp 5/r32/ebp
1192     5d/pop-to-ebp
1193     c3/return
1194 
1195 emit-subx-epilogue:  # out : (address buffered-file)
1196     # . prologue
1197     55/push-ebp
1198     89/<- %ebp 4/r32/esp
1199     #
1200     (write-buffered *(ebp+8) "# . epilogue\n")
1201     (write-buffered *(ebp+8) "89/<- %esp 5/r32/ebp\n")
1202     (write-buffered *(ebp+8) "5d/pop-to-ebp\n")
1203     (write-buffered *(ebp+8) "c3/return\n")
1204 $emit-subx-epilogue:end:
1205     # . epilogue
1206     89/<- %esp 5/r32/ebp
1207     5d/pop-to-ebp
1208     c3/return