https://github.com/akkartik/mu/blob/master/apps/mu.subx
   1 # The Mu computer's level-2 language, also called Mu.
   2 # http://akkartik.name/post/mu-2019-2
   3 #
   4 # To run:
   5 #   $ ./ntranslate init.linux 0*.subx apps/mu.subx
   6 
   7 # == Goals
   8 # 1. Be memory safe. It should be impossible to corrupt the heap, or to create
   9 # a bad pointer. (Requires strong type safety.)
  10 # 2. Do as little as possible to achieve goal 1.
  11 #   - runtime checks to avoid complex static analysis
  12 #   - minimize impedance mismatch between source language and SubX target
  13 
  14 # == Language description
  15 #
  16 # A program is a sequence of function definitions.
  17 #
  18 # Function example:
  19 #   fn foo n: int -> result/eax: int {
  20 #     ...
  21 #   }
  22 #
  23 # Functions consist of a name, optional inputs, optional outputs and a block.
  24 #
  25 # Function inputs and outputs are variables. All variables have a type and
  26 # storage specifier. They can be placed either in memory (on the stack) or in
  27 # one of 6 named registers.
  28 #   eax ecx edx ebx esi edi
  29 # Variables in registers must be primitive 32-bit types.
  30 # Variables not explicitly placed in a register are on the stack.
  31 # Variables in registers need not have a name; in that case you refer to them
  32 # directly by the register name.
  33 #
  34 # Function inputs are always passed in memory (on the stack), while outputs
  35 # are always returned in registers.
  36 #
  37 # Blocks mostly consist of statements.
  38 #
  39 # Statements mostly consist of a name, optional inputs and optional outputs.
  40 #
  41 # Statement inputs are variables or literals. Variables need to specify type
  42 # (and storage) the first time they're mentioned but not later.
  43 #
  44 # Statement outputs, like function outputs, must be variables in registers.
  45 #
  46 # Statement names must be either primitives or user-defined functions.
  47 #
  48 # Primitives can write to any register.
  49 # User-defined functions only write to hard-coded registers. Outputs of each
  50 # call must have the same registers as in the function definition.
  51 #
  52 # There are some other statement types:
  53 #   - blocks. Multiple statements surrounded by '{...}' and optionally
  54 #     prefixed with a label name and ':'
  55 #       - {
  56 #           ...
  57 #         }
  58 #       - foo: {
  59 #           ...
  60 #         }
  61 #
  62 #   - variable definitions on the stack. E.g.:
  63 #       - var foo: int
  64 #       - var bar: (array int 3)
  65 #     There's no initializer; variables are automatically initialized.
  66 #
  67 #   - variables definitions in a register. E.g.:
  68 #       - var foo/eax : int <- add bar 1
  69 #     The initializer is mandatory and must be a valid instruction that writes
  70 #     a single output to the right register. In practice registers will
  71 #     usually be either initialized by primitives or copied from eax.
  72 #       - var eax : int <- foo bar quux
  73 #         var floo/ecx : int <- copy eax
  74 #
  75 # Still todo:
  76 #   global variables
  77 #   heap allocations (planned name: 'handle')
  78 #   user-defined types: 'type' for structs, 'choice' for unions
  79 #   short-lived 'address' type for efficiently writing inside nested structs
  80 
  81 # Now that we know what the language looks like in the large, let's think
  82 # about how translation happens from the bottom up. The interplay between
  83 # variable scopes and statements using variables is the most complex aspect of
  84 # translation.
  85 #
  86 # Assume that we maintain a 'functions' list while parsing source code. And a
  87 # 'primitives' list is a global constant. Both these contain enough information
  88 # to perform type-checking on function calls or primitive statements, respectively.
  89 #
  90 # Defining variables pushes them on a stack with the current block depth and
  91 # enough information about their location (stack offset or register id).
  92 # Starting a block increments the current block id.
  93 # Each statement now has enough information to emit code for it.
  94 # Ending a block is where the magic happens:
  95 #   pop all variables at the current block depth
  96 #   emit code to restore all register variables introduced at the current depth
  97 #   emit code to clean up all stack variables at the current depth (just increment esp)
  98 #   decrement the current block depth
  99 #
 100 # One additional check we'll need is to ensure that a variable in a register
 101 # isn't shadowed by a different one. That may be worth a separate data
 102 # structure but for now repeatedly scanning the var stack should suffice.
 103 
 104 # == Book-keeping while emitting code for a single statement
 105 # Immutable data:
 106 #   function info
 107 #
 108 # Mutable data:
 109 #   stack: variables currently in scope
 110 #     block id
 111 #     type
 112 #     ebp offset for function | register id
 113 #   dict: register -> var
 114 
 115 # == Compiling a single instruction
 116 # Determine the function or primitive being called.
 117 #   If no matches, show all functions/primitives with the same name, along
 118 #   with reasons they don't match. (type and storage checking)
 119 #   It must be a function if:
 120 #     #outputs > 1, or
 121 #     #inouts > 2, or
 122 #     #inouts + #outputs > 2
 123 # If it's a function, emit:
 124 #   (low-level-name <rm32 or imm32>...)
 125 # Otherwise (it's a primitive):
 126 #   assert(#inouts <= 2 && #outs <= 1 && (#inouts + #outs) <= 2)
 127 #   emit opcode
 128 #   emit-rm32(inout[0])
 129 #   if out[0] exists: emit-r32(out[0])
 130 #   else if inout[1] is a literal: emit-imm32(inout[1])
 131 #   else: emit-rm32(inout[1])
 132 
 133 # emit-rm32 and emit-r32 should check that the variable they intend is still
 134 # available in the register.
 135 
 136 # == Emitting a block
 137 # Emit block name if necessary
 138 # Emit '{'
 139 # When you encounter a statement, emit it as above
 140 # When you encounter a variable declaration
 141 #   emit any code needed for it (bzeros)
 142 #   push it on the var stack
 143 #   update register dict if necessary
 144 # When you encounter '}'
 145 #   While popping variables off the var stack until block id changes
 146 #     Emit code needed to clean up the stack
 147 #       either increment esp
 148 #       or pop into appropriate register
 149 #   TODO: how to update the register dict? does it need to be a stack as well?
 150 
 151 # The rest is straightforward.
 152 
 153 # A sketch of planned data structures. Still highly speculative.
 154 == data
 155 
 156 # A program is currently a linked list of functions
 157 Program:  # (address function)
 158   0/imm32
 159 
 160 # A function consists of:
 161 #   name: (address string)
 162 #   inputs: (address var-type)  # tbd
 163 #   outputs: (address var-type)  # tbd
 164 #   body: (address block)
 165 #   next: (address function)
 166 Function-next:
 167   0x10/imm32
 168 Function-size:
 169   0x14/imm32/20
 170 
 171 # A block is a list of statements:
 172 #     statements: (address statement)
 173 
 174 # A statement can be either a regular statement consisting of:
 175 #     name: (address string)
 176 #     inputs: (address var)
 177 #     outputs: (address var-r)
 178 # or a variable declaration on the stack:
 179 #     name: (address string)
 180 #     type: (address type-sexpr)
 181 # or a regular statement writing to a single new variable in a register:
 182 #     name: (address string)
 183 #     inputs: (address var)
 184 #     output: var-r
 185 # or a block of statements:
 186 #     statements: (address statement)
 187 
 188 # Kinds of local variable declarations:
 189 #   var f : (array foo 10)
 190 #   var f/ecx : int <- copy 0
 191 # Variables live in either the stack or a register.
 192 # Variables in the stack are auto-initialized.
 193 #   (This is non-trivial for arrays, and arrays inside structs... We'll see.)
 194 # Variables in register need a real instruction.
 195 
 196 # var is a variable declaration. e.g. `foo: (array int 3)`
 197 #   name: (address string)
 198 #   type: (address type-sexpr)
 199 
 200 # var-r is a variable declaration in a register. e.g. `foo/eax: (array int 3)`
 201 #   name: (address string)
 202 #   type: (address type-sexpr)
 203 #   reg: int [0..7]
 204 
 205 # type-sexpr is a tree of type identifiers. e.g. (array (address int) 3)
 206 # either
 207 #   id: type-identifier
 208 # or
 209 #   car: (address type-sexpr)
 210 #   cdr: (address type-sexpr)
 211 
 212 == code
 213 
 214 Entry:
 215     # . prologue
 216     89/<- %ebp 4/r32/esp
 217     (new-segment Heap-size Heap)
 218     # if (argv[1] == "test') run-tests()
 219     {
 220       # if (argc <= 1) break
 221       81 7/subop/compare *ebp 1/imm32
 222       7e/jump-if-lesser-or-equal break/disp8
 223       # if (argv[1] != "test") break
 224       (kernel-string-equal? *(ebp+8) "test")  # => eax
 225       3d/compare-eax-and 0/imm32
 226       74/jump-if-equal break/disp8
 227       #
 228       (run-tests)
 229       # syscall(exit, *Num-test-failures)
 230       8b/-> *Num-test-failures 3/r32/ebx
 231       eb/jump $mu-main:end/disp8
 232     }
 233     # otherwise convert Stdin
 234     (convert-mu Stdin Stdout)
 235     (flush Stdout)
 236     # syscall(exit, 0)
 237     bb/copy-to-ebx 0/imm32
 238 $mu-main:end:
 239     b8/copy-to-eax 1/imm32/exit
 240     cd/syscall 0x80/imm8
 241 
 242 convert-mu:  # in : (address buffered-file), out : (address buffered-file)
 243     # . prologue
 244     55/push-ebp
 245     89/<- %ebp 4/r32/esp
 246     #
 247     (parse-mu *(ebp+8))
 248     (check-mu-types)
 249     (emit-subx *(ebp+0xc))
 250 $convert-mu:end:
 251     # . epilogue
 252     89/<- %esp 5/r32/ebp
 253     5d/pop-to-ebp
 254     c3/return
 255 
 256 test-convert-empty-input:
 257     # empty input => empty output
 258     # . prologue
 259     55/push-ebp
 260     89/<- %ebp 4/r32/esp
 261     # setup
 262     (clear-stream _test-input-stream)
 263     (clear-stream _test-input-buffered-file->buffer)
 264     (clear-stream _test-output-stream)
 265     (clear-stream _test-output-buffered-file->buffer)
 266     #
 267     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 268     (flush _test-output-buffered-file)
 269     (check-stream-equal _test-output-stream "" "F - test-convert-empty-input")
 270     # . epilogue
 271     89/<- %esp 5/r32/ebp
 272     5d/pop-to-ebp
 273     c3/return
 274 
 275 test-convert-function-skeleton:
 276     # empty function decl => function prologue and epilogue
 277     #   fn foo {
 278     #   }
 279     # =>
 280     #   foo:
 281     #     # . prologue
 282     #     55/push-ebp
 283     #     89/<- %ebp 4/r32/esp
 284     #     # . epilogue
 285     #     89/<- %esp 5/r32/ebp
 286     #     5d/pop-to-ebp
 287     #     c3/return
 288     # . prologue
 289     55/push-ebp
 290     89/<- %ebp 4/r32/esp
 291     # setup
 292     (clear-stream _test-input-stream)
 293     (clear-stream _test-input-buffered-file->buffer)
 294     (clear-stream _test-output-stream)
 295     (clear-stream _test-output-buffered-file->buffer)
 296     #
 297     (write _test-input-stream "fn foo {\n")
 298     (write _test-input-stream "}\n")
 299     # convert
 300     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 301     (flush _test-output-buffered-file)
 302 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 308     # check output
 309     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
 310     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
 311     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
 312     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
 313     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
 314     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
 315     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
 316     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
 317     # . epilogue
 318     89/<- %esp 5/r32/ebp
 319     5d/pop-to-ebp
 320     c3/return
 321 
 322 test-convert-multiple-function-skeletons:
 323     # multiple functions correctly organized into a linked list
 324     #   fn foo {
 325     #   }
 326     #   fn bar {
 327     #   }
 328     # =>
 329     #   foo:
 330     #     # . prologue
 331     #     55/push-ebp
 332     #     89/<- %ebp 4/r32/esp
 333     #     # . epilogue
 334     #     89/<- %esp 5/r32/ebp
 335     #     5d/pop-to-ebp
 336     #     c3/return
 337     #   bar:
 338     #     # . prologue
 339     #     55/push-ebp
 340     #     89/<- %ebp 4/r32/esp
 341     #     # . epilogue
 342     #     89/<- %esp 5/r32/ebp
 343     #     5d/pop-to-ebp
 344     #     c3/return
 345     # . prologue
 346     55/push-ebp
 347     89/<- %ebp 4/r32/esp
 348     # setup
 349     (clear-stream _test-input-stream)
 350     (clear-stream _test-input-buffered-file->buffer)
 351     (clear-stream _test-output-stream)
 352     (clear-stream _test-output-buffered-file->buffer)
 353     #
 354     (write _test-input-stream "fn foo {\n")
 355     (write _test-input-stream "}\n")
 356     (write _test-input-stream "fn bar {\n")
 357     (write _test-input-stream "}\n")
 358     # convert
 359     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 360     (flush _test-output-buffered-file)
 361 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 367     # check first function
 368     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-multiple-function-skeletons/0")
 369     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/1")
 370     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/2")
 371     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/3")
 372     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/4")
 373     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/5")
 374     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/6")
 375     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/7")
 376     # check second function
 377     (check-next-stream-line-equal _test-output-stream "bar:"                  "F - test-convert-multiple-function-skeletons/10")
 378     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/11")
 379     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/12")
 380     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/13")
 381     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/14")
 382     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/15")
 383     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/16")
 384     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/17")
 385     # . epilogue
 386     89/<- %esp 5/r32/ebp
 387     5d/pop-to-ebp
 388     c3/return
 389 
 390 test-convert-function-with-arg:
 391     # function with one arg and a copy instruction
 392     #   fn foo n : int -> result/eax : int {
 393     #     result <- copy n
 394     #   }
 395     # =>
 396     #   foo:
 397     #     # . prologue
 398     #     55/push-ebp
 399     #     89/<- %ebp 4/r32/esp
 400     #     {
 401     #     # result <- copy n
 402     #     8b/-> *(ebp+8) 0/r32/eax
 403     #     }
 404     #     # . epilogue
 405     #     89/<- %esp 5/r32/ebp
 406     #     5d/pop-to-ebp
 407     #     c3/return
 408     # . prologue
 409     55/push-ebp
 410     89/<- %ebp 4/r32/esp
 411     # setup
 412     (clear-stream _test-input-stream)
 413     (clear-stream _test-input-buffered-file->buffer)
 414     (clear-stream _test-output-stream)
 415     (clear-stream _test-output-buffered-file->buffer)
 416     #
 417     (write _test-input-stream "fn foo {\n")
 418     (write _test-input-stream "}\n")
 419     # convert
 420     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 421     (flush _test-output-buffered-file)
 422 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 428     # check output
 429     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
 430     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
 431     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
 432     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
 433     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
 434     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
 435     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
 436     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
 437     # . epilogue
 438     89/<- %esp 5/r32/ebp
 439     5d/pop-to-ebp
 440     c3/return
 441 
 442 parse-mu:  # in : (address buffered-file)
 443     # pseudocode
 444     #   var curr-function = Program
 445     #   var line : (stream byte 512)
 446     #   var word-slice : slice
 447     #   while true                                  # line loop
 448     #     clear-stream(line)
 449     #     read-line-buffered(in, line)
 450     #     if (line->write == 0) break               # end of file
 451     #     while true                                # word loop
 452     #       word-slice = next-word-or-string(line)
 453     #       if slice-empty?(word-slice)             # end of line
 454     #         break
 455     #       else if slice-starts-with?(word-slice, "#")  # comment
 456     #         break                                 # end of line
 457     #       else if slice-equal(word-slice, "fn")
 458     #         var new-function : (address function) = new function
 459     #         populate-mu-function(in, new-function)
 460     #         *curr-function = new-function
 461     #         curr-function = &new-function->next
 462     #       else
 463     #         abort()
 464     #
 465     # . prologue
 466     55/push-ebp
 467     89/<- %ebp 4/r32/esp
 468     # . save registers
 469     50/push-eax
 470     51/push-ecx
 471     52/push-edx
 472     57/push-edi
 473     # var line/ecx : (stream byte 512)
 474     81 5/subop/subtract %esp 0x200/imm32
 475     68/push 0x200/imm32/length
 476     68/push 0/imm32/read
 477     68/push 0/imm32/write
 478     89/<- %ecx 4/r32/esp
 479     # var word-slice/edx : slice
 480     68/push 0/imm32/end
 481     68/push 0/imm32/start
 482     89/<- %edx 4/r32/esp
 483     # var curr-function/edi : (address function) = Program
 484     bf/copy-to-edi Program/imm32
 485     {
 486 $parse-mu:line-loop:
 487       (clear-stream %ecx)
 488       (read-line-buffered *(ebp+8) %ecx)
 489       # if (line->write == 0) break
 490       81 7/subop/compare *ecx 0/imm32
 491       0f 84/jump-if-equal break/disp32
 492 +--  6 lines: #?       # dump line ---------------------------------------------------------------------------------------------------------------------------
 498       { # word loop
 499 $parse-mu:word-loop:
 500         (next-word-or-string %ecx %edx)
 501         # if slice-empty?(word-slice) break
 502         (slice-empty? %edx)
 503         3d/compare-eax-and 0/imm32
 504         0f 85/jump-if-not-equal break/disp32
 505         # if (*word-slice->start == "#") break
 506         # . eax = *word-slice->start
 507         8b/-> *edx 0/r32/eax
 508         8a/copy-byte *eax 0/r32/AL
 509         81 4/subop/and %eax 0xff/imm32
 510         # . if (eax == '#') break
 511         3d/compare-eax-and 0x23/imm32/hash
 512         0f 84/jump-if-equal break/disp32
 513         # if (slice-equal?(word-slice, "fn")) parse a function
 514         {
 515           (slice-equal? %edx "fn")
 516           3d/compare-eax-and 0/imm32
 517           0f 84/jump-if-equal break/disp32
 518           # var new-function/eax : (address function) = populate-mu-function()
 519           (allocate Heap *Function-size)  # => eax
 520           (populate-mu-function-header %ecx %eax)
 521           (populate-mu-function-body *(ebp+8) %eax)
 522           # *curr-function = new-function
 523           89/<- *edi 0/r32/eax
 524           # curr-function = &new-function->next
 525           8d/address-> *(eax+0x10) 7/r32/edi
 526           e9/jump $parse-mu:word-loop/disp32
 527         }
 528         # otherwise abort
 529         e9/jump $parse-mu:abort/disp32
 530       } # end word loop
 531       e9/jump loop/disp32
 532     } # end line loop
 533 $parse-mu:end:
 534     # . reclaim locals
 535     81 0/subop/add %esp 0x214/imm32
 536     # . restore registers
 537     5f/pop-to-edi
 538     5a/pop-to-edx
 539     59/pop-to-ecx
 540     58/pop-to-eax
 541     # . epilogue
 542     89/<- %esp 5/r32/ebp
 543     5d/pop-to-ebp
 544     c3/return
 545 
 546 $parse-mu:abort:
 547     # error("unexpected top-level command: " word-slice "\n")
 548     (write-buffered Stderr "unexpected top-level command: ")
 549     (write-buffered Stderr %edx)
 550     (write-buffered Stderr "\n")
 551     (flush Stderr)
 552     # . syscall(exit, 1)
 553     bb/copy-to-ebx  1/imm32
 554     b8/copy-to-eax  1/imm32/exit
 555     cd/syscall  0x80/imm8
 556     # never gets here
 557 
 558 # errors considered:
 559 #   fn foo { {
 560 #   fn foo { }
 561 #   fn foo { } {
 562 #   fn foo  # no block
 563 populate-mu-function-header:  # first-line : (address stream byte), out : (address function)
 564     # . prologue
 565     55/push-ebp
 566     89/<- %ebp 4/r32/esp
 567     # . save registers
 568     50/push-eax
 569     51/push-ecx
 570     57/push-edi
 571     # edi = out
 572     8b/-> *(ebp+0xc) 7/r32/edi
 573     # var word-slice/ecx : slice
 574     68/push 0/imm32/end
 575     68/push 0/imm32/start
 576     89/<- %ecx 4/r32/esp
 577     # save function name
 578     (next-word *(ebp+8) %ecx)
 579     (slice-to-string Heap %ecx)  # => eax
 580     89/<- *edi 0/r32/eax
 581     # assert that next token is '{'
 582     (next-word *(ebp+8) %ecx)
 583     (slice-equal? %ecx "{")
 584     3d/compare-eax-and 0/imm32
 585     74/jump-if-equal $populate-mu-function-header:abort/disp8
 586     # assert that there's no further token
 587     {
 588       # word-slice = next-word(line)
 589       (next-word *(ebp+8) %ecx)
 590       # if (word-slice == '') break
 591       (slice-empty? %ecx)
 592       3d/compare-eax-and 0/imm32
 593       75/jump-if-not-equal break/disp8
 594       # if (slice-starts-with?(word-slice, "#")) break
 595       # . eax = *word-slice->start
 596       8b/-> *edx 0/r32/eax
 597       8a/copy-byte *eax 0/r32/AL
 598       81 4/subop/and %eax 0xff/imm32
 599       # . if (eax == '#') break
 600       3d/compare-eax-and 0x23/imm32/hash
 601       74/jump-if-equal break/disp8
 602       # otherwise abort
 603       eb/jump $populate-mu-function-header:abort/disp8
 604     }
 605 $populate-mu-function-header:end:
 606     # . reclaim locals
 607     81 0/subop/add %esp 8/imm32
 608     # . restore registers
 609     5f/pop-to-edi
 610     59/pop-to-ecx
 611     58/pop-to-eax
 612     # . epilogue
 613     89/<- %esp 5/r32/ebp
 614     5d/pop-to-ebp
 615     c3/return
 616 
 617 $populate-mu-function-header:abort:
 618     # error("function header not in form 'fn <name> {'")
 619     (write-buffered Stderr "function header not in form 'fn <name> {' -- '")
 620     (rewind-stream *(ebp+8))
 621     (write-stream 2 *(ebp+8))
 622     (write-buffered Stderr "'\n")
 623     (flush Stderr)
 624     # . syscall(exit, 1)
 625     bb/copy-to-ebx  1/imm32
 626     b8/copy-to-eax  1/imm32/exit
 627     cd/syscall  0x80/imm8
 628     # never gets here
 629 
 630 # errors considered:
 631 #   { abc
 632 populate-mu-function-body:  # in : (address buffered-file), out : (address function)
 633     # . prologue
 634     55/push-ebp
 635     89/<- %ebp 4/r32/esp
 636     # . save registers
 637     50/push-eax
 638     51/push-ecx
 639     52/push-edx
 640     53/push-ebx
 641     # var line/ecx : (stream byte 512)
 642     81 5/subop/subtract %esp 0x200/imm32
 643     68/push 0x200/imm32/length
 644     68/push 0/imm32/read
 645     68/push 0/imm32/write
 646     89/<- %ecx 4/r32/esp
 647     # var word-slice/edx : slice
 648     68/push 0/imm32/end
 649     68/push 0/imm32/start
 650     89/<- %edx 4/r32/esp
 651     # var open-curly-count/ebx : int = 1
 652     bb/copy-to-ebx 1/imm32
 653     { # line loop
 654 $populate-mu-function-body:line-loop:
 655       # if (open-curly-count == 0) break
 656       81 7/subop/compare %ebx 0/imm32
 657       0f 84/jump-if-equal break/disp32
 658       # line = read-line-buffered(in)
 659       (clear-stream %ecx)
 660       (read-line-buffered *(ebp+8) %ecx)
 661       # if (line->write == 0) break
 662       81 7/subop/compare *ecx 0/imm32
 663       0f 84/jump-if-equal break/disp32
 664       # word-slice = next-word(line)
 665       (next-word %ecx %edx)
 666       # if slice-empty?(word-slice) continue
 667       (slice-empty? %ecx)
 668       3d/compare-eax-and 0/imm32
 669       75/jump-if-not-equal loop/disp8
 670       # if (slice-starts-with?(word-slice, '#') continue
 671       # . eax = *word-slice->start
 672       8b/-> *edx 0/r32/eax
 673       8a/copy-byte *eax 0/r32/AL
 674       81 4/subop/and %eax 0xff/imm32
 675       # . if (eax == '#') continue
 676       3d/compare-eax-and 0x23/imm32/hash
 677       74/jump-if-equal loop/disp8
 678       {
 679         # if slice-equal?(word-slice, "{") ++open-curly-count
 680         {
 681           (slice-equal? %ecx "{")
 682           3d/compare-eax-and 0/imm32
 683           74/jump-if-equal break/disp8
 684           43/increment-ebx
 685           eb/jump $curly-found:end/disp8
 686         }
 687         # else if slice-equal?(word-slice, "}") --open-curly-count
 688         {
 689           (slice-equal? %ecx "}")
 690           3d/compare-eax-and 0/imm32
 691           74/jump-if-equal break/disp8
 692           4b/decrement-ebx
 693           eb/jump $curly-found:end/disp8
 694         }
 695         # else break
 696         eb/jump $populate-mu-function-body:end/disp8
 697       }
 698       # - check for invalid tokens after curly
 699 $curly-found:end:
 700       # second-word-slice = next-word(line)
 701       (next-word %ecx %edx)
 702       # if slice-empty?(second-word-slice) continue
 703       (slice-empty? %ecx)
 704       3d/compare-eax-and 0/imm32
 705       0f 85/jump-if-not-equal loop/disp32
 706       # if (slice-starts-with?(second-word-slice, '#') continue
 707       # . eax = *second-word-slice->start
 708       8b/-> *edx 0/r32/eax
 709       8a/copy-byte *eax 0/r32/AL
 710       81 4/subop/and %eax 0xff/imm32
 711       # . if (eax == '#') continue
 712       3d/compare-eax-and 0x23/imm32/hash
 713       0f 84/jump-if-equal loop/disp32
 714       # abort
 715       eb/jump $populate-mu-function-body:abort/disp8
 716     } # end line loop
 717 $populate-mu-function-body:end:
 718     # . reclaim locals
 719     81 0/subop/add %esp 0x214/imm32
 720     # . restore registers
 721     5b/pop-to-ebx
 722     5a/pop-to-edx
 723     59/pop-to-ecx
 724     58/pop-to-eax
 725     # . epilogue
 726     89/<- %esp 5/r32/ebp
 727     5d/pop-to-ebp
 728     c3/return
 729 
 730 $populate-mu-function-body:abort:
 731     # error("'{' or '}' should be on its own line, but got '")
 732     (write-buffered Stderr "'{' or '}' should be on its own line, but got '")
 733     (rewind-stream %ecx)
 734     (write-stream 2 %ecx)
 735     (write-buffered Stderr "'\n")
 736     (flush Stderr)
 737     # . syscall(exit, 1)
 738     bb/copy-to-ebx  1/imm32
 739     b8/copy-to-eax  1/imm32/exit
 740     cd/syscall  0x80/imm8
 741     # never gets here
 742 
 743 check-mu-types:
 744     # . prologue
 745     55/push-ebp
 746     89/<- %ebp 4/r32/esp
 747     #
 748 $check-types:end:
 749     # . epilogue
 750     89/<- %esp 5/r32/ebp
 751     5d/pop-to-ebp
 752     c3/return
 753 
 754 emit-subx:  # out : (address buffered-file)
 755     # . prologue
 756     55/push-ebp
 757     89/<- %ebp 4/r32/esp
 758     # . save registers
 759     50/push-eax
 760     51/push-ecx
 761     57/push-edi
 762     # edi = out
 763     8b/-> *(ebp+8) 7/r32/edi
 764     # var curr/ecx : (address function) = Program
 765     8b/-> *Program 1/r32/ecx
 766     {
 767       # if (curr == NULL) break
 768       81 7/subop/compare %ecx 0/imm32
 769       0f 84/jump-if-equal break/disp32
 770       (emit-subx-function %edi %ecx)
 771       # curr = curr->next
 772       8b/-> *(ecx+0x10) 1/r32/ecx
 773       e9/jump loop/disp32
 774     }
 775 $emit-subx:end:
 776     # . restore registers
 777     5f/pop-to-edi
 778     59/pop-to-ecx
 779     58/pop-to-eax
 780     # . epilogue
 781     89/<- %esp 5/r32/ebp
 782     5d/pop-to-ebp
 783     c3/return
 784 
 785 # == Emitting a function
 786 # Emit function header
 787 # Emit function prologue
 788 # Translate function body
 789 # Emit function epilogue
 790 
 791 emit-subx-function:  # out : (address buffered-file), f : (address function)
 792     # . prologue
 793     55/push-ebp
 794     89/<- %ebp 4/r32/esp
 795     # . save registers
 796     50/push-eax
 797     51/push-ecx
 798     57/push-edi
 799     # edi = out
 800     8b/-> *(ebp+8) 7/r32/edi
 801     # ecx = f
 802     8b/-> *(ebp+0xc) 1/r32/ecx
 803     #
 804     (write-buffered %edi *ecx)
 805     (write-buffered %edi ":\n")
 806     (emit-subx-prologue %edi)
 807     (emit-subx-block %edi *(ecx+4))  # TODO: offset
 808     (emit-subx-epilogue %edi)
 809 $emit-subx-function:end:
 810     # . restore registers
 811     5f/pop-to-edi
 812     59/pop-to-ecx
 813     58/pop-to-eax
 814     # . epilogue
 815     89/<- %esp 5/r32/ebp
 816     5d/pop-to-ebp
 817     c3/return
 818 
 819 emit-subx-block:  # out : (address buffered-file), block : (address block)
 820     # . prologue
 821     55/push-ebp
 822     89/<- %ebp 4/r32/esp
 823     #
 824 $emit-subx-block:end:
 825     # . epilogue
 826     89/<- %esp 5/r32/ebp
 827     5d/pop-to-ebp
 828     c3/return
 829 
 830 emit-subx-statement:  # out : (address buffered-file), stmt : (address statement), vars : (address variable), regs : (address array (address variable)), primitives : (address opcode-info), functions : (address function)
 831     # . prologue
 832     55/push-ebp
 833     89/<- %ebp 4/r32/esp
 834     # . save registers
 835     50/push-eax
 836     51/push-ecx
 837     # var curr/ecx : (address primitive) = primitives
 838     8b/-> *(ebp+0x18) 1/r32/ecx
 839     {
 840       # if (curr != null) abort
 841       81 7/subop/compare *(ebp+0xc) 0/imm32
 842       0f 84/jump-if-equal $emit-subx-statement:abort/disp32
 843       # if (match(curr, stmt)) break
 844       (mu-stmt-matches-primitive? *(ebp+0xc) %ecx)  # => eax
 845       3d/compare-eax-and 0/imm32
 846       75/jump-if-not-equal break/disp8
 847       # emit code for stmt according to curr and vars
 848       # curr = curr->next
 849       8b/-> *(ecx+0x10) 1/r32/ecx
 850       e9/jump loop/disp32
 851     }
 852 $emit-subx-statement:end:
 853     # . restore registers
 854     59/pop-to-ecx
 855     58/pop-to-eax
 856     # . epilogue
 857     89/<- %esp 5/r32/ebp
 858     5d/pop-to-ebp
 859     c3/return
 860 
 861 $emit-subx-statement:abort:
 862     # error("couldn't translate '" stmt "'\n")
 863     (write-buffered Stderr "couldn't translate '")
 864 #?     (emit-string Stderr *(ebp+0xc))  # TODO
 865     (write-buffered Stderr "'\n")
 866     (flush Stderr)
 867     # . syscall(exit, 1)
 868     bb/copy-to-ebx  1/imm32
 869     b8/copy-to-eax  1/imm32/exit
 870     cd/syscall  0x80/imm8
 871     # never gets here
 872 
 873 mu-stmt-matches-primitive?:  # stmt : (address statement), primitive : (address opcode-info) => result/eax : boolean
 874     # . prologue
 875     55/push-ebp
 876     89/<- %ebp 4/r32/esp
 877     # . save registers
 878     51/push-ecx
 879     # return primitive->name == stmt->operation
 880     8b/-> *(ebp+8) 1/r32/ecx
 881     8b/-> *(ebp+0xc) 0/r32/eax
 882     (string-equal? *ecx *eax)  # => eax
 883 $mu-stmt-matches-primitive?:end:
 884     # . restore registers
 885     59/pop-to-ecx
 886     # . epilogue
 887     89/<- %esp 5/r32/ebp
 888     5d/pop-to-ebp
 889     c3/return
 890 
 891 test-emit-subx-statement-primitive:
 892     # Primitive operation on a variable on the stack.
 893     #   increment foo
 894     # =>
 895     #   ff 0/subop/increment *(ebp-8)
 896     #
 897     # There's a variable on the var stack as follows:
 898     #   name: 'foo'
 899     #   type: int
 900     #   location: -8  (negative numbers are on the stack;
 901     #                   0-7 are in registers;
 902     #                   higher positive numbers are invalid)
 903     #
 904     # There's nothing in registers.
 905     #
 906     # There's a primitive with this info:
 907     #   name: 'increment'
 908     #   inout: int/mem
 909     #   value: 'ff 0/subop/increment'
 910     #
 911     # There's nothing in functions.
 912     #
 913     # . prologue
 914     55/push-ebp
 915     89/<- %ebp 4/r32/esp
 916     # setup
 917     (clear-stream _test-output-stream)
 918     (clear-stream _test-output-buffered-file->buffer)
 919     # . ecx = vars
 920     68/push 0/imm32/next
 921     68/push -8/imm32/stack-offset
 922     68/push 0/imm32/int  # TODO
 923     68/push "foo"/imm32
 924     89/<- %ecx 4/r32/esp
 925     # . edx = operand
 926     68/push 0/imm32/next
 927     51/push-ecx/var-foo
 928     89/<- %edx 4/r32/esp
 929     # . edx = stmt
 930     68/push 0/imm32/next
 931     68/push 0/imm32/outputs
 932     52/push-edx/operand
 933     68/push "increment"/imm32/operation
 934     89/<- %edx 4/r32/esp
 935     # . ebx = primitives
 936     68/push 0/imm32/next
 937     68/push "ff 0/subop/increment"/imm32
 938     68/push 0/imm32/type-int
 939     68/push 0/imm32/storage-memory
 940     68/push "increment"/imm32/name
 941     89/<- %ebx 4/r32/esp
 942     # convert
 943     (emit-subx-statement _test-output-buffered-file %edx %ecx 0 %ebx 0)
 944     (flush _test-output-buffered-file)
 945 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 951     # check output
 952     (check-next-stream-line-equal _test-output-stream "ff 0/subop/increment *(ebp-8)" "F - test-emit-subx-statement-primitive/0")
 953     # . reclaim locals
 954     81 0/subop/add %esp 0x3c/imm32
 955     # . epilogue
 956     89/<- %esp 5/r32/ebp
 957     5d/pop-to-ebp
 958     c3/return
 959 
 960 test-emit-subx-statement-function-call:
 961     # Call a function on a variable on the stack.
 962     #   f var
 963     # =>
 964     #   (f2 *(ebp-8))
 965     # (Changing the function name just to help disambiguate things.)
 966     #
 967     # There's a variable on the var stack as follows:
 968     #   name: 'var'
 969     #   type: int
 970     #   location: -8  (negative numbers are on the stack;
 971     #                   0-7 are in registers;
 972     #                   higher positive numbers are invalid)
 973     #
 974     # There's nothing in registers.
 975     #
 976     # There's nothing in primitives.
 977     #
 978     # There's a function with this info:
 979     #   name: 'f'
 980     #   inout: int/mem
 981     #   value: 'f2'
 982     #
 983     # . prologue
 984     55/push-ebp
 985     89/<- %ebp 4/r32/esp
 986     # setup
 987     (clear-stream _test-output-stream)
 988     (clear-stream _test-output-buffered-file->buffer)
 989     # . ecx = vars
 990     68/push 0/imm32/next
 991     68/push -8/imm32/stack-offset
 992     68/push 0/imm32/int  # TODO
 993     68/push "var"/imm32
 994     89/<- %ecx 4/r32/esp
 995     # . edx = operand
 996     68/push 0/imm32/next
 997     51/push-ecx/var
 998     89/<- %edx 4/r32/esp
 999     # . edx = stmt
1000     68/push 0/imm32/next
1001     68/push 0/imm32/outputs
1002     52/push-edx/operand
1003     68/push "f"/imm32/operation
1004     89/<- %edx 4/r32/esp
1005     # . ebx = functions
1006     68/push 0/imm32/next
1007     68/push "f2"/imm32
1008     68/push 0/imm32/type-int
1009     68/push 0/imm32/storage-memory
1010     68/push "f"/imm32/name
1011     89/<- %ebx 4/r32/esp
1012     # convert
1013     (emit-subx-statement _test-output-buffered-file %edx %ecx 0 0 %ebx)
1014     (flush _test-output-buffered-file)
1015 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
1021     # check output
1022     (check-next-stream-line-equal _test-output-stream "f2 *(ebp-8)" "F - test-emit-subx-statement-function-call/0")
1023     # . reclaim locals
1024     81 0/subop/add %esp 0x3c/imm32
1025     # . epilogue
1026     89/<- %esp 5/r32/ebp
1027     5d/pop-to-ebp
1028     c3/return
1029 
1030 emit-subx-prologue:  # out : (address buffered-file)
1031     # . prologue
1032     55/push-ebp
1033     89/<- %ebp 4/r32/esp
1034     #
1035     (write-buffered *(ebp+8) "# . prologue\n")
1036     (write-buffered *(ebp+8) "55/push-ebp\n")
1037     (write-buffered *(ebp+8) "89/<- %ebp 4/r32/esp\n")
1038 $emit-subx-prologue:end:
1039     # . epilogue
1040     89/<- %esp 5/r32/ebp
1041     5d/pop-to-ebp
1042     c3/return
1043 
1044 emit-subx-epilogue:  # out : (address buffered-file)
1045     # . prologue
1046     55/push-ebp
1047     89/<- %ebp 4/r32/esp
1048     #
1049     (write-buffered *(ebp+8) "# . epilogue\n")
1050     (write-buffered *(ebp+8) "89/<- %esp 5/r32/ebp\n")
1051     (write-buffered *(ebp+8) "5d/pop-to-ebp\n")
1052     (write-buffered *(ebp+8) "c3/return\n")
1053 $emit-subx-epilogue:end:
1054     # . epilogue
1055     89/<- %esp 5/r32/ebp
1056     5d/pop-to-ebp
1057     c3/return