https://github.com/akkartik/mu/blob/master/apps/mu.subx
   1 # The Mu computer's level-2 language, also called Mu.
   2 # http://akkartik.name/post/mu-2019-2
   3 #
   4 # To run:
   5 #   $ ./ntranslate init.linux 0*.subx apps/mu.subx
   6 
   7 # == Goals
   8 # 1. Be memory safe. It should be impossible to corrupt the heap, or to create
   9 # a bad pointer. (Requires strong type safety.)
  10 # 2. Do as little as possible to achieve goal 1.
  11 #   - runtime checks to avoid complex static analysis
  12 #   - minimize impedance mismatch between source language and SubX target
  13 
  14 # == Language description
  15 #
  16 # A program is a sequence of function definitions.
  17 #
  18 # Function example:
  19 #   fn foo n: int -> result/eax: int {
  20 #     ...
  21 #   }
  22 #
  23 # Functions consist of a name, optional inputs, optional outputs and a block.
  24 #
  25 # Function inputs and outputs are variables. All variables have a type and
  26 # storage specifier. They can be placed either in memory (on the stack) or in
  27 # one of 6 named registers.
  28 #   eax ecx edx ebx esi edi
  29 # Variables in registers must be primitive 32-bit types.
  30 # Variables not explicitly placed in a register are on the stack.
  31 # Variables in registers need not have a name; in that case you refer to them
  32 # directly by the register name.
  33 #
  34 # Function inputs are always passed in memory (on the stack), while outputs
  35 # are always returned in registers.
  36 #
  37 # Blocks mostly consist of statements.
  38 #
  39 # Statements mostly consist of a name, optional inputs and optional outputs.
  40 #
  41 # Statement inputs are variables or literals. Variables need to specify type
  42 # (and storage) the first time they're mentioned but not later.
  43 #
  44 # Statement outputs, like function outputs, must be variables in registers.
  45 #
  46 # Statement names must be either primitives or user-defined functions.
  47 #
  48 # Primitives can write to any register.
  49 # User-defined functions only write to hard-coded registers. Outputs of each
  50 # call must have the same registers as in the function definition.
  51 #
  52 # There are some other statement types:
  53 #   - blocks. Multiple statements surrounded by '{...}' and optionally
  54 #     prefixed with a label name and ':'
  55 #       - {
  56 #           ...
  57 #         }
  58 #       - foo: {
  59 #           ...
  60 #         }
  61 #
  62 #   - variable definitions on the stack. E.g.:
  63 #       - var foo: int
  64 #       - var bar: (array int 3)
  65 #     There's no initializer; variables are automatically initialized.
  66 #
  67 #   - variables definitions in a register. E.g.:
  68 #       - var foo/eax : int <- add bar 1
  69 #     The initializer is mandatory and must be a valid instruction that writes
  70 #     a single output to the right register. In practice registers will
  71 #     usually be either initialized by primitives or copied from eax.
  72 #       - var eax : int <- foo bar quux
  73 #         var floo/ecx : int <- copy eax
  74 #
  75 # Still todo:
  76 #   global variables
  77 #   heap allocations (planned name: 'handle')
  78 #   user-defined types: 'type' for structs, 'choice' for unions
  79 #   short-lived 'address' type for efficiently writing inside nested structs
  80 
  81 # Now that we know what the language looks like in the large, let's think
  82 # about how translation happens from the bottom up. The interplay between
  83 # variable scopes and statements using variables is the most complex aspect of
  84 # translation.
  85 #
  86 # Assume that we maintain a 'functions' list while parsing source code. And a
  87 # 'primitives' list is a global constant. Both these contain enough information
  88 # to perform type-checking on function calls or primitive statements, respectively.
  89 #
  90 # Defining variables pushes them on a stack with the current block depth and
  91 # enough information about their location (stack offset or register id).
  92 # Starting a block increments the current block id.
  93 # Each statement now has enough information to emit code for it.
  94 # Ending a block is where the magic happens:
  95 #   pop all variables at the current block depth
  96 #   emit code to restore all register variables introduced at the current depth
  97 #   emit code to clean up all stack variables at the current depth (just increment esp)
  98 #   decrement the current block depth
  99 #
 100 # One additional check we'll need is to ensure that a variable in a register
 101 # isn't shadowed by a different one. That may be worth a separate data
 102 # structure but for now repeatedly scanning the var stack should suffice.
 103 
 104 # == Compiling a single instruction
 105 # Determine the function or primitive being called.
 106 #   If no matches, show all functions/primitives with the same name, along
 107 #   with reasons they don't match. (type and storage checking)
 108 #   It must be a function if:
 109 #     #outputs > 1, or
 110 #     #inouts > 2, or
 111 #     #inouts + #outputs > 2
 112 # If it's a function, emit:
 113 #   (low-level-name <rm32 or imm32>...)
 114 # Otherwise (it's a primitive):
 115 #   assert(#inouts <= 2 && #outs <= 1 && (#inouts + #outs) <= 2)
 116 #   emit opcode
 117 #   emit-rm32(inout[0])
 118 #   if out[0] exists: emit-r32(out[0])
 119 #   else if inout[1] is a literal: emit-imm32(inout[1])
 120 #   else: emit-rm32(inout[1])
 121 
 122 # emit-rm32 and emit-r32 should check that the variable they intend is still
 123 # available in the register.
 124 
 125 # == Emitting a block
 126 # Emit block name if necessary
 127 # Emit '{'
 128 # When you encounter a statement, emit it as above
 129 # When you encounter a variable declaration
 130 #   emit any code needed for it (bzeros)
 131 #   push it on the var stack
 132 #   update register dict if necessary
 133 # When you encounter '}'
 134 #   While popping variables off the var stack until block id changes
 135 #     Emit code needed to clean up the stack
 136 #       either increment esp
 137 #       or pop into appropriate register
 138 #   TODO: how to update the register dict? does it need to be a stack as well?
 139 
 140 # The rest is straightforward.
 141 
 142 # A sketch of planned data structures. Still highly speculative.
 143 == data
 144 
 145 # A program is currently a linked list of functions
 146 Program:  # (address function)
 147   0/imm32
 148 
 149 # A function consists of:
 150 #   name: (address string)
 151 #   inputs: (address var-type)  # tbd
 152 #   outputs: (address var-type)  # tbd
 153 #   body: (address block)
 154 #   next: (address function)
 155 Function-next:
 156   0x10/imm32
 157 Function-size:
 158   0x14/imm32/20
 159 
 160 # A block is a list of statements:
 161 #     statements: (address statement)
 162 
 163 # A statement can be either a regular statement consisting of:
 164 #     name: (address string)
 165 #     inputs: (address var)
 166 #     outputs: (address var-r)
 167 # or a variable declaration on the stack:
 168 #     name: (address string)
 169 #     type: (address type-sexpr)
 170 # or a regular statement writing to a single new variable in a register:
 171 #     name: (address string)
 172 #     inputs: (address var)
 173 #     output: var-r
 174 # or a block of statements:
 175 #     statements: (address statement)
 176 
 177 # Kinds of local variable declarations:
 178 #   var f : (array foo 10)
 179 #   var f/ecx : int <- copy 0
 180 # Variables live in either the stack or a register.
 181 # Variables in the stack are auto-initialized.
 182 #   (This is non-trivial for arrays, and arrays inside structs... We'll see.)
 183 # Variables in register need a real instruction.
 184 
 185 # var is a variable declaration. e.g. `foo: (array int 3)`
 186 #   name: (address string)
 187 #   type: (address type-sexpr)
 188 
 189 # var-r is a variable declaration in a register. e.g. `foo/eax: (array int 3)`
 190 #   name: (address string)
 191 #   type: (address type-sexpr)
 192 #   reg: int [0..7]
 193 
 194 # type-sexpr is a tree of type identifiers. e.g. (array (address int) 3)
 195 # either
 196 #   id: type-identifier
 197 # or
 198 #   car: (address type-sexpr)
 199 #   cdr: (address type-sexpr)
 200 
 201 == code
 202 
 203 Entry:
 204     # . prologue
 205     89/<- %ebp 4/r32/esp
 206     (new-segment Heap-size Heap)
 207     # if (argv[1] == "test') run-tests()
 208     {
 209       # if (argc <= 1) break
 210       81 7/subop/compare *ebp 1/imm32
 211       7e/jump-if-lesser-or-equal break/disp8
 212       # if (argv[1] != "test") break
 213       (kernel-string-equal? *(ebp+8) "test")  # => eax
 214       3d/compare-eax-and 0/imm32
 215       74/jump-if-equal break/disp8
 216       #
 217       (run-tests)
 218       # syscall(exit, *Num-test-failures)
 219       8b/-> *Num-test-failures 3/r32/ebx
 220       eb/jump $mu-main:end/disp8
 221     }
 222     # otherwise convert Stdin
 223     (convert-mu Stdin Stdout)
 224     (flush Stdout)
 225     # syscall(exit, 0)
 226     bb/copy-to-ebx 0/imm32
 227 $mu-main:end:
 228     b8/copy-to-eax 1/imm32/exit
 229     cd/syscall 0x80/imm8
 230 
 231 convert-mu:  # in : (address buffered-file), out : (address buffered-file)
 232     # . prologue
 233     55/push-ebp
 234     89/<- %ebp 4/r32/esp
 235     #
 236     (parse-mu *(ebp+8))
 237     (check-mu-types)
 238     (emit-subx *(ebp+0xc))
 239 $convert-mu:end:
 240     # . epilogue
 241     89/<- %esp 5/r32/ebp
 242     5d/pop-to-ebp
 243     c3/return
 244 
 245 test-convert-empty-input:
 246     # empty input => empty output
 247     # . prologue
 248     55/push-ebp
 249     89/<- %ebp 4/r32/esp
 250     # setup
 251     (clear-stream _test-input-stream)
 252     (clear-stream _test-input-buffered-file->buffer)
 253     (clear-stream _test-output-stream)
 254     (clear-stream _test-output-buffered-file->buffer)
 255     #
 256     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 257     (flush _test-output-buffered-file)
 258     (check-stream-equal _test-output-stream "" "F - test-convert-empty-input")
 259     # . epilogue
 260     89/<- %esp 5/r32/ebp
 261     5d/pop-to-ebp
 262     c3/return
 263 
 264 test-convert-function-skeleton:
 265     # empty function decl => function prologue and epilogue
 266     #   fn foo {
 267     #   }
 268     # =>
 269     #   foo:
 270     #     # . prologue
 271     #     55/push-ebp
 272     #     89/<- %ebp 4/r32/esp
 273     #     # . epilogue
 274     #     89/<- %esp 5/r32/ebp
 275     #     5d/pop-to-ebp
 276     #     c3/return
 277     # . prologue
 278     55/push-ebp
 279     89/<- %ebp 4/r32/esp
 280     # setup
 281     (clear-stream _test-input-stream)
 282     (clear-stream _test-input-buffered-file->buffer)
 283     (clear-stream _test-output-stream)
 284     (clear-stream _test-output-buffered-file->buffer)
 285     #
 286     (write _test-input-stream "fn foo {\n")
 287     (write _test-input-stream "}\n")
 288     # convert
 289     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 290     (flush _test-output-buffered-file)
 291 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 297     # check output
 298     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
 299     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
 300     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
 301     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
 302     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
 303     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
 304     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
 305     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
 306     # . epilogue
 307     89/<- %esp 5/r32/ebp
 308     5d/pop-to-ebp
 309     c3/return
 310 
 311 test-convert-multiple-function-skeletons:
 312     # multiple functions correctly organized into a linked list
 313     #   fn foo {
 314     #   }
 315     #   fn bar {
 316     #   }
 317     # =>
 318     #   foo:
 319     #     # . prologue
 320     #     55/push-ebp
 321     #     89/<- %ebp 4/r32/esp
 322     #     # . epilogue
 323     #     89/<- %esp 5/r32/ebp
 324     #     5d/pop-to-ebp
 325     #     c3/return
 326     #   bar:
 327     #     # . prologue
 328     #     55/push-ebp
 329     #     89/<- %ebp 4/r32/esp
 330     #     # . epilogue
 331     #     89/<- %esp 5/r32/ebp
 332     #     5d/pop-to-ebp
 333     #     c3/return
 334     # . prologue
 335     55/push-ebp
 336     89/<- %ebp 4/r32/esp
 337     # setup
 338     (clear-stream _test-input-stream)
 339     (clear-stream _test-input-buffered-file->buffer)
 340     (clear-stream _test-output-stream)
 341     (clear-stream _test-output-buffered-file->buffer)
 342     #
 343     (write _test-input-stream "fn foo {\n")
 344     (write _test-input-stream "}\n")
 345     (write _test-input-stream "fn bar {\n")
 346     (write _test-input-stream "}\n")
 347     # convert
 348     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 349     (flush _test-output-buffered-file)
 350 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 356     # check first function
 357     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-multiple-function-skeletons/0")
 358     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/1")
 359     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/2")
 360     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/3")
 361     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/4")
 362     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/5")
 363     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/6")
 364     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/7")
 365     # check second function
 366     (check-next-stream-line-equal _test-output-stream "bar:"                  "F - test-convert-multiple-function-skeletons/10")
 367     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/11")
 368     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/12")
 369     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/13")
 370     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/14")
 371     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/15")
 372     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/16")
 373     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/17")
 374     # . epilogue
 375     89/<- %esp 5/r32/ebp
 376     5d/pop-to-ebp
 377     c3/return
 378 
 379 test-convert-function-with-arg:
 380     # function with one arg and a copy instruction
 381     #   fn foo n : int -> result/eax : int {
 382     #     result <- copy n
 383     #   }
 384     # =>
 385     #   foo:
 386     #     # . prologue
 387     #     55/push-ebp
 388     #     89/<- %ebp 4/r32/esp
 389     #     {
 390     #     # result <- copy n
 391     #     8b/-> *(ebp+8) 0/r32/eax
 392     #     }
 393     #     # . epilogue
 394     #     89/<- %esp 5/r32/ebp
 395     #     5d/pop-to-ebp
 396     #     c3/return
 397     # . prologue
 398     55/push-ebp
 399     89/<- %ebp 4/r32/esp
 400     # setup
 401     (clear-stream _test-input-stream)
 402     (clear-stream _test-input-buffered-file->buffer)
 403     (clear-stream _test-output-stream)
 404     (clear-stream _test-output-buffered-file->buffer)
 405     #
 406     (write _test-input-stream "fn foo {\n")
 407     (write _test-input-stream "}\n")
 408     # convert
 409     (convert-mu _test-input-buffered-file _test-output-buffered-file)
 410     (flush _test-output-buffered-file)
 411 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 417     # check output
 418     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
 419     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
 420     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
 421     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
 422     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
 423     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
 424     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
 425     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
 426     # . epilogue
 427     89/<- %esp 5/r32/ebp
 428     5d/pop-to-ebp
 429     c3/return
 430 
 431 parse-mu:  # in : (address buffered-file)
 432     # pseudocode
 433     #   var curr-function = Program
 434     #   var line : (stream byte 512)
 435     #   var word-slice : slice
 436     #   while true                                  # line loop
 437     #     clear-stream(line)
 438     #     read-line-buffered(in, line)
 439     #     if (line->write == 0) break               # end of file
 440     #     while true                                # word loop
 441     #       word-slice = next-word-or-string(line)
 442     #       if slice-empty?(word-slice)             # end of line
 443     #         break
 444     #       else if slice-starts-with?(word-slice, "#")  # comment
 445     #         break                                 # end of line
 446     #       else if slice-equal(word-slice, "fn")
 447     #         var new-function : (address function) = new function
 448     #         populate-mu-function(in, new-function)
 449     #         *curr-function = new-function
 450     #         curr-function = &new-function->next
 451     #       else
 452     #         abort()
 453     #
 454     # . prologue
 455     55/push-ebp
 456     89/<- %ebp 4/r32/esp
 457     # . save registers
 458     50/push-eax
 459     51/push-ecx
 460     52/push-edx
 461     57/push-edi
 462     # var line/ecx : (stream byte 512)
 463     81 5/subop/subtract %esp 0x200/imm32
 464     68/push 0x200/imm32/length
 465     68/push 0/imm32/read
 466     68/push 0/imm32/write
 467     89/<- %ecx 4/r32/esp
 468     # var word-slice/edx : slice
 469     68/push 0/imm32/end
 470     68/push 0/imm32/start
 471     89/<- %edx 4/r32/esp
 472     # var curr-function/edi : (address function) = Program
 473     bf/copy-to-edi Program/imm32
 474     {
 475 $parse-mu:line-loop:
 476       (clear-stream %ecx)
 477       (read-line-buffered *(ebp+8) %ecx)
 478       # if (line->write == 0) break
 479       81 7/subop/compare *ecx 0/imm32
 480       0f 84/jump-if-equal break/disp32
 481 +--  6 lines: #?       # dump line ---------------------------------------------------------------------------------------------------------------------------
 487       { # word loop
 488 $parse-mu:word-loop:
 489         (next-word-or-string %ecx %edx)
 490         # if slice-empty?(word-slice) break
 491         (slice-empty? %edx)
 492         3d/compare-eax-and 0/imm32
 493         0f 85/jump-if-not-equal break/disp32
 494         # if (*word-slice->start == "#") break
 495         # . eax = *word-slice->start
 496         8b/-> *edx 0/r32/eax
 497         8a/copy-byte *eax 0/r32/AL
 498         81 4/subop/and %eax 0xff/imm32
 499         # . if (eax == '#') break
 500         3d/compare-eax-and 0x23/imm32/hash
 501         0f 84/jump-if-equal break/disp32
 502         # if (slice-equal?(word-slice, "fn")) parse a function
 503         {
 504           (slice-equal? %edx "fn")
 505           3d/compare-eax-and 0/imm32
 506           0f 84/jump-if-equal break/disp32
 507           # var new-function/eax : (address function) = populate-mu-function()
 508           (allocate Heap *Function-size)  # => eax
 509           (populate-mu-function-header %ecx %eax)
 510           (populate-mu-function-body *(ebp+8) %eax)
 511           # *curr-function = new-function
 512           89/<- *edi 0/r32/eax
 513           # curr-function = &new-function->next
 514           8d/address-> *(eax+0x10) 7/r32/edi
 515           e9/jump $parse-mu:word-loop/disp32
 516         }
 517         # otherwise abort
 518         e9/jump $parse-mu:abort/disp32
 519       } # end word loop
 520       e9/jump loop/disp32
 521     } # end line loop
 522 $parse-mu:end:
 523     # . reclaim locals
 524     81 0/subop/add %esp 0x214/imm32
 525     # . restore registers
 526     5f/pop-to-edi
 527     5a/pop-to-edx
 528     59/pop-to-ecx
 529     58/pop-to-eax
 530     # . epilogue
 531     89/<- %esp 5/r32/ebp
 532     5d/pop-to-ebp
 533     c3/return
 534 
 535 $parse-mu:abort:
 536     # error("unexpected top-level command: " word-slice "\n")
 537     (write-buffered Stderr "unexpected top-level command: ")
 538     (write-buffered Stderr %edx)
 539     (write-buffered Stderr "\n")
 540     (flush Stderr)
 541     # . syscall(exit, 1)
 542     bb/copy-to-ebx  1/imm32
 543     b8/copy-to-eax  1/imm32/exit
 544     cd/syscall  0x80/imm8
 545     # never gets here
 546 
 547 # errors considered:
 548 #   fn foo { {
 549 #   fn foo { }
 550 #   fn foo { } {
 551 #   fn foo  # no block
 552 populate-mu-function-header:  # first-line : (address stream byte), out : (address function)
 553     # . prologue
 554     55/push-ebp
 555     89/<- %ebp 4/r32/esp
 556     # . save registers
 557     50/push-eax
 558     51/push-ecx
 559     57/push-edi
 560     # edi = out
 561     8b/-> *(ebp+0xc) 7/r32/edi
 562     # var word-slice/ecx : slice
 563     68/push 0/imm32/end
 564     68/push 0/imm32/start
 565     89/<- %ecx 4/r32/esp
 566     # save function name
 567     (next-word *(ebp+8) %ecx)
 568     (slice-to-string Heap %ecx)  # => eax
 569     89/<- *edi 0/r32/eax
 570     # assert that next token is '{'
 571     (next-word *(ebp+8) %ecx)
 572     (slice-equal? %ecx "{")
 573     3d/compare-eax-and 0/imm32
 574     74/jump-if-equal $populate-mu-function-header:abort/disp8
 575     # assert that there's no further token
 576     {
 577       # word-slice = next-word(line)
 578       (next-word *(ebp+8) %ecx)
 579       # if (word-slice == '') break
 580       (slice-empty? %ecx)
 581       3d/compare-eax-and 0/imm32
 582       75/jump-if-not-equal break/disp8
 583       # if (slice-starts-with?(word-slice, "#")) break
 584       # . eax = *word-slice->start
 585       8b/-> *edx 0/r32/eax
 586       8a/copy-byte *eax 0/r32/AL
 587       81 4/subop/and %eax 0xff/imm32
 588       # . if (eax == '#') break
 589       3d/compare-eax-and 0x23/imm32/hash
 590       74/jump-if-equal break/disp8
 591       # otherwise abort
 592       eb/jump $populate-mu-function-header:abort/disp8
 593     }
 594 $populate-mu-function-header:end:
 595     # . reclaim locals
 596     81 0/subop/add %esp 8/imm32
 597     # . restore registers
 598     5f/pop-to-edi
 599     59/pop-to-ecx
 600     58/pop-to-eax
 601     # . epilogue
 602     89/<- %esp 5/r32/ebp
 603     5d/pop-to-ebp
 604     c3/return
 605 
 606 $populate-mu-function-header:abort:
 607     # error("function header not in form 'fn <name> {'")
 608     (write-buffered Stderr "function header not in form 'fn <name> {' -- '")
 609     (rewind-stream *(ebp+8))
 610     (write-stream 2 *(ebp+8))
 611     (write-buffered Stderr "'\n")
 612     (flush Stderr)
 613     # . syscall(exit, 1)
 614     bb/copy-to-ebx  1/imm32
 615     b8/copy-to-eax  1/imm32/exit
 616     cd/syscall  0x80/imm8
 617     # never gets here
 618 
 619 # errors considered:
 620 #   { abc
 621 populate-mu-function-body:  # in : (address buffered-file), out : (address function)
 622     # . prologue
 623     55/push-ebp
 624     89/<- %ebp 4/r32/esp
 625     # . save registers
 626     50/push-eax
 627     51/push-ecx
 628     52/push-edx
 629     53/push-ebx
 630     # var line/ecx : (stream byte 512)
 631     81 5/subop/subtract %esp 0x200/imm32
 632     68/push 0x200/imm32/length
 633     68/push 0/imm32/read
 634     68/push 0/imm32/write
 635     89/<- %ecx 4/r32/esp
 636     # var word-slice/edx : slice
 637     68/push 0/imm32/end
 638     68/push 0/imm32/start
 639     89/<- %edx 4/r32/esp
 640     # var open-curly-count/ebx : int = 1
 641     bb/copy-to-ebx 1/imm32
 642     { # line loop
 643 $populate-mu-function-body:line-loop:
 644       # if (open-curly-count == 0) break
 645       81 7/subop/compare %ebx 0/imm32
 646       0f 84/jump-if-equal break/disp32
 647       # line = read-line-buffered(in)
 648       (clear-stream %ecx)
 649       (read-line-buffered *(ebp+8) %ecx)
 650       # if (line->write == 0) break
 651       81 7/subop/compare *ecx 0/imm32
 652       0f 84/jump-if-equal break/disp32
 653       # word-slice = next-word(line)
 654       (next-word %ecx %edx)
 655       # if slice-empty?(word-slice) continue
 656       (slice-empty? %ecx)
 657       3d/compare-eax-and 0/imm32
 658       75/jump-if-not-equal loop/disp8
 659       # if (slice-starts-with?(word-slice, '#') continue
 660       # . eax = *word-slice->start
 661       8b/-> *edx 0/r32/eax
 662       8a/copy-byte *eax 0/r32/AL
 663       81 4/subop/and %eax 0xff/imm32
 664       # . if (eax == '#') continue
 665       3d/compare-eax-and 0x23/imm32/hash
 666       74/jump-if-equal loop/disp8
 667       {
 668         # if slice-equal?(word-slice, "{") ++open-curly-count
 669         {
 670           (slice-equal? %ecx "{")
 671           3d/compare-eax-and 0/imm32
 672           74/jump-if-equal break/disp8
 673           43/increment-ebx
 674           eb/jump $curly-found:end/disp8
 675         }
 676         # else if slice-equal?(word-slice, "}") --open-curly-count
 677         {
 678           (slice-equal? %ecx "}")
 679           3d/compare-eax-and 0/imm32
 680           74/jump-if-equal break/disp8
 681           4b/decrement-ebx
 682           eb/jump $curly-found:end/disp8
 683         }
 684         # else break
 685         eb/jump $populate-mu-function-body:end/disp8
 686       }
 687       # - check for invalid tokens after curly
 688 $curly-found:end:
 689       # second-word-slice = next-word(line)
 690       (next-word %ecx %edx)
 691       # if slice-empty?(second-word-slice) continue
 692       (slice-empty? %ecx)
 693       3d/compare-eax-and 0/imm32
 694       0f 85/jump-if-not-equal loop/disp32
 695       # if (slice-starts-with?(second-word-slice, '#') continue
 696       # . eax = *second-word-slice->start
 697       8b/-> *edx 0/r32/eax
 698       8a/copy-byte *eax 0/r32/AL
 699       81 4/subop/and %eax 0xff/imm32
 700       # . if (eax == '#') continue
 701       3d/compare-eax-and 0x23/imm32/hash
 702       0f 84/jump-if-equal loop/disp32
 703       # abort
 704       eb/jump $populate-mu-function-body:abort/disp8
 705     } # end line loop
 706 $populate-mu-function-body:end:
 707     # . reclaim locals
 708     81 0/subop/add %esp 0x214/imm32
 709     # . restore registers
 710     5b/pop-to-ebx
 711     5a/pop-to-edx
 712     59/pop-to-ecx
 713     58/pop-to-eax
 714     # . epilogue
 715     89/<- %esp 5/r32/ebp
 716     5d/pop-to-ebp
 717     c3/return
 718 
 719 $populate-mu-function-body:abort:
 720     # error("'{' or '}' should be on its own line, but got '")
 721     (write-buffered Stderr "'{' or '}' should be on its own line, but got '")
 722     (rewind-stream %ecx)
 723     (write-stream 2 %ecx)
 724     (write-buffered Stderr "'\n")
 725     (flush Stderr)
 726     # . syscall(exit, 1)
 727     bb/copy-to-ebx  1/imm32
 728     b8/copy-to-eax  1/imm32/exit
 729     cd/syscall  0x80/imm8
 730     # never gets here
 731 
 732 check-mu-types:
 733     # . prologue
 734     55/push-ebp
 735     89/<- %ebp 4/r32/esp
 736     #
 737 $check-types:end:
 738     # . epilogue
 739     89/<- %esp 5/r32/ebp
 740     5d/pop-to-ebp
 741     c3/return
 742 
 743 emit-subx:  # out : (address buffered-file)
 744     # . prologue
 745     55/push-ebp
 746     89/<- %ebp 4/r32/esp
 747     # . save registers
 748     50/push-eax
 749     51/push-ecx
 750     57/push-edi
 751     # edi = out
 752     8b/-> *(ebp+8) 7/r32/edi
 753     # var curr/ecx : (address function) = Program
 754     8b/-> *Program 1/r32/ecx
 755     {
 756       # if (curr == NULL) break
 757       81 7/subop/compare %ecx 0/imm32
 758       0f 84/jump-if-equal break/disp32
 759       (emit-subx-function %edi %ecx)
 760       # curr = curr->next
 761       8b/-> *(ecx+0x10) 1/r32/ecx
 762       e9/jump loop/disp32
 763     }
 764 $emit-subx:end:
 765     # . restore registers
 766     5f/pop-to-edi
 767     59/pop-to-ecx
 768     58/pop-to-eax
 769     # . epilogue
 770     89/<- %esp 5/r32/ebp
 771     5d/pop-to-ebp
 772     c3/return
 773 
 774 # == Emitting a function
 775 # Emit function header
 776 # Emit function prologue
 777 # Translate function body
 778 # Emit function epilogue
 779 
 780 emit-subx-function:  # out : (address buffered-file), f : (address function)
 781     # . prologue
 782     55/push-ebp
 783     89/<- %ebp 4/r32/esp
 784     # . save registers
 785     50/push-eax
 786     51/push-ecx
 787     57/push-edi
 788     # edi = out
 789     8b/-> *(ebp+8) 7/r32/edi
 790     # ecx = f
 791     8b/-> *(ebp+0xc) 1/r32/ecx
 792     #
 793     (write-buffered %edi *ecx)
 794     (write-buffered %edi ":\n")
 795     (emit-subx-prologue %edi)
 796     (emit-subx-block %edi *(ecx+4))  # TODO: offset
 797     (emit-subx-epilogue %edi)
 798 $emit-subx-function:end:
 799     # . restore registers
 800     5f/pop-to-edi
 801     59/pop-to-ecx
 802     58/pop-to-eax
 803     # . epilogue
 804     89/<- %esp 5/r32/ebp
 805     5d/pop-to-ebp
 806     c3/return
 807 
 808 emit-subx-block:  # out : (address buffered-file), block : (address block)
 809     # . prologue
 810     55/push-ebp
 811     89/<- %ebp 4/r32/esp
 812     #
 813 $emit-subx-block:end:
 814     # . epilogue
 815     89/<- %esp 5/r32/ebp
 816     5d/pop-to-ebp
 817     c3/return
 818 
 819 emit-subx-statement:  # out : (address buffered-file), stmt : (address statement), vars : (address variable), regs : (address array (address variable)), primitives : (address opcode-info), functions : (address function)
 820     # . prologue
 821     55/push-ebp
 822     89/<- %ebp 4/r32/esp
 823     # . save registers
 824     50/push-eax
 825     51/push-ecx
 826     # var curr/ecx : (address primitive) = primitives
 827     8b/-> *(ebp+0x18) 1/r32/ecx
 828     {
 829       # if (curr != null) abort
 830       81 7/subop/compare *(ebp+0xc) 0/imm32
 831       0f 84/jump-if-equal $emit-subx-statement:abort/disp32
 832       # if (match(curr, stmt)) break
 833       (mu-stmt-matches-primitive? *(ebp+0xc) %ecx)  # => eax
 834       3d/compare-eax-and 0/imm32
 835       75/jump-if-not-equal break/disp8
 836       # emit code for stmt according to curr and vars
 837       # curr = curr->next
 838       8b/-> *(ecx+0x10) 1/r32/ecx
 839       e9/jump loop/disp32
 840     }
 841 $emit-subx-statement:end:
 842     # . restore registers
 843     59/pop-to-ecx
 844     58/pop-to-eax
 845     # . epilogue
 846     89/<- %esp 5/r32/ebp
 847     5d/pop-to-ebp
 848     c3/return
 849 
 850 $emit-subx-statement:abort:
 851     # error("couldn't translate '" stmt "'\n")
 852     (write-buffered Stderr "couldn't translate '")
 853 #?     (emit-string Stderr *(ebp+0xc))  # TODO
 854     (write-buffered Stderr "'\n")
 855     (flush Stderr)
 856     # . syscall(exit, 1)
 857     bb/copy-to-ebx  1/imm32
 858     b8/copy-to-eax  1/imm32/exit
 859     cd/syscall  0x80/imm8
 860     # never gets here
 861 
 862 mu-stmt-matches-primitive?:  # stmt : (address statement), primitive : (address opcode-info) => result/eax : boolean
 863     # . prologue
 864     55/push-ebp
 865     89/<- %ebp 4/r32/esp
 866     # . save registers
 867     51/push-ecx
 868     # return primitive->name == stmt->operation
 869     8b/-> *(ebp+8) 1/r32/ecx
 870     8b/-> *(ebp+0xc) 0/r32/eax
 871     (string-equal? *ecx *eax)  # => eax
 872 $mu-stmt-matches-primitive?:end:
 873     # . restore registers
 874     59/pop-to-ecx
 875     # . epilogue
 876     89/<- %esp 5/r32/ebp
 877     5d/pop-to-ebp
 878     c3/return
 879 
 880 test-emit-subx-statement-primitive:
 881     # Primitive operation on a variable on the stack.
 882     #   increment foo
 883     # =>
 884     #   ff 0/subop/increment *(ebp-8)
 885     #
 886     # There's a variable on the var stack as follows:
 887     #   name: 'foo'
 888     #   type: int
 889     #   location: -8  (negative numbers are on the stack;
 890     #                   0-7 are in registers;
 891     #                   higher positive numbers are invalid)
 892     #
 893     # There's nothing in registers.
 894     #
 895     # There's a primitive with this info:
 896     #   name: 'increment'
 897     #   inout: int/mem
 898     #   value: 'ff 0/subop/increment'
 899     #
 900     # There's nothing in functions.
 901     #
 902     # . prologue
 903     55/push-ebp
 904     89/<- %ebp 4/r32/esp
 905     # setup
 906     (clear-stream _test-output-stream)
 907     (clear-stream _test-output-buffered-file->buffer)
 908     # . ecx = vars
 909     68/push 0/imm32/next
 910     68/push -8/imm32/stack-offset
 911     68/push 0/imm32/int  # TODO
 912     68/push "foo"/imm32
 913     89/<- %ecx 4/r32/esp
 914     # . edx = operand
 915     68/push 0/imm32/next
 916     51/push-ecx/var-foo
 917     89/<- %edx 4/r32/esp
 918     # . edx = stmt
 919     68/push 0/imm32/next
 920     68/push 0/imm32/outputs
 921     52/push-edx/operand
 922     68/push "increment"/imm32/operation
 923     89/<- %edx 4/r32/esp
 924     # . ebx = primitives
 925     68/push 0/imm32/next
 926     68/push "ff 0/subop/increment"/imm32
 927     68/push 0/imm32/type-int
 928     68/push 0/imm32/storage-memory
 929     68/push "increment"/imm32/name
 930     89/<- %ebx 4/r32/esp
 931     # convert
 932     (emit-subx-statement _test-output-buffered-file %edx %ecx 0 %ebx 0)
 933     (flush _test-output-buffered-file)
 934 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
 940     # check output
 941     (check-next-stream-line-equal _test-output-stream "ff 0/subop/increment *(ebp-8)" "F - test-emit-subx-statement-primitive/0")
 942     # . reclaim locals
 943     81 0/subop/add %esp 0x3c/imm32
 944     # . epilogue
 945     89/<- %esp 5/r32/ebp
 946     5d/pop-to-ebp
 947     c3/return
 948 
 949 test-emit-subx-statement-function-call:
 950     # Call a function on a variable on the stack.
 951     #   f var
 952     # =>
 953     #   (f2 *(ebp-8))
 954     # (Changing the function name just to help disambiguate things.)
 955     #
 956     # There's a variable on the var stack as follows:
 957     #   name: 'var'
 958     #   type: int
 959     #   location: -8  (negative numbers are on the stack;
 960     #                   0-7 are in registers;
 961     #                   higher positive numbers are invalid)
 962     #
 963     # There's nothing in registers.
 964     #
 965     # There's nothing in primitives.
 966     #
 967     # There's a function with this info:
 968     #   name: 'f'
 969     #   inout: int/mem
 970     #   value: 'f2'
 971     #
 972     # . prologue
 973     55/push-ebp
 974     89/<- %ebp 4/r32/esp
 975     # setup
 976     (clear-stream _test-output-stream)
 977     (clear-stream _test-output-buffered-file->buffer)
 978     # . ecx = vars
 979     68/push 0/imm32/next
 980     68/push -8/imm32/stack-offset
 981     68/push 0/imm32/int  # TODO
 982     68/push "var"/imm32
 983     89/<- %ecx 4/r32/esp
 984     # . edx = operand
 985     68/push 0/imm32/next
 986     51/push-ecx/var
 987     89/<- %edx 4/r32/esp
 988     # . edx = stmt
 989     68/push 0/imm32/next
 990     68/push 0/imm32/outputs
 991     52/push-edx/operand
 992     68/push "f"/imm32/operation
 993     89/<- %edx 4/r32/esp
 994     # . ebx = functions
 995     68/push 0/imm32/next
 996     68/push "f2"/imm32
 997     68/push 0/imm32/type-int
 998     68/push 0/imm32/storage-memory
 999     68/push "f"/imm32/name
1000     89/<- %ebx 4/r32/esp
1001     # convert
1002     (emit-subx-statement _test-output-buffered-file %edx %ecx 0 0 %ebx)
1003     (flush _test-output-buffered-file)
1004 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
1010     # check output
1011     (check-next-stream-line-equal _test-output-stream "f2 *(ebp-8)" "F - test-emit-subx-statement-function-call/0")
1012     # . reclaim locals
1013     81 0/subop/add %esp 0x3c/imm32
1014     # . epilogue
1015     89/<- %esp 5/r32/ebp
1016     5d/pop-to-ebp
1017     c3/return
1018 
1019 emit-subx-prologue:  # out : (address buffered-file)
1020     # . prologue
1021     55/push-ebp
1022     89/<- %ebp 4/r32/esp
1023     #
1024     (write-buffered *(ebp+8) "# . prologue\n")
1025     (write-buffered *(ebp+8) "55/push-ebp\n")
1026     (write-buffered *(ebp+8) "89/<- %ebp 4/r32/esp\n")
1027 $emit-subx-prologue:end:
1028     # . epilogue
1029     89/<- %esp 5/r32/ebp
1030     5d/pop-to-ebp
1031     c3/return
1032 
1033 emit-subx-epilogue:  # out : (address buffered-file)
1034     # . prologue
1035     55/push-ebp
1036     89/<- %ebp 4/r32/esp
1037     #
1038     (write-buffered *(ebp+8) "# . epilogue\n")
1039     (write-buffered *(ebp+8) "89/<- %esp 5/r32/ebp\n")
1040     (write-buffered *(ebp+8) "5d/pop-to-ebp\n")
1041     (write-buffered *(ebp+8) "c3/return\n")
1042 $emit-subx-epilogue:end:
1043     # . epilogue
1044     89/<- %esp 5/r32/ebp
1045     5d/pop-to-ebp
1046     c3/return