From 3a74e0e646c26e5f73478815b32b4e7ce12d4a06 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sat, 9 Nov 2019 09:02:24 -0800 Subject: 5730 --- html/apps/mu.subx.html | 1634 ++++++++++++++++++++++++++++++------------------ 1 file changed, 1028 insertions(+), 606 deletions(-) (limited to 'html') diff --git a/html/apps/mu.subx.html b/html/apps/mu.subx.html index a1500a8a..acd0ccb0 100644 --- a/html/apps/mu.subx.html +++ b/html/apps/mu.subx.html @@ -18,6 +18,7 @@ a { color:inherit; } .subxFunction { color: #af5f00; text-decoration: underline; } .LineNr { } .subxS1Comment { color: #0000af; } +.CommentedCode { color: #8a8a8a; } .SpecialChar { color: #d70000; } .Constant { color: #008787; } .Folded { color: #080808; background-color: #949494; } @@ -58,612 +59,1033 @@ if ('onhashchange' in window) { https://github.com/akkartik/mu/blob/master/apps/mu.subx
-  1 # The Mu computer's level-2 language, also called Mu.
-  2 # http://akkartik.name/post/mu-2019-2
-  3 #
-  4 # To run:
-  5 #   $ ./ntranslate init.linux 0*.subx apps/mu.subx
-  6 
-  7 # A sketch of planned data structures. Still highly speculative.
-  8 == data
-  9 
- 10 # A program is currently a linked list of functions
- 11 Program:  # (address function)
- 12   0/imm32
- 13 
- 14 # A function consists of:
- 15 #   name: (address string)
- 16 #   inputs: (address var-type)  # tbd
- 17 #   outputs: (address var-type)  # tbd
- 18 #   body: (address block)
- 19 #   next: (address function)
- 20 Function-next:
- 21   0x10/imm32
- 22 Function-size:
- 23   0x14/imm32/20
- 24 
- 25 # A block is a list of statements:
- 26 #     statements: (address statement)
- 27 
- 28 # A statement can be either a regular statement consisting of:
- 29 #     name: (address string)
- 30 #     inputs: (address var)
- 31 #     outputs: (address var-r)
- 32 # or a variable declaration on the stack:
- 33 #     name: (address string)
- 34 #     type: (address type-sexpr)
- 35 # or a regular statement writing to a single new variable in a register:
- 36 #     name: (address string)
- 37 #     inputs: (address var)
- 38 #     output: var-r
- 39 # or a block of statements:
- 40 #     statements: (address statement)
- 41 
- 42 # Kinds of local variable declarations:
- 43 #   var f : (array foo 10)
- 44 #   var f/ecx : int <- copy 0
- 45 # Variables live in either the stack or a register.
- 46 # Variables in the stack are auto-initialized.
- 47 #   (This is non-trivial for arrays, and arrays inside structs... We'll see.)
- 48 # Variables in register need a real instruction.
- 49 
- 50 # var is a variable declaration. e.g. `foo: (array int 3)`
- 51 #   name: (address string)
- 52 #   type: (address type-sexpr)
- 53 
- 54 # var-r is a variable declaration in a register. e.g. `foo/eax: (array int 3)`
- 55 #   name: (address string)
- 56 #   type: (address type-sexpr)
- 57 #   reg: int [0..7]
- 58 
- 59 # type-sexpr is a tree of type identifiers. e.g. (array (address int) 3)
- 60 # either
- 61 #   id: type-identifier
- 62 # or
- 63 #   car: (address type-sexpr)
- 64 #   cdr: (address type-sexpr)
- 65 
- 66 # Still todo:
- 67 #   global variables
- 68 #   heap allocations (planned name: 'handle')
- 69 #   user-defined types: 'type' for structs, 'choice' for unions
- 70 #   short-lived 'address' type for efficiently writing inside nested structs
- 71 
- 72 == code
- 73 
- 74 Entry:
- 75     # . prologue
- 76     89/<- %ebp 4/r32/esp
- 77     (new-segment Heap-size Heap)
- 78     # if (argv[1] == "test') run-tests()
- 79     {
- 80       # if (argc <= 1) break
- 81       81 7/subop/compare *ebp 1/imm32
- 82       7e/jump-if-lesser-or-equal break/disp8
- 83       # if (argv[1] != "test") break
- 84       (kernel-string-equal? *(ebp+8) "test")  # => eax
- 85       3d/compare-eax-and 0/imm32
- 86       74/jump-if-equal break/disp8
- 87       #
- 88       (run-tests)
- 89       # syscall(exit, *Num-test-failures)
- 90       8b/-> *Num-test-failures 3/r32/ebx
- 91       eb/jump $mu-main:end/disp8
- 92     }
- 93     # otherwise convert Stdin
- 94     (convert-mu Stdin Stdout)
- 95     (flush Stdout)
- 96     # syscall(exit, 0)
- 97     bb/copy-to-ebx 0/imm32
- 98 $mu-main:end:
- 99     b8/copy-to-eax 1/imm32/exit
-100     cd/syscall 0x80/imm8
-101 
-102 convert-mu:  # in : (address buffered-file), out : (address buffered-file)
-103     # . prologue
-104     55/push-ebp
-105     89/<- %ebp 4/r32/esp
-106     #
-107     (parse-mu *(ebp+8))
-108     (check-mu-types)
-109     (emit-subx *(ebp+0xc))
-110 $convert-mu:end:
-111     # . epilogue
-112     89/<- %esp 5/r32/ebp
-113     5d/pop-to-ebp
-114     c3/return
-115 
-116 test-convert-empty-input:
-117     # empty input => empty output
-118     # . prologue
-119     55/push-ebp
-120     89/<- %ebp 4/r32/esp
-121     # setup
-122     (clear-stream _test-input-stream)
-123     (clear-stream _test-input-buffered-file->buffer)
-124     (clear-stream _test-output-stream)
-125     (clear-stream _test-output-buffered-file->buffer)
-126     #
-127     (convert-mu _test-input-buffered-file _test-output-buffered-file)
-128     (flush _test-output-buffered-file)
-129     (check-stream-equal _test-output-stream "" "F - test-convert-empty-input")
-130     # . epilogue
-131     89/<- %esp 5/r32/ebp
-132     5d/pop-to-ebp
-133     c3/return
-134 
-135 test-convert-function-skeleton:
-136     # empty function decl => function prologue and epilogue
-137     #   fn foo {
-138     #   }
-139     # =>
-140     #   foo:
-141     #     # . prologue
-142     #     55/push-ebp
-143     #     89/<- %ebp 4/r32/esp
-144     #     # . epilogue
-145     #     89/<- %esp 5/r32/ebp
-146     #     5d/pop-to-ebp
-147     #     c3/return
-148     # . prologue
-149     55/push-ebp
-150     89/<- %ebp 4/r32/esp
-151     # setup
-152     (clear-stream _test-input-stream)
-153     (clear-stream _test-input-buffered-file->buffer)
-154     (clear-stream _test-output-stream)
-155     (clear-stream _test-output-buffered-file->buffer)
-156     #
-157     (write _test-input-stream "fn foo {\n")
-158     (write _test-input-stream "}\n")
-159     # convert
-160     (convert-mu _test-input-buffered-file _test-output-buffered-file)
-161     (flush _test-output-buffered-file)
-162 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
-168     # check output
-169     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
-170     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
-171     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
-172     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
-173     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
-174     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
-175     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
-176     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
-177     # . epilogue
-178     89/<- %esp 5/r32/ebp
-179     5d/pop-to-ebp
-180     c3/return
-181 
-182 test-convert-multiple-function-skeletons:
-183     # multiple functions correctly organized into a linked list
-184     #   fn foo {
-185     #   }
-186     #   fn bar {
-187     #   }
-188     # =>
-189     #   foo:
-190     #     # . prologue
-191     #     55/push-ebp
-192     #     89/<- %ebp 4/r32/esp
-193     #     # . epilogue
-194     #     89/<- %esp 5/r32/ebp
-195     #     5d/pop-to-ebp
-196     #     c3/return
-197     #   bar:
-198     #     # . prologue
-199     #     55/push-ebp
-200     #     89/<- %ebp 4/r32/esp
-201     #     # . epilogue
-202     #     89/<- %esp 5/r32/ebp
-203     #     5d/pop-to-ebp
-204     #     c3/return
-205     # . prologue
-206     55/push-ebp
-207     89/<- %ebp 4/r32/esp
-208     # setup
-209     (clear-stream _test-input-stream)
-210     (clear-stream _test-input-buffered-file->buffer)
-211     (clear-stream _test-output-stream)
-212     (clear-stream _test-output-buffered-file->buffer)
-213     #
-214     (write _test-input-stream "fn foo {\n")
-215     (write _test-input-stream "}\n")
-216     (write _test-input-stream "fn bar {\n")
-217     (write _test-input-stream "}\n")
-218     # convert
-219     (convert-mu _test-input-buffered-file _test-output-buffered-file)
-220     (flush _test-output-buffered-file)
-221 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
-227     # check first function
-228     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-multiple-function-skeletons/0")
-229     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/1")
-230     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/2")
-231     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/3")
-232     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/4")
-233     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/5")
-234     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/6")
-235     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/7")
-236     # check second function
-237     (check-next-stream-line-equal _test-output-stream "bar:"                  "F - test-convert-multiple-function-skeletons/10")
-238     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/11")
-239     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/12")
-240     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/13")
-241     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/14")
-242     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/15")
-243     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/16")
-244     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/17")
-245     # . epilogue
-246     89/<- %esp 5/r32/ebp
-247     5d/pop-to-ebp
-248     c3/return
-249 
-250 parse-mu:  # in : (address buffered-file)
-251     # pseudocode
-252     #   var curr-function = Program
-253     #   var line : (stream byte 512)
-254     #   var word-slice : slice
-255     #   while true                                  # line loop
-256     #     clear-stream(line)
-257     #     read-line-buffered(in, line)
-258     #     if (line->write == 0) break               # end of file
-259     #     while true                                # word loop
-260     #       word-slice = next-word-or-string(line)
-261     #       if slice-empty?(word-slice)             # end of line
-262     #         break
-263     #       else if slice-starts-with?(word-slice, "#")  # comment
-264     #         break                                 # end of line
-265     #       else if slice-equal(word-slice, "fn")
-266     #         var new-function : (address function) = new function
-267     #         populate-mu-function(in, new-function)
-268     #         *curr-function = new-function
-269     #         curr-function = &new-function->next
-270     #       else
-271     #         abort()
-272     #
-273     # . prologue
-274     55/push-ebp
-275     89/<- %ebp 4/r32/esp
-276     # . save registers
-277     50/push-eax
-278     51/push-ecx
-279     52/push-edx
-280     57/push-edi
-281     # var line/ecx : (stream byte 512)
-282     81 5/subop/subtract %esp 0x200/imm32
-283     68/push 0x200/imm32/length
-284     68/push 0/imm32/read
-285     68/push 0/imm32/write
-286     89/<- %ecx 4/r32/esp
-287     # var word-slice/edx : slice
-288     68/push 0/imm32/end
-289     68/push 0/imm32/start
-290     89/<- %edx 4/r32/esp
-291     # var curr-function/edi : (address function) = Program
-292     bf/copy-to-edi Program/imm32
-293     {
-294 $parse-mu:line-loop:
-295       (clear-stream %ecx)
-296       (read-line-buffered *(ebp+8) %ecx)
-297       # if (line->write == 0) break
-298       81 7/subop/compare *ecx 0/imm32
-299       0f 84/jump-if-equal break/disp32
-300 +--  6 lines: #?       # dump line ---------------------------------------------------------------------------------------------------------------------------
-306       { # word loop
-307 $parse-mu:word-loop:
-308         (next-word-or-string %ecx %edx)
-309         # if slice-empty?(word-slice) break
-310         (slice-empty? %edx)
-311         3d/compare-eax-and 0/imm32
-312         0f 85/jump-if-not-equal break/disp32
-313         # if (*word-slice->start == "#") break
-314         # . eax = *word-slice->start
-315         8b/-> *edx 0/r32/eax
-316         8a/copy-byte *eax 0/r32/AL
-317         81 4/subop/and %eax 0xff/imm32
-318         # . if (eax == '#') break
-319         3d/compare-eax-and 0x23/imm32/hash
-320         0f 84/jump-if-equal break/disp32
-321         # if (slice-equal?(word-slice, "fn")) parse a function
-322         {
-323           (slice-equal? %edx "fn")
-324           3d/compare-eax-and 0/imm32
-325           0f 84/jump-if-equal break/disp32
-326           # var new-function/eax : (address function) = populate-mu-function()
-327           (allocate Heap *Function-size)  # => eax
-328           (populate-mu-function-header %ecx %eax)
-329           (populate-mu-function-body *(ebp+8) %eax)
-330           # *curr-function = new-function
-331           89/<- *edi 0/r32/eax
-332           # curr-function = &new-function->next
-333           8d/address-> *(eax+0x10) 7/r32/edi
-334           e9/jump $parse-mu:word-loop/disp32
-335         }
-336         # otherwise abort
-337         e9/jump $parse-mu:abort/disp32
-338       } # end word loop
-339       e9/jump loop/disp32
-340     } # end line loop
-341 $parse-mu:end:
-342     # . reclaim locals
-343     81 0/subop/add %esp 0x214/imm32
-344     # . restore registers
-345     5f/pop-to-edi
-346     5a/pop-to-edx
-347     59/pop-to-ecx
-348     58/pop-to-eax
-349     # . epilogue
-350     89/<- %esp 5/r32/ebp
-351     5d/pop-to-ebp
-352     c3/return
-353 
-354 $parse-mu:abort:
-355     # error("unexpected top-level command: " word-slice "\n")
-356     (write-buffered Stderr "unexpected top-level command: ")
-357     (write-buffered Stderr %edx)
-358     (write-buffered Stderr "\n")
-359     (flush Stderr)
-360     # . syscall(exit, 1)
-361     bb/copy-to-ebx  1/imm32
-362     b8/copy-to-eax  1/imm32/exit
-363     cd/syscall  0x80/imm8
-364     # never gets here
-365 
-366 # errors considered:
-367 #   fn foo { {
-368 #   fn foo { }
-369 #   fn foo { } {
-370 #   fn foo  # no block
-371 populate-mu-function-header:  # first-line : (address stream byte), out : (address function)
-372     # . prologue
-373     55/push-ebp
-374     89/<- %ebp 4/r32/esp
-375     # . save registers
-376     50/push-eax
-377     51/push-ecx
-378     57/push-edi
-379     # edi = out
-380     8b/-> *(ebp+0xc) 7/r32/edi
-381     # var word-slice/ecx : slice
-382     68/push 0/imm32/end
-383     68/push 0/imm32/start
-384     89/<- %ecx 4/r32/esp
-385     # save function name
-386     (next-word *(ebp+8) %ecx)
-387     (slice-to-string Heap %ecx)  # => eax
-388     89/<- *edi 0/r32/eax
-389     # assert that next token is '{'
-390     (next-word *(ebp+8) %ecx)
-391     (slice-equal? %ecx "{")
-392     3d/compare-eax-and 0/imm32
-393     74/jump-if-equal $populate-mu-function-header:abort/disp8
-394     # assert that there's no further token
-395     {
-396       # word-slice = next-word(line)
-397       (next-word *(ebp+8) %ecx)
-398       # if (word-slice == '') break
-399       (slice-empty? %ecx)
-400       3d/compare-eax-and 0/imm32
-401       75/jump-if-not-equal break/disp8
-402       # if (slice-starts-with?(word-slice, "#")) break
-403       # . eax = *word-slice->start
-404       8b/-> *edx 0/r32/eax
-405       8a/copy-byte *eax 0/r32/AL
-406       81 4/subop/and %eax 0xff/imm32
-407       # . if (eax == '#') break
-408       3d/compare-eax-and 0x23/imm32/hash
-409       74/jump-if-equal break/disp8
-410       # otherwise abort
-411       eb/jump $populate-mu-function-header:abort/disp8
-412     }
-413 $populate-mu-function-header:end:
-414     # . reclaim locals
-415     81 0/subop/add %esp 8/imm32
-416     # . restore registers
-417     5f/pop-to-edi
-418     59/pop-to-ecx
-419     58/pop-to-eax
-420     # . epilogue
-421     89/<- %esp 5/r32/ebp
-422     5d/pop-to-ebp
-423     c3/return
-424 
-425 $populate-mu-function-header:abort:
-426     # error("function header not in form 'fn <name> {'")
-427     (write-buffered Stderr "function header not in form 'fn <name> {' -- '")
-428     (rewind-stream *(ebp+8))
-429     (write-stream 2 *(ebp+8))
-430     (write-buffered Stderr "'\n")
-431     (flush Stderr)
-432     # . syscall(exit, 1)
-433     bb/copy-to-ebx  1/imm32
-434     b8/copy-to-eax  1/imm32/exit
-435     cd/syscall  0x80/imm8
-436     # never gets here
-437 
-438 # errors considered:
-439 #   { abc
-440 populate-mu-function-body:  # in : (address buffered-file), out : (address function)
-441     # . prologue
-442     55/push-ebp
-443     89/<- %ebp 4/r32/esp
-444     # . save registers
-445     50/push-eax
-446     51/push-ecx
-447     52/push-edx
-448     53/push-ebx
-449     # var line/ecx : (stream byte 512)
-450     81 5/subop/subtract %esp 0x200/imm32
-451     68/push 0x200/imm32/length
-452     68/push 0/imm32/read
-453     68/push 0/imm32/write
-454     89/<- %ecx 4/r32/esp
-455     # var word-slice/edx : slice
-456     68/push 0/imm32/end
-457     68/push 0/imm32/start
-458     89/<- %edx 4/r32/esp
-459     # var open-curly-count/ebx : int = 1
-460     bb/copy-to-ebx 1/imm32
-461     { # line loop
-462 $populate-mu-function-body:line-loop:
-463       # if (open-curly-count == 0) break
-464       81 7/subop/compare %ebx 0/imm32
-465       0f 84/jump-if-equal break/disp32
-466       # line = read-line-buffered(in)
-467       (clear-stream %ecx)
-468       (read-line-buffered *(ebp+8) %ecx)
-469       # if (line->write == 0) break
-470       81 7/subop/compare *ecx 0/imm32
-471       0f 84/jump-if-equal break/disp32
-472       # word-slice = next-word(line)
-473       (next-word %ecx %edx)
-474       # if slice-empty?(word-slice) continue
-475       (slice-empty? %ecx)
-476       3d/compare-eax-and 0/imm32
-477       75/jump-if-not-equal loop/disp8
-478       # if (slice-starts-with?(word-slice, '#') continue
-479       # . eax = *word-slice->start
-480       8b/-> *edx 0/r32/eax
-481       8a/copy-byte *eax 0/r32/AL
-482       81 4/subop/and %eax 0xff/imm32
-483       # . if (eax == '#') continue
-484       3d/compare-eax-and 0x23/imm32/hash
-485       74/jump-if-equal loop/disp8
-486       {
-487         # if slice-equal?(word-slice, "{") ++open-curly-count
-488         {
-489           (slice-equal? %ecx "{")
-490           3d/compare-eax-and 0/imm32
-491           74/jump-if-equal break/disp8
-492           43/increment-ebx
-493           eb/jump $curly-found:end/disp8
-494         }
-495         # else if slice-equal?(word-slice, "}") --open-curly-count
-496         {
-497           (slice-equal? %ecx "}")
-498           3d/compare-eax-and 0/imm32
-499           74/jump-if-equal break/disp8
-500           4b/decrement-ebx
-501           eb/jump $curly-found:end/disp8
-502         }
-503         # else break
-504         eb/jump $populate-mu-function-body:end/disp8
-505       }
-506       # - check for invalid tokens after curly
-507 $curly-found:end:
-508       # second-word-slice = next-word(line)
-509       (next-word %ecx %edx)
-510       # if slice-empty?(second-word-slice) continue
-511       (slice-empty? %ecx)
-512       3d/compare-eax-and 0/imm32
-513       0f 85/jump-if-not-equal loop/disp32
-514       # if (slice-starts-with?(second-word-slice, '#') continue
-515       # . eax = *second-word-slice->start
-516       8b/-> *edx 0/r32/eax
-517       8a/copy-byte *eax 0/r32/AL
-518       81 4/subop/and %eax 0xff/imm32
-519       # . if (eax == '#') continue
-520       3d/compare-eax-and 0x23/imm32/hash
-521       0f 84/jump-if-equal loop/disp32
-522       # abort
-523       eb/jump $populate-mu-function-body:abort/disp8
-524     } # end line loop
-525 $populate-mu-function-body:end:
-526     # . reclaim locals
-527     81 0/subop/add %esp 0x214/imm32
-528     # . restore registers
-529     5b/pop-to-ebx
-530     5a/pop-to-edx
-531     59/pop-to-ecx
-532     58/pop-to-eax
-533     # . epilogue
-534     89/<- %esp 5/r32/ebp
-535     5d/pop-to-ebp
-536     c3/return
-537 
-538 $populate-mu-function-body:abort:
-539     # error("'{' or '}' should be on its own line, but got '")
-540     (write-buffered Stderr "'{' or '}' should be on its own line, but got '")
-541     (rewind-stream %ecx)
-542     (write-stream 2 %ecx)
-543     (write-buffered Stderr "'\n")
-544     (flush Stderr)
-545     # . syscall(exit, 1)
-546     bb/copy-to-ebx  1/imm32
-547     b8/copy-to-eax  1/imm32/exit
-548     cd/syscall  0x80/imm8
-549     # never gets here
-550 
-551 check-mu-types:
-552     # . prologue
-553     55/push-ebp
-554     89/<- %ebp 4/r32/esp
-555     #
-556 $check-types:end:
-557     # . epilogue
-558     89/<- %esp 5/r32/ebp
-559     5d/pop-to-ebp
-560     c3/return
-561 
-562 emit-subx:  # out : (address buffered-file)
-563     # . prologue
-564     55/push-ebp
-565     89/<- %ebp 4/r32/esp
-566     # . save registers
-567     50/push-eax
-568     51/push-ecx
-569     57/push-edi
-570     # edi = out
-571     8b/-> *(ebp+8) 7/r32/edi
-572     # var curr/ecx : (address function) = Program
-573     8b/-> *Program 1/r32/ecx
-574     {
-575       # if (curr == NULL) break
-576       81 7/subop/compare %ecx 0/imm32
-577       0f 84/jump-if-equal break/disp32
-578       (write-buffered %edi *ecx)
-579       (write-buffered %edi ":\n")
-580       (emit-subx-prologue %edi)
-581       (emit-subx-epilogue %edi)
-582       # curr = curr->next
-583       8b/-> *(ecx+0x10) 1/r32/ecx
-584       e9/jump loop/disp32
-585     }
-586 $emit-subx:end:
-587     # . restore registers
-588     5f/pop-to-edi
-589     59/pop-to-ecx
-590     58/pop-to-eax
-591     # . epilogue
-592     89/<- %esp 5/r32/ebp
-593     5d/pop-to-ebp
-594     c3/return
-595 
-596 emit-subx-prologue:  # out : (address buffered-file)
-597     # . prologue
-598     55/push-ebp
-599     89/<- %ebp 4/r32/esp
-600     #
-601     (write-buffered *(ebp+8) "# . prologue\n")
-602     (write-buffered *(ebp+8) "55/push-ebp\n")
-603     (write-buffered *(ebp+8) "89/<- %ebp 4/r32/esp\n")
-604     # . epilogue
-605     89/<- %esp 5/r32/ebp
-606     5d/pop-to-ebp
-607     c3/return
-608 
-609 emit-subx-epilogue:  # out : (address buffered-file)
-610     # . prologue
-611     55/push-ebp
-612     89/<- %ebp 4/r32/esp
-613     #
-614     (write-buffered *(ebp+8) "# . epilogue\n")
-615     (write-buffered *(ebp+8) "89/<- %esp 5/r32/ebp\n")
-616     (write-buffered *(ebp+8) "5d/pop-to-ebp\n")
-617     (write-buffered *(ebp+8) "c3/return\n")
-618     # . epilogue
-619     89/<- %esp 5/r32/ebp
-620     5d/pop-to-ebp
-621     c3/return
+   1 # The Mu computer's level-2 language, also called Mu.
+   2 # http://akkartik.name/post/mu-2019-2
+   3 #
+   4 # To run:
+   5 #   $ ./ntranslate init.linux 0*.subx apps/mu.subx
+   6 
+   7 # == Goals
+   8 # 1. Be memory safe. It should be impossible to corrupt the heap, or to create
+   9 # a bad pointer. (Requires strong type safety.)
+  10 # 2. Do as little as possible to achieve goal 1.
+  11 #   - runtime checks to avoid complex static analysis
+  12 #   - minimize impedance mismatch between source language and SubX target
+  13 
+  14 # == Language description
+  15 #
+  16 # A program is a sequence of function definitions.
+  17 #
+  18 # Function example:
+  19 #   fn foo n: int -> result/eax: int {
+  20 #     ...
+  21 #   }
+  22 #
+  23 # Functions consist of a name, optional inputs, optional outputs and a block.
+  24 #
+  25 # Function inputs and outputs are variables. All variables have a type and
+  26 # storage specifier. They can be placed either in memory (on the stack) or in
+  27 # one of 6 named registers.
+  28 #   eax ecx edx ebx esi edi
+  29 # Variables in registers must be primitive 32-bit types.
+  30 # Variables not explicitly placed in a register are on the stack.
+  31 # Variables in registers need not have a name; in that case you refer to them
+  32 # directly by the register name.
+  33 #
+  34 # Function inputs are always passed in memory (on the stack), while outputs
+  35 # are always returned in registers.
+  36 #
+  37 # Blocks mostly consist of statements.
+  38 #
+  39 # Statements mostly consist of a name, optional inputs and optional outputs.
+  40 #
+  41 # Statement inputs are variables or literals. Variables need to specify type
+  42 # (and storage) the first time they're mentioned but not later.
+  43 #
+  44 # Statement outputs, like function outputs, must be variables in registers.
+  45 #
+  46 # Statement names must be either primitives or user-defined functions.
+  47 #
+  48 # Primitives can write to any register.
+  49 # User-defined functions only write to hard-coded registers. Outputs of each
+  50 # call must have the same registers as in the function definition.
+  51 #
+  52 # There are some other statement types:
+  53 #   - blocks. Multiple statements surrounded by '{...}' and optionally
+  54 #     prefixed with a label name and ':'
+  55 #       - {
+  56 #           ...
+  57 #         }
+  58 #       - foo: {
+  59 #           ...
+  60 #         }
+  61 #
+  62 #   - variable definitions on the stack. E.g.:
+  63 #       - var foo: int
+  64 #       - var bar: (array int 3)
+  65 #     There's no initializer; variables are automatically initialized.
+  66 #
+  67 #   - variables definitions in a register. E.g.:
+  68 #       - var foo/eax : int <- add bar 1
+  69 #     The initializer is mandatory and must be a valid instruction that writes
+  70 #     a single output to the right register. In practice registers will
+  71 #     usually be either initialized by primitives or copied from eax.
+  72 #       - var eax : int <- foo bar quux
+  73 #         var floo/ecx : int <- copy eax
+  74 #
+  75 # Still todo:
+  76 #   global variables
+  77 #   heap allocations (planned name: 'handle')
+  78 #   user-defined types: 'type' for structs, 'choice' for unions
+  79 #   short-lived 'address' type for efficiently writing inside nested structs
+  80 
+  81 # Now that we know what the language looks like in the large, let's think
+  82 # about how translation happens from the bottom up. The interplay between
+  83 # variable scopes and statements using variables is the most complex aspect of
+  84 # translation.
+  85 #
+  86 # Assume that we maintain a 'functions' list while parsing source code. And a
+  87 # 'primitives' list is a global constant. Both these contain enough information
+  88 # to perform type-checking on function calls or primitive statements, respectively.
+  89 #
+  90 # Defining variables pushes them on a stack with the current block depth and
+  91 # enough information about their location (stack offset or register id).
+  92 # Starting a block increments the current block id.
+  93 # Each statement now has enough information to emit code for it.
+  94 # Ending a block is where the magic happens:
+  95 #   pop all variables at the current block depth
+  96 #   emit code to restore all register variables introduced at the current depth
+  97 #   emit code to clean up all stack variables at the current depth (just increment esp)
+  98 #   decrement the current block depth
+  99 #
+ 100 # One additional check we'll need is to ensure that a variable in a register
+ 101 # isn't shadowed by a different one. That may be worth a separate data
+ 102 # structure but for now repeatedly scanning the var stack should suffice.
+ 103 
+ 104 # == Book-keeping while emitting code for a single statement
+ 105 # Immutable data:
+ 106 #   function info
+ 107 #
+ 108 # Mutable data:
+ 109 #   stack: variables currently in scope
+ 110 #     block id
+ 111 #     type
+ 112 #     ebp offset for function | register id
+ 113 #   dict: register -> var
+ 114 
+ 115 # == Compiling a single instruction
+ 116 # Determine the function or primitive being called.
+ 117 #   If no matches, show all functions/primitives with the same name, along
+ 118 #   with reasons they don't match. (type and storage checking)
+ 119 #   It must be a function if:
+ 120 #     #outputs > 1, or
+ 121 #     #inouts > 2, or
+ 122 #     #inouts + #outputs > 2
+ 123 # If it's a function, emit:
+ 124 #   (low-level-name <rm32 or imm32>...)
+ 125 # Otherwise (it's a primitive):
+ 126 #   assert(#inouts <= 2 && #outs <= 1 && (#inouts + #outs) <= 2)
+ 127 #   emit opcode
+ 128 #   emit-rm32(inout[0])
+ 129 #   if out[0] exists: emit-r32(out[0])
+ 130 #   else if inout[1] is a literal: emit-imm32(inout[1])
+ 131 #   else: emit-rm32(inout[1])
+ 132 
+ 133 # emit-rm32 and emit-r32 should check that the variable they intend is still
+ 134 # available in the register.
+ 135 
+ 136 # == Emitting a block
+ 137 # Emit block name if necessary
+ 138 # Emit '{'
+ 139 # When you encounter a statement, emit it as above
+ 140 # When you encounter a variable declaration
+ 141 #   emit any code needed for it (bzeros)
+ 142 #   push it on the var stack
+ 143 #   update register dict if necessary
+ 144 # When you encounter '}'
+ 145 #   While popping variables off the var stack until block id changes
+ 146 #     Emit code needed to clean up the stack
+ 147 #       either increment esp
+ 148 #       or pop into appropriate register
+ 149 #   TODO: how to update the register dict? does it need to be a stack as well?
+ 150 
+ 151 # The rest is straightforward.
+ 152 
+ 153 # A sketch of planned data structures. Still highly speculative.
+ 154 == data
+ 155 
+ 156 # A program is currently a linked list of functions
+ 157 Program:  # (address function)
+ 158   0/imm32
+ 159 
+ 160 # A function consists of:
+ 161 #   name: (address string)
+ 162 #   inputs: (address var-type)  # tbd
+ 163 #   outputs: (address var-type)  # tbd
+ 164 #   body: (address block)
+ 165 #   next: (address function)
+ 166 Function-next:
+ 167   0x10/imm32
+ 168 Function-size:
+ 169   0x14/imm32/20
+ 170 
+ 171 # A block is a list of statements:
+ 172 #     statements: (address statement)
+ 173 
+ 174 # A statement can be either a regular statement consisting of:
+ 175 #     name: (address string)
+ 176 #     inputs: (address var)
+ 177 #     outputs: (address var-r)
+ 178 # or a variable declaration on the stack:
+ 179 #     name: (address string)
+ 180 #     type: (address type-sexpr)
+ 181 # or a regular statement writing to a single new variable in a register:
+ 182 #     name: (address string)
+ 183 #     inputs: (address var)
+ 184 #     output: var-r
+ 185 # or a block of statements:
+ 186 #     statements: (address statement)
+ 187 
+ 188 # Kinds of local variable declarations:
+ 189 #   var f : (array foo 10)
+ 190 #   var f/ecx : int <- copy 0
+ 191 # Variables live in either the stack or a register.
+ 192 # Variables in the stack are auto-initialized.
+ 193 #   (This is non-trivial for arrays, and arrays inside structs... We'll see.)
+ 194 # Variables in register need a real instruction.
+ 195 
+ 196 # var is a variable declaration. e.g. `foo: (array int 3)`
+ 197 #   name: (address string)
+ 198 #   type: (address type-sexpr)
+ 199 
+ 200 # var-r is a variable declaration in a register. e.g. `foo/eax: (array int 3)`
+ 201 #   name: (address string)
+ 202 #   type: (address type-sexpr)
+ 203 #   reg: int [0..7]
+ 204 
+ 205 # type-sexpr is a tree of type identifiers. e.g. (array (address int) 3)
+ 206 # either
+ 207 #   id: type-identifier
+ 208 # or
+ 209 #   car: (address type-sexpr)
+ 210 #   cdr: (address type-sexpr)
+ 211 
+ 212 == code
+ 213 
+ 214 Entry:
+ 215     # . prologue
+ 216     89/<- %ebp 4/r32/esp
+ 217     (new-segment Heap-size Heap)
+ 218     # if (argv[1] == "test') run-tests()
+ 219     {
+ 220       # if (argc <= 1) break
+ 221       81 7/subop/compare *ebp 1/imm32
+ 222       7e/jump-if-lesser-or-equal break/disp8
+ 223       # if (argv[1] != "test") break
+ 224       (kernel-string-equal? *(ebp+8) "test")  # => eax
+ 225       3d/compare-eax-and 0/imm32
+ 226       74/jump-if-equal break/disp8
+ 227       #
+ 228       (run-tests)
+ 229       # syscall(exit, *Num-test-failures)
+ 230       8b/-> *Num-test-failures 3/r32/ebx
+ 231       eb/jump $mu-main:end/disp8
+ 232     }
+ 233     # otherwise convert Stdin
+ 234     (convert-mu Stdin Stdout)
+ 235     (flush Stdout)
+ 236     # syscall(exit, 0)
+ 237     bb/copy-to-ebx 0/imm32
+ 238 $mu-main:end:
+ 239     b8/copy-to-eax 1/imm32/exit
+ 240     cd/syscall 0x80/imm8
+ 241 
+ 242 convert-mu:  # in : (address buffered-file), out : (address buffered-file)
+ 243     # . prologue
+ 244     55/push-ebp
+ 245     89/<- %ebp 4/r32/esp
+ 246     #
+ 247     (parse-mu *(ebp+8))
+ 248     (check-mu-types)
+ 249     (emit-subx *(ebp+0xc))
+ 250 $convert-mu:end:
+ 251     # . epilogue
+ 252     89/<- %esp 5/r32/ebp
+ 253     5d/pop-to-ebp
+ 254     c3/return
+ 255 
+ 256 test-convert-empty-input:
+ 257     # empty input => empty output
+ 258     # . prologue
+ 259     55/push-ebp
+ 260     89/<- %ebp 4/r32/esp
+ 261     # setup
+ 262     (clear-stream _test-input-stream)
+ 263     (clear-stream _test-input-buffered-file->buffer)
+ 264     (clear-stream _test-output-stream)
+ 265     (clear-stream _test-output-buffered-file->buffer)
+ 266     #
+ 267     (convert-mu _test-input-buffered-file _test-output-buffered-file)
+ 268     (flush _test-output-buffered-file)
+ 269     (check-stream-equal _test-output-stream "" "F - test-convert-empty-input")
+ 270     # . epilogue
+ 271     89/<- %esp 5/r32/ebp
+ 272     5d/pop-to-ebp
+ 273     c3/return
+ 274 
+ 275 test-convert-function-skeleton:
+ 276     # empty function decl => function prologue and epilogue
+ 277     #   fn foo {
+ 278     #   }
+ 279     # =>
+ 280     #   foo:
+ 281     #     # . prologue
+ 282     #     55/push-ebp
+ 283     #     89/<- %ebp 4/r32/esp
+ 284     #     # . epilogue
+ 285     #     89/<- %esp 5/r32/ebp
+ 286     #     5d/pop-to-ebp
+ 287     #     c3/return
+ 288     # . prologue
+ 289     55/push-ebp
+ 290     89/<- %ebp 4/r32/esp
+ 291     # setup
+ 292     (clear-stream _test-input-stream)
+ 293     (clear-stream _test-input-buffered-file->buffer)
+ 294     (clear-stream _test-output-stream)
+ 295     (clear-stream _test-output-buffered-file->buffer)
+ 296     #
+ 297     (write _test-input-stream "fn foo {\n")
+ 298     (write _test-input-stream "}\n")
+ 299     # convert
+ 300     (convert-mu _test-input-buffered-file _test-output-buffered-file)
+ 301     (flush _test-output-buffered-file)
+ 302 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
+ 308     # check output
+ 309     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
+ 310     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
+ 311     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
+ 312     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
+ 313     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
+ 314     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
+ 315     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
+ 316     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
+ 317     # . epilogue
+ 318     89/<- %esp 5/r32/ebp
+ 319     5d/pop-to-ebp
+ 320     c3/return
+ 321 
+ 322 test-convert-multiple-function-skeletons:
+ 323     # multiple functions correctly organized into a linked list
+ 324     #   fn foo {
+ 325     #   }
+ 326     #   fn bar {
+ 327     #   }
+ 328     # =>
+ 329     #   foo:
+ 330     #     # . prologue
+ 331     #     55/push-ebp
+ 332     #     89/<- %ebp 4/r32/esp
+ 333     #     # . epilogue
+ 334     #     89/<- %esp 5/r32/ebp
+ 335     #     5d/pop-to-ebp
+ 336     #     c3/return
+ 337     #   bar:
+ 338     #     # . prologue
+ 339     #     55/push-ebp
+ 340     #     89/<- %ebp 4/r32/esp
+ 341     #     # . epilogue
+ 342     #     89/<- %esp 5/r32/ebp
+ 343     #     5d/pop-to-ebp
+ 344     #     c3/return
+ 345     # . prologue
+ 346     55/push-ebp
+ 347     89/<- %ebp 4/r32/esp
+ 348     # setup
+ 349     (clear-stream _test-input-stream)
+ 350     (clear-stream _test-input-buffered-file->buffer)
+ 351     (clear-stream _test-output-stream)
+ 352     (clear-stream _test-output-buffered-file->buffer)
+ 353     #
+ 354     (write _test-input-stream "fn foo {\n")
+ 355     (write _test-input-stream "}\n")
+ 356     (write _test-input-stream "fn bar {\n")
+ 357     (write _test-input-stream "}\n")
+ 358     # convert
+ 359     (convert-mu _test-input-buffered-file _test-output-buffered-file)
+ 360     (flush _test-output-buffered-file)
+ 361 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
+ 367     # check first function
+ 368     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-multiple-function-skeletons/0")
+ 369     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/1")
+ 370     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/2")
+ 371     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/3")
+ 372     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/4")
+ 373     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/5")
+ 374     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/6")
+ 375     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/7")
+ 376     # check second function
+ 377     (check-next-stream-line-equal _test-output-stream "bar:"                  "F - test-convert-multiple-function-skeletons/10")
+ 378     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-multiple-function-skeletons/11")
+ 379     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-multiple-function-skeletons/12")
+ 380     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-multiple-function-skeletons/13")
+ 381     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-multiple-function-skeletons/14")
+ 382     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-multiple-function-skeletons/15")
+ 383     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-multiple-function-skeletons/16")
+ 384     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-multiple-function-skeletons/17")
+ 385     # . epilogue
+ 386     89/<- %esp 5/r32/ebp
+ 387     5d/pop-to-ebp
+ 388     c3/return
+ 389 
+ 390 test-convert-function-with-arg:
+ 391     # function with one arg and a copy instruction
+ 392     #   fn foo n : int -> result/eax : int {
+ 393     #     result <- copy n
+ 394     #   }
+ 395     # =>
+ 396     #   foo:
+ 397     #     # . prologue
+ 398     #     55/push-ebp
+ 399     #     89/<- %ebp 4/r32/esp
+ 400     #     {
+ 401     #     # result <- copy n
+ 402     #     8b/-> *(ebp+8) 0/r32/eax
+ 403     #     }
+ 404     #     # . epilogue
+ 405     #     89/<- %esp 5/r32/ebp
+ 406     #     5d/pop-to-ebp
+ 407     #     c3/return
+ 408     # . prologue
+ 409     55/push-ebp
+ 410     89/<- %ebp 4/r32/esp
+ 411     # setup
+ 412     (clear-stream _test-input-stream)
+ 413     (clear-stream _test-input-buffered-file->buffer)
+ 414     (clear-stream _test-output-stream)
+ 415     (clear-stream _test-output-buffered-file->buffer)
+ 416     #
+ 417     (write _test-input-stream "fn foo {\n")
+ 418     (write _test-input-stream "}\n")
+ 419     # convert
+ 420     (convert-mu _test-input-buffered-file _test-output-buffered-file)
+ 421     (flush _test-output-buffered-file)
+ 422 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
+ 428     # check output
+ 429     (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
+ 430     (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
+ 431     (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
+ 432     (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
+ 433     (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
+ 434     (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
+ 435     (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
+ 436     (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
+ 437     # . epilogue
+ 438     89/<- %esp 5/r32/ebp
+ 439     5d/pop-to-ebp
+ 440     c3/return
+ 441 
+ 442 parse-mu:  # in : (address buffered-file)
+ 443     # pseudocode
+ 444     #   var curr-function = Program
+ 445     #   var line : (stream byte 512)
+ 446     #   var word-slice : slice
+ 447     #   while true                                  # line loop
+ 448     #     clear-stream(line)
+ 449     #     read-line-buffered(in, line)
+ 450     #     if (line->write == 0) break               # end of file
+ 451     #     while true                                # word loop
+ 452     #       word-slice = next-word-or-string(line)
+ 453     #       if slice-empty?(word-slice)             # end of line
+ 454     #         break
+ 455     #       else if slice-starts-with?(word-slice, "#")  # comment
+ 456     #         break                                 # end of line
+ 457     #       else if slice-equal(word-slice, "fn")
+ 458     #         var new-function : (address function) = new function
+ 459     #         populate-mu-function(in, new-function)
+ 460     #         *curr-function = new-function
+ 461     #         curr-function = &new-function->next
+ 462     #       else
+ 463     #         abort()
+ 464     #
+ 465     # . prologue
+ 466     55/push-ebp
+ 467     89/<- %ebp 4/r32/esp
+ 468     # . save registers
+ 469     50/push-eax
+ 470     51/push-ecx
+ 471     52/push-edx
+ 472     57/push-edi
+ 473     # var line/ecx : (stream byte 512)
+ 474     81 5/subop/subtract %esp 0x200/imm32
+ 475     68/push 0x200/imm32/length
+ 476     68/push 0/imm32/read
+ 477     68/push 0/imm32/write
+ 478     89/<- %ecx 4/r32/esp
+ 479     # var word-slice/edx : slice
+ 480     68/push 0/imm32/end
+ 481     68/push 0/imm32/start
+ 482     89/<- %edx 4/r32/esp
+ 483     # var curr-function/edi : (address function) = Program
+ 484     bf/copy-to-edi Program/imm32
+ 485     {
+ 486 $parse-mu:line-loop:
+ 487       (clear-stream %ecx)
+ 488       (read-line-buffered *(ebp+8) %ecx)
+ 489       # if (line->write == 0) break
+ 490       81 7/subop/compare *ecx 0/imm32
+ 491       0f 84/jump-if-equal break/disp32
+ 492 +--  6 lines: #?       # dump line ---------------------------------------------------------------------------------------------------------------------------
+ 498       { # word loop
+ 499 $parse-mu:word-loop:
+ 500         (next-word-or-string %ecx %edx)
+ 501         # if slice-empty?(word-slice) break
+ 502         (slice-empty? %edx)
+ 503         3d/compare-eax-and 0/imm32
+ 504         0f 85/jump-if-not-equal break/disp32
+ 505         # if (*word-slice->start == "#") break
+ 506         # . eax = *word-slice->start
+ 507         8b/-> *edx 0/r32/eax
+ 508         8a/copy-byte *eax 0/r32/AL
+ 509         81 4/subop/and %eax 0xff/imm32
+ 510         # . if (eax == '#') break
+ 511         3d/compare-eax-and 0x23/imm32/hash
+ 512         0f 84/jump-if-equal break/disp32
+ 513         # if (slice-equal?(word-slice, "fn")) parse a function
+ 514         {
+ 515           (slice-equal? %edx "fn")
+ 516           3d/compare-eax-and 0/imm32
+ 517           0f 84/jump-if-equal break/disp32
+ 518           # var new-function/eax : (address function) = populate-mu-function()
+ 519           (allocate Heap *Function-size)  # => eax
+ 520           (populate-mu-function-header %ecx %eax)
+ 521           (populate-mu-function-body *(ebp+8) %eax)
+ 522           # *curr-function = new-function
+ 523           89/<- *edi 0/r32/eax
+ 524           # curr-function = &new-function->next
+ 525           8d/address-> *(eax+0x10) 7/r32/edi
+ 526           e9/jump $parse-mu:word-loop/disp32
+ 527         }
+ 528         # otherwise abort
+ 529         e9/jump $parse-mu:abort/disp32
+ 530       } # end word loop
+ 531       e9/jump loop/disp32
+ 532     } # end line loop
+ 533 $parse-mu:end:
+ 534     # . reclaim locals
+ 535     81 0/subop/add %esp 0x214/imm32
+ 536     # . restore registers
+ 537     5f/pop-to-edi
+ 538     5a/pop-to-edx
+ 539     59/pop-to-ecx
+ 540     58/pop-to-eax
+ 541     # . epilogue
+ 542     89/<- %esp 5/r32/ebp
+ 543     5d/pop-to-ebp
+ 544     c3/return
+ 545 
+ 546 $parse-mu:abort:
+ 547     # error("unexpected top-level command: " word-slice "\n")
+ 548     (write-buffered Stderr "unexpected top-level command: ")
+ 549     (write-buffered Stderr %edx)
+ 550     (write-buffered Stderr "\n")
+ 551     (flush Stderr)
+ 552     # . syscall(exit, 1)
+ 553     bb/copy-to-ebx  1/imm32
+ 554     b8/copy-to-eax  1/imm32/exit
+ 555     cd/syscall  0x80/imm8
+ 556     # never gets here
+ 557 
+ 558 # errors considered:
+ 559 #   fn foo { {
+ 560 #   fn foo { }
+ 561 #   fn foo { } {
+ 562 #   fn foo  # no block
+ 563 populate-mu-function-header:  # first-line : (address stream byte), out : (address function)
+ 564     # . prologue
+ 565     55/push-ebp
+ 566     89/<- %ebp 4/r32/esp
+ 567     # . save registers
+ 568     50/push-eax
+ 569     51/push-ecx
+ 570     57/push-edi
+ 571     # edi = out
+ 572     8b/-> *(ebp+0xc) 7/r32/edi
+ 573     # var word-slice/ecx : slice
+ 574     68/push 0/imm32/end
+ 575     68/push 0/imm32/start
+ 576     89/<- %ecx 4/r32/esp
+ 577     # save function name
+ 578     (next-word *(ebp+8) %ecx)
+ 579     (slice-to-string Heap %ecx)  # => eax
+ 580     89/<- *edi 0/r32/eax
+ 581     # assert that next token is '{'
+ 582     (next-word *(ebp+8) %ecx)
+ 583     (slice-equal? %ecx "{")
+ 584     3d/compare-eax-and 0/imm32
+ 585     74/jump-if-equal $populate-mu-function-header:abort/disp8
+ 586     # assert that there's no further token
+ 587     {
+ 588       # word-slice = next-word(line)
+ 589       (next-word *(ebp+8) %ecx)
+ 590       # if (word-slice == '') break
+ 591       (slice-empty? %ecx)
+ 592       3d/compare-eax-and 0/imm32
+ 593       75/jump-if-not-equal break/disp8
+ 594       # if (slice-starts-with?(word-slice, "#")) break
+ 595       # . eax = *word-slice->start
+ 596       8b/-> *edx 0/r32/eax
+ 597       8a/copy-byte *eax 0/r32/AL
+ 598       81 4/subop/and %eax 0xff/imm32
+ 599       # . if (eax == '#') break
+ 600       3d/compare-eax-and 0x23/imm32/hash
+ 601       74/jump-if-equal break/disp8
+ 602       # otherwise abort
+ 603       eb/jump $populate-mu-function-header:abort/disp8
+ 604     }
+ 605 $populate-mu-function-header:end:
+ 606     # . reclaim locals
+ 607     81 0/subop/add %esp 8/imm32
+ 608     # . restore registers
+ 609     5f/pop-to-edi
+ 610     59/pop-to-ecx
+ 611     58/pop-to-eax
+ 612     # . epilogue
+ 613     89/<- %esp 5/r32/ebp
+ 614     5d/pop-to-ebp
+ 615     c3/return
+ 616 
+ 617 $populate-mu-function-header:abort:
+ 618     # error("function header not in form 'fn <name> {'")
+ 619     (write-buffered Stderr "function header not in form 'fn <name> {' -- '")
+ 620     (rewind-stream *(ebp+8))
+ 621     (write-stream 2 *(ebp+8))
+ 622     (write-buffered Stderr "'\n")
+ 623     (flush Stderr)
+ 624     # . syscall(exit, 1)
+ 625     bb/copy-to-ebx  1/imm32
+ 626     b8/copy-to-eax  1/imm32/exit
+ 627     cd/syscall  0x80/imm8
+ 628     # never gets here
+ 629 
+ 630 # errors considered:
+ 631 #   { abc
+ 632 populate-mu-function-body:  # in : (address buffered-file), out : (address function)
+ 633     # . prologue
+ 634     55/push-ebp
+ 635     89/<- %ebp 4/r32/esp
+ 636     # . save registers
+ 637     50/push-eax
+ 638     51/push-ecx
+ 639     52/push-edx
+ 640     53/push-ebx
+ 641     # var line/ecx : (stream byte 512)
+ 642     81 5/subop/subtract %esp 0x200/imm32
+ 643     68/push 0x200/imm32/length
+ 644     68/push 0/imm32/read
+ 645     68/push 0/imm32/write
+ 646     89/<- %ecx 4/r32/esp
+ 647     # var word-slice/edx : slice
+ 648     68/push 0/imm32/end
+ 649     68/push 0/imm32/start
+ 650     89/<- %edx 4/r32/esp
+ 651     # var open-curly-count/ebx : int = 1
+ 652     bb/copy-to-ebx 1/imm32
+ 653     { # line loop
+ 654 $populate-mu-function-body:line-loop:
+ 655       # if (open-curly-count == 0) break
+ 656       81 7/subop/compare %ebx 0/imm32
+ 657       0f 84/jump-if-equal break/disp32
+ 658       # line = read-line-buffered(in)
+ 659       (clear-stream %ecx)
+ 660       (read-line-buffered *(ebp+8) %ecx)
+ 661       # if (line->write == 0) break
+ 662       81 7/subop/compare *ecx 0/imm32
+ 663       0f 84/jump-if-equal break/disp32
+ 664       # word-slice = next-word(line)
+ 665       (next-word %ecx %edx)
+ 666       # if slice-empty?(word-slice) continue
+ 667       (slice-empty? %ecx)
+ 668       3d/compare-eax-and 0/imm32
+ 669       75/jump-if-not-equal loop/disp8
+ 670       # if (slice-starts-with?(word-slice, '#') continue
+ 671       # . eax = *word-slice->start
+ 672       8b/-> *edx 0/r32/eax
+ 673       8a/copy-byte *eax 0/r32/AL
+ 674       81 4/subop/and %eax 0xff/imm32
+ 675       # . if (eax == '#') continue
+ 676       3d/compare-eax-and 0x23/imm32/hash
+ 677       74/jump-if-equal loop/disp8
+ 678       {
+ 679         # if slice-equal?(word-slice, "{") ++open-curly-count
+ 680         {
+ 681           (slice-equal? %ecx "{")
+ 682           3d/compare-eax-and 0/imm32
+ 683           74/jump-if-equal break/disp8
+ 684           43/increment-ebx
+ 685           eb/jump $curly-found:end/disp8
+ 686         }
+ 687         # else if slice-equal?(word-slice, "}") --open-curly-count
+ 688         {
+ 689           (slice-equal? %ecx "}")
+ 690           3d/compare-eax-and 0/imm32
+ 691           74/jump-if-equal break/disp8
+ 692           4b/decrement-ebx
+ 693           eb/jump $curly-found:end/disp8
+ 694         }
+ 695         # else break
+ 696         eb/jump $populate-mu-function-body:end/disp8
+ 697       }
+ 698       # - check for invalid tokens after curly
+ 699 $curly-found:end:
+ 700       # second-word-slice = next-word(line)
+ 701       (next-word %ecx %edx)
+ 702       # if slice-empty?(second-word-slice) continue
+ 703       (slice-empty? %ecx)
+ 704       3d/compare-eax-and 0/imm32
+ 705       0f 85/jump-if-not-equal loop/disp32
+ 706       # if (slice-starts-with?(second-word-slice, '#') continue
+ 707       # . eax = *second-word-slice->start
+ 708       8b/-> *edx 0/r32/eax
+ 709       8a/copy-byte *eax 0/r32/AL
+ 710       81 4/subop/and %eax 0xff/imm32
+ 711       # . if (eax == '#') continue
+ 712       3d/compare-eax-and 0x23/imm32/hash
+ 713       0f 84/jump-if-equal loop/disp32
+ 714       # abort
+ 715       eb/jump $populate-mu-function-body:abort/disp8
+ 716     } # end line loop
+ 717 $populate-mu-function-body:end:
+ 718     # . reclaim locals
+ 719     81 0/subop/add %esp 0x214/imm32
+ 720     # . restore registers
+ 721     5b/pop-to-ebx
+ 722     5a/pop-to-edx
+ 723     59/pop-to-ecx
+ 724     58/pop-to-eax
+ 725     # . epilogue
+ 726     89/<- %esp 5/r32/ebp
+ 727     5d/pop-to-ebp
+ 728     c3/return
+ 729 
+ 730 $populate-mu-function-body:abort:
+ 731     # error("'{' or '}' should be on its own line, but got '")
+ 732     (write-buffered Stderr "'{' or '}' should be on its own line, but got '")
+ 733     (rewind-stream %ecx)
+ 734     (write-stream 2 %ecx)
+ 735     (write-buffered Stderr "'\n")
+ 736     (flush Stderr)
+ 737     # . syscall(exit, 1)
+ 738     bb/copy-to-ebx  1/imm32
+ 739     b8/copy-to-eax  1/imm32/exit
+ 740     cd/syscall  0x80/imm8
+ 741     # never gets here
+ 742 
+ 743 check-mu-types:
+ 744     # . prologue
+ 745     55/push-ebp
+ 746     89/<- %ebp 4/r32/esp
+ 747     #
+ 748 $check-types:end:
+ 749     # . epilogue
+ 750     89/<- %esp 5/r32/ebp
+ 751     5d/pop-to-ebp
+ 752     c3/return
+ 753 
+ 754 emit-subx:  # out : (address buffered-file)
+ 755     # . prologue
+ 756     55/push-ebp
+ 757     89/<- %ebp 4/r32/esp
+ 758     # . save registers
+ 759     50/push-eax
+ 760     51/push-ecx
+ 761     57/push-edi
+ 762     # edi = out
+ 763     8b/-> *(ebp+8) 7/r32/edi
+ 764     # var curr/ecx : (address function) = Program
+ 765     8b/-> *Program 1/r32/ecx
+ 766     {
+ 767       # if (curr == NULL) break
+ 768       81 7/subop/compare %ecx 0/imm32
+ 769       0f 84/jump-if-equal break/disp32
+ 770       (emit-subx-function %edi %ecx)
+ 771       # curr = curr->next
+ 772       8b/-> *(ecx+0x10) 1/r32/ecx
+ 773       e9/jump loop/disp32
+ 774     }
+ 775 $emit-subx:end:
+ 776     # . restore registers
+ 777     5f/pop-to-edi
+ 778     59/pop-to-ecx
+ 779     58/pop-to-eax
+ 780     # . epilogue
+ 781     89/<- %esp 5/r32/ebp
+ 782     5d/pop-to-ebp
+ 783     c3/return
+ 784 
+ 785 # == Emitting a function
+ 786 # Emit function header
+ 787 # Emit function prologue
+ 788 # Translate function body
+ 789 # Emit function epilogue
+ 790 
+ 791 emit-subx-function:  # out : (address buffered-file), f : (address function)
+ 792     # . prologue
+ 793     55/push-ebp
+ 794     89/<- %ebp 4/r32/esp
+ 795     # . save registers
+ 796     50/push-eax
+ 797     51/push-ecx
+ 798     57/push-edi
+ 799     # edi = out
+ 800     8b/-> *(ebp+8) 7/r32/edi
+ 801     # ecx = f
+ 802     8b/-> *(ebp+0xc) 1/r32/ecx
+ 803     #
+ 804     (write-buffered %edi *ecx)
+ 805     (write-buffered %edi ":\n")
+ 806     (emit-subx-prologue %edi)
+ 807     (emit-subx-block %edi *(ecx+4))  # TODO: offset
+ 808     (emit-subx-epilogue %edi)
+ 809 $emit-subx-function:end:
+ 810     # . restore registers
+ 811     5f/pop-to-edi
+ 812     59/pop-to-ecx
+ 813     58/pop-to-eax
+ 814     # . epilogue
+ 815     89/<- %esp 5/r32/ebp
+ 816     5d/pop-to-ebp
+ 817     c3/return
+ 818 
+ 819 emit-subx-block:  # out : (address buffered-file), block : (address block)
+ 820     # . prologue
+ 821     55/push-ebp
+ 822     89/<- %ebp 4/r32/esp
+ 823     #
+ 824 $emit-subx-block:end:
+ 825     # . epilogue
+ 826     89/<- %esp 5/r32/ebp
+ 827     5d/pop-to-ebp
+ 828     c3/return
+ 829 
+ 830 emit-subx-statement:  # out : (address buffered-file), stmt : (address statement), vars : (address variable), regs : (address array (address variable)), primitives : (address opcode-info), functions : (address function)
+ 831     # . prologue
+ 832     55/push-ebp
+ 833     89/<- %ebp 4/r32/esp
+ 834     # . save registers
+ 835     50/push-eax
+ 836     51/push-ecx
+ 837     # var curr/ecx : (address primitive) = primitives
+ 838     8b/-> *(ebp+0x18) 1/r32/ecx
+ 839     {
+ 840       # if (curr != null) abort
+ 841       81 7/subop/compare *(ebp+0xc) 0/imm32
+ 842       0f 84/jump-if-equal $emit-subx-statement:abort/disp32
+ 843       # if (match(curr, stmt)) break
+ 844       (mu-stmt-matches-primitive? *(ebp+0xc) %ecx)  # => eax
+ 845       3d/compare-eax-and 0/imm32
+ 846       75/jump-if-not-equal break/disp8
+ 847       # emit code for stmt according to curr and vars
+ 848       # curr = curr->next
+ 849       8b/-> *(ecx+0x10) 1/r32/ecx
+ 850       e9/jump loop/disp32
+ 851     }
+ 852 $emit-subx-statement:end:
+ 853     # . restore registers
+ 854     59/pop-to-ecx
+ 855     58/pop-to-eax
+ 856     # . epilogue
+ 857     89/<- %esp 5/r32/ebp
+ 858     5d/pop-to-ebp
+ 859     c3/return
+ 860 
+ 861 $emit-subx-statement:abort:
+ 862     # error("couldn't translate '" stmt "'\n")
+ 863     (write-buffered Stderr "couldn't translate '")
+ 864 #?     (emit-string Stderr *(ebp+0xc))  # TODO
+ 865     (write-buffered Stderr "'\n")
+ 866     (flush Stderr)
+ 867     # . syscall(exit, 1)
+ 868     bb/copy-to-ebx  1/imm32
+ 869     b8/copy-to-eax  1/imm32/exit
+ 870     cd/syscall  0x80/imm8
+ 871     # never gets here
+ 872 
+ 873 mu-stmt-matches-primitive?:  # stmt : (address statement), primitive : (address opcode-info) => result/eax : boolean
+ 874     # . prologue
+ 875     55/push-ebp
+ 876     89/<- %ebp 4/r32/esp
+ 877     # . save registers
+ 878     51/push-ecx
+ 879     # return primitive->name == stmt->operation
+ 880     8b/-> *(ebp+8) 1/r32/ecx
+ 881     8b/-> *(ebp+0xc) 0/r32/eax
+ 882     (string-equal? *ecx *eax)  # => eax
+ 883 $mu-stmt-matches-primitive?:end:
+ 884     # . restore registers
+ 885     59/pop-to-ecx
+ 886     # . epilogue
+ 887     89/<- %esp 5/r32/ebp
+ 888     5d/pop-to-ebp
+ 889     c3/return
+ 890 
+ 891 test-emit-subx-statement-primitive:
+ 892     # Primitive operation on a variable on the stack.
+ 893     #   increment foo
+ 894     # =>
+ 895     #   ff 0/subop/increment *(ebp-8)
+ 896     #
+ 897     # There's a variable on the var stack as follows:
+ 898     #   name: 'foo'
+ 899     #   type: int
+ 900     #   location: -8  (negative numbers are on the stack;
+ 901     #                   0-7 are in registers;
+ 902     #                   higher positive numbers are invalid)
+ 903     #
+ 904     # There's nothing in registers.
+ 905     #
+ 906     # There's a primitive with this info:
+ 907     #   name: 'increment'
+ 908     #   inout: int/mem
+ 909     #   value: 'ff 0/subop/increment'
+ 910     #
+ 911     # There's nothing in functions.
+ 912     #
+ 913     # . prologue
+ 914     55/push-ebp
+ 915     89/<- %ebp 4/r32/esp
+ 916     # setup
+ 917     (clear-stream _test-output-stream)
+ 918     (clear-stream _test-output-buffered-file->buffer)
+ 919     # . ecx = vars
+ 920     68/push 0/imm32/next
+ 921     68/push -8/imm32/stack-offset
+ 922     68/push 0/imm32/int  # TODO
+ 923     68/push "foo"/imm32
+ 924     89/<- %ecx 4/r32/esp
+ 925     # . edx = operand
+ 926     68/push 0/imm32/next
+ 927     51/push-ecx/var-foo
+ 928     89/<- %edx 4/r32/esp
+ 929     # . edx = stmt
+ 930     68/push 0/imm32/next
+ 931     68/push 0/imm32/outputs
+ 932     52/push-edx/operand
+ 933     68/push "increment"/imm32/operation
+ 934     89/<- %edx 4/r32/esp
+ 935     # . ebx = primitives
+ 936     68/push 0/imm32/next
+ 937     68/push "ff 0/subop/increment"/imm32
+ 938     68/push 0/imm32/type-int
+ 939     68/push 0/imm32/storage-memory
+ 940     68/push "increment"/imm32/name
+ 941     89/<- %ebx 4/r32/esp
+ 942     # convert
+ 943     (emit-subx-statement _test-output-buffered-file %edx %ecx 0 %ebx 0)
+ 944     (flush _test-output-buffered-file)
+ 945 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
+ 951     # check output
+ 952     (check-next-stream-line-equal _test-output-stream "ff 0/subop/increment *(ebp-8)" "F - test-emit-subx-statement-primitive/0")
+ 953     # . reclaim locals
+ 954     81 0/subop/add %esp 0x3c/imm32
+ 955     # . epilogue
+ 956     89/<- %esp 5/r32/ebp
+ 957     5d/pop-to-ebp
+ 958     c3/return
+ 959 
+ 960 test-emit-subx-statement-function-call:
+ 961     # Call a function on a variable on the stack.
+ 962     #   f var
+ 963     # =>
+ 964     #   (f2 *(ebp-8))
+ 965     # (Changing the function name just to help disambiguate things.)
+ 966     #
+ 967     # There's a variable on the var stack as follows:
+ 968     #   name: 'var'
+ 969     #   type: int
+ 970     #   location: -8  (negative numbers are on the stack;
+ 971     #                   0-7 are in registers;
+ 972     #                   higher positive numbers are invalid)
+ 973     #
+ 974     # There's nothing in registers.
+ 975     #
+ 976     # There's nothing in primitives.
+ 977     #
+ 978     # There's a function with this info:
+ 979     #   name: 'f'
+ 980     #   inout: int/mem
+ 981     #   value: 'f2'
+ 982     #
+ 983     # . prologue
+ 984     55/push-ebp
+ 985     89/<- %ebp 4/r32/esp
+ 986     # setup
+ 987     (clear-stream _test-output-stream)
+ 988     (clear-stream _test-output-buffered-file->buffer)
+ 989     # . ecx = vars
+ 990     68/push 0/imm32/next
+ 991     68/push -8/imm32/stack-offset
+ 992     68/push 0/imm32/int  # TODO
+ 993     68/push "var"/imm32
+ 994     89/<- %ecx 4/r32/esp
+ 995     # . edx = operand
+ 996     68/push 0/imm32/next
+ 997     51/push-ecx/var
+ 998     89/<- %edx 4/r32/esp
+ 999     # . edx = stmt
+1000     68/push 0/imm32/next
+1001     68/push 0/imm32/outputs
+1002     52/push-edx/operand
+1003     68/push "f"/imm32/operation
+1004     89/<- %edx 4/r32/esp
+1005     # . ebx = functions
+1006     68/push 0/imm32/next
+1007     68/push "f2"/imm32
+1008     68/push 0/imm32/type-int
+1009     68/push 0/imm32/storage-memory
+1010     68/push "f"/imm32/name
+1011     89/<- %ebx 4/r32/esp
+1012     # convert
+1013     (emit-subx-statement _test-output-buffered-file %edx %ecx 0 0 %ebx)
+1014     (flush _test-output-buffered-file)
+1015 +--  6 lines: #?     # dump _test-output-stream --------------------------------------------------------------------------------------------------------------
+1021     # check output
+1022     (check-next-stream-line-equal _test-output-stream "f2 *(ebp-8)" "F - test-emit-subx-statement-function-call/0")
+1023     # . reclaim locals
+1024     81 0/subop/add %esp 0x3c/imm32
+1025     # . epilogue
+1026     89/<- %esp 5/r32/ebp
+1027     5d/pop-to-ebp
+1028     c3/return
+1029 
+1030 emit-subx-prologue:  # out : (address buffered-file)
+1031     # . prologue
+1032     55/push-ebp
+1033     89/<- %ebp 4/r32/esp
+1034     #
+1035     (write-buffered *(ebp+8) "# . prologue\n")
+1036     (write-buffered *(ebp+8) "55/push-ebp\n")
+1037     (write-buffered *(ebp+8) "89/<- %ebp 4/r32/esp\n")
+1038 $emit-subx-prologue:end:
+1039     # . epilogue
+1040     89/<- %esp 5/r32/ebp
+1041     5d/pop-to-ebp
+1042     c3/return
+1043 
+1044 emit-subx-epilogue:  # out : (address buffered-file)
+1045     # . prologue
+1046     55/push-ebp
+1047     89/<- %ebp 4/r32/esp
+1048     #
+1049     (write-buffered *(ebp+8) "# . epilogue\n")
+1050     (write-buffered *(ebp+8) "89/<- %esp 5/r32/ebp\n")
+1051     (write-buffered *(ebp+8) "5d/pop-to-ebp\n")
+1052     (write-buffered *(ebp+8) "c3/return\n")
+1053 $emit-subx-epilogue:end:
+1054     # . epilogue
+1055     89/<- %esp 5/r32/ebp
+1056     5d/pop-to-ebp
+1057     c3/return
 
-- cgit 1.4.1-2-gfad0