5727

author: Kartik Agaram <vc@akkartik.com> 2019-11-08 11:32:14 -0800
committer: Kartik Agaram <vc@akkartik.com> 2019-11-08 11:32:14 -0800
commit: 0c31de3852febc1f8fc5aaf1daf3d5693155be2c (patch)
tree: 27161fbf901bb02118be1319d61075c7c940806e /apps/mu.subx
parent: 6dd309a2e134a73df8b05e26da193e9f121758c6 (diff)
download: mu-0c31de3852febc1f8fc5aaf1daf3d5693155be2c.tar.gz
1 files changed, 157 insertions, 6 deletions
diff --git a/apps/mu.subx b/apps/mu.subx
index adf5234a..0f5a147a 100644
--- a/apps/mu.subx
+++ b/apps/mu.subx
@@ -4,6 +4,111 @@
 # To run:
 #   $ ./ntranslate init.linux 0*.subx apps/mu.subx
 
+# == Goals
+# 1. Be memory safe. It should be impossible to corrupt the heap, or to create
+# a bad pointer. (Requires strong type safety.)
+# 2. Do as little as possible to achieve goal 1.
+#   - runtime checks to avoid complex static analysis
+#   - minimize impedance mismatch between source language and SubX target
+
+# == Language description
+#
+# A program is a sequence of function definitions.
+#
+# Function example:
+#   fn foo n: int -> result/eax: int {
+#     ...
+#   }
+#
+# Functions consist of a name, optional inputs, optional outputs and a block.
+#
+# Inputs are variables with types. Outputs are variables in registers with
+# (word-size) types.
+#
+# All variables have a type and storage specifier. They can be placed either
+# in memory (on the stack) or in one of 6 named registers.
+#   eax ecx edx ebx esi edi
+# Variables in registers must be word-sized (int or address).
+# Variables not explicitly placed in a register are on the stack.
+# Variables in registers need not have a name; in that case you refer to them
+# directly by the register name.
+#
+# Blocks mostly consist of statements.
+#
+# Statements mostly consist of a name, optional inputs and optional outputs.
+#
+# Inputs are variables or literals. Variables need to specify type (and
+# storage) the first time they're mentioned but not later.
+#
+# Outputs can only be variables.
+#
+# Statement names must be either primitives or user-defined functions.
+#
+# Primitives can write to any register.
+# User-defined functions only write to hard-coded registers. Outputs of each
+# call must have the same registers as in the function definition.
+#
+# There are some other statement types:
+#   - blocks. Multiple statements surrounded by '{...}' and optionally
+#     prefixed with a label name and ':'
+#       - {
+#           ...
+#         }
+#       - foo: {
+#           ...
+#         }
+#
+#   - variable definitions on the stack. E.g.:
+#       - var foo: int
+#       - var bar: (array int 3)
+#     There's no initializer; variables are automatically initialized.
+#
+#   - variables definitions in a register. E.g.:
+#       - var foo/eax : int <- add bar 1
+#     The initializer is mandatory and must be a valid instruction that writes
+#     a single output to the right register. In practice registers will
+#     usually be either initialized by primitives or copied from eax.
+#       - var eax : int <- foo bar quux
+#         var floo/ecx : int <- copy eax
+#
+# Still todo:
+#   global variables
+#   heap allocations (planned name: 'handle')
+#   user-defined types: 'type' for structs, 'choice' for unions
+#   short-lived 'address' type for efficiently writing inside nested structs
+
+# Now that we know what the language looks like in the large, let's think
+# about how translation happens from the bottom up.
+
+# == Book-keeping while emitting code for a single statement
+# Immutable data:
+#   function info
+#
+# Mutable data:
+#   stack: variables currently in scope
+#     block id
+#     type
+#     ebp offset for function | register id
+#   dict: register -> var
+
+# == Compiling a single instruction
+# Determine the function or primitive being called.
+#   If no matches, show all functions/primitives with the same name, along
+#   with reasons they don't match. (type and storage checking)
+#   It must be a function if:
+#     #outputs > 1, or
+#     #inouts > 2, or
+#     #inouts + #outputs > 2
+# If it's a function, emit:
+#   (low-level-name <rm32 or imm32>...)
+# Otherwise (it's a primitive):
+#   assert(#inouts <= 2 && #outs <= 1 && (#inouts + #outs) <= 2)
+#   emit opcode
+#   emit-rm32(inout[0])
+#   if out[0] exists: emit-rm32(out[0])
+#   else if inout[1] is a literal: emit-imm32(inout[1])
+#   else: emit-rm32(inout[1])
+
 # A sketch of planned data structures. Still highly speculative.
 == data
 
@@ -63,12 +168,6 @@ Function-size:
 #   car: (address type-sexpr)
 #   cdr: (address type-sexpr)
 
-# Still todo:
-#   global variables
-#   heap allocations (planned name: 'handle')
-#   user-defined types: 'type' for structs, 'choice' for unions
-#   short-lived 'address' type for efficiently writing inside nested structs
-
 == code
 
 Entry:
@@ -247,6 +346,58 @@ test-convert-multiple-function-skeletons:
     5d/pop-to-ebp
     c3/return
 
+test-convert-function-with-arg:
+    # function with one arg and a copy instruction
+    #   fn foo n : int -> result/eax : int {
+    #     result <- copy n
+    #   }
+    # =>
+    #   foo:
+    #     # . prologue
+    #     55/push-ebp
+    #     89/<- %ebp 4/r32/esp
+    #     {
+    #     # result <- copy n
+    #     8b/-> *(ebp+8) 0/r32/eax
+    #     }
+    #     # . epilogue
+    #     89/<- %esp 5/r32/ebp
+    #     5d/pop-to-ebp
+    #     c3/return
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # setup
+    (clear-stream _test-input-stream)
+    (clear-stream _test-input-buffered-file->buffer)
+    (clear-stream _test-output-stream)
+    (clear-stream _test-output-buffered-file->buffer)
+    #
+    (write _test-input-stream "fn foo {\n")
+    (write _test-input-stream "}\n")
+    # convert
+    (convert-mu _test-input-buffered-file _test-output-buffered-file)
+    (flush _test-output-buffered-file)
+#?     # dump _test-output-stream {{{
+#?     (write 2 "^")
+#?     (write-stream 2 _test-output-stream)
+#?     (write 2 "$\n")
+#?     (rewind-stream _test-output-stream)
+#?     # }}}
+    # check output
+    (check-next-stream-line-equal _test-output-stream "foo:"                  "F - test-convert-function-skeleton/0")
+    (check-next-stream-line-equal _test-output-stream "# . prologue"          "F - test-convert-function-skeleton/1")
+    (check-next-stream-line-equal _test-output-stream "55/push-ebp"           "F - test-convert-function-skeleton/2")
+    (check-next-stream-line-equal _test-output-stream "89/<- %ebp 4/r32/esp"  "F - test-convert-function-skeleton/3")
+    (check-next-stream-line-equal _test-output-stream "# . epilogue"          "F - test-convert-function-skeleton/4")
+    (check-next-stream-line-equal _test-output-stream "89/<- %esp 5/r32/ebp"  "F - test-convert-function-skeleton/5")
+    (check-next-stream-line-equal _test-output-stream "5d/pop-to-ebp"         "F - test-convert-function-skeleton/6")
+    (check-next-stream-line-equal _test-output-stream "c3/return"             "F - test-convert-function-skeleton/7")
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
+
 parse-mu:  # in : (address buffered-file)
     # pseudocode
     #   var curr-function = Program
author	Kartik Agaram <vc@akkartik.com>	2019-11-08 11:32:14 -0800
committer	Kartik Agaram <vc@akkartik.com>	2019-11-08 11:32:14 -0800
commit	0c31de3852febc1f8fc5aaf1daf3d5693155be2c (patch)
tree	27161fbf901bb02118be1319d61075c7c940806e /apps/mu.subx
parent	6dd309a2e134a73df8b05e26da193e9f121758c6 (diff)
download	mu-0c31de3852febc1f8fc5aaf1daf3d5693155be2c.tar.gz