about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2020-06-13 20:23:51 -0700
committerKartik Agaram <vc@akkartik.com>2020-06-13 20:23:51 -0700
commitef845524e90c736b6cbab9320bc41114d421ff63 (patch)
treecd9d0e11b1cba8fa035340e697ee0fa9ca0ae0e2
parent7e55a20ff4c9bdb64b55daddd551f19cd7a51bcb (diff)
downloadmu-ef845524e90c736b6cbab9320bc41114d421ff63.tar.gz
6516 - operations on bytes
Byte-oriented addressing is only supported in a couple of instructions
in SubX. As a result, variables of type 'byte' can't live on the stack,
or in registers 'esi' and 'edi'.
-rwxr-xr-xapps/mubin274733 -> 278119 bytes
-rw-r--r--apps/mu.subx216
-rw-r--r--mu_instructions4
-rw-r--r--mu_summary19
4 files changed, 228 insertions, 11 deletions
diff --git a/apps/mu b/apps/mu
index b261cc99..867801d9 100755
--- a/apps/mu
+++ b/apps/mu
Binary files differdiff --git a/apps/mu.subx b/apps/mu.subx
index 858ef59a..ff80a5b8 100644
--- a/apps/mu.subx
+++ b/apps/mu.subx
@@ -382,9 +382,9 @@ Tree-size:  # (addr int)
 
 # Types
 
-# TODO: heap allocations here can't be reclaimed
+# TODO: Turn this data structure into valid Mu, with (fake) handles rather than addrs.
 Type-id:  # (stream (addr array byte))
-  0x1c/imm32/write
+  0x20/imm32/write
   0/imm32/read
   0x100/imm32/size
   # data
@@ -396,9 +396,9 @@ Type-id:  # (stream (addr array byte))
   "boolean"/imm32  # 5
   "constant"/imm32  # 6: like a literal, but value is an int in Var-offset
   "offset"/imm32  # 7: (offset T) is guaranteed to be a 32-bit multiple of size-of(T)
-  0/imm32
   # 0x20
-  0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32
+  "byte"/imm32  # 8
+          0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32
   0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32
   0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32
   0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32 0/imm32
@@ -481,7 +481,7 @@ convert-mu:  # in: (addr buffered-file), out: (addr buffered-file), err: (addr b
     89/<- %ebp 4/r32/esp
     # initialize global data structures
     c7 0/subop/copy *Next-block-index 1/imm32
-    c7 0/subop/copy *Type-id 0x20/imm32  # stream-write
+    c7 0/subop/copy *Type-id 0x24/imm32  # stream-write
     c7 0/subop/copy *_Program-functions 0/imm32
     c7 0/subop/copy *_Program-functions->payload 0/imm32
     c7 0/subop/copy *_Program-types 0/imm32
@@ -1173,6 +1173,64 @@ test-convert-function-with-local-var-dereferenced:
     5d/pop-to-ebp
     c3/return
 
+# variables of type 'byte' are not allowed on the stack
+test-convert-function-with-byte-operations:
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # setup
+    (clear-stream _test-input-stream)
+    (clear-stream $_test-input-buffered-file->buffer)
+    (clear-stream _test-output-stream)
+    (clear-stream $_test-output-buffered-file->buffer)
+    #
+    (write _test-input-stream "fn foo {\n")
+    (write _test-input-stream "  var x/eax: byte <- copy 0\n")
+    (write _test-input-stream "  var y/ecx: byte <- copy 0\n")
+    (write _test-input-stream "  y <- copy-byte x\n")
+    (write _test-input-stream "  var z/edx: (addr byte) <- copy 0\n")
+    (write _test-input-stream "  y <- copy-byte *z\n")
+    (write _test-input-stream "  copy-byte-to *z, x\n")
+    (write _test-input-stream "}\n")
+    # convert
+    (convert-mu _test-input-buffered-file _test-output-buffered-file Stderr 0)
+    (flush _test-output-buffered-file)
+#?     # dump _test-output-stream {{{
+#?     (write 2 "^")
+#?     (write-stream 2 _test-output-stream)
+#?     (write 2 "$\n")
+#?     (rewind-stream _test-output-stream)
+#?     # }}}
+    # check output
+    (check-next-stream-line-equal _test-output-stream "foo:"                                        "F - test-convert-function-with-byte-operations/0")
+    (check-next-stream-line-equal _test-output-stream "  # . prologue"                              "F - test-convert-function-with-byte-operations/1")
+    (check-next-stream-line-equal _test-output-stream "  55/push-ebp"                               "F - test-convert-function-with-byte-operations/2")
+    (check-next-stream-line-equal _test-output-stream "  89/<- %ebp 4/r32/esp"                      "F - test-convert-function-with-byte-operations/3")
+    (check-next-stream-line-equal _test-output-stream "  {"                                         "F - test-convert-function-with-byte-operations/4")
+    (check-next-stream-line-equal _test-output-stream "$foo:0x00000001:loop:"                       "F - test-convert-function-with-byte-operations/5")
+    (check-next-stream-line-equal _test-output-stream "    ff 6/subop/push %eax"                    "F - test-convert-function-with-byte-operations/6")
+    (check-next-stream-line-equal _test-output-stream "    b8/copy-to-eax 0/imm32"                  "F - test-convert-function-with-byte-operations/7")
+    (check-next-stream-line-equal _test-output-stream "    ff 6/subop/push %ecx"                    "F - test-convert-function-with-byte-operations/8")
+    (check-next-stream-line-equal _test-output-stream "    b9/copy-to-ecx 0/imm32"                  "F - test-convert-function-with-byte-operations/9")
+    (check-next-stream-line-equal _test-output-stream "    8a/byte-> %eax 0x00000001/r32"           "F - test-convert-function-with-byte-operations/10")
+    (check-next-stream-line-equal _test-output-stream "    ff 6/subop/push %edx"                    "F - test-convert-function-with-byte-operations/11")
+    (check-next-stream-line-equal _test-output-stream "    ba/copy-to-edx 0/imm32"                  "F - test-convert-function-with-byte-operations/12")
+    (check-next-stream-line-equal _test-output-stream "    8a/byte-> *edx 0x00000001/r32"           "F - test-convert-function-with-byte-operations/13")
+    (check-next-stream-line-equal _test-output-stream "    88/byte<- *edx 0x00000000/r32"           "F - test-convert-function-with-byte-operations/14")
+    (check-next-stream-line-equal _test-output-stream "    8f 0/subop/pop %edx"                     "F - test-convert-function-with-byte-operations/15")
+    (check-next-stream-line-equal _test-output-stream "    8f 0/subop/pop %ecx"                     "F - test-convert-function-with-byte-operations/16")
+    (check-next-stream-line-equal _test-output-stream "    8f 0/subop/pop %eax"                     "F - test-convert-function-with-byte-operations/17")
+    (check-next-stream-line-equal _test-output-stream "  }"                                         "F - test-convert-function-with-byte-operations/18")
+    (check-next-stream-line-equal _test-output-stream "$foo:0x00000001:break:"                      "F - test-convert-function-with-byte-operations/19")
+    (check-next-stream-line-equal _test-output-stream "  # . epilogue"                              "F - test-convert-function-with-byte-operations/20")
+    (check-next-stream-line-equal _test-output-stream "  89/<- %esp 5/r32/ebp"                      "F - test-convert-function-with-byte-operations/21")
+    (check-next-stream-line-equal _test-output-stream "  5d/pop-to-ebp"                             "F - test-convert-function-with-byte-operations/22")
+    (check-next-stream-line-equal _test-output-stream "  c3/return"                                 "F - test-convert-function-with-byte-operations/23")
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
+
 test-convert-compare-register-with-literal:
     # . prologue
     55/push-ebp
@@ -5976,6 +6034,7 @@ parse-mu-var-def:  # line: (addr stream byte), vars: (addr stack live-var), out:
     3d/compare-eax-and 0/imm32
     {
       75/jump-if-!= break/disp8
+      # TODO: disallow vars of type 'byte' on the stack
       # ensure that there's nothing else on this line
       (next-mu-token *(ebp+8) %ecx)
       (slice-empty? %ecx)  # => eax
@@ -5988,6 +6047,7 @@ parse-mu-var-def:  # line: (addr stream byte), vars: (addr stack live-var), out:
     # or v has a register and there's more to this line
     {
       74/jump-if-= break/disp8
+      # TODO: disallow vars of type 'byte' in registers 'esi' or 'edi'
       # ensure that the next word is '<-'
       (next-mu-token *(ebp+8) %ecx)
       (slice-equal? %ecx "<-")  # => eax
@@ -7816,8 +7876,15 @@ compute-size-of-type-id:  # t: type-id -> result/eax: int
     # eax = t
     8b/-> *(ebp+8) 0/r32/eax
     # if v is a literal, return 0
-    3d/compare-eax-and 0/imm32
+    3d/compare-eax-and 0/imm32/literal
     74/jump-if-= $compute-size-of-type-id:end/disp8  # eax changes type from type-id to int
+    # if v is a byte, return 1
+    {
+      3d/compare-eax-and 8/imm32/byte
+      75/jump-if-!= break/disp8
+      b8/copy-to-eax 1/imm32
+      eb/jump $compute-size-of-type-id:end/disp8
+    }
     # if v has a user-defined type, compute its size
     # TODO: support non-atom type
     (find-typeinfo %eax %ecx)
@@ -11499,6 +11566,61 @@ _Primitive-copy-lit-to-mem:  # (payload primitive)
     0/imm32/no-disp32
     1/imm32/output-is-write-only
     0x11/imm32/alloc-id:fake
+    _Primitive-copy-byte-from-reg/imm32/next
+# - copy byte
+_Primitive-copy-byte-from-reg:
+    0x11/imm32/alloc-id:fake:payload
+    # var/reg <- copy-byte var2/reg2 => 8a/byte-> %var2 var/r32
+    0x11/imm32/alloc-id:fake
+    _string-copy-byte/imm32/name
+    0x11/imm32/alloc-id:fake
+    Single-byte-var-in-some-register/imm32/inouts
+    0x11/imm32/alloc-id:fake
+    Single-byte-var-in-some-register/imm32/outputs
+    0x11/imm32/alloc-id:fake
+    _string_8a_copy_byte/imm32/subx-name
+    1/imm32/rm32-is-first-inout
+    3/imm32/r32-is-first-output
+    0/imm32/no-imm32
+    0/imm32/no-disp32
+    1/imm32/output-is-write-only
+    0x11/imm32/alloc-id:fake
+    _Primitive-copy-byte-from-mem/imm32/next
+_Primitive-copy-byte-from-mem:
+    0x11/imm32/alloc-id:fake:payload
+    # var/reg <- copy-byte *var2/reg2 => 8a/byte-> *var2 var/r32
+    0x11/imm32/alloc-id:fake
+    _string-copy-byte/imm32/name
+    0x11/imm32/alloc-id:fake
+    Single-byte-var-in-mem/imm32/inouts
+    0x11/imm32/alloc-id:fake
+    Single-byte-var-in-some-register/imm32/outputs
+    0x11/imm32/alloc-id:fake
+    _string_8a_copy_byte/imm32/subx-name
+    1/imm32/rm32-is-first-inout
+    3/imm32/r32-is-first-output
+    0/imm32/no-imm32
+    0/imm32/no-disp32
+    1/imm32/output-is-write-only
+    0x11/imm32/alloc-id:fake
+    _Primitive-copy-byte-to-mem/imm32/next
+_Primitive-copy-byte-to-mem:
+    0x11/imm32/alloc-id:fake:payload
+    # copy-byte-to *var1/reg1, var2/reg2 => 88/byte<- *reg1 reg2/r32
+    0x11/imm32/alloc-id:fake
+    _string-copy-byte-to/imm32/name
+    0x11/imm32/alloc-id:fake
+    Two-args-byte-stack-byte-reg/imm32/inouts
+    0/imm32/no-outputs
+    0/imm32/no-outputs
+    0x11/imm32/alloc-id:fake
+    _string_88_copy_byte/imm32/subx-name
+    1/imm32/rm32-is-first-inout
+    2/imm32/r32-is-second-inout
+    0/imm32/no-imm32
+    0/imm32/no-disp32
+    0/imm32/output-is-write-only
+    0x11/imm32/alloc-id:fake
     _Primitive-address/imm32/next
 # - address
 _Primitive-address:  # (payload primitive)
@@ -12476,6 +12598,16 @@ _string-copy-to:  # (payload array byte)
     # "copy-to"
     0x7/imm32/size
     0x63/c 0x6f/o 0x70/p 0x79/y 0x2d/dash 0x74/t 0x6f/o
+_string-copy-byte:
+    0x11/imm32/alloc-id:fake:payload
+    # "copy-byte"
+    0x9/imm32/size
+    0x63/c 0x6f/o 0x70/p 0x79/y 0x2d/- 0x62/b 0x79/y 0x74/t 0x65/e
+_string-copy-byte-to:
+    0x11/imm32/alloc-id:fake:payload
+    # "copy-byte-to"
+    0xc/imm32/size
+    0x63/c 0x6f/o 0x70/p 0x79/y 0x2d/- 0x62/b 0x79/y 0x74/t 0x65/e 0x2d/- 0x74/t 0x6f/o
 _string-decrement:  # (payload array byte)
     0x11/imm32/alloc-id:fake:payload
     # "decrement"
@@ -12923,6 +13055,16 @@ _string_8b_->:  # (payload array byte)
     # "8b/->"
     0x5/imm32/size
     0x38/8 0x62/b 0x2f/slash 0x2d/dash 0x3e/>
+_string_8a_copy_byte:
+    0x11/imm32/alloc-id:fake:payload
+    # "8a/byte->"
+    0x9/imm32/size
+    0x38/8 0x61/a 0x2f// 0x62/b 0x79/y 0x74/t 0x65/e 0x2d/- 0x3e/>
+_string_88_copy_byte:
+    0x11/imm32/alloc-id:fake:payload
+    # "88/byte<-"
+    0x9/imm32/size
+    0x38/8 0x38/8 0x2f// 0x62/b 0x79/y 0x74/t 0x65/e 0x3c/< 0x2d/-
 _string_8d_copy_address:  # (payload array byte)
     0x11/imm32/alloc-id:fake:payload
     # "8d/copy-address"
@@ -13007,6 +13149,26 @@ Int-var-in-mem:  # (payload var)
     0/imm32/no-register
     0/imm32/no-register
 
+# Not really legal, but closest we can currently represent a dereference of an (addr byte)
+Single-byte-var-in-mem:  # (payload list var)
+    0x11/imm32/alloc-id:fake:payload
+    0x11/imm32/alloc-id:fake
+    Byte-var-in-mem/imm32
+    0/imm32/next
+    0/imm32/next
+
+# Not really legal, but closest we can currently represent a dereference of an (addr byte)
+Byte-var-in-mem:  # (payload var)
+    0x11/imm32/alloc-id:fake:payload
+    0/imm32/name
+    0/imm32/name
+    0x11/imm32/alloc-id:fake
+    Type-byte/imm32
+    1/imm32/some-block-depth
+    1/imm32/some-stack-offset
+    0/imm32/no-register
+    0/imm32/no-register
+
 Two-args-int-stack-int-reg:  # (payload list var)
     0x11/imm32/alloc-id:fake:payload
     0x11/imm32/alloc-id:fake
@@ -13014,6 +13176,14 @@ Two-args-int-stack-int-reg:  # (payload list var)
     0x11/imm32/alloc-id:fake
     Single-int-var-in-some-register/imm32/next
 
+# Not really legal, but closest we can currently represent a dereference of an (addr byte)
+Two-args-byte-stack-byte-reg:  # (payload list var)
+    0x11/imm32/alloc-id:fake:payload
+    0x11/imm32/alloc-id:fake
+    Byte-var-in-mem/imm32
+    0x11/imm32/alloc-id:fake
+    Single-byte-var-in-some-register/imm32/next
+
 Two-args-int-reg-int-stack:  # (payload list var)
     0x11/imm32/alloc-id:fake:payload
     0x11/imm32/alloc-id:fake
@@ -13056,6 +13226,13 @@ Single-addr-var-in-some-register:  # (payload list var)
     0/imm32/next
     0/imm32/next
 
+Single-byte-var-in-some-register:  # (payload list var)
+    0x11/imm32/alloc-id:fake:payload
+    0x11/imm32/alloc-id:fake
+    Byte-var-in-some-register/imm32
+    0/imm32/next
+    0/imm32/next
+
 Int-var-in-some-register:  # (payload var)
     0x11/imm32/alloc-id:fake:payload
     0/imm32/name
@@ -13068,10 +13245,10 @@ Int-var-in-some-register:  # (payload var)
     Any-register/imm32
 
 Any-register:  # (payload array byte)
-  0x11/imm32/alloc-id:fake:payload
-  1/imm32/size
-  # data
-  2a/asterisk
+    0x11/imm32/alloc-id:fake:payload
+    1/imm32/size
+    # data
+    2a/asterisk
 
 Addr-var-in-some-register:  # (payload var)
     0x11/imm32/alloc-id:fake:payload
@@ -13084,6 +13261,17 @@ Addr-var-in-some-register:  # (payload var)
     0x11/imm32/alloc-id:fake
     Any-register/imm32
 
+Byte-var-in-some-register:  # (payload var)
+    0x11/imm32/alloc-id:fake:payload
+    0/imm32/name
+    0/imm32/name
+    0x11/imm32/alloc-id:fake
+    Type-byte/imm32
+    1/imm32/some-block-depth
+    0/imm32/no-stack-offset
+    0x11/imm32/alloc-id:fake
+    Any-register/imm32
+
 Single-int-var-in-eax:  # (payload list var)
     0x11/imm32/alloc-id:fake:payload
     0x11/imm32/alloc-id:fake
@@ -13234,6 +13422,14 @@ Type-addr:  # (payload tree type-id)
     0/imm32/right:null
     0/imm32/right:null
 
+Type-byte:  # (payload tree type-id)
+    0x11/imm32/alloc-id:fake:payload
+    1/imm32/is-atom
+    8/imm32/value:byte
+    0/imm32/left:unused
+    0/imm32/right:null
+    0/imm32/right:null
+
 == code
 emit-subx-primitive:  # out: (addr buffered-file), stmt: (addr stmt), primitive: (addr primitive), err: (addr buffered-file), ed: (addr exit-descriptor)
     # . prologue
diff --git a/mu_instructions b/mu_instructions
index 5ce9e07c..7b38a1f8 100644
--- a/mu_instructions
+++ b/mu_instructions
@@ -90,6 +90,10 @@ var/reg <- copy n                 => "c7 0/subop/copy %" reg " " n "/imm32"
 copy-to var, n                    => "c7 0/subop/copy *(ebp+" var.stack-offset ") " n "/imm32"
 copy-to *var/reg, n               => "c7 0/subop/copy *" reg " " n "/imm32"
 
+var/reg <- copy-byte var2/reg2    => "8a/byte-> %" reg2 " " reg "/r32"
+var/reg <- copy-byte *var2/reg2   => "8a/byte-> *" reg2 " " reg "/r32"
+copy-byte-to *var1/reg1, var2/reg2  => "88/byte<- *" reg1 " " reg2 "/r32"
+
 compare var1, var2/reg2           => "39/compare *(ebp+" var1.stack-offset ") " reg2 "/r32"
 compare *var1/reg1, var2/reg2     => "39/compare *" reg1 " " reg2 "/r32"
 compare var1/reg1, var2           => "3b/compare<- *(ebp+" var2.stack-offset ") " reg1 "/r32"
diff --git a/mu_summary b/mu_summary
index f97e1bd6..286f8286 100644
--- a/mu_summary
+++ b/mu_summary
@@ -98,7 +98,7 @@ register):
   var/reg <- xor n
   xor-with var, n
 
-  var1/reg1 <- copy var2/reg2
+  var/reg <- copy var2/reg2
   copy-to var1, var2/reg
   var/reg <- copy var2
   var/reg <- copy n
@@ -118,6 +118,23 @@ Any instruction above that takes a variable in memory can be replaced with a
 dereference (`*`) of an address variable in a register. But you can't dereference
 variables in memory.
 
+## Byte operations
+
+A special-case is variables of type 'byte'. Mu is a 32-bit platform so for the
+most part only supports types that are multiples of 32 bits. However, we do
+want to support strings in ASCII and UTF-8, which will be arrays of bytes.
+
+Since most x86 instructions implicitly load 32 bits at a time from memory,
+variables of type 'byte' are only allowed in registers, not on the stack. Here
+are the possible instructions for reading bytes to/from memory:
+
+  var/reg <- copy-byte var2/reg2      # var: byte, var2: byte
+  var/reg <- copy-byte *var2/reg2     # var: byte, var2: (addr byte)
+  copy-byte-to *var1/reg1, var2/reg2  # var1: (addr byte), var2: byte
+
+In addition, variables of type 'byte' are restricted to (the lowest bytes of)
+just 4 registers: eax, ecx, edx and ebx.
+
 ## Primitive jump instructions
 
 There are two kinds of jumps, both with many variations: `break` and `loop`.