about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--baremetal/107trace.subx87
-rw-r--r--baremetal/108write.subx53
-rw-r--r--baremetal/112read-byte.subx33
-rw-r--r--baremetal/115write-byte.subx37
-rw-r--r--baremetal/120allocate.subx52
-rw-r--r--baremetal/302stack_allocate.subx61
-rw-r--r--baremetal/309stream.subx200
-rw-r--r--baremetal/400.mu5
-rw-r--r--baremetal/401draw-text-rightward.mu19
-rw-r--r--baremetal/403unicode.mu193
10 files changed, 729 insertions, 11 deletions
diff --git a/baremetal/107trace.subx b/baremetal/107trace.subx
new file mode 100644
index 00000000..cdbdffc5
--- /dev/null
+++ b/baremetal/107trace.subx
@@ -0,0 +1,87 @@
+#   instruction                     effective address                                                   register    displacement    immediate
+# . op          subop               mod             rm32          base        index         scale       r32
+# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+
+# 3-argument variant of _append
+_append-3:  # out: (addr byte), outend: (addr byte), s: (addr array byte) -> num_bytes_appended/eax
+    # . prologue
+    55/push-ebp
+    89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
+    # . save registers
+    51/push-ecx
+    # eax = _append-4(out, outend, &s->data[0], &s->data[s->size])
+    # . . push &s->data[s->size]
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .                         0/r32/eax   0x10/disp8      .                 # copy *(ebp+16) to eax
+    8b/copy                         0/mod/indirect  0/rm32/eax    .           .             .           1/r32/ecx   .               .                 # copy *eax to ecx
+    8d/copy-address                 1/mod/*+disp8   4/rm32/sib    0/base/eax  1/index/ecx   .           1/r32/ecx   4/disp8         .                 # copy eax+ecx+4 to ecx
+    51/push-ecx
+    # . . push &s->data[0]
+    8d/copy-address                 1/mod/*+disp8   0/rm32/eax    .           .             .           1/r32/ecx   4/disp8         .                 # copy eax+4 to ecx
+    51/push-ecx
+    # . . push outend
+    ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       .                 # push *(ebp+12)
+    # . . push out
+    ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           8/disp8         .                 # push *(ebp+8)
+    # . . call
+    e8/call  _append-4/disp32
+    # . . discard args
+    81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0x10/imm32        # add to esp
+$_append-3:end:
+    # . restore registers
+    59/pop-to-ecx
+    # . epilogue
+    89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
+    5d/pop-to-ebp
+    c3/return
+
+# 4-argument variant of _append
+_append-4:  # out: (addr byte), outend: (addr byte), in: (addr byte), inend: (addr byte) -> num_bytes_appended/eax: int
+    # . prologue
+    55/push-ebp
+    89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
+    # . save registers
+    51/push-ecx
+    52/push-edx
+    53/push-ebx
+    56/push-esi
+    57/push-edi
+    # num_bytes_appended = 0
+    b8/copy-to-eax  0/imm32
+    # edi = out
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           7/r32/edi   8/disp8         .                 # copy *(ebp+8) to edi
+    # edx = outend
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           2/r32/edx   0xc/disp8       .                 # copy *(ebp+12) to edx
+    # esi = in
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           6/r32/esi   0x10/disp8      .                 # copy *(ebp+16) to esi
+    # ecx = inend
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           1/r32/ecx   0x14/disp8      .                 # copy *(ebp+20) to ecx
+$_append-4:loop:
+    # if (in >= inend) break
+    39/compare                      3/mod/direct    6/rm32/esi    .           .             .           1/r32/ecx   .               .                 # compare esi with ecx
+    73/jump-if-addr>=  $_append-4:end/disp8
+    # if (out >= outend) abort  # just to catch test failures fast
+    39/compare                      3/mod/direct    7/rm32/edi    .           .             .           2/r32/edx   .               .                 # compare edi with edx
+    73/jump-if-addr>=  $_append-4:end/disp8  # TODO: abort
+    # *out = *in
+    8a/copy-byte                    0/mod/indirect  6/rm32/esi    .           .             .           3/r32/BL    .               .                 # copy byte at *esi to BL
+    88/copy-byte                    0/mod/indirect  7/rm32/edi    .           .             .           3/r32/BL    .               .                 # copy byte at BL to *edi
+    # ++num_bytes_appended
+    40/increment-eax
+    # ++in
+    46/increment-esi
+    # ++out
+    47/increment-edi
+    eb/jump  $_append-4:loop/disp8
+$_append-4:end:
+    # . restore registers
+    5f/pop-to-edi
+    5e/pop-to-esi
+    5b/pop-to-ebx
+    5a/pop-to-edx
+    59/pop-to-ecx
+    # . epilogue
+    89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
+    5d/pop-to-ebp
+    c3/return
+
+# . . vim:nowrap:textwidth=0
diff --git a/baremetal/108write.subx b/baremetal/108write.subx
new file mode 100644
index 00000000..daf1f535
--- /dev/null
+++ b/baremetal/108write.subx
@@ -0,0 +1,53 @@
+#   instruction                     effective address                                                   register    displacement    immediate
+# . op          subop               mod             rm32          base        index         scale       r32
+# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+
+write:  # f: (addr stream byte), s: (addr array byte)
+    # . prologue
+    55/push-ebp
+    89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
+    # if (s == 0) return
+    81          7/subop/compare     1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       0/imm32           # compare *(ebp+12)
+    74/jump-if-=  $write:end/disp8
+    # TODO: write to file
+    # otherwise, treat 'f' as a stream to append to
+    # . save registers
+    50/push-eax
+    51/push-ecx
+    52/push-edx
+    53/push-ebx
+    # ecx = f
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .                         1/r32/ecx   8/disp8         .                 # copy *(ebp+8) to ecx
+    # edx = f->write
+    8b/copy                         0/mod/indirect  1/rm32/ecx    .           .             .           2/r32/edx   .               .                 # copy *ecx to edx
+    # ebx = f->size
+    8b/copy                         1/mod/*+disp8   1/rm32/ecx    .           .             .           3/r32/ebx   8/disp8         .                 # copy *(ecx+8) to ebx
+    # eax = _append-3(&f->data[f->write], &f->data[f->size], s)
+    # . . push s
+    ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       .                 # push *(ebp+12)
+    # . . push &f->data[f->size]
+    8d/copy-address                 1/mod/*+disp8   4/rm32/sib    1/base/ecx  3/index/ebx   .           3/r32/ebx   0xc/disp8       .                 # copy ecx+ebx+12 to ebx
+    53/push-ebx
+    # . . push &f->data[f->write]
+    8d/copy-address                 1/mod/*+disp8   4/rm32/sib    1/base/ecx  2/index/edx   .           3/r32/ebx   0xc/disp8       .                 # copy ecx+edx+12 to ebx
+    53/push-ebx
+    # . . call
+    e8/call  _append-3/disp32
+    # . . discard args
+    81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0xc/imm32         # add to esp
+    # f->write += eax
+    01/add                          0/mod/indirect  1/rm32/ecx    .           .             .           0/r32/eax   .               .                 # add eax to *ecx
+    # . restore registers
+    5b/pop-to-ebx
+    5a/pop-to-edx
+    59/pop-to-ecx
+    58/pop-to-eax
+$write:end:
+    # . epilogue
+    89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
+    5d/pop-to-ebp
+    c3/return
+
+# TODO: bring in tests once we have check-ints-equal
+
+# . . vim:nowrap:textwidth=0
diff --git a/baremetal/112read-byte.subx b/baremetal/112read-byte.subx
new file mode 100644
index 00000000..e8731044
--- /dev/null
+++ b/baremetal/112read-byte.subx
@@ -0,0 +1,33 @@
+# TODO: read-byte-buffered
+
+# Return next byte value in eax, with top 3 bytes cleared.
+# Abort on reaching end of stream.
+read-byte:  # s: (addr stream byte) -> result/eax: byte
+    # . prologue
+    55/push-ebp
+    89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
+    # . save registers
+    51/push-ecx
+    56/push-esi
+    # esi = s
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           6/r32/esi   8/disp8         .                 # copy *(ebp+8) to esi
+    # ecx = s->read
+    8b/copy                         1/mod/*+disp8   6/rm32/esi    .           .             .           1/r32/ecx   4/disp8         .                 # copy *(esi+4) to ecx
+    # if (f->read >= f->write) abort
+    3b/compare                      0/mod/indirect  6/rm32/esi    .           .             .           1/r32/ecx   .               .                 # compare ecx with *esi
+    0f 8d/jump-if->=  $read-byte:end/disp32  # TODO: abort
+    # result = f->data[f->read]
+    31/xor                          3/mod/direct    0/rm32/eax    .           .             .           0/r32/eax   .               .                 # clear eax
+    8a/copy-byte                    1/mod/*+disp8   4/rm32/sib    6/base/esi  1/index/ecx   .           0/r32/AL    0xc/disp8       .                 # copy byte at *(esi+ecx+12) to AL
+    # ++f->read
+    ff          0/subop/increment   1/mod/*+disp8   6/rm32/esi    .           .             .           .           4/disp8         .                 # increment *(esi+4)
+$read-byte:end:
+    # . restore registers
+    5e/pop-to-esi
+    59/pop-to-ecx
+    # . epilogue
+    89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
+    5d/pop-to-ebp
+    c3/return
+
+# . . vim:nowrap:textwidth=0
diff --git a/baremetal/115write-byte.subx b/baremetal/115write-byte.subx
new file mode 100644
index 00000000..32245116
--- /dev/null
+++ b/baremetal/115write-byte.subx
@@ -0,0 +1,37 @@
+#   instruction                     effective address                                                   register    displacement    immediate
+# . op          subop               mod             rm32          base        index         scale       r32
+# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+
+# Write lower byte of 'n' to 'f'.
+append-byte:  # f: (addr stream byte), n: int
+    # . prologue
+    55/push-ebp
+    89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
+    # . save registers
+    51/push-ecx
+    57/push-edi
+    # edi = f
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           7/r32/edi   8/disp8         .                 # copy *(ebp+8) to edi
+    # ecx = f->write
+    8b/copy                         0/mod/indirect  7/rm32/edi    .           .             .           1/r32/ecx   .               .                 # copy *edi to ecx
+    # if (f->write >= f->size) abort
+    3b/compare                      1/mod/*+disp8   7/rm32/edi    .           .             .           1/r32/ecx   8/disp8         .                 # compare ecx with *(edi+8)
+    7d/jump-if->=  $append-byte:end/disp8  # TODO: abort
+$append-byte:to-stream:
+    # write to stream
+    # f->data[f->write] = LSB(n)
+    31/xor                          3/mod/direct    0/rm32/eax    .           .             .           0/r32/eax   .               .                 # clear eax
+    8a/copy-byte                    1/mod/*+disp8   5/rm32/ebp    .           .             .           0/r32/AL    0xc/disp8       .                 # copy byte at *(ebp+12) to AL
+    88/copy-byte                    1/mod/*+disp8   4/rm32/sib    7/base/edi  1/index/ecx   .           0/r32/AL    0xc/disp8       .                 # copy AL to *(edi+ecx+12)
+    # ++f->write
+    ff          0/subop/increment   0/mod/indirect  7/rm32/edi    .           .             .           .           .               .                 # increment *edi
+$append-byte:end:
+    # . restore registers
+    5f/pop-to-edi
+    59/pop-to-ecx
+    # . epilogue
+    89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
+    5d/pop-to-ebp
+    c3/return
+
+# . . vim:nowrap:textwidth=0
diff --git a/baremetal/120allocate.subx b/baremetal/120allocate.subx
new file mode 100644
index 00000000..fbe998bb
--- /dev/null
+++ b/baremetal/120allocate.subx
@@ -0,0 +1,52 @@
+#   instruction                     effective address                                                   register    displacement    immediate
+# . op          subop               mod             rm32          base        index         scale       r32
+# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+
+# Fill a region of memory with zeroes.
+zero-out:  # start: (addr byte), size: int
+    # pseudocode:
+    #   curr/esi = start
+    #   i/ecx = 0
+    #   while true
+    #     if (i >= size) break
+    #     *curr = 0
+    #     ++curr
+    #     ++i
+    #
+    # . prologue
+    55/push-ebp
+    89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
+    # . save registers
+    50/push-eax
+    51/push-ecx
+    52/push-edx
+    56/push-esi
+    # curr/esi = start
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           6/r32/esi   8/disp8         .                 # copy *(ebp+8) to esi
+    # var i/ecx: int = 0
+    31/xor                          3/mod/direct    1/rm32/ecx    .           .             .           1/r32/ecx   .               .                 # clear ecx
+    # edx = size
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           2/r32/edx   0xc/disp8       .                 # copy *(ebp+12) to edx
+$zero-out:loop:
+    # if (i >= size) break
+    39/compare                      3/mod/direct    1/rm32/ecx    .           .             .           2/r32/edx   .               .                 # compare ecx with edx
+    7d/jump-if->=  $zero-out:end/disp8
+    # *curr = 0
+    c6          0/subop/copy-byte   0/mod/direct    6/rm32/esi    .           .             .           .           .               0/imm8            # copy byte to *esi
+    # ++curr
+    46/increment-esi
+    # ++i
+    41/increment-ecx
+    eb/jump  $zero-out:loop/disp8
+$zero-out:end:
+    # . restore registers
+    5e/pop-to-esi
+    5a/pop-to-edx
+    59/pop-to-ecx
+    58/pop-to-eax
+    # . epilogue
+    89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
+    5d/pop-to-ebp
+    c3/return
+
+# . . vim:nowrap:textwidth=0
diff --git a/baremetal/302stack_allocate.subx b/baremetal/302stack_allocate.subx
new file mode 100644
index 00000000..cd51d5ff
--- /dev/null
+++ b/baremetal/302stack_allocate.subx
@@ -0,0 +1,61 @@
+# A function which pushes n zeros on the stack.
+# Really only intended to be called from code generated by mu.subx (for array
+# vars on the stack).
+
+== code
+
+#? Entry:
+#?     # . prologue
+#?     89/<- %ebp 4/r32/esp
+#?     #
+#?     68/push 0xfcfdfeff/imm32
+#?     b8/copy-to-eax 0x34353637/imm32
+#? $dump-stack0:
+#?     (push-n-zero-bytes 4)
+#?     68/push 0x20/imm32
+#? $dump-stack9:
+#?     b8/copy-to-eax 1/imm32/exit
+#?     cd/syscall 0x80/imm8
+
+# This is not a regular function, so it won't be idiomatic.
+# Registers must be properly restored.
+# Registers can be spilled, but that modifies the stack and needs to be
+# cleaned up.
+
+# Overhead:
+#   62 + n*6 instructions to push n bytes.
+# If we just emitted code to push n zeroes, it would be:
+#   5 bytes for 4 zero bytes, so 1.25 bytes per zero. And that's not even
+#   instructions.
+# But on the other hand it would destroy the instruction cache, where this
+# approach requires 15 instructions, fixed.
+
+# n must be positive
+push-n-zero-bytes:  # n: int
+$push-n-zero-bytes:prologue:
+    89/<- *Push-n-zero-bytes-ebp 5/r32/ebp  # spill ebp without affecting stack
+    89/<- %ebp 4/r32/esp
+$push-n-zero-bytes:copy-ra:
+    # -- esp = ebp
+    89/<- *Push-n-zero-bytes-eax 0/r32/eax
+    8b/-> *esp 0/r32/eax
+    2b/subtract *(ebp+4) 4/r32/esp
+    # -- esp+n = ebp
+    89/<- *esp 0/r32/eax
+    8b/-> *Push-n-zero-bytes-eax 0/r32/eax
+$push-n-zero-bytes:bulk-cleaning:
+    89/<- *Push-n-zero-bytes-esp 4/r32/esp
+    81 0/subop/add *Push-n-zero-bytes-esp 4/imm32
+    81 0/subop/add *(ebp+4) 4/imm32
+    (zero-out *Push-n-zero-bytes-esp *(ebp+4))  # n+4
+$push-n-zero-bytes:epilogue:
+    8b/-> *Push-n-zero-bytes-ebp 5/r32/ebp  # restore spill
+    c3/return
+
+== data
+Push-n-zero-bytes-ebp:  # (addr int)
+  0/imm32
+Push-n-zero-bytes-esp:  # (addr int)
+  0/imm32
+Push-n-zero-bytes-eax:
+  0/imm32
diff --git a/baremetal/309stream.subx b/baremetal/309stream.subx
new file mode 100644
index 00000000..720ee0eb
--- /dev/null
+++ b/baremetal/309stream.subx
@@ -0,0 +1,200 @@
+# Some unsafe methods not intended to be used directly in SubX, only through
+# Mu after proper type-checking.
+
+== code
+
+stream-empty?:  # s: (addr stream _) -> result/eax: boolean
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # . save registers
+    51/push-ecx
+    56/push-esi
+    # result = false
+    b8/copy-to-eax 0/imm32/false
+    # esi = s
+    8b/-> *(ebp+8) 6/r32/esi
+    # return s->read >= s->write
+    8b/-> *esi 1/r32/ecx
+    39/compare-with *(esi+4) 1/r32/ecx
+    0f 9d/set-if->= %al
+$stream-empty?:end:
+    # . restore registers
+    5e/pop-to-esi
+    59/pop-to-ecx
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
+
+stream-full?:  # s: (addr stream _) -> result/eax: boolean
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # . save registers
+    51/push-ecx
+    56/push-esi
+    # result = false
+    b8/copy-to-eax 0/imm32/false
+    # esi = s
+    8b/-> *(ebp+8) 6/r32/esi
+    # return s->write >= s->size
+    8b/-> *(esi+8) 1/r32/ecx
+    39/compare-with *esi 1/r32/ecx
+    0f 9d/set-if->= %al
+$stream-full?:end:
+    # . restore registers
+    5e/pop-to-esi
+    59/pop-to-ecx
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
+
+write-to-stream:  # s: (addr stream _), in: (addr byte), n: int
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # . save registers
+    50/push-eax
+    51/push-ecx
+    52/push-edx
+    53/push-ebx
+    57/push-edi
+    # edi = s
+    8b/-> *(ebp+8) 7/r32/edi
+    # var swrite/edx: int = s->write
+    8b/-> *edi 2/r32/edx
+    # if (swrite + n > s->size) return
+    8b/-> *(ebp+0x10) 1/r32/ecx
+    01/add-to %ecx 2/r32/edx
+    3b/compare 1/r32/ecx *(edi+8)
+    0f 8f/jump-if-> $write-to-stream:end/disp32  # TODO: abort
+    # var out/edx: (addr byte) = s->data + s->write
+    8d/copy-address *(edi+edx+0xc) 2/r32/edx
+    # var outend/ebx: (addr byte) = out + n
+    8b/-> *(ebp+0x10) 3/r32/ebx
+    8d/copy-address *(edx+ebx) 3/r32/ebx
+    # eax = in
+    8b/-> *(ebp+0xc) 0/r32/eax
+    # var inend/ecx: (addr byte) = in + n
+    8b/-> *(ebp+0x10) 1/r32/ecx
+    8d/copy-address *(eax+ecx) 1/r32/ecx
+    #
+    (_append-4  %edx %ebx  %eax %ecx)  # => eax
+    # s->write += n
+    8b/-> *(ebp+0x10) 1/r32/ecx
+    01/add-to *edi 1/r32/ecx
+$write-to-stream:end:
+    # . restore registers
+    5f/pop-to-edi
+    5b/pop-to-ebx
+    5a/pop-to-edx
+    59/pop-to-ecx
+    58/pop-to-eax
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
+
+read-from-stream:  # s: (addr stream _), out: (addr byte), n: int
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # . save registers
+    50/push-eax
+    51/push-ecx
+    52/push-edx
+    53/push-ebx
+    56/push-esi
+    # esi = s
+    8b/-> *(ebp+8) 6/r32/esi
+    # var sread/edx: int = s->read
+    8b/-> *(esi+4) 2/r32/edx
+    # if (sread + n > s->write) return
+    8b/-> *(ebp+0x10) 1/r32/ecx
+    01/add-to %ecx 2/r32/edx
+    3b/compare 1/r32/ecx *esi
+    0f 8f/jump-if-> $read-from-stream:end/disp32  # TODO: abort
+    # var in/edx: (addr byte) = s->data + s->read
+    8d/copy-address *(esi+edx+0xc) 2/r32/edx
+    # var inend/ebx: (addr byte) = in + n
+    8b/-> *(ebp+0x10) 3/r32/ebx
+    8d/copy-address *(edx+ebx) 3/r32/ebx
+    # eax = out
+    8b/-> *(ebp+0xc) 0/r32/eax
+    # var outend/ecx: (addr byte) = out + n
+    8b/-> *(ebp+0x10) 1/r32/ecx
+    8d/copy-address *(eax+ecx) 1/r32/ecx
+    #
+    (_append-4  %eax %ecx  %edx %ebx)  # => eax
+    # s->read += n
+    8b/-> *(ebp+0x10) 1/r32/ecx
+    01/add-to *(esi+4) 1/r32/ecx
+$read-from-stream:end:
+    # . restore registers
+    5e/pop-to-esi
+    5b/pop-to-ebx
+    5a/pop-to-edx
+    59/pop-to-ecx
+    58/pop-to-eax
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
+
+stream-first:  # s: (addr stream byte) -> result/eax: byte
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # . save registers
+    51/push-ecx
+    56/push-esi
+    # result = false
+    b8/copy-to-eax 0/imm32
+    # esi = s
+    8b/-> *(ebp+8) 6/r32/esi
+    # var idx/ecx: int = s->read
+    8b/-> *(esi+4) 1/r32/ecx
+    # if idx >= s->write return 0
+    3b/compare-with 1/r32/ecx *esi
+    7d/jump-if->= $stream-first:end/disp8
+    # result = s->data[idx]
+    8a/byte-> *(esi+ecx+0xc) 0/r32/AL
+$stream-first:end:
+    # . restore registers
+    5e/pop-to-esi
+    59/pop-to-ecx
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
+
+stream-final:  # s: (addr stream byte) -> result/eax: byte
+    # . prologue
+    55/push-ebp
+    89/<- %ebp 4/r32/esp
+    # . save registers
+    51/push-ecx
+    56/push-esi
+    # result = false
+    b8/copy-to-eax 0/imm32
+    # esi = s
+    8b/-> *(ebp+8) 6/r32/esi
+    # var max/ecx: int = s->write
+    8b/-> *esi 1/r32/ecx
+    # if s->read >= max return 0
+    39/compare-with *(esi+4) 1/r32/ecx
+    7d/jump-if->= $stream-final:end/disp8
+    # var idx/ecx: int = max - 1
+    49/decrement-ecx
+    # result = s->data[idx]
+    8a/byte-> *(esi+ecx+0xc) 0/r32/AL
+$stream-final:end:
+    # . restore registers
+    5e/pop-to-esi
+    59/pop-to-ecx
+    # . epilogue
+    89/<- %esp 5/r32/ebp
+    5d/pop-to-ebp
+    c3/return
diff --git a/baremetal/400.mu b/baremetal/400.mu
index 016204c5..f25e13a3 100644
--- a/baremetal/400.mu
+++ b/baremetal/400.mu
@@ -1,3 +1,8 @@
 sig pixel screen: (addr screen), x: int, y: int, color: int
 sig read-key kbd: (addr keyboard) -> _/eax: byte
 sig draw-grapheme screen: (addr screen), g: grapheme, x: int, y: int, color: int
+
+sig write f: (addr stream byte), s: (addr array byte)
+sig append-byte f: (addr stream byte), n: int
+sig read-byte s: (addr stream byte) -> _/eax: byte
+sig stream-empty? s: (addr stream _) -> _/eax: boolean
diff --git a/baremetal/401draw-text-rightward.mu b/baremetal/401draw-text-rightward.mu
index 089c5d5c..61b2d9ca 100644
--- a/baremetal/401draw-text-rightward.mu
+++ b/baremetal/401draw-text-rightward.mu
@@ -1,16 +1,13 @@
-fn draw-text-rightward screen: (addr screen), _text: (addr array byte), x: int, y: int, color: int {
-  var text/esi: (addr array byte) <- copy _text
-  var len/ecx: int <- length text
-  var i/edx: int <- copy 0
+fn draw-text-rightward screen: (addr screen), text: (addr array byte), x: int, y: int, color: int {
+  var stream-storage: (stream byte 0x100)
+  var stream/esi: (addr stream byte) <- address stream-storage
+  write stream, text
   {
-    compare i, len
-    break-if->=
-    var g/eax: (addr byte) <- index text, i
-    var g2/eax: byte <- copy-byte *g
-    var g3/eax: grapheme <- copy g2
-    draw-grapheme screen, g3, x, y, color
+    var g/eax: grapheme <- read-grapheme stream
+    compare g, 0xffffffff  # end-of-file
+    break-if-=
+    draw-grapheme screen, g, x, y, color
     add-to x, 8  # font-width
-    i <- increment
     loop
   }
 }
diff --git a/baremetal/403unicode.mu b/baremetal/403unicode.mu
new file mode 100644
index 00000000..ea45f707
--- /dev/null
+++ b/baremetal/403unicode.mu
@@ -0,0 +1,193 @@
+# Helpers for Unicode.
+#
+# Mu has no characters, only code points and graphemes.
+# Code points are the indivisible atoms of text streams.
+#   https://en.wikipedia.org/wiki/Code_point
+# Graphemes are the smallest self-contained unit of text.
+# Graphemes may consist of multiple code points.
+#
+# Mu graphemes are always represented in utf-8, and they are required to fit
+# in 4 bytes.
+#
+# Mu doesn't currently support combining code points, or graphemes made of
+# multiple code points. One day we will.
+# We also don't currently support code points that translate into multiple
+# or wide graphemes. (In particular, Tab will never be supported.)
+
+# transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox
+# https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm
+#
+# The day we want to support combining characters, this function will need to
+# take multiple code points. Or something.
+fn to-grapheme in: code-point -> _/eax: grapheme {
+  var c/eax: int <- copy in
+  var num-trailers/ecx: int <- copy 0
+  var first/edx: int <- copy 0
+  $to-grapheme:compute-length: {
+    # single byte: just return it
+    compare c, 0x7f
+    {
+      break-if->
+      var g/eax: grapheme <- copy c
+      return g
+    }
+    # 2 bytes
+    compare c, 0x7ff
+    {
+      break-if->
+      num-trailers <- copy 1
+      first <- copy 0xc0
+      break $to-grapheme:compute-length
+    }
+    # 3 bytes
+    compare c, 0xffff
+    {
+      break-if->
+      num-trailers <- copy 2
+      first <- copy 0xe0
+      break $to-grapheme:compute-length
+    }
+    # 4 bytes
+    compare c, 0x1fffff
+    {
+      break-if->
+      num-trailers <- copy 3
+      first <- copy 0xf0
+      break $to-grapheme:compute-length
+    }
+    # more than 4 bytes: unsupported
+    # TODO: print error message to stderr
+    compare c, 0x1fffff
+    {
+      break-if->
+      return 0
+    }
+  }
+  # emit trailer bytes, 6 bits from 'in', first two bits '10'
+  var result/edi: grapheme <- copy 0
+  {
+    compare num-trailers, 0
+    break-if-<=
+    var tmp/esi: int <- copy c
+    tmp <- and 0x3f
+    tmp <- or 0x80
+    result <- shift-left 8
+    result <- or tmp
+    # update loop state
+    c <- shift-right 6
+    num-trailers <- decrement
+    loop
+  }
+  # emit engine
+  result <- shift-left 8
+  result <- or c
+  result <- or first
+  #
+  return result
+}
+
+# TODO: bring in tests once we have check-ints-equal
+
+# read the next grapheme from a stream of bytes
+fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
+  # if at eof, return EOF
+  {
+    var eof?/eax: boolean <- stream-empty? in
+    compare eof?, 0  # false
+    break-if-=
+    return 0xffffffff
+  }
+  var c/eax: byte <- read-byte in
+  var num-trailers/ecx: int <- copy 0
+  $read-grapheme:compute-length: {
+    # single byte: just return it
+    compare c, 0xc0
+    {
+      break-if->=
+      var g/eax: grapheme <- copy c
+      return g
+    }
+    compare c, 0xfe
+    {
+      break-if-<
+      var g/eax: grapheme <- copy c
+      return g
+    }
+    # 2 bytes
+    compare c, 0xe0
+    {
+      break-if->=
+      num-trailers <- copy 1
+      break $read-grapheme:compute-length
+    }
+    # 3 bytes
+    compare c, 0xf0
+    {
+      break-if->=
+      num-trailers <- copy 2
+      break $read-grapheme:compute-length
+    }
+    # 4 bytes
+    compare c, 0xf8
+    {
+      break-if->=
+      num-trailers <- copy 3
+      break $read-grapheme:compute-length
+    }
+    # TODO: print error message
+    return 0
+  }
+  # prepend trailer bytes
+  var result/edi: grapheme <- copy c
+  var num-byte-shifts/edx: int <- copy 1
+  {
+    compare num-trailers, 0
+    break-if-<=
+    var tmp/eax: byte <- read-byte in
+    var tmp2/eax: int <- copy tmp
+    tmp2 <- shift-left-bytes tmp2, num-byte-shifts
+    result <- or tmp2
+    # update loop state
+    num-byte-shifts <- increment
+    num-trailers <- decrement
+    loop
+  }
+  return result
+}
+
+# needed because available primitives only shift by a literal/constant number of bits
+fn shift-left-bytes n: int, k: int -> _/eax: int {
+  var i/ecx: int <- copy 0
+  var result/eax: int <- copy n
+  {
+    compare i, k
+    break-if->=
+    compare i, 4  # only 4 bytes in 32 bits
+    break-if->=
+    result <- shift-left 8
+    i <- increment
+    loop
+  }
+  return result
+}
+
+# write a grapheme to a stream of bytes
+# this is like write-to-stream, except we skip leading 0 bytes
+fn write-grapheme out: (addr stream byte), g: grapheme {
+$write-grapheme:body: {
+  var c/eax: int <- copy g
+  append-byte out, c  # first byte is always written
+  c <- shift-right 8
+  compare c, 0
+  break-if-= $write-grapheme:body
+  append-byte out, c
+  c <- shift-right 8
+  compare c, 0
+  break-if-= $write-grapheme:body
+  append-byte out, c
+  c <- shift-right 8
+  compare c, 0
+  break-if-= $write-grapheme:body
+  append-byte out, c
+}
+}