about summary refs log tree commit diff stats
path: root/112read-byte.subx
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2020-08-28 23:24:04 -0700
committerKartik Agaram <vc@akkartik.com>2020-08-28 23:24:04 -0700
commitcd94852dbc713ff38f38a30d6e5fb4675606823c (patch)
treedc2a52048a609937cf0de0d0906336d52419ef65 /112read-byte.subx
parent392ebcce803423631de77fdc85c837be636078bb (diff)
downloadmu-cd94852dbc713ff38f38a30d6e5fb4675606823c.tar.gz
6733 - read utf-8 'grapheme' from byte stream
No support for combining characters. Graphemes are currently just utf-8
encodings of a single Unicode code-point. No support for code-points that
require more than 32 bits in utf-8.
Diffstat (limited to '112read-byte.subx')
-rw-r--r--112read-byte.subx46
1 files changed, 45 insertions, 1 deletions
diff --git a/112read-byte.subx b/112read-byte.subx
index 387cbb66..32f89647 100644
--- a/112read-byte.subx
+++ b/112read-byte.subx
@@ -33,7 +33,7 @@ $Stdin->buffer:
 # . op          subop               mod             rm32          base        index         scale       r32
 # . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 
-# return next byte value in eax, with top 3 bytes cleared.
+# Return next byte value in eax, with top 3 bytes cleared.
 # On reaching end of file, return 0xffffffff (Eof).
 read-byte-buffered:  # f: (addr buffered-file) -> byte-or-Eof/eax: byte
     # . prologue
@@ -268,6 +268,50 @@ test-read-byte-buffered-refills-buffer:
     # . end
     c3/return
 
+# Return next byte value in eax, with top 3 bytes cleared.
+# Abort on reaching end of file.
+read-byte:  # s: (addr stream byte) -> result/eax: byte
+    # . prologue
+    55/push-ebp
+    89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
+    # . save registers
+    51/push-ecx
+    56/push-esi
+    # esi = s
+    8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           6/r32/esi   8/disp8         .                 # copy *(ebp+8) to esi
+    # ecx = s->read
+    8b/copy                         1/mod/*+disp8   6/rm32/esi    .           .             .           1/r32/ecx   4/disp8         .                 # copy *(esi+4) to ecx
+    # if (f->read >= f->write) abort
+    3b/compare                      0/mod/indirect  6/rm32/esi    .           .             .           1/r32/ecx   .               .                 # compare ecx with *esi
+    0f 8d/jump-if->=  $read-byte:abort/disp32
+    # result = f->data[f->read]
+    31/xor                          3/mod/direct    0/rm32/eax    .           .             .           0/r32/eax   .               .                 # clear eax
+    8a/copy-byte                    1/mod/*+disp8   4/rm32/sib    6/base/esi  1/index/ecx   .           0/r32/AL    0xc/disp8       .                 # copy byte at *(esi+ecx+12) to AL
+    # ++f->read
+    ff          0/subop/increment   1/mod/*+disp8   6/rm32/esi    .           .             .           .           4/disp8         .                 # increment *(esi+4)
+$read-byte:end:
+    # . restore registers
+    5e/pop-to-esi
+    59/pop-to-ecx
+    # . epilogue
+    89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
+    5d/pop-to-ebp
+    c3/return
+
+$read-byte:abort:
+    # . _write(2/stderr, error)
+    # . . push args
+    68/push  "read-byte: empty stream\n"/imm32
+    68/push  2/imm32/stderr
+    # . . call
+    e8/call  _write/disp32
+    # . . discard args
+    81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
+    # . syscall(exit, 1)
+    bb/copy-to-ebx  1/imm32
+    e8/call  syscall_exit/disp32
+    # never gets here
+
 == data
 
 # a test buffered file for _test-stream