From cd94852dbc713ff38f38a30d6e5fb4675606823c Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Fri, 28 Aug 2020 23:24:04 -0700 Subject: 6733 - read utf-8 'grapheme' from byte stream No support for combining characters. Graphemes are currently just utf-8 encodings of a single Unicode code-point. No support for code-points that require more than 32 bits in utf-8. --- 112read-byte.subx | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to '112read-byte.subx') diff --git a/112read-byte.subx b/112read-byte.subx index 387cbb66..32f89647 100644 --- a/112read-byte.subx +++ b/112read-byte.subx @@ -33,7 +33,7 @@ $Stdin->buffer: # . op subop mod rm32 base index scale r32 # . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -# return next byte value in eax, with top 3 bytes cleared. +# Return next byte value in eax, with top 3 bytes cleared. # On reaching end of file, return 0xffffffff (Eof). read-byte-buffered: # f: (addr buffered-file) -> byte-or-Eof/eax: byte # . prologue @@ -268,6 +268,50 @@ test-read-byte-buffered-refills-buffer: # . end c3/return +# Return next byte value in eax, with top 3 bytes cleared. +# Abort on reaching end of file. +read-byte: # s: (addr stream byte) -> result/eax: byte + # . prologue + 55/push-ebp + 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp + # . save registers + 51/push-ecx + 56/push-esi + # esi = s + 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 8/disp8 . # copy *(ebp+8) to esi + # ecx = s->read + 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 1/r32/ecx 4/disp8 . # copy *(esi+4) to ecx + # if (f->read >= f->write) abort + 3b/compare 0/mod/indirect 6/rm32/esi . . . 1/r32/ecx . . # compare ecx with *esi + 0f 8d/jump-if->= $read-byte:abort/disp32 + # result = f->data[f->read] + 31/xor 3/mod/direct 0/rm32/eax . . . 0/r32/eax . . # clear eax + 8a/copy-byte 1/mod/*+disp8 4/rm32/sib 6/base/esi 1/index/ecx . 0/r32/AL 0xc/disp8 . # copy byte at *(esi+ecx+12) to AL + # ++f->read + ff 0/subop/increment 1/mod/*+disp8 6/rm32/esi . . . . 4/disp8 . # increment *(esi+4) +$read-byte:end: + # . restore registers + 5e/pop-to-esi + 59/pop-to-ecx + # . epilogue + 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp + 5d/pop-to-ebp + c3/return + +$read-byte:abort: + # . _write(2/stderr, error) + # . . push args + 68/push "read-byte: empty stream\n"/imm32 + 68/push 2/imm32/stderr + # . . call + e8/call _write/disp32 + # . . discard args + 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp + # . syscall(exit, 1) + bb/copy-to-ebx 1/imm32 + e8/call syscall_exit/disp32 + # never gets here + == data # a test buffered file for _test-stream -- cgit 1.4.1-2-gfad0