From d253a3182859c7c989449122a60d5f362f19ded0 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Tue, 9 Nov 2021 08:12:11 -0800 Subject: rename grapheme to code-point-utf8 Longer name, but it doesn't lie. We have no data structure right now for combining multiple code points. And it makes no sense for the notion of a grapheme to conflate its Unicode encoding. --- linux/126write-int-decimal.subx | 4 +- linux/304screen.subx | 14 +- linux/305keyboard.subx | 6 +- linux/400.mu | 8 +- linux/403unicode.mu | 146 +++++++-------- linux/405screen.mu | 390 +++++++++++++++++++-------------------- linux/407right-justify.mu | 2 +- linux/411string.mu | 20 +- linux/apps/arith.mu | 46 ++--- linux/apps/parse-int.mu | 2 +- linux/apps/print-file.mu | 4 +- linux/apps/tui.mu | 2 +- linux/browse/main.mu | 40 ++-- linux/browse/paginated-screen.mu | 176 +++++++++--------- linux/mu | Bin 614607 -> 614614 bytes linux/mu.subx | 10 +- linux/tile/box.mu | 4 +- linux/tile/environment.mu | 114 ++++++------ linux/tile/gap-buffer.mu | 188 +++++++++---------- linux/tile/grapheme-stack.mu | 124 ++++++------- linux/tile/main.mu | 6 +- linux/tile/surface.mu | 18 +- linux/tile/value.mu | 12 +- linux/tile/word.mu | 48 ++--- linux/vocabulary.md | 12 +- 25 files changed, 698 insertions(+), 698 deletions(-) (limited to 'linux') diff --git a/linux/126write-int-decimal.subx b/linux/126write-int-decimal.subx index 04f8b021..c48e17c8 100644 --- a/linux/126write-int-decimal.subx +++ b/linux/126write-int-decimal.subx @@ -303,7 +303,7 @@ test-write-int32-decimal-negative-multiple-digits: # . end c3/return -decimal-digit?: # c: grapheme -> result/eax: boolean +decimal-digit?: # c: code-point-utf8 -> result/eax: boolean # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp @@ -402,7 +402,7 @@ test-decimal-digit-above-9: 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp c3/return -to-decimal-digit: # in: grapheme -> out/eax: int +to-decimal-digit: # in: code-point-utf8 -> out/eax: int # . prologue 55/push-ebp 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp diff --git a/linux/304screen.subx b/linux/304screen.subx index 0b24fdbf..fb8ac0ea 100644 --- a/linux/304screen.subx +++ b/linux/304screen.subx @@ -157,8 +157,8 @@ $print-stream-to-real-screen:end: 5d/pop-to-ebp c3/return -# print a grapheme in utf-8 (only up to 4 bytes so far) -print-grapheme-to-real-screen: # c: grapheme +# print a code-point-utf8 in utf-8 (only up to 4 bytes so far) +print-code-point-utf8-to-real-screen: # c: code-point-utf8 # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp @@ -170,31 +170,31 @@ print-grapheme-to-real-screen: # c: grapheme 8a/byte-> *(ebp+8) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) # curr = *(ebp+9) 8a/byte-> *(ebp+9) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) # curr = *(ebp+10) 8a/byte-> *(ebp+0xa) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) # curr = *(ebp+11) 8a/byte-> *(ebp+0xb) 0/r32/al # if (curr == 0) return 3d/compare-eax-and 0/imm32 - 74/jump-if-= $print-grapheme-to-real-screen:end/disp8 + 74/jump-if-= $print-code-point-utf8-to-real-screen:end/disp8 # (print-byte-to-real-screen %eax) -$print-grapheme-to-real-screen:end: +$print-code-point-utf8-to-real-screen:end: # . restore registers 58/pop-to-eax # . epilogue diff --git a/linux/305keyboard.subx b/linux/305keyboard.subx index 32159e49..21be1081 100644 --- a/linux/305keyboard.subx +++ b/linux/305keyboard.subx @@ -121,15 +121,15 @@ $enable-keyboard-type-mode:end: # read keys or escapes up to 4 bytes # -# fun fact: terminal escapes and graphemes in utf-8 don't conflict! -# - in graphemes all but the first/lowest byte will have a 1 in the MSB (be +# fun fact: terminal escapes and code-point-utf8s in utf-8 don't conflict! +# - in code-point-utf8s all but the first/lowest byte will have a 1 in the MSB (be # greater than 0x7f) # - in escapes every byte will have a 0 in the MSB # the two categories overlap only when the first/lowest byte is 0x1b or 'esc' # # Only use this in immediate mode; in type (typewriter) mode 4 bytes may get # parts of multiple keys. -read-key-from-real-keyboard: # -> result/eax: grapheme +read-key-from-real-keyboard: # -> result/eax: code-point-utf8 # . prologue 55/push-ebp 89/<- %ebp 4/r32/esp diff --git a/linux/400.mu b/linux/400.mu index c757c970..a391a76e 100644 --- a/linux/400.mu +++ b/linux/400.mu @@ -113,8 +113,8 @@ sig skip-until-close-paren line: (addr stream byte) #sig skip-until-close-paren-in-slice curr: (addr byte), end: (addr byte) -> _/eax: (addr byte) sig write-stream-data f: (addr buffered-file), s: (addr stream byte) sig write-int32-decimal out: (addr stream byte), n: int -sig decimal-digit? c: grapheme -> _/eax: boolean -sig to-decimal-digit in: grapheme -> _/eax: int +sig decimal-digit? c: code-point-utf8 -> _/eax: boolean +sig to-decimal-digit in: code-point-utf8 -> _/eax: int # bad name alert # next-word really tokenizes # next-raw-word really reads whitespace-separated words @@ -159,7 +159,7 @@ sig move-cursor-on-real-screen row: int, column: int sig print-string-to-real-screen s: (addr array byte) sig print-slice-to-real-screen s: (addr slice) sig print-stream-to-real-screen s: (addr stream byte) -sig print-grapheme-to-real-screen c: grapheme +sig print-code-point-utf8-to-real-screen c: code-point-utf8 sig print-int32-hex-to-real-screen n: int sig print-int32-hex-bits-to-real-screen n: int, bits: int sig print-int32-decimal-to-real-screen n: int @@ -174,7 +174,7 @@ sig hide-cursor-on-real-screen sig show-cursor-on-real-screen sig enable-keyboard-immediate-mode sig enable-keyboard-type-mode -sig read-key-from-real-keyboard -> _/eax: grapheme +sig read-key-from-real-keyboard -> _/eax: code-point-utf8 sig read-line-from-real-keyboard out: (addr stream byte) sig open filename: (addr array byte), write?: boolean, out: (addr handle buffered-file) sig populate-buffered-file-containing contents: (addr array byte), out: (addr handle buffered-file) diff --git a/linux/403unicode.mu b/linux/403unicode.mu index 8594615a..655cae2b 100644 --- a/linux/403unicode.mu +++ b/linux/403unicode.mu @@ -1,31 +1,31 @@ # Helpers for Unicode. # -# Mu has no characters, only code points and graphemes. +# Mu has no characters, only code points and code-point-utf8s. # Code points are the indivisible atoms of text streams. # https://en.wikipedia.org/wiki/Code_point # Graphemes are the smallest self-contained unit of text. # Graphemes may consist of multiple code points. # -# Mu graphemes are always represented in utf-8, and they are required to fit +# Mu code-point-utf8s are always represented in utf-8, and they are required to fit # in 4 bytes. # -# Mu doesn't currently support combining code points, or graphemes made of +# Mu doesn't currently support combining code points, or code-point-utf8s made of # multiple code points. One day we will. # On Linux, we also don't currently support code points that translate into -# multiple or wide graphemes. (In particular, Tab will never be supported.) +# multiple or wide code-point-utf8s. (In particular, Tab will never be supported.) # transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox # https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm -fn to-grapheme in: code-point -> _/eax: grapheme { +fn to-utf8 in: code-point -> _/eax: code-point-utf8 { var c/eax: int <- copy in var num-trailers/ecx: int <- copy 0 var first/edx: int <- copy 0 - $to-grapheme:compute-length: { + $to-utf8:compute-length: { # single byte: just return it compare c, 0x7f { break-if-> - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -34,7 +34,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 1 first <- copy 0xc0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # 3 bytes compare c, 0xffff @@ -42,7 +42,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 2 first <- copy 0xe0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # 4 bytes compare c, 0x1fffff @@ -50,7 +50,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { break-if-> num-trailers <- copy 3 first <- copy 0xf0 - break $to-grapheme:compute-length + break $to-utf8:compute-length } # more than 4 bytes: unsupported # TODO: print to stderr @@ -65,7 +65,7 @@ fn to-grapheme in: code-point -> _/eax: grapheme { } } # emit trailer bytes, 6 bits from 'in', first two bits '10' - var result/edi: grapheme <- copy 0 + var result/edi: code-point-utf8 <- copy 0 { compare num-trailers, 0 break-if-<= @@ -87,16 +87,16 @@ fn to-grapheme in: code-point -> _/eax: grapheme { return result } -# single-byte code point have identical graphemes -fn test-to-grapheme-single-byte { +# single-byte code point have identical code-point-utf8s +fn test-to-utf8-single-byte { var in-int/ecx: int <- copy 0 { compare in-int, 0x7f break-if-> var in/eax: code-point <- copy in-int - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, in-int, "F - test-to-grapheme-single-byte" + check-ints-equal out-int, in-int, "F - test-to-utf8-single-byte" in-int <- increment loop } @@ -104,55 +104,55 @@ fn test-to-grapheme-single-byte { # byte | byte | byte | byte # smallest 2-byte utf-8 -fn test-to-grapheme-two-bytes-min { +fn test-to-utf8-two-bytes-min { var in/eax: code-point <- copy 0x80 # 10 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x80c2, "F - to-grapheme/2a" # 110 0-0010 10 00-0000 + check-ints-equal out-int, 0x80c2, "F - to-utf8/2a" # 110 0-0010 10 00-0000 } # largest 2-byte utf-8 -fn test-to-grapheme-two-bytes-max { +fn test-to-utf8-two-bytes-max { var in/eax: code-point <- copy 0x7ff # 1-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfdf, "F - to-grapheme/2b" # 110 1-1111 10 11-1111 + check-ints-equal out-int, 0xbfdf, "F - to-utf8/2b" # 110 1-1111 10 11-1111 } # smallest 3-byte utf-8 -fn test-to-grapheme-three-bytes-min { +fn test-to-utf8-three-bytes-min { var in/eax: code-point <- copy 0x800 # 10-0000 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x80a0e0, "F - to-grapheme/3a" # 1110 0000 10 10-0000 10 00-0000 + check-ints-equal out-int, 0x80a0e0, "F - to-utf8/3a" # 1110 0000 10 10-0000 10 00-0000 } # largest 3-byte utf-8 -fn test-to-grapheme-three-bytes-max { +fn test-to-utf8-three-bytes-max { var in/eax: code-point <- copy 0xffff # 1111 11-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfbfef, "F - to-grapheme/3b" # 1110 1111 10 11-1111 10 11-1111 + check-ints-equal out-int, 0xbfbfef, "F - to-utf8/3b" # 1110 1111 10 11-1111 10 11-1111 } # smallest 4-byte utf-8 -fn test-to-grapheme-four-bytes-min { +fn test-to-utf8-four-bytes-min { var in/eax: code-point <- copy 0x10000 # 1-0000 00-0000 00-0000 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0x808090f0, "F - to-grapheme/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000 + check-ints-equal out-int, 0x808090f0, "F - to-utf8/4a" # 1111-0 000 10 01-0000 10 00-0000 10 00-0000 } # largest 4-byte utf-8 -fn test-to-grapheme-four-bytes-max { +fn test-to-utf8-four-bytes-max { var in/eax: code-point <- copy 0x1fffff # 111 11-1111 11-1111 11-1111 - var out/eax: grapheme <- to-grapheme in + var out/eax: code-point-utf8 <- to-utf8 in var out-int/eax: int <- copy out - check-ints-equal out-int, 0xbfbfbff7, "F - to-grapheme/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111 + check-ints-equal out-int, 0xbfbfbff7, "F - to-utf8/4b" # 1111-0 111 10 11-1111 10 11-1111 10 11-1111 } -# read the next grapheme from a stream of bytes -fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { +# read the next code-point-utf8 from a stream of bytes +fn read-code-point-utf8 in: (addr stream byte) -> _/eax: code-point-utf8 { # if at eof, return EOF { var eof?/eax: boolean <- stream-empty? in @@ -162,18 +162,18 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { } var c/eax: byte <- read-byte in var num-trailers/ecx: int <- copy 0 - $read-grapheme:compute-length: { + $read-code-point-utf8:compute-length: { # single byte: just return it compare c, 0xc0 { break-if->= - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } compare c, 0xfe { break-if-< - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -181,23 +181,23 @@ fn read-grapheme in: (addr stream byte) -> _/eax: grapheme { { break-if->= num-trailers <- copy 1 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } # 3 bytes compare c, 0xf0 { break-if->= num-trailers <- copy 2 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } # 4 bytes compare c, 0xf8 { break-if->= num-trailers <- copy 3 - break $read-grapheme:compute-length + break $read-code-point-utf8:compute-length } -$read-grapheme:abort: { +$read-code-point-utf8:abort: { # TODO: print to stderr print-string-to-real-screen "utf-8 encodings larger than 4 bytes are not yet supported. First byte seen: " var n/eax: int <- copy c @@ -208,7 +208,7 @@ $read-grapheme:abort: { } } # prepend trailer bytes - var result/edi: grapheme <- copy c + var result/edi: code-point-utf8 <- copy c var num-byte-shifts/edx: int <- copy 1 { compare num-trailers, 0 @@ -225,48 +225,48 @@ $read-grapheme:abort: { return result } -fn test-read-grapheme { +fn test-read-code-point-utf8 { var s: (stream byte 0x30) var s2/ecx: (addr stream byte) <- address s write s2, "aΒc世d界e" - var c/eax: grapheme <- read-grapheme s2 + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x61, "F - test grapheme/0" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x61, "F - test code-point-utf8/0" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test grapheme/1" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x92ce/greek-capital-letter-beta, "F - test code-point-utf8/1" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x63, "F - test grapheme/2" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x63, "F - test code-point-utf8/2" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x96b8e4, "F - test grapheme/3" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x96b8e4, "F - test code-point-utf8/3" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x64, "F - test grapheme/4" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x64, "F - test code-point-utf8/4" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x8c95e7, "F - test grapheme/5" - var c/eax: grapheme <- read-grapheme s2 + check-ints-equal n, 0x8c95e7, "F - test code-point-utf8/5" + var c/eax: code-point-utf8 <- read-code-point-utf8 s2 var n/eax: int <- copy c - check-ints-equal n, 0x65, "F - test grapheme/6" + check-ints-equal n, 0x65, "F - test code-point-utf8/6" } -fn read-grapheme-buffered in: (addr buffered-file) -> _/eax: grapheme { +fn read-code-point-utf8-buffered in: (addr buffered-file) -> _/eax: code-point-utf8 { var c/eax: byte <- read-byte-buffered in var num-trailers/ecx: int <- copy 0 - $read-grapheme-buffered:compute-length: { + $read-code-point-utf8-buffered:compute-length: { # single byte: just return it compare c, 0xc0 { break-if->= - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } compare c, 0xfe { break-if-< - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c return g } # 2 bytes @@ -274,23 +274,23 @@ fn read-grapheme-buffered in: (addr buffered-file) -> _/eax: grapheme { { break-if->= num-trailers <- copy 1 - break $read-grapheme-buffered:compute-length + break $read-code-point-utf8-buffered:compute-length } # 3 bytes compare c, 0xf0 { break-if->= num-trailers <- copy 2 - break $read-grapheme-buffered:compute-length + break $read-code-point-utf8-buffered:compute-length } # 4 bytes compare c, 0xf8 { break-if->= num-trailers <- copy 3 - break $read-grapheme-buffered:compute-length + break $read-code-point-utf8-buffered:compute-length } -$read-grapheme-buffered:abort: { +$read-code-point-utf8-buffered:abort: { # TODO: print to stderr print-string-to-real-screen "utf-8 encodings larger than 4 bytes are not supported. First byte seen: " var n/eax: int <- copy c @@ -301,7 +301,7 @@ $read-grapheme-buffered:abort: { } } # prepend trailer bytes - var result/edi: grapheme <- copy c + var result/edi: code-point-utf8 <- copy c var num-byte-shifts/edx: int <- copy 1 { compare num-trailers, 0 @@ -364,23 +364,23 @@ fn test-shift-left-bytes-5 { check-ints-equal result, 0, "F - shift-left-bytes >4" } -# write a grapheme to a stream of bytes +# write a code-point-utf8 to a stream of bytes # this is like write-to-stream, except we skip leading 0 bytes -fn write-grapheme out: (addr stream byte), g: grapheme { -$write-grapheme:body: { +fn write-code-point-utf8 out: (addr stream byte), g: code-point-utf8 { +$write-code-point-utf8:body: { var c/eax: int <- copy g append-byte out, c # first byte is always written c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c c <- shift-right 8 compare c, 0 - break-if-= $write-grapheme:body + break-if-= $write-code-point-utf8:body append-byte out, c } } diff --git a/linux/405screen.mu b/linux/405screen.mu index c850df2a..ffb0deb8 100644 --- a/linux/405screen.mu +++ b/linux/405screen.mu @@ -18,7 +18,7 @@ type screen { } type screen-cell { - data: grapheme + data: code-point-utf8 color: int background-color: int bold?: boolean @@ -83,7 +83,7 @@ fn clear-screen screen: (addr screen) { return } # fake screen - var space/edi: grapheme <- copy 0x20 + var space/edi: code-point-utf8 <- copy 0x20 move-cursor screen, 1, 1 var screen-addr/esi: (addr screen) <- copy screen var i/eax: int <- copy 1 @@ -96,7 +96,7 @@ fn clear-screen screen: (addr screen) { { compare j, *ncols break-if-> - print-grapheme screen, space + print-code-point-utf8 screen, space j <- increment loop } @@ -186,8 +186,8 @@ fn print-stream _screen: (addr screen), s: (addr stream byte) { var done?/eax: boolean <- stream-empty? s compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s + print-code-point-utf8 screen, g loop } } @@ -211,11 +211,11 @@ fn print-array-of-ints-in-decimal screen: (addr screen), _a: (addr array int) { } } -fn print-grapheme screen: (addr screen), c: grapheme { +fn print-code-point-utf8 screen: (addr screen), c: code-point-utf8 { compare screen, 0 { break-if-!= - print-grapheme-to-real-screen c + print-code-point-utf8-to-real-screen c return } # fake screen @@ -239,7 +239,7 @@ fn print-grapheme screen: (addr screen), c: grapheme { break-if-<= copy-to *cursor-row-addr, num-rows # if (top-index > data size) top-index = 0, otherwise top-index += num-cols - $print-grapheme:perform-scroll: { + $print-code-point-utf8:perform-scroll: { var top-index-addr/ebx: (addr int) <- get screen-addr, top-index var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data var data/eax: (addr array screen-cell) <- lookup *data-ah @@ -248,7 +248,7 @@ fn print-grapheme screen: (addr screen), c: grapheme { { break-if->= add-to *top-index-addr, num-cols - break $print-grapheme:perform-scroll + break $print-code-point-utf8:perform-scroll } { break-if-< @@ -257,7 +257,7 @@ fn print-grapheme screen: (addr screen), c: grapheme { } } var idx/ecx: int <- current-screen-cell-index screen-addr -#? print-string-to-real-screen "printing grapheme at screen index " +#? print-string-to-real-screen "printing code-point-utf8 at screen index " #? print-int32-hex-to-real-screen idx #? print-string-to-real-screen ": " var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data @@ -266,9 +266,9 @@ fn print-grapheme screen: (addr screen), c: grapheme { var dest-cell/ecx: (addr screen-cell) <- index data, offset var src-cell/eax: (addr screen-cell) <- get screen-addr, curr-attributes copy-object src-cell, dest-cell - var dest/eax: (addr grapheme) <- get dest-cell, data - var c2/ecx: grapheme <- copy c -#? print-grapheme-to-real-screen c2 + var dest/eax: (addr code-point-utf8) <- get dest-cell, data + var c2/ecx: code-point-utf8 <- copy c +#? print-code-point-utf8-to-real-screen c2 #? print-string-to-real-screen "\n" copy-to *dest, c2 increment *cursor-col-addr @@ -305,21 +305,21 @@ fn screen-cell-index screen-on-stack: (addr screen), row: int, col: int -> _/ecx return result } -fn screen-grapheme-at screen-on-stack: (addr screen), row: int, col: int -> _/eax: grapheme { +fn screen-code-point-utf8-at screen-on-stack: (addr screen), row: int, col: int -> _/eax: code-point-utf8 { var screen-addr/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen-addr, row, col - var result/eax: grapheme <- screen-grapheme-at-idx screen-addr, idx + var result/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen-addr, idx return result } -fn screen-grapheme-at-idx screen-on-stack: (addr screen), idx-on-stack: int -> _/eax: grapheme { +fn screen-code-point-utf8-at-idx screen-on-stack: (addr screen), idx-on-stack: int -> _/eax: code-point-utf8 { var screen-addr/esi: (addr screen) <- copy screen-on-stack var data-ah/eax: (addr handle array screen-cell) <- get screen-addr, data var data/eax: (addr array screen-cell) <- lookup *data-ah var idx/ecx: int <- copy idx-on-stack var offset/ecx: (offset screen-cell) <- compute-offset data, idx var cell/eax: (addr screen-cell) <- index data, offset - var src/eax: (addr grapheme) <- get cell, data + var src/eax: (addr code-point-utf8) <- get cell, data return *src } @@ -433,8 +433,8 @@ fn screen-blink-at-idx? screen-on-stack: (addr screen), idx-on-stack: int -> _/e } fn print-code-point screen: (addr screen), c: code-point { - var g/eax: grapheme <- to-grapheme c - print-grapheme screen, g + var g/eax: code-point-utf8 <- to-utf8 c + print-code-point-utf8 screen, g } fn print-int32-hex screen: (addr screen), n: int { @@ -453,8 +453,8 @@ fn print-int32-hex screen: (addr screen), n: int { var done?/eax: boolean <- stream-empty? s2-addr compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s2-addr - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr + print-code-point-utf8 screen, g loop } } @@ -475,8 +475,8 @@ fn print-int32-hex-bits screen: (addr screen), n: int, bits: int { var done?/eax: boolean <- stream-empty? s2-addr compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s2-addr - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr + print-code-point-utf8 screen, g loop } } @@ -497,8 +497,8 @@ fn print-int32-decimal screen: (addr screen), n: int { var done?/eax: boolean <- stream-empty? s2-addr compare done?, 0 break-if-!= - var g/eax: grapheme <- read-grapheme s2-addr - print-grapheme screen, g + var g/eax: code-point-utf8 <- read-code-point-utf8 s2-addr + print-code-point-utf8 screen, g loop } } @@ -631,7 +631,7 @@ fn check-screen-row screen: (addr screen), row-idx: int, expected: (addr array b fn check-screen-row-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -639,35 +639,35 @@ fn check-screen-row-from screen-on-stack: (addr screen), row-idx: int, col-idx: var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var expected-grapheme/eax: grapheme <- read-grapheme e-addr - # compare graphemes - $check-screen-row-from:compare-graphemes: { - # if expected-grapheme is space, null grapheme is also ok + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + # compare code-point-utf8s + $check-screen-row-from:compare-code-point-utf8s: { + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 - break-if-= $check-screen-row-from:compare-graphemes + break-if-= $check-screen-row-from:compare-code-point-utf8s } - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-from:compare-graphemes + break $check-screen-row-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } idx <- increment @@ -685,7 +685,7 @@ fn check-screen-row-in-color screen: (addr screen), fg: int, row-idx: int, expec fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -693,45 +693,45 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-i var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edi: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edi: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-color-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-color-from:compare-cells } - # if expected-grapheme is space, a different color is ok + # if expected-code-point-utf8 is space, a different color is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var color/eax: int <- screen-color-at-idx screen, idx compare color, fg break-if-!= $check-screen-row-in-color-from:compare-cells } - # compare graphemes - $check-screen-row-in-color-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-color-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-color-from:compare-graphemes + break $check-screen-row-in-color-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-color-from:compare-colors: { @@ -745,7 +745,7 @@ fn check-screen-row-in-color-from screen-on-stack: (addr screen), fg: int, row-i # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -772,7 +772,7 @@ fn check-screen-row-in-background-color screen: (addr screen), bg: int, row-idx: fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg: int, row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -780,45 +780,45 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg: var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-background-color-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-background-color-from:compare-cells } - # if expected-grapheme is space, a different color is ok + # if expected-code-point-utf8 is space, a different color is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var color/eax: int <- screen-background-color-at-idx screen, idx compare color, bg break-if-!= $check-screen-row-in-background-color-from:compare-cells } - # compare graphemes - $check-screen-row-in-background-color-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-background-color-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-background-color-from:compare-graphemes + break $check-screen-row-in-background-color-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-background-color-from:compare-colors: { @@ -832,7 +832,7 @@ fn check-screen-row-in-background-color-from screen-on-stack: (addr screen), bg: # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -857,7 +857,7 @@ fn check-screen-row-in-bold screen: (addr screen), row-idx: int, expected: (addr fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -865,45 +865,45 @@ fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, c var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-bold-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-bold-from:compare-cells } - # if expected-grapheme is space, non-bold is ok + # if expected-code-point-utf8 is space, non-bold is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var bold?/eax: boolean <- screen-bold-at-idx? screen, idx compare bold?, 1 break-if-!= $check-screen-row-in-bold-from:compare-cells } - # compare graphemes - $check-screen-row-in-bold-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-bold-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-bold-from:compare-graphemes + break $check-screen-row-in-bold-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-bold-from:compare-bold: { @@ -917,7 +917,7 @@ fn check-screen-row-in-bold-from screen-on-stack: (addr screen), row-idx: int, c # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -938,7 +938,7 @@ fn check-screen-row-in-underline screen: (addr screen), row-idx: int, expected: fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -946,45 +946,45 @@ fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: i var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-underline-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-underline-from:compare-cells } - # if expected-grapheme is space, non-underline is ok + # if expected-code-point-utf8 is space, non-underline is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var underline?/eax: boolean <- screen-underline-at-idx? screen, idx compare underline?, 1 break-if-!= $check-screen-row-in-underline-from:compare-cells } - # compare graphemes - $check-screen-row-in-underline-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-underline-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-underline-from:compare-graphemes + break $check-screen-row-in-underline-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-underline-from:compare-underline: { @@ -998,7 +998,7 @@ fn check-screen-row-in-underline-from screen-on-stack: (addr screen), row-idx: i # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -1019,7 +1019,7 @@ fn check-screen-row-in-reverse screen: (addr screen), row-idx: int, expected: (a fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -1027,45 +1027,45 @@ fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-reverse-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-reverse-from:compare-cells } - # if expected-grapheme is space, non-reverse is ok + # if expected-code-point-utf8 is space, non-reverse is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var reverse?/eax: boolean <- screen-reverse-at-idx? screen, idx compare reverse?, 1 break-if-!= $check-screen-row-in-reverse-from:compare-cells } - # compare graphemes - $check-screen-row-in-reverse-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-reverse-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-reverse-from:compare-graphemes + break $check-screen-row-in-reverse-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-reverse-from:compare-reverse: { @@ -1079,7 +1079,7 @@ fn check-screen-row-in-reverse-from screen-on-stack: (addr screen), row-idx: int # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -1100,7 +1100,7 @@ fn check-screen-row-in-blinking screen: (addr screen), row-idx: int, expected: ( fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: int, col-idx: int, expected: (addr array byte), msg: (addr array byte) { var screen/esi: (addr screen) <- copy screen-on-stack var idx/ecx: int <- screen-cell-index screen, row-idx, col-idx - # compare 'expected' with the screen contents starting at 'idx', grapheme by grapheme + # compare 'expected' with the screen contents starting at 'idx', code-point-utf8 by code-point-utf8 var e: (stream byte 0x100) var e-addr/edx: (addr stream byte) <- address e write e-addr, expected @@ -1108,45 +1108,45 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in var done?/eax: boolean <- stream-empty? e-addr compare done?, 0 break-if-!= - var _g/eax: grapheme <- screen-grapheme-at-idx screen, idx - var g/ebx: grapheme <- copy _g - var _expected-grapheme/eax: grapheme <- read-grapheme e-addr - var expected-grapheme/edx: grapheme <- copy _expected-grapheme + var _g/eax: code-point-utf8 <- screen-code-point-utf8-at-idx screen, idx + var g/ebx: code-point-utf8 <- copy _g + var _expected-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 e-addr + var expected-code-point-utf8/edx: code-point-utf8 <- copy _expected-code-point-utf8 $check-screen-row-in-blinking-from:compare-cells: { - # if expected-grapheme is space, null grapheme is also ok + # if expected-code-point-utf8 is space, null code-point-utf8 is also ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= compare g, 0 break-if-= $check-screen-row-in-blinking-from:compare-cells } - # if expected-grapheme is space, non-blinking is ok + # if expected-code-point-utf8 is space, non-blinking is ok { - compare expected-grapheme, 0x20 + compare expected-code-point-utf8, 0x20 break-if-!= var blinking?/eax: boolean <- screen-blink-at-idx? screen, idx compare blinking?, 1 break-if-!= $check-screen-row-in-blinking-from:compare-cells } - # compare graphemes - $check-screen-row-in-blinking-from:compare-graphemes: { - # if (g == expected-grapheme) print "." - compare g, expected-grapheme + # compare code-point-utf8s + $check-screen-row-in-blinking-from:compare-code-point-utf8s: { + # if (g == expected-code-point-utf8) print "." + compare g, expected-code-point-utf8 { break-if-!= print-string-to-real-screen "." - break $check-screen-row-in-blinking-from:compare-graphemes + break $check-screen-row-in-blinking-from:compare-code-point-utf8s } # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " print-int32-hex-to-real-screen col-idx print-string-to-real-screen ") but observed '" - print-grapheme-to-real-screen g + print-code-point-utf8-to-real-screen g print-string-to-real-screen "'\n" } $check-screen-row-in-blinking-from:compare-blinking: { @@ -1160,7 +1160,7 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in # otherwise print an error print-string-to-real-screen msg print-string-to-real-screen ": expected '" - print-grapheme-to-real-screen expected-grapheme + print-code-point-utf8-to-real-screen expected-code-point-utf8 print-string-to-real-screen "' at (" print-int32-hex-to-real-screen row-idx print-string-to-real-screen ", " @@ -1175,21 +1175,21 @@ fn check-screen-row-in-blinking-from screen-on-stack: (addr screen), row-idx: in } } -fn test-print-single-grapheme { +fn test-print-single-code-point-utf8 { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c - check-screen-row screen, 1/row, "a", "F - test-print-single-grapheme" # top-left corner of the screen + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c + check-screen-row screen, 1/row, "a", "F - test-print-single-code-point-utf8" # top-left corner of the screen } -fn test-print-multiple-graphemes { +fn test-print-multiple-code-point-utf8s { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols print-string screen, "Hello, 世界" - check-screen-row screen, 1/row, "Hello, 世界", "F - test-print-multiple-graphemes" + check-screen-row screen, 1/row, "Hello, 世界", "F - test-print-multiple-code-point-utf8s" } fn test-move-cursor { @@ -1197,8 +1197,8 @@ fn test-move-cursor { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 1, 4 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, " a", "F - test-move-cursor" # top row } @@ -1207,8 +1207,8 @@ fn test-move-cursor-zeroes { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 0, 0 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, "a", "F - test-move-cursor-zeroes" # top-left corner of the screen } @@ -1217,8 +1217,8 @@ fn test-move-cursor-zero-row { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 0, 2 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, " a", "F - test-move-cursor-zero-row" # top row } @@ -1227,8 +1227,8 @@ fn test-move-cursor-zero-column { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 4, 0 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 4/row, "a", "F - test-move-cursor-zero-column" } @@ -1237,8 +1237,8 @@ fn test-move-cursor-negative-row { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5, 3 move-cursor screen, -1/row, 2/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # no move check-screen-row screen, 1/row, "a", "F - test-move-cursor-negative-row" } @@ -1248,8 +1248,8 @@ fn test-move-cursor-negative-column { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5, 3 move-cursor screen, 2/row, -1/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # no move check-screen-row screen, 1/row, "a", "F - test-move-cursor-negative-column" } @@ -1259,8 +1259,8 @@ fn test-move-cursor-column-too-large { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 1/row, 4/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # top row is empty check-screen-row screen, 1/row, " ", "F - test-move-cursor-column-too-large" # character shows up on next row @@ -1272,8 +1272,8 @@ fn test-move-cursor-column-too-large-saturates { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 1/row, 6/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # top row is empty check-screen-row screen, 1/row, " ", "F - test-move-cursor-column-too-large-saturates" # top-left corner of the screen # character shows up at the start of next row @@ -1285,8 +1285,8 @@ fn test-move-cursor-row-too-large { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 6/row, 2/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # bottom row shows the character check-screen-row screen, 5/row, " a", "F - test-move-cursor-row-too-large" } @@ -1296,8 +1296,8 @@ fn test-move-cursor-row-too-large-saturates { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 3/cols move-cursor screen, 9/row, 2/col - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c # bottom row shows the character check-screen-row screen, 5/row, " a", "F - test-move-cursor-row-too-large-saturates" } @@ -1307,8 +1307,8 @@ fn test-check-screen-row-from { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols move-cursor screen, 1, 4 - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row screen, 1/row, " a", "F - test-check-screen-row-from/baseline" check-screen-row-from screen, 1/row, 4/col, "a", "F - test-check-screen-row-from" } @@ -1328,8 +1328,8 @@ fn test-check-screen-scrolls-on-overflow { initialize-screen screen, 5/rows, 4/cols # single character starting at bottom right move-cursor screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c check-screen-row-from screen, 5/row, 4/col, "a", "F - test-check-screen-scrolls-on-overflow/baseline" # bottom-right corner of the screen # multiple characters starting at bottom right move-cursor screen, 5, 4 @@ -1348,14 +1348,14 @@ fn test-check-screen-color { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c start-color screen, 1/fg, 0/bg c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-color screen, 0/fg, 7/bg c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-color screen, 0/fg, 1/row, "a c", "F - test-check-screen-color" } @@ -1363,14 +1363,14 @@ fn test-check-screen-background-color { var screen-on-stack: screen var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c start-color screen, 0/fg, 1/bg c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-color screen, 0/fg, 7/bg c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-background-color screen, 7/bg, 1/row, "a c", "F - test-check-screen-background-color" } @@ -1379,14 +1379,14 @@ fn test-check-screen-bold { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-bold screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-bold screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-bold screen, 1/row, "a c", "F - test-check-screen-bold" } @@ -1395,14 +1395,14 @@ fn test-check-screen-underline { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-underline screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-underline screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-underline screen, 1/row, "a c", "F - test-check-screen-underline" } @@ -1411,14 +1411,14 @@ fn test-check-screen-reverse { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-reverse-video screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-reverse-video screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-reverse screen, 1/row, "a c", "F - test-check-screen-reverse" } @@ -1427,14 +1427,14 @@ fn test-check-screen-blinking { var screen/esi: (addr screen) <- address screen-on-stack initialize-screen screen, 5/rows, 4/cols start-blinking screen - var c/eax: grapheme <- copy 0x61/a - print-grapheme screen, c + var c/eax: code-point-utf8 <- copy 0x61/a + print-code-point-utf8 screen, c reset-formatting screen c <- copy 0x62/b - print-grapheme screen, c + print-code-point-utf8 screen, c start-blinking screen c <- copy 0x63/c - print-grapheme screen, c + print-code-point-utf8 screen, c check-screen-row-in-blinking screen, 1/row, "a c", "F - test-check-screen-blinking" } diff --git a/linux/407right-justify.mu b/linux/407right-justify.mu index b7322ae5..aa767782 100644 --- a/linux/407right-justify.mu +++ b/linux/407right-justify.mu @@ -6,7 +6,7 @@ fn print-int32-decimal-right-justified screen: (addr screen), n: int, _width: in { compare n-width, width break-if->= - print-grapheme screen, 0x20/space + print-code-point-utf8 screen, 0x20/space width <- decrement loop } diff --git a/linux/411string.mu b/linux/411string.mu index cf0471ac..493c9b56 100644 --- a/linux/411string.mu +++ b/linux/411string.mu @@ -1,4 +1,4 @@ -# read up to 'len' graphemes after skipping the first 'start' ones +# read up to 'len' code-point-utf8s after skipping the first 'start' ones fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle array byte) { var in-stream: (stream byte 0x100) var in-stream-addr/esi: (addr stream byte) <- address in-stream @@ -6,29 +6,29 @@ fn substring in: (addr array byte), start: int, len: int, out-ah: (addr handle a var out-stream: (stream byte 0x100) var out-stream-addr/edi: (addr stream byte) <- address out-stream $substring:core: { - # skip 'start' graphemes + # skip 'start' code-point-utf8s var i/eax: int <- copy 0 { compare i, start break-if->= { - var dummy/eax: grapheme <- read-grapheme in-stream-addr + var dummy/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare dummy, 0xffffffff/end-of-file break-if-= $substring:core } i <- increment loop } - # copy 'len' graphemes + # copy 'len' code-point-utf8s i <- copy 0 { compare i, len break-if->= { - var g/eax: grapheme <- read-grapheme in-stream-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare g, 0xffffffff/end-of-file break-if-= $substring:core - write-grapheme out-stream-addr, g + write-code-point-utf8 out-stream-addr, g } i <- increment loop @@ -85,7 +85,7 @@ fn test-substring { check-strings-equal out, "bcde", "F - test-substring/middle-too-small" } -fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array (handle array byte)) { +fn split-string in: (addr array byte), delim: code-point-utf8, out: (addr handle array (handle array byte)) { var in-stream: (stream byte 0x100) var in-stream-addr/esi: (addr stream byte) <- address in-stream write in-stream-addr, in @@ -94,10 +94,10 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array var curr-stream: (stream byte 0x100) var curr-stream-addr/ecx: (addr stream byte) <- address curr-stream $split-string:core: { - var g/eax: grapheme <- read-grapheme in-stream-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-addr compare g, 0xffffffff break-if-= -#? print-grapheme-to-real-screen g +#? print-code-point-utf8-to-real-screen g #? print-string-to-real-screen "\n" compare g, delim { @@ -110,7 +110,7 @@ fn split-string in: (addr array byte), delim: grapheme, out: (addr handle array clear-stream curr-stream-addr loop $split-string:core } - write-grapheme curr-stream-addr, g + write-code-point-utf8 curr-stream-addr, g loop } stream-to-array tokens-stream-addr, out diff --git a/linux/apps/arith.mu b/linux/apps/arith.mu index 4393a34c..08b2008d 100644 --- a/linux/apps/arith.mu +++ b/linux/apps/arith.mu @@ -33,7 +33,7 @@ fn main -> _/ebx: int { enable-keyboard-immediate-mode - var look/esi: grapheme <- copy 0 # lookahead + var look/esi: code-point-utf8 <- copy 0 # lookahead var n/eax: int <- copy 0 # result of each expression print-string 0/screen, "press ctrl-c or ctrl-d to exit\n" # read-eval-print loop @@ -55,17 +55,17 @@ fn main -> _/ebx: int { return 0 } -fn simplify -> _/eax: int, _/esi: grapheme { +fn simplify -> _/eax: int, _/esi: code-point-utf8 { # prime the pump - var look/esi: grapheme <- get-char + var look/esi: code-point-utf8 <- get-char # do it var result/eax: int <- copy 0 result, look <- expression look return result, look } -fn expression _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look +fn expression _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # read arg var result/eax: int <- copy 0 result, look <- term look @@ -78,7 +78,7 @@ fn expression _look: grapheme -> _/eax: int, _/esi: grapheme { break-if-= $expression:loop } # read operator - var op/ecx: grapheme <- copy 0 + var op/ecx: code-point-utf8 <- copy 0 op, look <- operator look # read next arg var second/edx: int <- copy 0 @@ -109,8 +109,8 @@ fn expression _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn term _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look +fn term _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # read arg look <- skip-spaces look var result/eax: int <- copy 0 @@ -124,7 +124,7 @@ fn term _look: grapheme -> _/eax: int, _/esi: grapheme { break-if-= $term:loop } # read operator - var op/ecx: grapheme <- copy 0 + var op/ecx: code-point-utf8 <- copy 0 op, look <- operator look # read next arg var second/edx: int <- copy 0 @@ -154,8 +154,8 @@ fn term _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn factor _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look # should be a no-op +fn factor _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # should be a no-op look <- skip-spaces look # if next char is not '(', parse a number compare look, 0x28/open-paren @@ -174,7 +174,7 @@ fn factor _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn mul-or-div? c: grapheme -> _/eax: boolean { +fn mul-or-div? c: code-point-utf8 -> _/eax: boolean { compare c, 0x2a/* { break-if-!= @@ -188,7 +188,7 @@ fn mul-or-div? c: grapheme -> _/eax: boolean { return 0/false } -fn add-or-sub? c: grapheme -> _/eax: boolean { +fn add-or-sub? c: code-point-utf8 -> _/eax: boolean { compare c, 0x2b/+ { break-if-!= @@ -202,14 +202,14 @@ fn add-or-sub? c: grapheme -> _/eax: boolean { return 0/false } -fn operator _look: grapheme -> _/ecx: grapheme, _/esi: grapheme { - var op/ecx: grapheme <- copy _look - var look/esi: grapheme <- get-char +fn operator _look: code-point-utf8 -> _/ecx: code-point-utf8, _/esi: code-point-utf8 { + var op/ecx: code-point-utf8 <- copy _look + var look/esi: code-point-utf8 <- get-char return op, look } -fn num _look: grapheme -> _/eax: int, _/esi: grapheme { - var look/esi: grapheme <- copy _look +fn num _look: code-point-utf8 -> _/eax: int, _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look var result/edi: int <- copy 0 { var first-digit/eax: int <- to-decimal-digit look @@ -234,8 +234,8 @@ fn num _look: grapheme -> _/eax: int, _/esi: grapheme { return result, look } -fn skip-spaces _look: grapheme -> _/esi: grapheme { - var look/esi: grapheme <- copy _look # should be a no-op +fn skip-spaces _look: code-point-utf8 -> _/esi: code-point-utf8 { + var look/esi: code-point-utf8 <- copy _look # should be a no-op { compare look, 0x20 break-if-!= @@ -245,9 +245,9 @@ fn skip-spaces _look: grapheme -> _/esi: grapheme { return look } -fn get-char -> _/esi: grapheme { - var look/eax: grapheme <- read-key-from-real-keyboard - print-grapheme-to-real-screen look +fn get-char -> _/esi: code-point-utf8 { + var look/eax: code-point-utf8 <- read-key-from-real-keyboard + print-code-point-utf8-to-real-screen look compare look, 4 { break-if-!= diff --git a/linux/apps/parse-int.mu b/linux/apps/parse-int.mu index 0f8c71d1..ccff8d44 100644 --- a/linux/apps/parse-int.mu +++ b/linux/apps/parse-int.mu @@ -37,7 +37,7 @@ fn parse-int _in: (addr array byte) -> _/eax: int { var tmp/ebx: (addr byte) <- index in, i var c/eax: byte <- copy-byte *tmp # - var g/eax: grapheme <- copy c + var g/eax: code-point-utf8 <- copy c var digit/eax: int <- to-decimal-digit g result <- add digit i <- increment diff --git a/linux/apps/print-file.mu b/linux/apps/print-file.mu index 75ce2e39..284b805e 100644 --- a/linux/apps/print-file.mu +++ b/linux/apps/print-file.mu @@ -30,8 +30,8 @@ fn main _args: (addr array addr array byte) -> _/ebx: int { var c/eax: byte <- read-byte-buffered in-addr compare c, 0xffffffff/end-of-file break-if-= - var g/eax: grapheme <- copy c - print-grapheme 0/screen, g + var g/eax: code-point-utf8 <- copy c + print-code-point-utf8 0/screen, g loop } } diff --git a/linux/apps/tui.mu b/linux/apps/tui.mu index 4e58b986..f4fc914c 100644 --- a/linux/apps/tui.mu +++ b/linux/apps/tui.mu @@ -23,7 +23,7 @@ fn main -> _/ebx: int { print-string 0/screen, "press a key to see its code: " enable-keyboard-immediate-mode - var x/eax: grapheme <- read-key-from-real-keyboard + var x/eax: code-point-utf8 <- read-key-from-real-keyboard enable-keyboard-type-mode enable-screen-type-mode print-string 0/screen, "You pressed " diff --git a/linux/browse/main.mu b/linux/browse/main.mu index 5b4f2e06..27504afe 100644 --- a/linux/browse/main.mu +++ b/linux/browse/main.mu @@ -49,7 +49,7 @@ fn interactive fs: (addr buffered-file) { # { render paginated-screen, fs - var key/eax: grapheme <- read-key-from-real-keyboard + var key/eax: code-point-utf8 <- read-key-from-real-keyboard compare key, 0x71/'q' loop-if-!= } @@ -160,13 +160,13 @@ fn test-render-asterisk-in-text { fn render-normal screen: (addr paginated-screen), fs: (addr buffered-file) { var newline-seen?/esi: boolean <- copy 0/false var start-of-paragraph?/edi: boolean <- copy 1/true - var previous-grapheme/ebx: grapheme <- copy 0 + var previous-code-point-utf8/ebx: code-point-utf8 <- copy 0 $render-normal:loop: { # if done-drawing?(screen) break var done?/eax: boolean <- done-drawing? screen compare done?, 0/false break-if-!= - var c/eax: grapheme <- read-grapheme-buffered fs + var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs $render-normal:loop-body: { # if (c == EOF) break compare c, 0xffffffff/end-of-file @@ -186,8 +186,8 @@ $render-normal:loop-body: { # otherwise render two newlines { break-if-= - add-grapheme screen, 0xa/newline - add-grapheme screen, 0xa/newline + add-code-point-utf8 screen, 0xa/newline + add-code-point-utf8 screen, 0xa/newline newline-seen? <- copy 0/false start-of-paragraph? <- copy 1/true break $render-normal:loop-body @@ -221,20 +221,20 @@ $render-normal:flush-buffered-newline: { { compare c, 0x20 break-if-!= - add-grapheme screen, 0xa/newline + add-code-point-utf8 screen, 0xa/newline break $render-normal:flush-buffered-newline } - add-grapheme screen, 0x20/space + add-code-point-utf8 screen, 0x20/space # fall through to print c } ## end soft newline support $render-normal:whitespace-separated-regions: { - # if previous-grapheme wasn't whitespace, skip this block + # if previous-code-point-utf8 wasn't whitespace, skip this block { - compare previous-grapheme, 0x20/space + compare previous-code-point-utf8, 0x20/space break-if-= - compare previous-grapheme, 0xa/newline + compare previous-code-point-utf8, 0xa/newline break-if-= break $render-normal:whitespace-separated-regions } @@ -260,9 +260,9 @@ $render-normal:whitespace-separated-regions: { } } # - add-grapheme screen, c + add-code-point-utf8 screen, c } # $render-normal:loop-body - previous-grapheme <- copy c + previous-code-point-utf8 <- copy c loop } # $render-normal:loop } @@ -271,7 +271,7 @@ fn render-header-line screen: (addr paginated-screen), fs: (addr buffered-file) $render-header-line:body: { # compute color based on number of '#'s var header-level/esi: int <- copy 1 # caller already grabbed one - var c/eax: grapheme <- copy 0 + var c/eax: code-point-utf8 <- copy 0 { # if done-drawing?(screen) return { @@ -280,7 +280,7 @@ $render-header-line:body: { break-if-!= $render-header-line:body } # - c <- read-grapheme-buffered fs + c <- read-code-point-utf8-buffered fs # if (c != '#') break compare c, 0x23/'#' break-if-!= @@ -298,7 +298,7 @@ $render-header-line:body: { break-if-!= } # - c <- read-grapheme-buffered fs + c <- read-code-point-utf8-buffered fs # if (c == EOF) break compare c, 0xffffffff/end-of-file break-if-= @@ -306,7 +306,7 @@ $render-header-line:body: { compare c, 0xa/newline break-if-= # - add-grapheme screen, c + add-code-point-utf8 screen, c # loop } @@ -353,7 +353,7 @@ fn render-until-asterisk screen: (addr paginated-screen), fs: (addr buffered-fil compare done?, 0/false break-if-!= # - var c/eax: grapheme <- read-grapheme-buffered fs + var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs # if (c == EOF) break compare c, 0xffffffff/end-of-file break-if-= @@ -361,7 +361,7 @@ fn render-until-asterisk screen: (addr paginated-screen), fs: (addr buffered-fil compare c, 0x2a/'*' break-if-= # - add-grapheme screen, c + add-code-point-utf8 screen, c # loop } @@ -374,7 +374,7 @@ fn render-until-underscore screen: (addr paginated-screen), fs: (addr buffered-f compare done?, 0/false break-if-!= # - var c/eax: grapheme <- read-grapheme-buffered fs + var c/eax: code-point-utf8 <- read-code-point-utf8-buffered fs # if (c == EOF) break compare c, 0xffffffff/end-of-file break-if-= @@ -382,7 +382,7 @@ fn render-until-underscore screen: (addr paginated-screen), fs: (addr buffered-f compare c, 0x5f/'_' break-if-= # - add-grapheme screen, c + add-code-point-utf8 screen, c # loop } diff --git a/linux/browse/paginated-screen.mu b/linux/browse/paginated-screen.mu index f4579d95..05f954a7 100644 --- a/linux/browse/paginated-screen.mu +++ b/linux/browse/paginated-screen.mu @@ -7,7 +7,7 @@ # on each frame # start-drawing # while !done-drawing -# add-grapheme ... +# add-code-point-utf8 ... type paginated-screen { screen: (handle screen) @@ -152,23 +152,23 @@ fn done-drawing? _self: (addr paginated-screen) -> _/eax: boolean { return 1/true } -fn add-grapheme _self: (addr paginated-screen), c: grapheme { -#? print-string-to-real-screen "add-grapheme: " -#? print-grapheme-to-real-screen c +fn add-code-point-utf8 _self: (addr paginated-screen), c: code-point-utf8 { +#? print-string-to-real-screen "add-code-point-utf8: " +#? print-code-point-utf8-to-real-screen c #? print-string-to-real-screen "\n" -$add-grapheme:body: { +$add-code-point-utf8:body: { var self/esi: (addr paginated-screen) <- copy _self { compare c, 0xa/newline break-if-!= next-line self reposition-cursor self - break $add-grapheme:body + break $add-code-point-utf8:body } # print c var screen-ah/eax: (addr handle screen) <- get self, screen var screen-addr/eax: (addr screen) <- lookup *screen-ah - print-grapheme screen-addr, c + print-code-point-utf8 screen-addr, c # self->col++ var tmp/eax: (addr int) <- get self, col increment *tmp @@ -186,21 +186,21 @@ $add-grapheme:body: { ## tests -fn test-print-grapheme-on-paginated-screen { +fn test-print-code-point-utf8-on-paginated-screen { var pg-on-stack: paginated-screen var pg/eax: (addr paginated-screen) <- address pg-on-stack initialize-fake-paginated-screen pg, 3/rows, 0xa/cols, 0xa/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-grapheme-on-paginated-screen/done" + check-ints-equal done, 0, "F - test-print-code-point-utf8-on-paginated-screen/done" } var screen-ah/eax: (addr handle screen) <- get pg, screen var screen-addr/eax: (addr screen) <- lookup *screen-ah - check-screen-row screen-addr, 1, "a", "F - test-print-grapheme-on-paginated-screen" + check-screen-row screen-addr, 1, "a", "F - test-print-code-point-utf8-on-paginated-screen" } fn test-print-single-page { @@ -210,29 +210,29 @@ fn test-print-single-page { start-drawing pg # pages at columns [1, 3), [3, 5) { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page/done-4" @@ -250,36 +250,36 @@ fn test-print-single-page-narrower-than-page-width { initialize-fake-paginated-screen pg, 2/rows, 4/cols, 5/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width/done-5" @@ -297,36 +297,36 @@ fn test-print-single-page-narrower-than-page-width-with-margin { initialize-fake-paginated-screen pg, 2/rows, 4/cols, 5/page-width, 0/top-margin, 1/left-margin start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-single-page-narrower-than-page-width-with-margin/done-5" @@ -344,29 +344,29 @@ fn test-print-multiple-pages { initialize-fake-paginated-screen pg, 2/rows, 2/cols, 1/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 1, "F - test-print-multiple-pages/done-4" @@ -384,57 +384,57 @@ fn test-print-multiple-pages-2 { initialize-fake-paginated-screen pg, 2/rows, 4/cols, 2/page-width, 0, 0 start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-5" } { - var c/ecx: grapheme <- copy 0x66/f - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x66/f + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-6" } { - var c/ecx: grapheme <- copy 0x67/g - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x67/g + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 0, "F - test-print-multiple-pages-2/done-7" } { - var c/ecx: grapheme <- copy 0x68/h - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x68/h + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? check-ints-equal done, 1, "F - test-print-multiple-pages-2/done-8" @@ -452,60 +452,60 @@ fn test-print-multiple-pages-with-margins { initialize-fake-paginated-screen pg, 3/rows, 6/cols, 2/page-width, 1/top-margin, 1/left-margin start-drawing pg { - var c/ecx: grapheme <- copy 0x61/a - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x61/a + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-1" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-1" } { - var c/ecx: grapheme <- copy 0x62/b - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x62/b + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-2" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-2" } { - var c/ecx: grapheme <- copy 0x63/c - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x63/c + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-3" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-3" } { - var c/ecx: grapheme <- copy 0x64/d - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x64/d + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-4" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-4" } { - var c/ecx: grapheme <- copy 0x65/e - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x65/e + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-5" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-5" } { - var c/ecx: grapheme <- copy 0x66/f - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x66/f + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-6" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-6" } { - var c/ecx: grapheme <- copy 0x67/g - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x67/g + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/grapheme-7" + check-ints-equal done, 0, "F - test-print-multiple-pages-with-margins/code-point-utf8-7" } { - var c/ecx: grapheme <- copy 0x68/h - add-grapheme pg, c + var c/ecx: code-point-utf8 <- copy 0x68/h + add-code-point-utf8 pg, c var done?/eax: boolean <- done-drawing? pg var done/eax: int <- copy done? - check-ints-equal done, 1, "F - test-print-multiple-pages-with-margins/grapheme-8" + check-ints-equal done, 1, "F - test-print-multiple-pages-with-margins/code-point-utf8-8" } var screen-ah/eax: (addr handle screen) <- get pg, screen var screen-addr/eax: (addr screen) <- lookup *screen-ah diff --git a/linux/mu b/linux/mu index fdeacea0..94cb0a6e 100755 Binary files a/linux/mu and b/linux/mu differ diff --git a/linux/mu.subx b/linux/mu.subx index 879b751e..b4c55006 100644 --- a/linux/mu.subx +++ b/linux/mu.subx @@ -416,8 +416,8 @@ Type-id: # (stream (addr array byte)) "stream"/imm32 # 11 "slice"/imm32 # 12 "code-point"/imm32 # 13; smallest scannable unit from a text stream - "grapheme"/imm32 # 14; smallest printable unit; will eventually be composed of multiple code-points, but currently corresponds 1:1 - # only 4-byte graphemes in utf-8 are currently supported; + "code-point-utf8"/imm32 # 14; smallest printable unit; will eventually be composed of multiple code-points, but currently corresponds 1:1 + # only 4-byte code-point-utf8s in utf-8 are currently supported; # unclear how we should deal with larger clusters. "float"/imm32 # 15 # 0x40 @@ -22183,9 +22183,9 @@ $mu-numberlike-output?:check-code-point: (simple-mu-type? %esi 0xd) # code-point => eax 3d/compare-eax-and 0/imm32/false 75/jump-if-!= $mu-numberlike-output?:return-true/disp8 -$mu-numberlike-output?:check-grapheme: - # if t is a grapheme, return - (simple-mu-type? %esi 0xe) # grapheme => eax +$mu-numberlike-output?:check-code-point-utf8: + # if t is a code-point-utf8, return + (simple-mu-type? %esi 0xe) # code-point-utf8 => eax 3d/compare-eax-and 0/imm32/false 75/jump-if-!= $mu-numberlike-output?:return-true/disp8 $mu-numberlike-output?:return-false: diff --git a/linux/tile/box.mu b/linux/tile/box.mu index 859d0b8e..f5155ea8 100644 --- a/linux/tile/box.mu +++ b/linux/tile/box.mu @@ -78,7 +78,7 @@ fn clear-rect screen: (addr screen), row1: int, col1: int, row2: int, col2: int { compare j, col2 break-if-> - print-grapheme screen 0x20/space + print-code-point-utf8 screen 0x20/space j <- increment loop } @@ -98,7 +98,7 @@ fn clear-rect2 screen: (addr screen), row1: int, col1: int, w: int, h: int { { compare j, h break-if->= - print-grapheme screen 0x20/space + print-code-point-utf8 screen 0x20/space j <- increment loop } diff --git a/linux/tile/environment.mu b/linux/tile/environment.mu index 3c869d3c..1512db6d 100644 --- a/linux/tile/environment.mu +++ b/linux/tile/environment.mu @@ -70,7 +70,7 @@ fn initialize-environment-with-fake-screen _self: (addr environment), nrows: int # Iterate ############# -fn process _self: (addr environment), key: grapheme { +fn process _self: (addr environment), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var fn-name-ah/eax: (addr handle word) <- get self, partial-function-name var fn-name/eax: (addr word) <- lookup *fn-name-ah @@ -102,7 +102,7 @@ fn process _self: (addr environment), key: grapheme { } # collect new name in partial-function-name, and move the cursor to function with that name -fn process-goto-dialog _self: (addr environment), key: grapheme { +fn process-goto-dialog _self: (addr environment), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var fn-name-ah/edi: (addr handle word) <- get self, partial-function-name # if 'esc' pressed, cancel goto @@ -130,7 +130,7 @@ fn process-goto-dialog _self: (addr environment), key: grapheme { compare key, 0x7f/del # backspace on Macs $process-goto-dialog:backspace: { break-if-!= - # if not at start, delete grapheme before cursor + # if not at start, delete code-point-utf8 before cursor var fn-name/eax: (addr word) <- lookup *fn-name-ah var at-start?/eax: boolean <- cursor-at-start? fn-name compare at-start?, 0/false @@ -142,24 +142,24 @@ fn process-goto-dialog _self: (addr environment), key: grapheme { return } # otherwise insert key within current word - var print?/eax: boolean <- real-grapheme? key - $process-goto-dialog:real-grapheme: { + var print?/eax: boolean <- real-code-point-utf8? key + $process-goto-dialog:real-code-point-utf8: { compare print?, 0/false break-if-= var fn-name/eax: (addr word) <- lookup *fn-name-ah - add-grapheme-to-word fn-name, key + add-code-point-utf8-to-word fn-name, key return } # silently ignore other hotkeys } -fn process-function _self: (addr environment), _function: (addr function), key: grapheme { +fn process-function _self: (addr environment), _function: (addr function), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var function/edi: (addr function) <- copy _function process-function-edit self, function, key } -fn process-function-edit _self: (addr environment), _function: (addr function), key: grapheme { +fn process-function-edit _self: (addr environment), _function: (addr function), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var function/edi: (addr function) <- copy _function var cursor-word-ah/ebx: (addr handle word) <- get function, cursor-word @@ -290,7 +290,7 @@ fn process-function-edit _self: (addr environment), _function: (addr function), compare key, 0x7f/del # backspace on Macs $process-function-edit:backspace: { break-if-!= - # if not at start of some word, delete grapheme before cursor within current word + # if not at start of some word, delete code-point-utf8 before cursor within current word var at-start?/eax: boolean <- cursor-at-start? cursor-word compare at-start?, 0/false { @@ -325,25 +325,25 @@ fn process-function-edit _self: (addr environment), _function: (addr function), copy-object new-prev-word-ah, cursor-word-ah return } - # if start of word is quote and grapheme before cursor is not, just insert it as usual + # if start of word is quote and code-point-utf8 before cursor is not, just insert it as usual # TODO: support string escaping { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x22/double-quote + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x22/double-quote break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x22/double-quote + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x22/double-quote break-if-= break $process-function-edit:space } - # if start of word is '[' and grapheme before cursor is not ']', just insert it as usual + # if start of word is '[' and code-point-utf8 before cursor is not ']', just insert it as usual # TODO: support nested arrays { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x5b/[ + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x5b/[ break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x5d/] + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x5d/] break-if-= break $process-function-edit:space } @@ -368,26 +368,26 @@ fn process-function-edit _self: (addr environment), _function: (addr function), var at-end?/eax: boolean <- cursor-at-end? cursor-word compare at-end?, 0/false break-if-!= - var g/eax: grapheme <- pop-after-cursor cursor-word - add-grapheme-to-word next-word, g + var g/eax: code-point-utf8 <- pop-after-cursor cursor-word + add-code-point-utf8-to-word next-word, g loop } cursor-to-start next-word return } # otherwise insert key within current word - var g/edx: grapheme <- copy key - var print?/eax: boolean <- real-grapheme? key - $process-function-edit:real-grapheme: { + var g/edx: code-point-utf8 <- copy key + var print?/eax: boolean <- real-code-point-utf8? key + $process-function-edit:real-code-point-utf8: { compare print?, 0/false break-if-= - add-grapheme-to-word cursor-word, g + add-code-point-utf8-to-word cursor-word, g return } # silently ignore other hotkeys } -fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: grapheme { +fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var sandbox/edi: (addr sandbox) <- copy _sandbox var rename-word-mode-ah?/ecx: (addr handle word) <- get sandbox, partial-name-for-cursor-word @@ -413,7 +413,7 @@ fn process-sandbox _self: (addr environment), _sandbox: (addr sandbox), key: gra process-sandbox-edit self, sandbox, key } -fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key: grapheme { +fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key: code-point-utf8 { var self/esi: (addr environment) <- copy _self var sandbox/edi: (addr sandbox) <- copy _sandbox var cursor-call-path-ah/eax: (addr handle call-path-element) <- get sandbox, cursor-call-path @@ -730,7 +730,7 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key compare key, 0x7f/del # backspace on Macs $process-sandbox-edit:backspace: { break-if-!= - # if not at start of some word, delete grapheme before cursor within current word + # if not at start of some word, delete code-point-utf8 before cursor within current word var at-start?/eax: boolean <- cursor-at-start? cursor-word compare at-start?, 0/false { @@ -766,25 +766,25 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key decrement-final-element cursor-call-path return } - # if start of word is quote and grapheme before cursor is not, just insert it as usual + # if start of word is quote and code-point-utf8 before cursor is not, just insert it as usual # TODO: support string escaping { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x22/double-quote + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x22/double-quote break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x22/double-quote + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x22/double-quote break-if-= break $process-sandbox-edit:space } - # if start of word is '[' and grapheme before cursor is not ']', just insert it as usual + # if start of word is '[' and code-point-utf8 before cursor is not ']', just insert it as usual # TODO: support nested arrays { - var first-grapheme/eax: grapheme <- first-grapheme cursor-word - compare first-grapheme, 0x5b/[ + var first-code-point-utf8/eax: code-point-utf8 <- first-code-point-utf8 cursor-word + compare first-code-point-utf8, 0x5b/[ break-if-!= - var final-grapheme/eax: grapheme <- grapheme-before-cursor cursor-word - compare final-grapheme, 0x5d/] + var final-code-point-utf8/eax: code-point-utf8 <- code-point-utf8-before-cursor cursor-word + compare final-code-point-utf8, 0x5d/] break-if-= break $process-sandbox-edit:space } @@ -809,8 +809,8 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key var at-end?/eax: boolean <- cursor-at-end? cursor-word compare at-end?, 0/false break-if-!= - var g/eax: grapheme <- pop-after-cursor cursor-word - add-grapheme-to-word next-word, g + var g/eax: code-point-utf8 <- pop-after-cursor cursor-word + add-code-point-utf8-to-word next-word, g loop } cursor-to-start next-word @@ -838,12 +838,12 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key return } # otherwise insert key within current word - var g/edx: grapheme <- copy key - var print?/eax: boolean <- real-grapheme? key - $process-sandbox-edit:real-grapheme: { + var g/edx: code-point-utf8 <- copy key + var print?/eax: boolean <- real-code-point-utf8? key + $process-sandbox-edit:real-code-point-utf8: { compare print?, 0/false break-if-= - add-grapheme-to-word cursor-word, g + add-code-point-utf8-to-word cursor-word, g return } # silently ignore other hotkeys @@ -852,7 +852,7 @@ fn process-sandbox-edit _self: (addr environment), _sandbox: (addr sandbox), key # collect new name in partial-name-for-cursor-word, and then rename the word # at cursor to it # Precondition: cursor-call-path is a singleton (not within a call) -fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { +fn process-sandbox-rename _sandbox: (addr sandbox), key: code-point-utf8 { var sandbox/esi: (addr sandbox) <- copy _sandbox var new-name-ah/edi: (addr handle word) <- get sandbox, partial-name-for-cursor-word # if 'esc' pressed, cancel rename @@ -911,7 +911,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { { var new-name/eax: (addr word) <- lookup *new-name-ah cursor-to-start new-name - add-grapheme-to-word new-name, 0x3d/= + add-code-point-utf8-to-word new-name, 0x3d/= } # append name to new line chain-words new-line-word-ah, new-name-ah @@ -941,7 +941,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { compare key, 0x7f/del # backspace on Macs $process-sandbox-rename:backspace: { break-if-!= - # if not at start, delete grapheme before cursor + # if not at start, delete code-point-utf8 before cursor var new-name/eax: (addr word) <- lookup *new-name-ah var at-start?/eax: boolean <- cursor-at-start? new-name compare at-start?, 0/false @@ -953,12 +953,12 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { return } # otherwise insert key within current word - var print?/eax: boolean <- real-grapheme? key - $process-sandbox-rename:real-grapheme: { + var print?/eax: boolean <- real-code-point-utf8? key + $process-sandbox-rename:real-code-point-utf8: { compare print?, 0/false break-if-= var new-name/eax: (addr word) <- lookup *new-name-ah - add-grapheme-to-word new-name, key + add-code-point-utf8-to-word new-name, key return } # silently ignore other hotkeys @@ -968,7 +968,7 @@ fn process-sandbox-rename _sandbox: (addr sandbox), key: grapheme { # of the sandbox to be a new function with that name. Replace the last line # with a call to the appropriate function. # Precondition: cursor-call-path is a singleton (not within a call) -fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle function), key: grapheme { +fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle function), key: code-point-utf8 { var sandbox/esi: (addr sandbox) <- copy _sandbox var new-name-ah/edi: (addr handle word) <- get sandbox, partial-name-for-function # if 'esc' pressed, cancel define @@ -1033,7 +1033,7 @@ fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle func compare key, 0x7f/del # backspace on Macs $process-sandbox-define:backspace: { break-if-!= - # if not at start, delete grapheme before cursor + # if not at start, delete code-point-utf8 before cursor var new-name/eax: (addr word) <- lookup *new-name-ah var at-start?/eax: boolean <- cursor-at-start? new-name compare at-start?, 0/false @@ -1045,12 +1045,12 @@ fn process-sandbox-define _sandbox: (addr sandbox), functions: (addr handle func return } # otherwise insert key within current word - var print?/eax: boolean <- real-grapheme? key - $process-sandbox-define:real-grapheme: { + var print?/eax: boolean <- real-code-point-utf8? key + $process-sandbox-define:real-code-point-utf8: { compare print?, 0/false break-if-= var new-name/eax: (addr word) <- lookup *new-name-ah - add-grapheme-to-word new-name, key + add-code-point-utf8-to-word new-name, key return } # silently ignore other hotkeys @@ -2107,7 +2107,7 @@ fn render-function-right-aligned screen: (addr screen), row: int, right-col: int start-color screen, 0, 0xf7 clear-rect screen, row, col, new-row, col2 col <- add 1 -#? var dummy/eax: grapheme <- read-key-from-real-keyboard +#? var dummy/eax: code-point-utf8 <- read-key-from-real-keyboard render-function screen, row, col, f new-row <- add 1/function-bottom-margin col <- subtract 1/function-left-padding @@ -2144,7 +2144,7 @@ fn render-function screen: (addr screen), row: int, col: int, _f: (addr function render-line-without-stack screen, body, row, col, cursor-word, cursor-row, cursor-col } -fn real-grapheme? g: grapheme -> _/eax: boolean { +fn real-code-point-utf8? g: code-point-utf8 -> _/eax: boolean { # if g == newline return true compare g, 0xa { diff --git a/linux/tile/gap-buffer.mu b/linux/tile/gap-buffer.mu index 0132daf0..1441684b 100644 --- a/linux/tile/gap-buffer.mu +++ b/linux/tile/gap-buffer.mu @@ -1,14 +1,14 @@ type gap-buffer { - left: grapheme-stack - right: grapheme-stack + left: code-point-utf8-stack + right: code-point-utf8-stack } fn initialize-gap-buffer _self: (addr gap-buffer) { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - initialize-grapheme-stack left, 0x10/max-word-size - var right/eax: (addr grapheme-stack) <- get self, right - initialize-grapheme-stack right, 0x10/max-word-size + var left/eax: (addr code-point-utf8-stack) <- get self, left + initialize-code-point-utf8-stack left, 0x10/max-word-size + var right/eax: (addr code-point-utf8-stack) <- get self, right + initialize-code-point-utf8-stack right, 0x10/max-word-size } # just for tests @@ -21,8 +21,8 @@ fn initialize-gap-buffer-with self: (addr gap-buffer), s: (addr array byte) { var done?/eax: boolean <- stream-empty? stream compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme stream - add-grapheme-at-gap self, g + var g/eax: code-point-utf8 <- read-code-point-utf8 stream + add-code-point-utf8-at-gap self, g loop } } @@ -37,44 +37,44 @@ fn gap-buffer-to-string self: (addr gap-buffer), out: (addr handle array byte) { fn emit-gap-buffer _self: (addr gap-buffer), out: (addr stream byte) { var self/esi: (addr gap-buffer) <- copy _self clear-stream out - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left emit-stack-from-bottom left, out - var right/eax: (addr grapheme-stack) <- get self, right + var right/eax: (addr code-point-utf8-stack) <- get self, right emit-stack-from-top right, out } # dump stack from bottom to top -fn emit-stack-from-bottom _self: (addr grapheme-stack), out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-from-bottom _self: (addr code-point-utf8-stack), out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- increment loop } } # dump stack from top to bottom -fn emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn emit-stack-from-top _self: (addr code-point-utf8-stack), out: (addr stream byte) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy *top-addr i <- decrement { compare i, 0 break-if-< - var g/edx: (addr grapheme) <- index data, i - write-grapheme out, *g + var g/edx: (addr code-point-utf8) <- index data, i + write-code-point-utf8 out, *g i <- decrement loop } @@ -82,33 +82,33 @@ fn emit-stack-from-top _self: (addr grapheme-stack), out: (addr stream byte) { fn render-gap-buffer screen: (addr screen), _gap: (addr gap-buffer) { var gap/esi: (addr gap-buffer) <- copy _gap - var left/eax: (addr grapheme-stack) <- get gap, left + var left/eax: (addr code-point-utf8-stack) <- get gap, left render-stack-from-bottom left, screen - var right/eax: (addr grapheme-stack) <- get gap, right + var right/eax: (addr code-point-utf8-stack) <- get gap, right render-stack-from-top right, screen } fn gap-buffer-length _gap: (addr gap-buffer) -> _/eax: int { var gap/esi: (addr gap-buffer) <- copy _gap - var left/eax: (addr grapheme-stack) <- get gap, left + var left/eax: (addr code-point-utf8-stack) <- get gap, left var tmp/eax: (addr int) <- get left, top var left-length/ecx: int <- copy *tmp - var right/esi: (addr grapheme-stack) <- get gap, right + var right/esi: (addr code-point-utf8-stack) <- get gap, right tmp <- get right, top var result/eax: int <- copy *tmp result <- add left-length return result } -fn add-grapheme-at-gap _self: (addr gap-buffer), g: grapheme { +fn add-code-point-utf8-at-gap _self: (addr gap-buffer), g: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - push-grapheme-stack left, g + var left/eax: (addr code-point-utf8-stack) <- get self, left + push-code-point-utf8-stack left, g } fn gap-to-start self: (addr gap-buffer) { { - var curr/eax: grapheme <- gap-left self + var curr/eax: code-point-utf8 <- gap-left self compare curr, -1 loop-if-!= } @@ -116,7 +116,7 @@ fn gap-to-start self: (addr gap-buffer) { fn gap-to-end self: (addr gap-buffer) { { - var curr/eax: grapheme <- gap-right self + var curr/eax: code-point-utf8 <- gap-right self compare curr, -1 loop-if-!= } @@ -124,96 +124,96 @@ fn gap-to-end self: (addr gap-buffer) { fn gap-at-start? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-empty? left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-empty? left return result } fn gap-at-end? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var right/eax: (addr grapheme-stack) <- get self, right - var result/eax: boolean <- grapheme-stack-empty? right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var result/eax: boolean <- code-point-utf8-stack-empty? right return result } -fn gap-right _self: (addr gap-buffer) -> _/eax: grapheme { +fn gap-right _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var g/eax: grapheme <- copy 0 - var right/ecx: (addr grapheme-stack) <- get self, right - g <- pop-grapheme-stack right + var g/eax: code-point-utf8 <- copy 0 + var right/ecx: (addr code-point-utf8-stack) <- get self, right + g <- pop-code-point-utf8-stack right compare g, -1 { break-if-= - var left/ecx: (addr grapheme-stack) <- get self, left - push-grapheme-stack left, g + var left/ecx: (addr code-point-utf8-stack) <- get self, left + push-code-point-utf8-stack left, g } return g } -fn gap-left _self: (addr gap-buffer) -> _/eax: grapheme { +fn gap-left _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self - var g/eax: grapheme <- copy 0 + var g/eax: code-point-utf8 <- copy 0 { - var left/ecx: (addr grapheme-stack) <- get self, left - g <- pop-grapheme-stack left + var left/ecx: (addr code-point-utf8-stack) <- get self, left + g <- pop-code-point-utf8-stack left } compare g, -1 { break-if-= - var right/ecx: (addr grapheme-stack) <- get self, right - push-grapheme-stack right, g + var right/ecx: (addr code-point-utf8-stack) <- get self, right + push-code-point-utf8-stack right, g } return g } fn gap-index _self: (addr gap-buffer) -> _/eax: int { var self/eax: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left var top-addr/eax: (addr int) <- get left, top var result/eax: int <- copy *top-addr return result } -fn first-grapheme-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn first-code-point-utf8-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # try to read from left - var left/eax: (addr grapheme-stack) <- get self, left + var left/eax: (addr code-point-utf8-stack) <- get self, left var top-addr/ecx: (addr int) <- get left, top compare *top-addr, 0 { break-if-<= - var data-ah/eax: (addr handle array grapheme) <- get left, data - var data/eax: (addr array grapheme) <- lookup *data-ah - var result-addr/eax: (addr grapheme) <- index data, 0 + var data-ah/eax: (addr handle array code-point-utf8) <- get left, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah + var result-addr/eax: (addr code-point-utf8) <- index data, 0 return *result-addr } # try to read from right - var right/eax: (addr grapheme-stack) <- get self, right + var right/eax: (addr code-point-utf8-stack) <- get self, right top-addr <- get right, top compare *top-addr, 0 { break-if-<= - var data-ah/eax: (addr handle array grapheme) <- get right, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/eax: (addr handle array code-point-utf8) <- get right, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/ecx: int <- copy *top-addr top <- decrement - var result-addr/eax: (addr grapheme) <- index data, top + var result-addr/eax: (addr code-point-utf8) <- index data, top return *result-addr } # give up return -1 } -fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: grapheme { +fn code-point-utf8-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/esi: (addr gap-buffer) <- copy _self # try to read from left - var left/ecx: (addr grapheme-stack) <- get self, left + var left/ecx: (addr code-point-utf8-stack) <- get self, left var top-addr/edx: (addr int) <- get left, top compare *top-addr, 0 { break-if-<= - var result/eax: grapheme <- pop-grapheme-stack left - push-grapheme-stack left, result + var result/eax: code-point-utf8 <- pop-code-point-utf8-stack left + push-code-point-utf8-stack left, result return result } # give up @@ -222,27 +222,27 @@ fn grapheme-before-cursor-in-gap-buffer _self: (addr gap-buffer) -> _/eax: graph fn delete-before-gap _self: (addr gap-buffer) { var self/eax: (addr gap-buffer) <- copy _self - var left/eax: (addr grapheme-stack) <- get self, left - var dummy/eax: grapheme <- pop-grapheme-stack left + var left/eax: (addr code-point-utf8-stack) <- get self, left + var dummy/eax: code-point-utf8 <- pop-code-point-utf8-stack left } -fn pop-after-gap _self: (addr gap-buffer) -> _/eax: grapheme { +fn pop-after-gap _self: (addr gap-buffer) -> _/eax: code-point-utf8 { var self/eax: (addr gap-buffer) <- copy _self - var right/eax: (addr grapheme-stack) <- get self, right - var result/eax: grapheme <- pop-grapheme-stack right + var right/eax: (addr code-point-utf8-stack) <- get self, right + var result/eax: code-point-utf8 <- pop-code-point-utf8-stack right return result } fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - # complication: graphemes may be multiple bytes + # complication: code-point-utf8s may be multiple bytes # so don't rely on length # instead turn the expected result into a stream and arrange to read from it in order var stream-storage: (stream byte 0x10/max-word-size) var expected-stream/ecx: (addr stream byte) <- address stream-storage write expected-stream, s # compare left - var left/edx: (addr grapheme-stack) <- get self, left + var left/edx: (addr code-point-utf8-stack) <- get self, left var result/eax: boolean <- prefix-match? left, expected-stream compare result, 0/false { @@ -250,7 +250,7 @@ fn gap-buffer-equal? _self: (addr gap-buffer), s: (addr array byte) -> _/eax: bo return result } # compare right - var right/edx: (addr grapheme-stack) <- get self, right + var right/edx: (addr code-point-utf8-stack) <- get self, right result <- suffix-match? right, expected-stream compare result, 0/false { @@ -267,10 +267,10 @@ fn test-gap-buffer-equal-from-end? { var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer g # - var c/eax: grapheme <- copy 0x61/a - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c + var c/eax: code-point-utf8 <- copy 0x61/a + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c # gap is at end (right is empty) var _result/eax: boolean <- gap-buffer-equal? g, "aaa" var result/eax: int <- copy _result @@ -282,11 +282,11 @@ fn test-gap-buffer-equal-from-middle? { var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer g # - var c/eax: grapheme <- copy 0x61/a - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - var dummy/eax: grapheme <- gap-left g + var c/eax: code-point-utf8 <- copy 0x61/a + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + var dummy/eax: code-point-utf8 <- gap-left g # gap is in the middle var _result/eax: boolean <- gap-buffer-equal? g, "aaa" var result/eax: int <- copy _result @@ -298,11 +298,11 @@ fn test-gap-buffer-equal-from-start? { var g/esi: (addr gap-buffer) <- address _g initialize-gap-buffer g # - var c/eax: grapheme <- copy 0x61/a - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - add-grapheme-at-gap g, c - var dummy/eax: grapheme <- gap-left g + var c/eax: code-point-utf8 <- copy 0x61/a + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + add-code-point-utf8-at-gap g, c + var dummy/eax: code-point-utf8 <- gap-left g dummy <- gap-left g dummy <- gap-left g # gap is at the start @@ -319,25 +319,25 @@ fn copy-gap-buffer _src-ah: (addr handle gap-buffer), _dest-ah: (addr handle gap var dest-ah/eax: (addr handle gap-buffer) <- copy _dest-ah var _dest-a/eax: (addr gap-buffer) <- lookup *dest-ah var dest-a/edi: (addr gap-buffer) <- copy _dest-a - # copy left grapheme-stack - var src/ecx: (addr grapheme-stack) <- get src-a, left - var dest/edx: (addr grapheme-stack) <- get dest-a, left - copy-grapheme-stack src, dest - # copy right grapheme-stack + # copy left code-point-utf8-stack + var src/ecx: (addr code-point-utf8-stack) <- get src-a, left + var dest/edx: (addr code-point-utf8-stack) <- get dest-a, left + copy-code-point-utf8-stack src, dest + # copy right code-point-utf8-stack src <- get src-a, right dest <- get dest-a, right - copy-grapheme-stack src, dest + copy-code-point-utf8-stack src, dest } fn gap-buffer-is-decimal-integer? _self: (addr gap-buffer) -> _/eax: boolean { var self/esi: (addr gap-buffer) <- copy _self - var curr/ecx: (addr grapheme-stack) <- get self, left - var result/eax: boolean <- grapheme-stack-is-decimal-integer? curr + var curr/ecx: (addr code-point-utf8-stack) <- get self, left + var result/eax: boolean <- code-point-utf8-stack-is-decimal-integer? curr { compare result, 0/false break-if-= curr <- get self, right - result <- grapheme-stack-is-decimal-integer? curr + result <- code-point-utf8-stack-is-decimal-integer? curr } return result } diff --git a/linux/tile/grapheme-stack.mu b/linux/tile/grapheme-stack.mu index 0ea59ae5..c7565a1c 100644 --- a/linux/tile/grapheme-stack.mu +++ b/linux/tile/grapheme-stack.mu @@ -1,24 +1,24 @@ -type grapheme-stack { - data: (handle array grapheme) +type code-point-utf8-stack { + data: (handle array code-point-utf8) top: int } -fn initialize-grapheme-stack _self: (addr grapheme-stack), n: int { - var self/esi: (addr grapheme-stack) <- copy _self - var d/edi: (addr handle array grapheme) <- get self, data +fn initialize-code-point-utf8-stack _self: (addr code-point-utf8-stack), n: int { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var d/edi: (addr handle array code-point-utf8) <- get self, data populate d, n var top/eax: (addr int) <- get self, top copy-to *top, 0 } -fn clear-grapheme-stack _self: (addr grapheme-stack) { - var self/esi: (addr grapheme-stack) <- copy _self +fn clear-code-point-utf8-stack _self: (addr code-point-utf8-stack) { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top/eax: (addr int) <- get self, top copy-to *top, 0 } -fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self +fn code-point-utf8-stack-empty? _self: (addr code-point-utf8-stack) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top/eax: (addr int) <- get self, top compare *top, 0 { @@ -28,20 +28,20 @@ fn grapheme-stack-empty? _self: (addr grapheme-stack) -> _/eax: boolean { return 0/false } -fn push-grapheme-stack _self: (addr grapheme-stack), _val: grapheme { - var self/esi: (addr grapheme-stack) <- copy _self +fn push-code-point-utf8-stack _self: (addr code-point-utf8-stack), _val: code-point-utf8 { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/ecx: (addr int) <- get self, top - var data-ah/edx: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/edx: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/edx: int <- copy *top-addr - var dest-addr/edx: (addr grapheme) <- index data, top - var val/eax: grapheme <- copy _val + var dest-addr/edx: (addr code-point-utf8) <- index data, top + var val/eax: code-point-utf8 <- copy _val copy-to *dest-addr, val add-to *top-addr, 1 } -fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme { - var self/esi: (addr grapheme-stack) <- copy _self +fn pop-code-point-utf8-stack _self: (addr code-point-utf8-stack) -> _/eax: code-point-utf8 { + var self/esi: (addr code-point-utf8-stack) <- copy _self var top-addr/ecx: (addr int) <- get self, top { compare *top-addr, 0 @@ -49,25 +49,25 @@ fn pop-grapheme-stack _self: (addr grapheme-stack) -> _/eax: grapheme { return -1 } subtract-from *top-addr, 1 - var data-ah/edx: (addr handle array grapheme) <- get self, data - var data/eax: (addr array grapheme) <- lookup *data-ah + var data-ah/edx: (addr handle array code-point-utf8) <- get self, data + var data/eax: (addr array code-point-utf8) <- lookup *data-ah var top/edx: int <- copy *top-addr - var result-addr/eax: (addr grapheme) <- index data, top + var result-addr/eax: (addr code-point-utf8) <- index data, top return *result-addr } -fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) { - var src/esi: (addr grapheme-stack) <- copy _src - var data-ah/edi: (addr handle array grapheme) <- get src, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn copy-code-point-utf8-stack _src: (addr code-point-utf8-stack), dest: (addr code-point-utf8-stack) { + var src/esi: (addr code-point-utf8-stack) <- copy _src + var data-ah/edi: (addr handle array code-point-utf8) <- get src, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get src, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - push-grapheme-stack dest, *g + var g/edx: (addr code-point-utf8) <- index data, i + push-code-point-utf8-stack dest, *g i <- increment loop } @@ -75,18 +75,18 @@ fn copy-grapheme-stack _src: (addr grapheme-stack), dest: (addr grapheme-stack) # dump stack to screen from bottom to top # don't move the cursor or anything -fn render-stack-from-bottom _self: (addr grapheme-stack), screen: (addr screen) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn render-stack-from-bottom _self: (addr code-point-utf8-stack), screen: (addr screen) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i - print-grapheme screen, *g + var g/edx: (addr code-point-utf8) <- index data, i + print-code-point-utf8 screen, *g i <- increment loop } @@ -94,19 +94,19 @@ fn render-stack-from-bottom _self: (addr grapheme-stack), screen: (addr screen) # dump stack to screen from top to bottom # don't move the cursor or anything -fn render-stack-from-top _self: (addr grapheme-stack), screen: (addr screen) { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn render-stack-from-top _self: (addr code-point-utf8-stack), screen: (addr screen) { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/eax: int <- copy *top-addr i <- decrement { compare i, 0 break-if-< - var g/edx: (addr grapheme) <- index data, i - print-grapheme screen, *g + var g/edx: (addr code-point-utf8) <- index data, i + print-code-point-utf8 screen, *g i <- decrement loop } @@ -114,11 +114,11 @@ fn render-stack-from-top _self: (addr grapheme-stack), screen: (addr screen) { # compare from bottom # beware: modifies 'stream', which must be disposed of after a false result -fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn prefix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/ebx: int <- copy 0 { @@ -126,8 +126,8 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b break-if->= # if curr != expected, return false { - var curr-a/edx: (addr grapheme) <- index data, i - var expected/eax: grapheme <- read-grapheme s + var curr-a/edx: (addr code-point-utf8) <- index data, i + var expected/eax: code-point-utf8 <- read-code-point-utf8 s { compare expected, *curr-a break-if-= @@ -142,11 +142,11 @@ fn prefix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b # compare from bottom # beware: modifies 'stream', which must be disposed of after a false result -fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/edi: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edi: (addr array grapheme) <- copy _data +fn suffix-match? _self: (addr code-point-utf8-stack), s: (addr stream byte) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/edi: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edi: (addr array code-point-utf8) <- copy _data var top-addr/eax: (addr int) <- get self, top var i/ebx: int <- copy *top-addr i <- decrement @@ -154,8 +154,8 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b compare i, 0 break-if-< { - var curr-a/edx: (addr grapheme) <- index data, i - var expected/eax: grapheme <- read-grapheme s + var curr-a/edx: (addr code-point-utf8) <- index data, i + var expected/eax: code-point-utf8 <- read-code-point-utf8 s # if curr != expected, return false { compare expected, *curr-a @@ -169,18 +169,18 @@ fn suffix-match? _self: (addr grapheme-stack), s: (addr stream byte) -> _/eax: b return 1 # true } -fn grapheme-stack-is-decimal-integer? _self: (addr grapheme-stack) -> _/eax: boolean { - var self/esi: (addr grapheme-stack) <- copy _self - var data-ah/eax: (addr handle array grapheme) <- get self, data - var _data/eax: (addr array grapheme) <- lookup *data-ah - var data/edx: (addr array grapheme) <- copy _data +fn code-point-utf8-stack-is-decimal-integer? _self: (addr code-point-utf8-stack) -> _/eax: boolean { + var self/esi: (addr code-point-utf8-stack) <- copy _self + var data-ah/eax: (addr handle array code-point-utf8) <- get self, data + var _data/eax: (addr array code-point-utf8) <- lookup *data-ah + var data/edx: (addr array code-point-utf8) <- copy _data var top-addr/ecx: (addr int) <- get self, top var i/ebx: int <- copy 0 var result/eax: boolean <- copy 1/true - $grapheme-stack-is-integer?:loop: { + $code-point-utf8-stack-is-integer?:loop: { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index data, i + var g/edx: (addr code-point-utf8) <- index data, i result <- decimal-digit? *g compare result, 0/false break-if-= diff --git a/linux/tile/main.mu b/linux/tile/main.mu index e0daaf1b..f2561a28 100644 --- a/linux/tile/main.mu +++ b/linux/tile/main.mu @@ -54,7 +54,7 @@ fn interactive { initialize-environment env { render env - var key/eax: grapheme <- read-key-from-real-keyboard + var key/eax: code-point-utf8 <- read-key-from-real-keyboard compare key, 0x11/ctrl-q break-if-= process env, key @@ -79,7 +79,7 @@ fn process-all env: (addr environment), cmds: (addr array byte) { var done?/eax: boolean <- stream-empty? cmds-stream-a compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme cmds-stream-a + var g/eax: code-point-utf8 <- read-code-point-utf8 cmds-stream-a process env, g loop } @@ -105,7 +105,7 @@ fn repl { var done?/eax: boolean <- stream-empty? line compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme line + var g/eax: code-point-utf8 <- read-code-point-utf8 line process env, g loop } diff --git a/linux/tile/surface.mu b/linux/tile/surface.mu index 2e353022..e86e22b9 100644 --- a/linux/tile/surface.mu +++ b/linux/tile/surface.mu @@ -111,10 +111,10 @@ fn print-surface-cell-at _self: (addr surface), screen-row: int, screen-col: int compare idx, 0 { break-if->= - var space/ecx: grapheme <- copy 0x20 + var space/ecx: code-point-utf8 <- copy 0x20 var screen-ah/edi: (addr handle screen) <- get self, screen var screen/eax: (addr screen) <- lookup *screen-ah - print-grapheme screen, space + print-code-point-utf8 screen, space return } # otherwise print the appropriate screen-cell @@ -156,9 +156,9 @@ fn print-screen-cell screen: (addr screen), _cell: (addr screen-cell) { break-if-= start-blinking screen } - var g/eax: (addr grapheme) <- get cell, data - print-grapheme screen, *g -#? var g2/eax: grapheme <- copy *g + var g/eax: (addr code-point-utf8) <- get cell, data + print-code-point-utf8 screen, *g +#? var g2/eax: code-point-utf8 <- copy *g #? var g3/eax: int <- copy g2 #? print-int32-hex-to-real-screen g3 #? print-string-to-real-screen "\n" @@ -264,7 +264,7 @@ fn num-lines in: (addr array byte) -> _/ecx: int { var done?/eax: boolean <- stream-empty? s-addr compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme s-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr compare g, 0xa/newline loop-if-!= result <- increment @@ -282,7 +282,7 @@ fn first-line-length in: (addr array byte) -> _/edx: int { var done?/eax: boolean <- stream-empty? s-addr compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme s-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr compare g, 0xa/newline break-if-= result <- increment @@ -301,12 +301,12 @@ fn fill-in _out: (addr array screen-cell), in: (addr array byte) { var done?/eax: boolean <- stream-empty? s-addr compare done?, 0/false break-if-!= - var g/eax: grapheme <- read-grapheme s-addr + var g/eax: code-point-utf8 <- read-code-point-utf8 s-addr compare g, 0xa/newline loop-if-= var offset/edx: (offset screen-cell) <- compute-offset out, idx var dest/edx: (addr screen-cell) <- index out, offset - var dest2/edx: (addr grapheme) <- get dest, data + var dest2/edx: (addr code-point-utf8) <- get dest, data copy-to *dest2, g idx <- increment loop diff --git a/linux/tile/value.mu b/linux/tile/value.mu index 8bd01676..0eacd8be 100644 --- a/linux/tile/value.mu +++ b/linux/tile/value.mu @@ -90,15 +90,15 @@ fn render-number screen: (addr screen), val: float, top-level?: boolean { fg <- copy 0 } start-color screen, fg, bg - print-grapheme screen, 0x20/space + print-code-point-utf8 screen, 0x20/space print-float-decimal-approximate screen, val, 3 - print-grapheme screen, 0x20/space + print-code-point-utf8 screen, 0x20/space } fn render-array-at screen: (addr screen), row: int, col: int, _a: (addr array value) { start-color screen, 0xf2, 7 # don't surround in spaces - print-grapheme screen, 0x5b/[ + print-code-point-utf8 screen, 0x5b/[ increment col var a/esi: (addr array value) <- copy _a var max/ecx: int <- length a @@ -122,7 +122,7 @@ fn render-array-at screen: (addr screen), row: int, col: int, _a: (addr array va i <- increment loop } - print-grapheme screen, 0x5d/] + print-code-point-utf8 screen, 0x5d/] } fn render-screen screen: (addr screen), row: int, col: int, _target-screen: (addr screen) { @@ -179,13 +179,13 @@ fn print-screen-cell-of-fake-screen screen: (addr screen), _target: (addr screen start-blinking screen start-color screen, 0, 1 } - var g/eax: grapheme <- screen-grapheme-at target, row, col + var g/eax: code-point-utf8 <- screen-code-point-utf8-at target, row, col { compare g, 0 break-if-!= g <- copy 0x20/space } - print-grapheme screen, g + print-code-point-utf8 screen, g reset-formatting screen } diff --git a/linux/tile/word.mu b/linux/tile/word.mu index b4f5000b..3b4851f0 100644 --- a/linux/tile/word.mu +++ b/linux/tile/word.mu @@ -58,15 +58,15 @@ fn move-word-contents _src-ah: (addr handle word), _dest-ah: (addr handle word) cursor-to-start src var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah - var src-stack/ecx: (addr grapheme-stack) <- get src-data, right + var src-stack/ecx: (addr code-point-utf8-stack) <- get src-data, right { - var done?/eax: boolean <- grapheme-stack-empty? src-stack + var done?/eax: boolean <- code-point-utf8-stack-empty? src-stack compare done?, 0/false break-if-!= - var g/eax: grapheme <- pop-grapheme-stack src-stack -#? print-grapheme 0, g + var g/eax: code-point-utf8 <- pop-code-point-utf8-stack src-stack +#? print-code-point-utf8 0, g #? print-string 0, "\n" - add-grapheme-to-word dest, g + add-code-point-utf8-to-word dest, g loop } } @@ -79,17 +79,17 @@ fn copy-word-contents-before-cursor _src-ah: (addr handle word), _dest-ah: (addr var src/eax: (addr word) <- lookup *src-ah var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah - var src-stack/ecx: (addr grapheme-stack) <- get src-data, left - var src-stack-data-ah/eax: (addr handle array grapheme) <- get src-stack, data - var _src-stack-data/eax: (addr array grapheme) <- lookup *src-stack-data-ah - var src-stack-data/edx: (addr array grapheme) <- copy _src-stack-data + var src-stack/ecx: (addr code-point-utf8-stack) <- get src-data, left + var src-stack-data-ah/eax: (addr handle array code-point-utf8) <- get src-stack, data + var _src-stack-data/eax: (addr array code-point-utf8) <- lookup *src-stack-data-ah + var src-stack-data/edx: (addr array code-point-utf8) <- copy _src-stack-data var top-addr/ecx: (addr int) <- get src-stack, top var i/eax: int <- copy 0 { compare i, *top-addr break-if->= - var g/edx: (addr grapheme) <- index src-stack-data, i - add-grapheme-to-word dest, *g + var g/edx: (addr code-point-utf8) <- index src-stack-data, i + add-code-point-utf8-to-word dest, *g i <- increment loop } @@ -129,27 +129,27 @@ fn final-word _in: (addr handle word), out: (addr handle word) { copy-object curr-ah, out # modify 'out' right at the end, just in case it's same as 'in' } -fn first-grapheme _self: (addr word) -> _/eax: grapheme { +fn first-code-point-utf8 _self: (addr word) -> _/eax: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var result/eax: grapheme <- first-grapheme-in-gap-buffer data + var result/eax: code-point-utf8 <- first-code-point-utf8-in-gap-buffer data return result } -fn grapheme-before-cursor _self: (addr word) -> _/eax: grapheme { +fn code-point-utf8-before-cursor _self: (addr word) -> _/eax: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var result/eax: grapheme <- grapheme-before-cursor-in-gap-buffer data + var result/eax: code-point-utf8 <- code-point-utf8-before-cursor-in-gap-buffer data return result } -fn add-grapheme-to-word _self: (addr word), c: grapheme { +fn add-code-point-utf8-to-word _self: (addr word), c: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - add-grapheme-at-gap data, c + add-code-point-utf8-at-gap data, c } fn cursor-at-start? _self: (addr word) -> _/eax: boolean { @@ -172,14 +172,14 @@ fn cursor-left _self: (addr word) { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var dummy/eax: grapheme <- gap-left data + var dummy/eax: code-point-utf8 <- gap-left data } fn cursor-right _self: (addr word) { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var dummy/eax: grapheme <- gap-right data + var dummy/eax: code-point-utf8 <- gap-right data } fn cursor-to-start _self: (addr word) { @@ -211,11 +211,11 @@ fn delete-before-cursor _self: (addr word) { delete-before-gap data } -fn pop-after-cursor _self: (addr word) -> _/eax: grapheme { +fn pop-after-cursor _self: (addr word) -> _/eax: code-point-utf8 { var self/esi: (addr word) <- copy _self var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data var data/eax: (addr gap-buffer) <- lookup *data-ah - var result/eax: grapheme <- pop-after-gap data + var result/eax: code-point-utf8 <- pop-after-gap data return result } @@ -553,14 +553,14 @@ fn parse-words in: (addr array byte), out-ah: (addr handle word) { var done?/eax: boolean <- stream-empty? in-stream-a compare done?, 0/false break-if-!= - var _g/eax: grapheme <- read-grapheme in-stream-a - var g/ecx: grapheme <- copy _g + var _g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-a + var g/ecx: code-point-utf8 <- copy _g # if not space, insert compare g, 0x20/space { break-if-= var cursor-word/eax: (addr word) <- lookup *cursor-word-ah - add-grapheme-to-word cursor-word, g + add-code-point-utf8-to-word cursor-word, g loop $parse-words:loop } # otherwise insert word after and move cursor to it diff --git a/linux/vocabulary.md b/linux/vocabulary.md index f1c6e3a5..2eefae33 100644 --- a/linux/vocabulary.md +++ b/linux/vocabulary.md @@ -206,8 +206,8 @@ doesn't yet parse floating-point literals: - `print-int32-buffered`: int -> buffered-file - textual representation in hex, including '0x' prefix -- `write-grapheme`: grapheme -> stream -- `to-grapheme`: code-point -> grapheme +- `write-code-point-utf8`: code-point-utf8 -> stream +- `to-utf8`: code-point -> code-point-utf8 - `write-float-decimal-approximate`: float, precision: int -> stream @@ -226,8 +226,8 @@ there isn't enough room in the destination stream. - `read-line-buffered`: buffered-file -> stream - Will abort the entire program if there isn't enough room. -- `read-grapheme`: stream -> grapheme -- `read-grapheme-buffered`: buffered-file -> grapheme +- `read-code-point-utf8`: stream -> code-point-utf8 +- `read-code-point-utf8-buffered`: buffered-file -> code-point-utf8 - `read-lines`: buffered-file -> array of strings @@ -268,7 +268,7 @@ Unix terminal properties supported by almost all modern terminal emulators. - `print-string`: string -> screen - `print-stream` -- `print-grapheme` +- `print-code-point-utf8` - `print-code-point` - `print-int32-hex` - `print-int32-decimal` @@ -290,7 +290,7 @@ manipulated. Assertions for tests: -- `screen-grapheme-at` +- `screen-code-point-utf8-at` - `screen-color-at` - `screen-background-color-at` - `screen-bold-at?` -- cgit 1.4.1-2-gfad0