From 6e05a8fa27139ddf75a029ad94d44b48a92785b2 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Sun, 29 Aug 2021 22:16:34 -0700 Subject: fix bad terminology: grapheme -> code point Unix text-mode terminals transparently support utf-8 these days, and so I treat utf-8 sequences (which I call graphemes in Mu) as fundamental. I then blindly carried over this state of affairs to bare-metal Mu, where it makes no sense. If you don't have a terminal handling font-rendering for you, fonts are most often indexed by code points and not utf-8 sequences. --- linux/403unicode.mu | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'linux') diff --git a/linux/403unicode.mu b/linux/403unicode.mu index 9f857c7f..2baefd9f 100644 --- a/linux/403unicode.mu +++ b/linux/403unicode.mu @@ -11,14 +11,11 @@ # # Mu doesn't currently support combining code points, or graphemes made of # multiple code points. One day we will. -# We also don't currently support code points that translate into multiple -# or wide graphemes. (In particular, Tab will never be supported.) +# On Linux, we also don't currently support code points that translate into +# multiple or wide graphemes. (In particular, Tab will never be supported.) # transliterated from tb_utf8_unicode_to_char in https://github.com/nsf/termbox # https://wiki.tcl-lang.org/page/UTF%2D8+bit+by+bit explains the algorithm -# -# The day we want to support combining characters, this function will need to -# take multiple code points. Or something. fn to-grapheme in: code-point -> _/eax: grapheme { var c/eax: int <- copy in var num-trailers/ecx: int <- copy 0 -- cgit 1.4.1-2-gfad0