about summary refs log tree commit diff stats
path: root/shell
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2021-11-09 08:12:11 -0800
committerKartik K. Agaram <vc@akkartik.com>2021-11-09 08:12:11 -0800
commitd253a3182859c7c989449122a60d5f362f19ded0 (patch)
tree7459cddc57f93107fa4cee89d4f0a94dd0f0f131 /shell
parentd1808995b2c6b99749237a29e6ac6477d00ff8f9 (diff)
downloadmu-d253a3182859c7c989449122a60d5f362f19ded0.tar.gz
rename grapheme to code-point-utf8
Longer name, but it doesn't lie. We have no data structure right now for
combining multiple code points. And it makes no sense for the notion of
a grapheme to conflate its Unicode encoding.
Diffstat (limited to 'shell')
-rw-r--r--shell/README.md2
-rw-r--r--shell/environment.mu6
-rw-r--r--shell/global.mu2
-rw-r--r--shell/infix.mu30
-rw-r--r--shell/main.mu2
-rw-r--r--shell/primitives.mu18
-rw-r--r--shell/sandbox.mu2
-rw-r--r--shell/tokenize.mu100
-rw-r--r--shell/trace.mu2
9 files changed, 82 insertions, 82 deletions
diff --git a/shell/README.md b/shell/README.md
index 3a9cdf3c..180782eb 100644
--- a/shell/README.md
+++ b/shell/README.md
@@ -150,7 +150,7 @@ def (a <> b)
 ```
 
 To permit arbitrary infix operators, the Mu shell partitions the space of
-graphemes between operators and regular symbols. As a result, you can't define
+code-point-utf8s between operators and regular symbols. As a result, you can't define
 symbols mixing the two.
 ```
 '*global*
diff --git a/shell/environment.mu b/shell/environment.mu
index c3d78d86..439d5423 100644
--- a/shell/environment.mu
+++ b/shell/environment.mu
@@ -2,7 +2,7 @@
 #
 # vim:textwidth&
 # It would be nice for tests to use a narrower screen than the standard 0x80 of
-# 1024 pixels with 8px-wide graphemes. But it complicates rendering logic to
+# 1024 pixels with 8px-wide code-point-utf8s. But it complicates rendering logic to
 # make width configurable, so we just use longer lines than usual.
 
 type environment {
@@ -93,7 +93,7 @@ fn type-in self: (addr environment), screen: (addr screen), keys: (addr array by
     var done?/eax: boolean <- stream-empty? input-stream
     compare done?, 0/false
     break-if-!=
-    var key/eax: grapheme <- read-grapheme input-stream
+    var key/eax: code-point-utf8 <- read-code-point-utf8 input-stream
     edit-environment self, key, 0/no-disk
     render-environment screen, self
     loop
@@ -145,7 +145,7 @@ fn render-environment screen: (addr screen), _self: (addr environment) {
   render-sandbox-menu screen, sandbox
 }
 
-fn edit-environment _self: (addr environment), key: grapheme, data-disk: (addr disk) {
+fn edit-environment _self: (addr environment), key: code-point-utf8, data-disk: (addr disk) {
   var self/esi: (addr environment) <- copy _self
   var globals/edi: (addr global-table) <- get self, globals
   var sandbox/ecx: (addr sandbox) <- get self, sandbox
diff --git a/shell/global.mu b/shell/global.mu
index f6a779f9..329556c1 100644
--- a/shell/global.mu
+++ b/shell/global.mu
@@ -230,7 +230,7 @@ fn render-globals-menu screen: (addr screen), _self: (addr global-table) {
   draw-text-rightward-from-cursor screen, " >>  ", width, 7/fg, 0xc5/bg=blue-bg
 }
 
-fn edit-globals _self: (addr global-table), key: grapheme {
+fn edit-globals _self: (addr global-table), key: code-point-utf8 {
   var self/esi: (addr global-table) <- copy _self
   # ctrl-s
   {
diff --git a/shell/infix.mu b/shell/infix.mu
index 41e8fa5d..2ca1b875 100644
--- a/shell/infix.mu
+++ b/shell/infix.mu
@@ -312,8 +312,8 @@ fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) {
   var buffer/edi: (addr gap-buffer) <- address buffer-storage
   initialize-gap-buffer buffer, 0x40/max-symbol-size
   # scan for first non-$
-  var g/eax: grapheme <- read-grapheme sym-data
-  add-grapheme-at-gap buffer, g
+  var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data
+  add-code-point-utf8-at-gap buffer, g
   {
     compare g, 0x24/dollar
     break-if-!=
@@ -323,28 +323,28 @@ fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) {
       break-if-=
       return  # symbol is all '$'s; do nothing
     }
-    g <- read-grapheme sym-data
-    add-grapheme-at-gap buffer, g
+    g <- read-code-point-utf8 sym-data
+    add-code-point-utf8-at-gap buffer, g
     loop
   }
   var tokenization-needed?: boolean
-  var _operator-so-far?/eax: boolean <- operator-grapheme? g
+  var _operator-so-far?/eax: boolean <- operator-code-point-utf8? g
   var operator-so-far?/ecx: boolean <- copy _operator-so-far?
   {
     var done?/eax: boolean <- stream-empty? sym-data
     compare done?, 0/false
     break-if-!=
-    var g/eax: grapheme <- read-grapheme sym-data
+    var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data
     {
-      var curr-operator?/eax: boolean <- operator-grapheme? g
+      var curr-operator?/eax: boolean <- operator-code-point-utf8? g
       compare curr-operator?, operator-so-far?
       break-if-=
       # state change; insert a space
-      add-grapheme-at-gap buffer, 0x20/space
+      add-code-point-utf8-at-gap buffer, 0x20/space
       operator-so-far? <- copy curr-operator?
       copy-to tokenization-needed?, 1/true
     }
-    add-grapheme-at-gap buffer, g
+    add-code-point-utf8-at-gap buffer, g
     loop
   }
   compare tokenization-needed?, 0/false
@@ -406,7 +406,7 @@ fn test-infix {
 
 # helpers
 
-# return true if x is composed entirely of operator graphemes, optionally prefixed with some '$'s
+# return true if x is composed entirely of operator code-point-utf8s, optionally prefixed with some '$'s
 # some operator, some non-operator => pre-tokenized symbol; return false
 # all '$'s => return false
 fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
@@ -421,7 +421,7 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
   var _x-data/eax: (addr stream byte) <- lookup *x-data-ah
   var x-data/esi: (addr stream byte) <- copy _x-data
   rewind-stream x-data
-  var g/eax: grapheme <- read-grapheme x-data
+  var g/eax: code-point-utf8 <- read-code-point-utf8 x-data
   # special case: '$' is reserved for gensyms, and can work with either
   # operator or non-operator symbols.
   {
@@ -434,12 +434,12 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
       # '$', '$$', '$$$', etc. are regular symbols
       return 0/false
     }
-    g <- read-grapheme x-data
+    g <- read-code-point-utf8 x-data
     loop
   }
   {
     {
-      var result/eax: boolean <- operator-grapheme? g
+      var result/eax: boolean <- operator-code-point-utf8? g
       compare result, 0/false
       break-if-!=
       return 0/false
@@ -449,13 +449,13 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
       compare done?, 0/false
     }
     break-if-!=
-    g <- read-grapheme x-data
+    g <- read-code-point-utf8 x-data
     loop
   }
   return 1/true
 }
 
-fn operator-grapheme? g: grapheme -> _/eax: boolean {
+fn operator-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
   # '$' is special and can be in either a symbol or operator; here we treat it as a symbol
   compare g, 0x25/percent
   {
diff --git a/shell/main.mu b/shell/main.mu
index 1f0e2de9..a588324a 100644
--- a/shell/main.mu
+++ b/shell/main.mu
@@ -13,7 +13,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk)
       var key/eax: byte <- read-key keyboard
       compare key, 0
       loop-if-=
-      var key/eax: grapheme <- copy key
+      var key/eax: code-point-utf8 <- copy key
       edit-environment env, key, data-disk
     }
     loop
diff --git a/shell/primitives.mu b/shell/primitives.mu
index e955b531..a87009d3 100644
--- a/shell/primitives.mu
+++ b/shell/primitives.mu
@@ -172,7 +172,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int {
   y <- increment
   var tmpx/eax: int <- copy xmin
   tmpx <- draw-text-rightward screen, "  key", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
-  tmpx <- draw-text-rightward screen, ": keyboard -> grapheme?", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
+  tmpx <- draw-text-rightward screen, ": keyboard -> code-point-utf8?", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
   y <- increment
   var tmpx/eax: int <- copy xmin
   tmpx <- draw-text-rightward screen, "streams", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
@@ -183,7 +183,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int {
   y <- increment
   var tmpx/eax: int <- copy xmin
   tmpx <- draw-text-rightward screen, "  write", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
-  tmpx <- draw-text-rightward screen, ": stream grapheme -> stream", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
+  tmpx <- draw-text-rightward screen, ": stream code-point-utf8 -> stream", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
   y <- increment
   var tmpx/eax: int <- copy xmin
   tmpx <- draw-text-rightward screen, "  rewind clear", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
@@ -191,7 +191,7 @@ fn render-primitives screen: (addr screen), xmin: int, xmax: int, ymax: int {
   y <- increment
   var tmpx/eax: int <- copy xmin
   tmpx <- draw-text-rightward screen, "  read", tmpx, left-max, y, 0x2a/fg=orange, 0xdc/bg=green-bg
-  tmpx <- draw-text-rightward screen, ": stream -> grapheme", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
+  tmpx <- draw-text-rightward screen, ": stream -> code-point-utf8", tmpx, left-max, y, 7/fg=grey, 0xdc/bg=green-bg
 }
 
 fn primitive-global? _x: (addr global) -> _/eax: boolean {
@@ -3056,7 +3056,7 @@ fn wait-for-key keyboard: (addr gap-buffer) -> _/eax: int {
     return result
   }
   # otherwise read from fake keyboard
-  var g/eax: grapheme <- read-from-gap-buffer keyboard
+  var g/eax: code-point-utf8 <- read-from-gap-buffer keyboard
   var result/eax: int <- copy g
   return result
 }
@@ -3121,14 +3121,14 @@ fn apply-write _args-ah: (addr handle cell), out: (addr handle cell), trace: (ad
     var second-type/eax: (addr int) <- get second, type
     compare *second-type, 1/number
     break-if-=
-    error trace, "second arg for 'write' is not a number/grapheme"
+    error trace, "second arg for 'write' is not a number/code-point-utf8"
     return
   }
   var second-value/eax: (addr float) <- get second, number-data
   var x-float/xmm0: float <- copy *second-value
   var x/eax: int <- convert x-float
-  var x-grapheme/eax: grapheme <- copy x
-  write-grapheme stream-data, x-grapheme
+  var x-code-point-utf8/eax: code-point-utf8 <- copy x
+  write-code-point-utf8 stream-data, x-code-point-utf8
   # return the stream
   copy-object first-ah, out
 }
@@ -3202,8 +3202,8 @@ fn apply-read _args-ah: (addr handle cell), out: (addr handle cell), trace: (add
   var _stream-data/eax: (addr stream byte) <- lookup *stream-data-ah
   var stream-data/ebx: (addr stream byte) <- copy _stream-data
 #?   rewind-stream stream-data
-  var result-grapheme/eax: grapheme <- read-grapheme stream-data
-  var result/eax: int <- copy result-grapheme
+  var result-code-point-utf8/eax: code-point-utf8 <- read-code-point-utf8 stream-data
+  var result/eax: int <- copy result-code-point-utf8
   new-integer out, result
 }
 
diff --git a/shell/sandbox.mu b/shell/sandbox.mu
index d50f47f0..20471115 100644
--- a/shell/sandbox.mu
+++ b/shell/sandbox.mu
@@ -449,7 +449,7 @@ fn render-keyboard-menu screen: (addr screen) {
   draw-text-rightward-from-cursor screen, " to sandbox  ", width, 7/fg, 0xc5/bg=blue-bg
 }
 
-fn edit-sandbox _self: (addr sandbox), key: grapheme, globals: (addr global-table), data-disk: (addr disk) {
+fn edit-sandbox _self: (addr sandbox), key: code-point-utf8, globals: (addr global-table), data-disk: (addr disk) {
   var self/esi: (addr sandbox) <- copy _self
   # ctrl-s
   {
diff --git a/shell/tokenize.mu b/shell/tokenize.mu
index fba26b74..dc392a34 100644
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@@ -429,13 +429,13 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     next-indent-token in, out, trace  # might not be returned
   }
   skip-spaces-from-gap-buffer in
-  var g/eax: grapheme <- peek-from-gap-buffer in
+  var g/eax: code-point-utf8 <- peek-from-gap-buffer in
   {
     compare g, 0x23/comment
     break-if-!=
     skip-rest-of-line in
   }
-  var g/eax: grapheme <- peek-from-gap-buffer in
+  var g/eax: code-point-utf8 <- peek-from-gap-buffer in
   {
     compare g, 0xa/newline
     break-if-!=
@@ -461,8 +461,8 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     trace-higher trace
     return 1/at-start-of-line
   }
-  var _g/eax: grapheme <- peek-from-gap-buffer in
-  var g/ecx: grapheme <- copy _g
+  var _g/eax: code-point-utf8 <- peek-from-gap-buffer in
+  var g/ecx: code-point-utf8 <- copy _g
   {
     var should-trace?/eax: boolean <- should-trace? trace
     compare should-trace?, 0/false
@@ -479,7 +479,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     {
       compare g, 0x22/double-quote
       break-if-!=
-      var dummy/eax: grapheme <- read-from-gap-buffer in  # skip
+      var dummy/eax: code-point-utf8 <- read-from-gap-buffer in  # skip
       next-stream-token in, out, trace
       break $next-token:case
     }
@@ -487,13 +487,13 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     {
       compare g, 0x5b/open-square-bracket
       break-if-!=
-      var dummy/eax: grapheme <- read-from-gap-buffer in  # skip open bracket
+      var dummy/eax: code-point-utf8 <- read-from-gap-buffer in  # skip open bracket
       next-balanced-stream-token in, out, trace
       break $next-token:case
     }
     # other symbol char
     {
-      var symbol?/eax: boolean <- symbol-grapheme? g
+      var symbol?/eax: boolean <- symbol-code-point-utf8? g
       compare symbol?, 0/false
       break-if-=
       next-symbol-token in, out, trace
@@ -508,10 +508,10 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     }
     # other brackets are always single-char tokens
     {
-      var bracket?/eax: boolean <- bracket-grapheme? g
+      var bracket?/eax: boolean <- bracket-code-point-utf8? g
       compare bracket?, 0/false
       break-if-=
-      var g/eax: grapheme <- read-from-gap-buffer in
+      var g/eax: code-point-utf8 <- read-from-gap-buffer in
       next-bracket-token g, out, trace
       break $next-token:case
     }
@@ -519,7 +519,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     {
       compare g, 0x27/single-quote
       break-if-!=
-      var g/eax: grapheme <- read-from-gap-buffer in  # consume
+      var g/eax: code-point-utf8 <- read-from-gap-buffer in  # consume
       initialize-token out, "'"
       break $next-token:case
     }
@@ -527,7 +527,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     {
       compare g, 0x60/backquote
       break-if-!=
-      var g/eax: grapheme <- read-from-gap-buffer in  # consume
+      var g/eax: code-point-utf8 <- read-from-gap-buffer in  # consume
       initialize-token out, "`"
       break $next-token:case
     }
@@ -535,7 +535,7 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
     {
       compare g, 0x2c/comma
       break-if-!=
-      var g/eax: grapheme <- read-from-gap-buffer in  # consume
+      var g/eax: code-point-utf8 <- read-from-gap-buffer in  # consume
       # check for unquote-splice
       {
         g <- peek-from-gap-buffer in
@@ -581,7 +581,7 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
     var done?/eax: boolean <- gap-buffer-scan-done? in
     compare done?, 0/false
     break-if-!=
-    var g/eax: grapheme <- peek-from-gap-buffer in
+    var g/eax: code-point-utf8 <- peek-from-gap-buffer in
     {
       {
         var should-trace?/eax: boolean <- should-trace? trace
@@ -597,14 +597,14 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
     }
     # if non-symbol, return
     {
-      var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
-      compare symbol-grapheme?, 0/false
+      var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
+      compare symbol-code-point-utf8?, 0/false
       break-if-!=
       trace-text trace, "tokenize", "stop"
       break $next-symbol-token:loop
     }
-    var g/eax: grapheme <- read-from-gap-buffer in
-    write-grapheme out-data, g
+    var g/eax: code-point-utf8 <- read-from-gap-buffer in
+    write-code-point-utf8 out-data, g
     loop
   }
   trace-higher trace
@@ -630,16 +630,16 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
   var out-data/edi: (addr stream byte) <- copy _out-data
   $next-number-token:check-minus: {
-    var g/eax: grapheme <- peek-from-gap-buffer in
+    var g/eax: code-point-utf8 <- peek-from-gap-buffer in
     compare g, 0x2d/minus
     g <- read-from-gap-buffer in  # consume
-    write-grapheme out-data, g
+    write-code-point-utf8 out-data, g
   }
   $next-number-token:loop: {
     var done?/eax: boolean <- gap-buffer-scan-done? in
     compare done?, 0/false
     break-if-!=
-    var g/eax: grapheme <- peek-from-gap-buffer in
+    var g/eax: code-point-utf8 <- peek-from-gap-buffer in
     {
       {
         var should-trace?/eax: boolean <- should-trace? trace
@@ -653,15 +653,15 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
       write-int32-hex stream, gval
       trace trace, "tokenize", stream
     }
-    # if not symbol grapheme, return
+    # if not symbol code-point-utf8, return
     {
-      var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
-      compare symbol-grapheme?, 0/false
+      var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
+      compare symbol-code-point-utf8?, 0/false
       break-if-!=
       trace-text trace, "tokenize", "stop"
       break $next-number-token:loop
     }
-    # if not digit grapheme, abort
+    # if not digit code-point-utf8, abort
     {
       var digit?/eax: boolean <- decimal-digit? g
       compare digit?, 0/false
@@ -670,8 +670,8 @@ fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
       return
     }
     trace-text trace, "tokenize", "append"
-    var g/eax: grapheme <- read-from-gap-buffer in
-    write-grapheme out-data, g
+    var g/eax: code-point-utf8 <- read-from-gap-buffer in
+    write-code-point-utf8 out-data, g
     loop
   }
   trace-higher trace
@@ -696,10 +696,10 @@ fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
       error trace, "unbalanced '\"'"
       return
     }
-    var g/eax: grapheme <- read-from-gap-buffer in
+    var g/eax: code-point-utf8 <- read-from-gap-buffer in
     compare g, 0x22/double-quote
     break-if-=
-    write-grapheme out-data, g
+    write-code-point-utf8 out-data, g
     loop
   }
   {
@@ -735,7 +735,7 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace:
       error trace, "unbalanced '['"
       return
     }
-    var g/eax: grapheme <- read-from-gap-buffer in
+    var g/eax: code-point-utf8 <- read-from-gap-buffer in
     {
       compare g, 0x5b/open-square-bracket
       break-if-!=
@@ -748,7 +748,7 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace:
       break-if-= $next-balanced-stream-token:loop
       decrement bracket-count
     }
-    write-grapheme out-data, g
+    write-code-point-utf8 out-data, g
     loop
   }
   {
@@ -764,14 +764,14 @@ fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace:
   }
 }
 
-fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) {
+fn next-bracket-token g: code-point-utf8, _out: (addr token), trace: (addr trace) {
   trace-text trace, "tokenize", "bracket"
   var out/eax: (addr token) <- copy _out
   var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
   populate-stream out-data-ah, 0x40
   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
   var out-data/edi: (addr stream byte) <- copy _out-data
-  write-grapheme out-data, g
+  write-code-point-utf8 out-data, g
   {
     var should-trace?/eax: boolean <- should-trace? trace
     compare should-trace?, 0/false
@@ -790,7 +790,7 @@ fn skip-rest-of-line in: (addr gap-buffer) {
     var done?/eax: boolean <- gap-buffer-scan-done? in
     compare done?, 0/false
     break-if-!=
-    var g/eax: grapheme <- peek-from-gap-buffer in
+    var g/eax: code-point-utf8 <- peek-from-gap-buffer in
     compare g, 0xa/newline
     break-if-=
     g <- read-from-gap-buffer in  # consume
@@ -810,7 +810,7 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
     var done?/eax: boolean <- gap-buffer-scan-done? in
     compare done?, 0/false
     break-if-!=
-    var g/eax: grapheme <- peek-from-gap-buffer in
+    var g/eax: code-point-utf8 <- peek-from-gap-buffer in
     {
       {
         var should-trace?/eax: boolean <- should-trace? trace
@@ -844,7 +844,7 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
   }
 }
 
-# Mu carves up the space of graphemes into 4 categories:
+# Mu carves up the space of code-point-utf8s into 4 categories:
 #   whitespace
 #   quotes and unquotes (from a Lisp perspective; doesn't include double
 #                        quotes or other Unicode quotes)
@@ -856,20 +856,20 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
 # During tokenization operators and symbols are treated identically.
 # A later phase digs into that nuance.
 
-fn symbol-grapheme? g: grapheme -> _/eax: boolean {
-  var whitespace?/eax: boolean <- whitespace-grapheme? g
+fn symbol-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
+  var whitespace?/eax: boolean <- whitespace-code-point-utf8? g
   compare whitespace?, 0/false
   {
     break-if-=
     return 0/false
   }
-  var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g
+  var quote-or-unquote?/eax: boolean <- quote-or-unquote-code-point-utf8? g
   compare quote-or-unquote?, 0/false
   {
     break-if-=
     return 0/false
   }
-  var bracket?/eax: boolean <- bracket-grapheme? g
+  var bracket?/eax: boolean <- bracket-code-point-utf8? g
   compare bracket?, 0/false
   {
     break-if-=
@@ -888,7 +888,7 @@ fn symbol-grapheme? g: grapheme -> _/eax: boolean {
   return 1/true
 }
 
-fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
+fn whitespace-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
   compare g, 9/tab
   {
     break-if-!=
@@ -907,7 +907,7 @@ fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
   return 0/false
 }
 
-fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
+fn quote-or-unquote-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
   compare g, 0x27/single-quote
   {
     break-if-!=
@@ -931,7 +931,7 @@ fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
   return 0/false
 }
 
-fn bracket-grapheme? g: grapheme -> _/eax: boolean {
+fn bracket-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
   compare g, 0x28/open-paren
   {
     break-if-!=
@@ -971,12 +971,12 @@ fn number-token? _self: (addr token) -> _/eax: boolean {
   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
   var in-data/ecx: (addr stream byte) <- copy _in-data
   rewind-stream in-data
-  var g/eax: grapheme <- read-grapheme in-data
+  var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
   # if '-', read another
   {
     compare g, 0x2d/minus
     break-if-!=
-    g <- read-grapheme in-data
+    g <- read-code-point-utf8 in-data
   }
   {
     {
@@ -990,7 +990,7 @@ fn number-token? _self: (addr token) -> _/eax: boolean {
       compare done?, 0/false
     }
     break-if-!=
-    g <- read-grapheme in-data
+    g <- read-code-point-utf8 in-data
     loop
   }
   return 1/true
@@ -1008,8 +1008,8 @@ fn bracket-token? _self: (addr token) -> _/eax: boolean {
   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
   rewind-stream in-data
-  var g/eax: grapheme <- read-grapheme in-data
-  var result/eax: boolean <- bracket-grapheme? g
+  var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
+  var result/eax: boolean <- bracket-code-point-utf8? g
   return result
 }
 
@@ -1055,7 +1055,7 @@ fn open-paren-token? _self: (addr token) -> _/eax: boolean {
   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
   var in-data/ecx: (addr stream byte) <- copy _in-data
   rewind-stream in-data
-  var g/eax: grapheme <- read-grapheme in-data
+  var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
   compare g, 0x28/open-paren
   {
     break-if-!=
@@ -1071,7 +1071,7 @@ fn close-paren-token? _self: (addr token) -> _/eax: boolean {
   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
   var in-data/ecx: (addr stream byte) <- copy _in-data
   rewind-stream in-data
-  var g/eax: grapheme <- read-grapheme in-data
+  var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
   compare g, 0x29/close-paren
   {
     break-if-!=
@@ -1087,7 +1087,7 @@ fn dot-token? _self: (addr token) -> _/eax: boolean {
   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
   var in-data/ecx: (addr stream byte) <- copy _in-data
   rewind-stream in-data
-  var g/eax: grapheme <- read-grapheme in-data
+  var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
   compare g, 0x2e/dot
   {
     break-if-!=
diff --git a/shell/trace.mu b/shell/trace.mu
index 298b7e23..e4c9ec5e 100644
--- a/shell/trace.mu
+++ b/shell/trace.mu
@@ -904,7 +904,7 @@ fn render-trace-menu screen: (addr screen) {
   draw-text-rightward-from-cursor screen, " show whole line  ", width, 7/fg, 0xc5/bg=blue-bg
 }
 
-fn edit-trace _self: (addr trace), key: grapheme {
+fn edit-trace _self: (addr trace), key: code-point-utf8 {
   var self/esi: (addr trace) <- copy _self
   # cursor down
   {