about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--112read-byte.subx2
-rw-r--r--403unicode.mu69
-rw-r--r--apps/browse/main.mu38
-rw-r--r--apps/browse/screen-position-state.mu9
4 files changed, 93 insertions, 25 deletions
diff --git a/112read-byte.subx b/112read-byte.subx
index 32f89647..06da3a64 100644
--- a/112read-byte.subx
+++ b/112read-byte.subx
@@ -269,7 +269,7 @@ test-read-byte-buffered-refills-buffer:
     c3/return
 
 # Return next byte value in eax, with top 3 bytes cleared.
-# Abort on reaching end of file.
+# Abort on reaching end of stream.
 read-byte:  # s: (addr stream byte) -> result/eax: byte
     # . prologue
     55/push-ebp
diff --git a/403unicode.mu b/403unicode.mu
index c1a4d748..23f14d8f 100644
--- a/403unicode.mu
+++ b/403unicode.mu
@@ -253,6 +253,75 @@ fn test-read-grapheme {
   check-ints-equal n, 0x65, "F - test grapheme/6"
 }
 
+fn read-grapheme-buffered in: (addr buffered-file) -> out/eax: grapheme {
+$read-grapheme-buffered:body: {
+  var c/eax: byte <- read-byte-buffered in
+  var num-trailers/ecx: int <- copy 0
+  $read-grapheme-buffered:compute-length: {
+    # single byte: just return it
+    compare c, 0xc0
+    {
+      break-if->=
+      out <- copy c
+      num-trailers <- copy 0
+      break $read-grapheme-buffered:body
+    }
+    compare c, 0xfe
+    {
+      break-if-<
+      out <- copy c
+      break $read-grapheme-buffered:body
+    }
+    # 2 bytes
+    compare c, 0xe0
+    {
+      break-if->=
+      num-trailers <- copy 1
+      break $read-grapheme-buffered:compute-length
+    }
+    # 3 bytes
+    compare c, 0xf0
+    {
+      break-if->=
+      num-trailers <- copy 2
+      break $read-grapheme-buffered:compute-length
+    }
+    # 4 bytes
+    compare c, 0xf8
+    {
+      break-if->=
+      num-trailers <- copy 3
+      break $read-grapheme-buffered:compute-length
+    }
+$read-grapheme-buffered:abort: {
+      # TODO: print to stderr
+      print-string-to-real-screen "utf-8 encodings larger than 4 bytes are not supported. First byte seen: "
+      var n/eax: int <- copy c
+      print-int32-hex-to-real-screen n
+      print-string-to-real-screen "\n"
+      var exit-status/ebx: int <- copy 1
+      syscall_exit
+    }
+  }
+  # prepend trailer bytes
+  var result/edi: int <- copy c
+  var num-byte-shifts/edx: int <- copy 1
+  {
+    compare num-trailers, 0
+    break-if-<=
+    var tmp/eax: byte <- read-byte-buffered in
+    var tmp2/eax: int <- copy tmp
+    tmp2 <- shift-left-bytes tmp2, num-byte-shifts
+    result <- or tmp2
+    # update loop state
+    num-byte-shifts <- increment
+    num-trailers <- decrement
+    loop
+  }
+  out <- copy result
+}
+}
+
 # needed because available primitives only shift by a literal/constant number of bits
 fn shift-left-bytes n: int, k: int -> result/eax: int {
   var i/ecx: int <- copy 0
diff --git a/apps/browse/main.mu b/apps/browse/main.mu
index d710c1a6..8467f01f 100644
--- a/apps/browse/main.mu
+++ b/apps/browse/main.mu
@@ -33,13 +33,13 @@ fn render screen: (addr screen), fs: (addr buffered-file), state: (addr screen-p
 fn render-normal screen: (addr screen), fs: (addr buffered-file), state: (addr screen-position-state) {
   var newline-seen?/esi: boolean <- copy 0  # false
   var start-of-paragraph?/edi: boolean <- copy 1  # true
-  var previous-char/ebx: byte <- copy 0
+  var previous-grapheme/ebx: grapheme <- copy 0
 $render-normal:loop: {
     # if done-drawing?(state) break
     var done?/eax: boolean <- done-drawing? state
     compare done?, 0  # false
     break-if-!=
-    var c/eax: byte <- read-byte-buffered fs
+    var c/eax: grapheme <- read-grapheme-buffered fs
 $render-normal:loop-body: {
       # if (c == EOF) break
       compare c, 0xffffffff  # EOF marker
@@ -59,8 +59,8 @@ $render-normal:loop-body: {
         # otherwise render two newlines
         {
           break-if-=
-          add-char state, 0xa  # newline
-          add-char state, 0xa  # newline
+          add-grapheme state, 0xa  # newline
+          add-grapheme state, 0xa  # newline
           newline-seen? <- copy 0  # false
           start-of-paragraph? <- copy 1  # true
           break $render-normal:loop-body
@@ -94,20 +94,20 @@ $render-normal:flush-buffered-newline: {
         {
           compare c, 0x20
           break-if-!=
-          add-char state, 0xa  # newline
+          add-grapheme state, 0xa  # newline
           break $render-normal:flush-buffered-newline
         }
-        add-char state, 0x20  # space
+        add-grapheme state, 0x20  # space
         # fall through to print c
       }
       ## end soft newline support
 
 $render-normal:whitespace-separated-regions: {
-        # if previous-char wasn't whitespace, skip this block
+        # if previous-grapheme wasn't whitespace, skip this block
         {
-          compare previous-char, 0x20  # space
+          compare previous-grapheme, 0x20  # space
           break-if-=
-          compare previous-char, 0xa  # newline
+          compare previous-grapheme, 0xa  # newline
           break-if-=
           break $render-normal:whitespace-separated-regions
         }
@@ -133,9 +133,9 @@ $render-normal:whitespace-separated-regions: {
         }
       }
       #
-      add-char state, c
+      add-grapheme state, c
     }  # $render-normal:loop-body
-    previous-char <- copy c
+    previous-grapheme <- copy c
     loop
   }  # $render-normal:loop
 }
@@ -144,7 +144,7 @@ fn render-header-line screen: (addr screen), fs: (addr buffered-file), state: (a
 $render-header-line:body: {
   # compute color based on number of '#'s
   var header-level/esi: int <- copy 1  # caller already grabbed one
-  var c/eax: byte <- copy 0
+  var c/eax: grapheme <- copy 0
   {
     # if done-drawing?(state) return
     {
@@ -153,7 +153,7 @@ $render-header-line:body: {
       break-if-!= $render-header-line:body
     }
     #
-    c <- read-byte-buffered fs
+    c <- read-grapheme-buffered fs
     # if (c != '#') break
     compare c, 0x23  # '#'
     break-if-!=
@@ -171,7 +171,7 @@ $render-header-line:body: {
       break-if-!=
     }
     #
-    c <- read-byte-buffered fs
+    c <- read-grapheme-buffered fs
     # if (c == EOF) break
     compare c, 0xffffffff  # EOF marker
     break-if-=
@@ -179,7 +179,7 @@ $render-header-line:body: {
     compare c, 0xa  # newline
     break-if-=
     #
-    add-char state, c
+    add-grapheme state, c
     #
     loop
   }
@@ -226,7 +226,7 @@ fn render-until-asterisk fs: (addr buffered-file), state: (addr screen-position-
     compare done?, 0  # false
     break-if-!=
     #
-    var c/eax: byte <- read-byte-buffered fs
+    var c/eax: grapheme <- read-grapheme-buffered fs
     # if (c == EOF) break
     compare c, 0xffffffff  # EOF marker
     break-if-=
@@ -234,7 +234,7 @@ fn render-until-asterisk fs: (addr buffered-file), state: (addr screen-position-
     compare c, 0x2a  # '*'
     break-if-=
     #
-    add-char state, c
+    add-grapheme state, c
     #
     loop
   }
@@ -247,7 +247,7 @@ fn render-until-underscore fs: (addr buffered-file), state: (addr screen-positio
     compare done?, 0  # false
     break-if-!=
     #
-    var c/eax: byte <- read-byte-buffered fs
+    var c/eax: grapheme <- read-grapheme-buffered fs
     # if (c == EOF) break
     compare c, 0xffffffff  # EOF marker
     break-if-=
@@ -255,7 +255,7 @@ fn render-until-underscore fs: (addr buffered-file), state: (addr screen-positio
     compare c, 0x5f  # '_'
     break-if-=
     #
-    add-char state, c
+    add-grapheme state, c
     #
     loop
   }
diff --git a/apps/browse/screen-position-state.mu b/apps/browse/screen-position-state.mu
index 7b53ae07..f342faab 100644
--- a/apps/browse/screen-position-state.mu
+++ b/apps/browse/screen-position-state.mu
@@ -61,19 +61,18 @@ fn start-drawing _self: (addr screen-position-state) {
   reposition-cursor self
 }
 
-fn add-char _self: (addr screen-position-state), c: byte {
-$add-char:body: {
+fn add-grapheme _self: (addr screen-position-state), c: grapheme {
+$add-grapheme:body: {
   var self/esi: (addr screen-position-state) <- copy _self
   {
     compare c, 0xa  # newline
     break-if-!=
     next-line self
     reposition-cursor self
-    break $add-char:body
+    break $add-grapheme:body
   }
   # print c
-  var g/eax: grapheme <- copy c
-  print-grapheme 0, g
+  print-grapheme 0, c
   # self->col++
   var tmp/eax: (addr int) <- get self, col
   increment *tmp