about summary refs log tree commit diff stats
path: root/shell/tokenize.mu
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2021-04-27 22:48:07 -0700
committerKartik K. Agaram <vc@akkartik.com>2021-04-27 22:48:16 -0700
commit6ecd9920ca7e36057a51ed339c2448b738335d10 (patch)
tree9df9ea3d7b25adccd57df57329f521b0bb1ea5c3 /shell/tokenize.mu
parent16f51dd76d460eaa459cbd7e376cdf8f61a3c068 (diff)
downloadmu-6ecd9920ca7e36057a51ed339c2448b738335d10.tar.gz
shell: tokenizing stream (string) literals
We're calling them streams since they support appending.
Diffstat (limited to 'shell/tokenize.mu')
-rw-r--r--shell/tokenize.mu79
1 files changed, 78 insertions, 1 deletions
diff --git a/shell/tokenize.mu b/shell/tokenize.mu
index 387fe617..1293e91f 100644
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@@ -56,6 +56,30 @@ fn test-tokenize-dotted-list {
   check close-paren?, "F - test-tokenize-dotted-list: close paren"
 }
 
+fn test-tokenize-stream-literal {
+  # in: "[abc def]"
+  var in-storage: gap-buffer
+  var in/esi: (addr gap-buffer) <- address in-storage
+  initialize-gap-buffer-with in, "[abc def]"
+  #
+  var stream-storage: (stream cell 0x10)
+  var stream/edi: (addr stream cell) <- address stream-storage
+  #
+  tokenize in, stream, 0/no-trace
+  #
+  var curr-token-storage: cell
+  var curr-token/ebx: (addr cell) <- address curr-token-storage
+  read-from-stream stream, curr-token
+  var stream?/eax: boolean <- stream-token? curr-token
+  check stream?, "F - test-tokenize-stream-literal: type"
+  var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
+  var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
+  var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
+  check data-equal?, "F - test-tokenize-stream-literal"
+  var empty?/eax: boolean <- stream-empty? stream
+  check empty?, "F - test-tokenize-stream-literal: empty?"
+}
+
 fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace) {
   trace-text trace, "read", "next-token"
   trace-lower trace
@@ -91,7 +115,25 @@ fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace)
       next-symbol-token in, out, trace
       break $next-token:body
     }
-    # brackets are always single-char tokens
+    # open square brackets begin streams
+    {
+      compare g, 0x5b/open-square-bracket
+      break-if-!=
+      g <- read-from-gap-buffer in  # skip open bracket
+      next-stream-token in, out, trace
+      var out-cell/eax: (addr cell) <- copy _out-cell
+      var out-cell-type/eax: (addr int) <- get out-cell, type
+      copy-to *out-cell-type, 3/stream
+      break $next-token:body
+    }
+    # unbalanced close square brackets are errors
+    {
+      compare g, 0x5d/close-square-bracket
+      break-if-!=
+      error trace, "unbalanced ']'"
+      return
+    }
+    # other brackets are always single-char tokens
     {
       var bracket?/eax: boolean <- bracket-grapheme? g
       compare bracket?, 0/false
@@ -232,6 +274,30 @@ fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (add
   trace-higher trace
 }
 
+fn next-stream-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
+  trace-text trace, "read", "stream"
+  {
+    var empty?/eax: boolean <- gap-buffer-scan-done? in
+    compare empty?, 0/false
+    {
+      break-if-=
+      error trace, "unbalanced '['"
+      return
+    }
+    var g/eax: grapheme <- read-from-gap-buffer in
+    compare g, 0x5d/close-square-bracket
+    break-if-=
+    write-grapheme out, g
+    loop
+  }
+  var stream-storage: (stream byte 0x40)
+  var stream/esi: (addr stream byte) <- address stream-storage
+  write stream, "=> "
+  rewind-stream out
+  write-stream stream, out
+  trace trace, "read", stream
+}
+
 fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) {
   trace-text trace, "read", "bracket"
   write-grapheme out, g
@@ -636,3 +702,14 @@ fn test-dot-token {
   var result/eax: boolean <- dot-token? tmp
   check result, "F - test-dot-token"
 }
+
+fn stream-token? _in: (addr cell) -> _/eax: boolean {
+  var in/eax: (addr cell) <- copy _in
+  var in-type/eax: (addr int) <- get in, type
+  compare *in-type, 3/stream
+  {
+    break-if-=
+    return 0/false
+  }
+  return 1/true
+}