about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2021-06-22 22:30:21 -0700
committerKartik K. Agaram <vc@akkartik.com>2021-06-22 22:32:20 -0700
commitaebcfd1bfbf1d071ffbff7603c89c9746cb81017 (patch)
tree7b91afdbb3f59a7a0dc9dcd69a66d32aef061196
parentf5e55cbbdbe177f5174112f811af20bd76150b6f (diff)
downloadmu-aebcfd1bfbf1d071ffbff7603c89c9746cb81017.tar.gz
beginnings of tokenization within symbols
We're now down to 4 failing tests. But these will require surgery.
-rw-r--r--shell/infix.mu83
-rw-r--r--shell/tokenize.mu2
2 files changed, 68 insertions, 17 deletions
diff --git a/shell/infix.mu b/shell/infix.mu
index e7982d0e..b0b13fb6 100644
--- a/shell/infix.mu
+++ b/shell/infix.mu
@@ -55,7 +55,7 @@ fn transform-infix-2 _x-ah: (addr handle cell), trace: (addr trace) {
   # symbol? maybe break it down into a pair
   {
     compare *x-type, 2/symbol
-    break-if-=
+    break-if-!=
     tokenize-infix x-ah, trace
   }
   # not a pair? return
@@ -233,14 +233,65 @@ fn swap-cells a-ah: (addr handle cell), b-ah: (addr handle cell) {
 }
 
 fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) {
-#?     # non-symbol operators
+  var sym-ah/eax: (addr handle cell) <- copy _sym-ah
+  var sym/eax: (addr cell) <- lookup *sym-ah
+  var sym-data-ah/eax: (addr handle stream byte) <- get sym, text-data
+  var _sym-data/eax: (addr stream byte) <- lookup *sym-data-ah
+  var sym-data/esi: (addr stream byte) <- copy _sym-data
+  rewind-stream sym-data
+  # read sym into a gap buffer and insert spaces in a few places
+  var buffer-storage: gap-buffer
+  var buffer/edi: (addr gap-buffer) <- address buffer-storage
+  initialize-gap-buffer buffer, 0x40/max-symbol-size
+  # scan for first non-$
+  var g/eax: grapheme <- read-grapheme sym-data
+  add-grapheme-at-gap buffer, g
+  {
+    compare g, 0x24/dollar
+    break-if-!=
+    {
+      var done?/eax: boolean <- stream-empty? sym-data
+      compare done?, 0/false
+      break-if-=
+      return  # symbol is all '$'s; do nothing
+    }
+    g <- read-grapheme sym-data
+    add-grapheme-at-gap buffer, g
+    loop
+  }
+  var tokenization-needed?: boolean
+  var _operator-so-far?/eax: boolean <- operator-grapheme? g
+  var operator-so-far?/ecx: boolean <- copy _operator-so-far?
+  {
+    var done?/eax: boolean <- stream-empty? sym-data
+    compare done?, 0/false
+    break-if-!=
+    var g/eax: grapheme <- read-grapheme sym-data
+    {
+      var curr-operator?/eax: boolean <- operator-grapheme? g
+      compare curr-operator?, operator-so-far?
+      break-if-=
+      # if grapheme switches state, insert a space
+      add-grapheme-at-gap buffer, 0x20/space
+      operator-so-far? <- copy curr-operator?
+      copy-to tokenization-needed?, 1/true
+    }
+    add-grapheme-at-gap buffer, g
+    loop
+  }
+  compare tokenization-needed?, 0/false
+  break-if-=
+#?   {
+#?     var dummy1/eax: int <- copy 0
+#?     var dummy2/ecx: int <- copy 0
+#?     dummy1, dummy2 <- render-gap-buffer-wrapping-right-then-down 0/screen, buffer, 0x20/xmin 5/ymin, 0x80/xmax 0x30/ymax, 0/no-cursor, 3/fg 0/bg
 #?     {
-#?       var operator?/eax: boolean <- operator-grapheme? g
-#?       compare operator?, 0/false
-#?       break-if-=
-#?       next-operator-token in, out, trace
-#?       break $next-token:case
+#?       loop
 #?     }
+#?   }
+  # recursively process buffer
+  # this time we're guaranteed we won't enter tokenize-infix
+  read-cell buffer, _sym-ah, trace
 }
 
 fn test-infix {
@@ -269,17 +320,17 @@ fn test-infix {
   check-infix "(a + b + c)", "(+ (+ a b) c)", "F - test-infix/left-associative"
 #?   check-infix "(f a + b)", "(f (+ a b))", "F - test-infix/higher-precedence-than-call"
 #?   check-infix "(f a + b c + d)", "(f (+ a b) (+ c d))", "F - test-infix/multiple"
-#?   check-infix "+a", "(+ a)", "F - test-infix/unary-operator-2"
-#?   check-infix "-a", "(- a)", "F - test-infix/unary-operator-3"
-#?   check-infix "a+b", "(+ a b)", "F - test-infix/no-spaces"
-#?   check-infix "',a+b", "',(+ a b)", "F - test-infix/no-spaces-with-nested-quotes"
-#?   check-infix "$a+b", "(+ $a b)", "F - test-infix/no-spaces-2"
+  check-infix "+a", "(+ a)", "F - test-infix/unary-operator-2"
+  check-infix "-a", "(- a)", "F - test-infix/unary-operator-3"
+  check-infix "a+b", "(+ a b)", "F - test-infix/no-spaces"
+  check-infix "',a+b", "',(+ a b)", "F - test-infix/no-spaces-with-nested-quotes"
+  check-infix "$a+b", "(+ $a b)", "F - test-infix/no-spaces-2"
 #?   check-infix "-a+b", "(+ (- a) b)", "F - test-infix/unary-over-binary"
 #?   check-infix "~a+b", "(+ (~ a) b)", "F - test-infix/unary-complement"
-#?   check-infix "(n * n-1)", "(* n (- n 1))", "F - test-infix/no-spaces-over-spaces"
-#?   check-infix "`(a + b)", "`(+ a b)", "F - test-infix/backquote"
-#?   check-infix ",@a+b", ",@(+ a b)", "F - test-infix/unquote-splice"
-#?   check-infix ",@(a + b)", ",@(+ a b)", "F - test-infix/unquote-splice-2"
+  check-infix "(n * n-1)", "(* n (- n 1))", "F - test-infix/no-spaces-over-spaces"
+  check-infix "`(a + b)", "`(+ a b)", "F - test-infix/backquote"
+  check-infix ",@a+b", ",@(+ a b)", "F - test-infix/unquote-splice"
+  check-infix ",@(a + b)", ",@(+ a b)", "F - test-infix/unquote-splice-2"
 }
 
 # helpers
diff --git a/shell/tokenize.mu b/shell/tokenize.mu
index 3a080135..2d7ea041 100644
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@@ -523,7 +523,7 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
   trace-lower trace
   var out/eax: (addr token) <- copy _out
   var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
-  populate-stream out-data-ah, 0x40
+  populate-stream out-data-ah, 0x40/max-symbol-size
   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
   var out-data/edi: (addr stream byte) <- copy _out-data
   $next-symbol-token:loop: {