about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--mu-init.subx10
-rw-r--r--shell/infix.mu104
-rw-r--r--shell/tokenize.mu86
3 files changed, 58 insertions, 142 deletions
diff --git a/mu-init.subx b/mu-init.subx
index 26accb8a..ddae2bae 100644
--- a/mu-init.subx
+++ b/mu-init.subx
@@ -15,6 +15,16 @@ Entry:
   #
 #?   (main 0 0 Primary-bus-secondary-drive)
   (set-cursor-position 0 0x30 2)
+  (test-tokenize-number)
+  (test-tokenize-negative-number)
+  (test-tokenize-quote)
+  (test-tokenize-backquote)
+  (test-tokenize-unquote)
+  (test-tokenize-unquote-splice)
+  (test-tokenize-dotted-list)
+  (test-tokenize-stream-literal)
+  (test-tokenize-stream-literal-in-tree)
+  (test-tokenize-indent)
   (test-infix)
 #?   # always first run tests
 #?   (run-tests)
diff --git a/shell/infix.mu b/shell/infix.mu
index f1d9d5d6..a0e81ff2 100644
--- a/shell/infix.mu
+++ b/shell/infix.mu
@@ -1,6 +1,7 @@
 fn transform-infix x-ah: (addr handle cell), trace: (addr trace) {
   trace-text trace, "infix", "transform infix"
   trace-lower trace
+#?   trace-text trace, "infix", "todo"
 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a:", 2/fg 0/bg
 #?   dump-cell-from-cursor-over-full-screen x-ah, 7/fg 0/bg
   transform-infix-2 x-ah, trace
@@ -316,91 +317,18 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
 }
 
 fn non-operator-grapheme? g: grapheme -> _/eax: boolean {
-  ## whitespace
-  compare g, 9/tab
+  var operator?/eax: boolean <- operator-grapheme? g
+  compare operator?, 0/false
   {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0xa/newline
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x20/space
-  {
-    break-if-!=
-    return 0/false
-  }
-  ## we don't really use double quotes
-  compare g, 0x22/double-quote
-  {
-    break-if-!=
-    return 1/true
-  }
-  ## brackets
-  compare g, 0x28/open-paren
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x29/close-paren
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x5b/open-square-bracket
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x5d/close-square-bracket
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x7b/open-curly-bracket
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x7d/close-curly-bracket
-  {
-    break-if-!=
-    return 0/false
-  }
-  # quotes and unquotes are like symbols for this purpose
-  compare g, 0x27/single-quote
-  {
-    break-if-!=
-    return 1/true
-  }
-  compare g, 0x60/backquote
-  {
-    break-if-!=
-    return 1/true
-  }
-  compare g, 0x2c/comma
-  {
-    break-if-!=
-    return 1/true
-  }
-  compare g, 0x40/at-sign
-  {
-    break-if-!=
-    return 1/true
-  }
-  # - other punctuation
-  compare g, 0x23/hash
-  {
-    break-if-!=
+    break-if-=
     return 0/false
   }
   return 1/true
 }
 
+# just a short list of operator graphemes for now
 fn operator-grapheme? g: grapheme -> _/eax: boolean {
-  # '$' is a symbol char
+  # '$' is special and can be in either a symbol or operator
   compare g, 0x25/percent
   {
     break-if-!=
@@ -411,26 +339,6 @@ fn operator-grapheme? g: grapheme -> _/eax: boolean {
     break-if-!=
     return 1/true
   }
-  compare g, 0x27/single-quote
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x60/backquote
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x2c/comma
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x40/at-sign
-  {
-    break-if-!=
-    return 0/false
-  }
   compare g, 0x2a/asterisk
   {
     break-if-!=
diff --git a/shell/tokenize.mu b/shell/tokenize.mu
index b75e57e1..3a080135 100644
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@@ -744,93 +744,91 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra
   }
 }
 
+# Mu carves up the space of graphemes into 4 categories:
+#   whitespace
+#   quotes and unquotes (from a Lisp perspective; doesn't include double
+#                        quotes or other Unicode quotes)
+#   operators
+#   symbols
+# (Numbers have their own parsing rules that don't fit cleanly in this
+# partition.)
+#
+# During tokenization operators and symbols are treated identically.
+# A later phase digs into that nuance.
+
 fn symbol-grapheme? g: grapheme -> _/eax: boolean {
-  ## whitespace
-  compare g, 9/tab
-  {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0xa/newline
+  var whitespace?/eax: boolean <- whitespace-grapheme? g
+  compare whitespace?, 0/false
   {
-    break-if-!=
-    return 0/false
-  }
-  compare g, 0x20/space
-  {
-    break-if-!=
-    return 0/false
-  }
-  ## quotes
-  compare g, 0x22/double-quote
-  {
-    break-if-!=
+    break-if-=
     return 0/false
   }
-  compare g, 0x60/backquote
+  var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g
+  compare quote-or-unquote?, 0/false
   {
-    break-if-!=
+    break-if-=
     return 0/false
   }
-  ## brackets
-  compare g, 0x28/open-paren
+  var bracket?/eax: boolean <- bracket-grapheme? g
+  compare bracket?, 0/false
   {
-    break-if-!=
+    break-if-=
     return 0/false
   }
-  compare g, 0x29/close-paren
+  compare g, 0x23/hash  # comments get filtered out
   {
     break-if-!=
     return 0/false
   }
-  compare g, 0x5b/open-square-bracket
+  compare g, 0x22/double-quote  # double quotes reserved for now
   {
     break-if-!=
     return 0/false
   }
-  compare g, 0x5d/close-square-bracket
+  return 1/true
+}
+
+fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
+  compare g, 9/tab
   {
     break-if-!=
-    return 0/false
+    return 1/true
   }
-  compare g, 0x7b/open-curly-bracket
+  compare g, 0xa/newline
   {
     break-if-!=
-    return 0/false
+    return 1/true
   }
-  compare g, 0x7d/close-curly-bracket
+  compare g, 0x20/space
   {
     break-if-!=
-    return 0/false
+    return 1/true
   }
-  # quotes and unquotes
+  return 0/false
+}
+
+fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
   compare g, 0x27/single-quote
   {
     break-if-!=
-    return 0/false
+    return 1/true
   }
   compare g, 0x60/backquote
   {
     break-if-!=
-    return 0/false
+    return 1/true
   }
   compare g, 0x2c/comma
   {
     break-if-!=
-    return 0/false
+    return 1/true
   }
   compare g, 0x40/at-sign
   {
     break-if-!=
-    return 0/false
-  }
-  # - other punctuation
-  compare g, 0x23/hash
-  {
-    break-if-!=
-    return 0/false
+    return 1/true
   }
-  return 1/true
+  return 0/false
 }
 
 fn bracket-grapheme? g: grapheme -> _/eax: boolean {