about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--mu-init.subx2
-rw-r--r--shell/tokenize.mu166
2 files changed, 95 insertions, 73 deletions
diff --git a/mu-init.subx b/mu-init.subx
index 8fcf84e3..4ba24788 100644
--- a/mu-init.subx
+++ b/mu-init.subx
@@ -14,6 +14,8 @@ Entry:
   bd/copy-to-ebp 0/imm32
   #
 #?   (main 0 0 Primary-bus-secondary-drive)
+#?   (test-tokenize-backquote)
+#?   (test-tokenize-stream-literal)
   # always first run tests
   (run-tests)
   (num-test-failures)  # => eax
diff --git a/shell/tokenize.mu b/shell/tokenize.mu
index d6a5f8da..97696cd3 100644
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@@ -296,7 +296,7 @@ fn test-tokenize-stream-literal-in-tree {
 
 # caller is responsible for threading start-of-line? between calls to next-token
 # 'in' may contain whitespace if start-of-line?
-fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
+fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
   trace-text trace, "tokenize", "next-token"
   trace-lower trace
   skip-spaces-from-gap-buffer in
@@ -306,9 +306,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
     break-if-!=
     trace-text trace, "tokenize", "newline"
     g <- read-from-gap-buffer in
-    var out/eax: (addr token) <- copy _out
-    var out-type/eax: (addr int) <- get out, type
-    copy-to *out-type, 2/skip
+    initialize-skip-token out
     return 1/at-start-of-line
   }
   {
@@ -316,9 +314,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
     compare done?, 0/false
     break-if-=
     trace-text trace, "tokenize", "end"
-    var out/eax: (addr token) <- copy _out
-    var out-type/eax: (addr int) <- get out, type
-    copy-to *out-type, 2/skip
+    initialize-skip-token out
     return 1/at-start-of-line
   }
   var _g/eax: grapheme <- peek-from-gap-buffer in
@@ -334,40 +330,20 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
     write-int32-hex stream, gval
     trace trace, "tokenize", stream
   }
-  var out/eax: (addr token) <- copy _out
-  var out-data-ah/edi: (addr handle stream byte) <- get out, text-data
-  $next-token:allocate: {
-    # Allocate a large buffer if it's a stream.
-    # Sometimes a whole function definition will need to fit in it.
-    compare g, 0x5b/open-square-bracket
-    {
-      break-if-!=
-      populate-stream out-data-ah, 0x400/max-definition-size=1KB
-      break $next-token:allocate
-    }
-    populate-stream out-data-ah, 0x40
-  }
-  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
-  var out-data/edi: (addr stream byte) <- copy _out-data
-  clear-stream out-data
   $next-token:case: {
     # open square brackets begin streams
     {
       compare g, 0x5b/open-square-bracket
       break-if-!=
       var dummy/eax: grapheme <- read-from-gap-buffer in  # skip open bracket
-      next-stream-token in, out-data, trace
-      var out/eax: (addr token) <- copy _out
-      # streams set the type
-      var out-type/eax: (addr int) <- get out, type
-      copy-to *out-type, 1/stream
+      next-stream-token in, out, trace
       break $next-token:case
     }
     # comment
     {
       compare g, 0x23/comment
       break-if-!=
-      rest-of-line in, out-data, trace
+      rest-of-line in, out, trace
       copy-to start-of-line?, 1/true
       break $next-token:case
     }
@@ -381,7 +357,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       var digit?/eax: boolean <- decimal-digit? g2
       compare digit?, 0/false
       break-if-=
-      next-number-token in, out-data, trace
+      next-number-token in, out, trace
       break $next-token:case
     }
     # digit
@@ -389,7 +365,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       var digit?/eax: boolean <- decimal-digit? g
       compare digit?, 0/false
       break-if-=
-      next-number-token in, out-data, trace
+      next-number-token in, out, trace
       break $next-token:case
     }
     # other symbol char
@@ -397,7 +373,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       var symbol?/eax: boolean <- symbol-grapheme? g
       compare symbol?, 0/false
       break-if-=
-      next-symbol-token in, out-data, trace
+      next-symbol-token in, out, trace
       break $next-token:case
     }
     # unbalanced close square brackets are errors
@@ -413,7 +389,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       compare bracket?, 0/false
       break-if-=
       var g/eax: grapheme <- read-from-gap-buffer in
-      next-bracket-token g, out-data, trace
+      next-bracket-token g, out, trace
       break $next-token:case
     }
     # non-symbol operators
@@ -421,7 +397,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       var operator?/eax: boolean <- operator-grapheme? g
       compare operator?, 0/false
       break-if-=
-      next-operator-token in, out-data, trace
+      next-operator-token in, out, trace
       break $next-token:case
     }
     # quote
@@ -429,7 +405,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       compare g, 0x27/single-quote
       break-if-!=
       var g/eax: grapheme <- read-from-gap-buffer in  # consume
-      write-grapheme out-data, g
+      initialize-token out, "'"
       break $next-token:case
     }
     # backquote
@@ -437,7 +413,7 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       compare g, 0x60/backquote
       break-if-!=
       var g/eax: grapheme <- read-from-gap-buffer in  # consume
-      write-grapheme out-data, g
+      initialize-token out, "`"
       break $next-token:case
     }
     # unquote
@@ -445,15 +421,16 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
       compare g, 0x2c/comma
       break-if-!=
       var g/eax: grapheme <- read-from-gap-buffer in  # consume
-      write-grapheme out-data, g
       # check for unquote-splice
       {
-        var g2/eax: grapheme <- peek-from-gap-buffer in
-        compare g2, 0x40/at-sign
+        g <- peek-from-gap-buffer in
+        compare g, 0x40/at-sign
         break-if-!=
-        g2 <- read-from-gap-buffer in
-        write-grapheme out-data, g2
+        g <- read-from-gap-buffer in
+        initialize-token out, ",@"
+        break $next-token:case
       }
+      initialize-token out, ","
       break $next-token:case
     }
     abort "unknown token type"
@@ -466,16 +443,20 @@ fn next-token in: (addr gap-buffer), _out: (addr token), start-of-line?: boolean
     var stream-storage: (stream byte 0x400)  # maximum possible token size (next-stream-token)
     var stream/eax: (addr stream byte) <- address stream-storage
     write stream, "=> "
-    rewind-stream out-data
-    write-stream stream, out-data
+    write-token-text-data stream, out
     trace trace, "tokenize", stream
   }
   return start-of-line?
 }
 
-fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
+fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
   trace-text trace, "tokenize", "looking for a symbol"
   trace-lower trace
+  var out/eax: (addr token) <- copy _out
+  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
+  populate-stream out-data-ah, 0x40
+  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
+  var out-data/edi: (addr stream byte) <- copy _out-data
   $next-symbol-token:loop: {
     var done?/eax: boolean <- gap-buffer-scan-done? in
     compare done?, 0/false
@@ -503,7 +484,7 @@ fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (add
       break $next-symbol-token:loop
     }
     var g/eax: grapheme <- read-from-gap-buffer in
-    write-grapheme out, g
+    write-grapheme out-data, g
     loop
   }
   trace-higher trace
@@ -514,15 +495,20 @@ fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (add
     var stream-storage: (stream byte 0x40)
     var stream/esi: (addr stream byte) <- address stream-storage
     write stream, "=> "
-    rewind-stream out
-    write-stream stream, out
+    rewind-stream out-data
+    write-stream stream, out-data
     trace trace, "tokenize", stream
   }
 }
 
-fn next-operator-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
+fn next-operator-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
   trace-text trace, "tokenize", "looking for a operator"
   trace-lower trace
+  var out/eax: (addr token) <- copy _out
+  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
+  populate-stream out-data-ah, 0x40
+  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
+  var out-data/edi: (addr stream byte) <- copy _out-data
   $next-operator-token:loop: {
     var done?/eax: boolean <- gap-buffer-scan-done? in
     compare done?, 0/false
@@ -550,7 +536,7 @@ fn next-operator-token in: (addr gap-buffer), out: (addr stream byte), trace: (a
       break $next-operator-token:loop
     }
     var g/eax: grapheme <- read-from-gap-buffer in
-    write-grapheme out, g
+    write-grapheme out-data, g
     loop
   }
   trace-higher trace
@@ -561,20 +547,25 @@ fn next-operator-token in: (addr gap-buffer), out: (addr stream byte), trace: (a
     var stream-storage: (stream byte 0x40)
     var stream/esi: (addr stream byte) <- address stream-storage
     write stream, "=> "
-    rewind-stream out
-    write-stream stream, out
+    rewind-stream out-data
+    write-stream stream, out-data
     trace trace, "tokenize", stream
   }
 }
 
-fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
+fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
   trace-text trace, "tokenize", "looking for a number"
   trace-lower trace
+  var out/eax: (addr token) <- copy _out
+  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
+  populate-stream out-data-ah, 0x40
+  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
+  var out-data/edi: (addr stream byte) <- copy _out-data
   $next-number-token:check-minus: {
     var g/eax: grapheme <- peek-from-gap-buffer in
     compare g, 0x2d/minus
     g <- read-from-gap-buffer in  # consume
-    write-grapheme out, g
+    write-grapheme out-data, g
   }
   $next-number-token:loop: {
     var done?/eax: boolean <- gap-buffer-scan-done? in
@@ -612,14 +603,23 @@ fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (add
     }
     trace-text trace, "tokenize", "append"
     var g/eax: grapheme <- read-from-gap-buffer in
-    write-grapheme out, g
+    write-grapheme out-data, g
     loop
   }
   trace-higher trace
 }
 
-fn next-stream-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
+fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
   trace-text trace, "tokenize", "stream"
+  var out/edi: (addr token) <- copy _out
+  var out-type/eax: (addr int) <- get out, type
+  copy-to *out-type, 1/stream
+  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
+  # stream tokens contain whole function definitions on boot, so we always
+  # give them plenty of space
+  populate-stream out-data-ah, 0x400/max-definition-size=1KB
+  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
+  var out-data/edi: (addr stream byte) <- copy _out-data
   {
     var empty?/eax: boolean <- gap-buffer-scan-done? in
     compare empty?, 0/false
@@ -631,7 +631,7 @@ fn next-stream-token in: (addr gap-buffer), out: (addr stream byte), trace: (add
     var g/eax: grapheme <- read-from-gap-buffer in
     compare g, 0x5d/close-square-bracket
     break-if-=
-    write-grapheme out, g
+    write-grapheme out-data, g
     loop
   }
   {
@@ -641,15 +641,20 @@ fn next-stream-token in: (addr gap-buffer), out: (addr stream byte), trace: (add
     var stream-storage: (stream byte 0x400)  # max-definition-size
     var stream/esi: (addr stream byte) <- address stream-storage
     write stream, "=> "
-    rewind-stream out
-    write-stream stream, out
+    rewind-stream out-data
+    write-stream stream, out-data
     trace trace, "tokenize", stream
   }
 }
 
-fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) {
+fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) {
   trace-text trace, "tokenize", "bracket"
-  write-grapheme out, g
+  var out/eax: (addr token) <- copy _out
+  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
+  populate-stream out-data-ah, 0x40
+  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
+  var out-data/edi: (addr stream byte) <- copy _out-data
+  write-grapheme out-data, g
   {
     var should-trace?/eax: boolean <- should-trace? trace
     compare should-trace?, 0/false
@@ -657,14 +662,19 @@ fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace)
     var stream-storage: (stream byte 0x40)
     var stream/esi: (addr stream byte) <- address stream-storage
     write stream, "=> "
-    rewind-stream out
-    write-stream stream, out
+    rewind-stream out-data
+    write-stream stream, out-data
     trace trace, "tokenize", stream
   }
 }
 
-fn rest-of-line in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
+fn rest-of-line in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
   trace-text trace, "tokenize", "comment"
+  var out/eax: (addr token) <- copy _out
+  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
+  populate-stream out-data-ah, 0x40
+  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
+  var out-data/edi: (addr stream byte) <- copy _out-data
   {
     var empty?/eax: boolean <- gap-buffer-scan-done? in
     compare empty?, 0/false
@@ -675,7 +685,7 @@ fn rest-of-line in: (addr gap-buffer), out: (addr stream byte), trace: (addr tra
     var g/eax: grapheme <- read-from-gap-buffer in
     compare g, 0xa/newline
     break-if-=
-    write-grapheme out, g
+    write-grapheme out-data, g
     loop
   }
   {
@@ -685,8 +695,8 @@ fn rest-of-line in: (addr gap-buffer), out: (addr stream byte), trace: (addr tra
     var stream-storage: (stream byte 0x80)
     var stream/esi: (addr stream byte) <- address stream-storage
     write stream, "=> "
-    rewind-stream out
-    write-stream stream, out
+    rewind-stream out-data
+    write-stream stream, out-data
     trace trace, "tokenize", stream
   }
 }
@@ -1125,8 +1135,9 @@ fn dot-token? _self: (addr token) -> _/eax: boolean {
 fn test-dot-token {
   var tmp-storage: (handle token)
   var tmp-ah/eax: (addr handle token) <- address tmp-storage
-  new-token tmp-ah, "."
+  allocate-token tmp-ah
   var tmp/eax: (addr token) <- lookup *tmp-ah
+  initialize-token tmp, "."
   var result/eax: boolean <- dot-token? tmp
   check result, "F - test-dot-token"
 }
@@ -1175,15 +1186,24 @@ fn allocate-token _self-ah: (addr handle token) {
   populate-stream dest-ah, 0x40/max-symbol-size
 }
 
-fn initialize-token _self-ah: (addr handle token), val: (addr array byte) {
-  var self-ah/eax: (addr handle token) <- copy _self-ah
-  var self/eax: (addr token) <- lookup *self-ah
+fn initialize-token _self: (addr token), val: (addr array byte) {
+  var self/eax: (addr token) <- copy _self
   var dest-ah/eax: (addr handle stream byte) <- get self, text-data
+  populate-stream dest-ah, 0x40
   var dest/eax: (addr stream byte) <- lookup *dest-ah
   write dest, val
 }
 
-fn new-token self-ah: (addr handle token), val: (addr array byte) {
-  allocate-token self-ah
-  initialize-token self-ah, val
+fn initialize-skip-token _self: (addr token) {
+  var self/eax: (addr token) <- copy _self
+  var self-type/eax: (addr int) <- get self, type
+  copy-to *self-type, 2/skip
+}
+
+fn write-token-text-data out: (addr stream byte), _self: (addr token) {
+  var self/eax: (addr token) <- copy _self
+  var data-ah/eax: (addr handle stream byte) <- get self, text-data
+  var data/eax: (addr stream byte) <- lookup *data-ah
+  rewind-stream data
+  write-stream out, data
 }