about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2021-06-20 21:18:38 -0700
committerKartik K. Agaram <vc@akkartik.com>2021-06-20 21:18:38 -0700
commit6e1aa99a0033d7ff2ead3a56400dd6e47a80e4f5 (patch)
tree37df885d6c5b3d65d55d946e484bd82a4223b069
parent9d7d99fe6cc5a05960ef52cdfa8acefabf8e40bf (diff)
downloadmu-6e1aa99a0033d7ff2ead3a56400dd6e47a80e4f5.tar.gz
start guessing parentheses based on indentation
-rw-r--r--mu-init.subx32
-rw-r--r--shell/parenthesize.mu27
-rw-r--r--shell/tokenize.mu84
3 files changed, 66 insertions, 77 deletions
diff --git a/mu-init.subx b/mu-init.subx
index 25c1149e..438e29e7 100644
--- a/mu-init.subx
+++ b/mu-init.subx
@@ -15,22 +15,22 @@ Entry:
   #
 #?   (main 0 0 Primary-bus-secondary-drive)
 #?   (set-cursor-position 0 0x40 0x20)
-  (test-parenthesize)
-  (test-parenthesize-skips-lines-with-initial-parens)
-  (test-parenthesize-skips-single-word-lines)
-#?   # always first run tests
-#?   (run-tests)
-#?   (num-test-failures)  # => eax
-#?   # call main if tests all passed
-#?   {
-#?     3d/compare-eax-and 0/imm32
-#?     75/jump-if-!= break/disp8
-#?     c7 0/subop/copy *Running-tests? 0/imm32/false
-#?     (clear-real-screen)
-#?     c7 0/subop/copy *Real-screen-cursor-x 0/imm32
-#?     c7 0/subop/copy *Real-screen-cursor-y 0/imm32
-#?     (main 0 0 Primary-bus-secondary-drive)
-#?   }
+#?   (test-parenthesize)
+#?   (test-parenthesize-skips-lines-with-initial-parens)
+#?   (test-parenthesize-skips-single-word-lines)
+  # always first run tests
+  (run-tests)
+  (num-test-failures)  # => eax
+  # call main if tests all passed
+  {
+    3d/compare-eax-and 0/imm32
+    75/jump-if-!= break/disp8
+    c7 0/subop/copy *Running-tests? 0/imm32/false
+    (clear-real-screen)
+    c7 0/subop/copy *Real-screen-cursor-x 0/imm32
+    c7 0/subop/copy *Real-screen-cursor-y 0/imm32
+    (main 0 0 Primary-bus-secondary-drive)
+  }
 
   # hang indefinitely
   {
diff --git a/shell/parenthesize.mu b/shell/parenthesize.mu
index f99f8c7f..91bc4334 100644
--- a/shell/parenthesize.mu
+++ b/shell/parenthesize.mu
@@ -219,7 +219,8 @@ fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr i
   }
 }
 
-fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) {
+# helper for checking parenthesize
+fn emit-salient-tokens in: (addr stream token), out: (addr stream token) {
   rewind-stream in
   {
     var done?/eax: boolean <- stream-empty? in
@@ -228,12 +229,15 @@ fn emit-non-indent-tokens in: (addr stream token), out: (addr stream token) {
     var token-storage: token
     var token/edx: (addr token) <- address token-storage
     read-from-stream in, token
+    # skip tokens should be skipped
     var is-skip?/eax: boolean <- skip-token? token
     compare is-skip?, 0/false
     loop-if-!=
+    # indent tokens should be skipped
     var is-indent?/eax: boolean <- indent-token? token
     compare is-indent?, 0/false
     loop-if-!=
+    #
     write-to-stream out, token  # shallow copy
     loop
   }
@@ -250,7 +254,18 @@ fn test-parenthesize {
   check-parenthesize "a b c\n  (d ef)\n  g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented"
   check-parenthesize "a b c\n  d e\n    f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented"
   check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment"
+#? a b c
+#?     d ef
+#? 
+#?   g
+#?   check-parenthesize "a b c\n    d ef\n\n  g", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
+#?   check-parenthesize "a b c\n    d ef\n\n  g #abc", "(a b c (d ef)) g", "F - test-parenthesize/11-comments"
   check-parenthesize "a b c\n    d ef\n\n  g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
+#? a b c
+#?   '(d ef)
+#? 
+#?   g #abc
+#?   check-parenthesize "a b c\n  '(d ef)\n  g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
   check-parenthesize "a b c\n  '(d ef)\n\n  g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
   check-parenthesize "  a b c", "(a b c)", "F - test-parenthesize/13-initial-indent"
   check-parenthesize "    a b c\n  34", "(a b c) 34", "F - test-parenthesize/14-initial-indent"
@@ -261,7 +276,7 @@ fn test-parenthesize {
   check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote"
   check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice"
   check-parenthesize "a b\n  'c\n  ,d\n  e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words"
-  check-parenthesize "def foo\n#a b c\n  de\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
+  check-parenthesize "def foo\n#a b c\n  d e\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
 }
 
 fn test-parenthesize-skips-lines-with-initial-parens {
@@ -306,7 +321,7 @@ fn check-parenthesize actual: (addr array byte), expected: (addr array byte), me
   initialize-gap-buffer-with expected-buffer, expected
   var expected-tokens-storage: (stream token 0x40)
   var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage
-  tokenize-and-strip-indent expected-buffer, expected-tokens, trace
+  tokenize-salient expected-buffer, expected-tokens, trace
   #
   rewind-stream actual-tokens
   check-token-streams-data-equal actual-tokens, expected-tokens, message
@@ -348,9 +363,11 @@ fn check-token-streams-data-equal actual: (addr stream token), expected: (addr s
     var curr-token-storage: token
     var curr-token/ecx: (addr token) <- address curr-token-storage
     read-from-stream actual, curr-token
+#?     dump-token-from-cursor curr-token
     var expected-token-storage: token
     var expected-token/edx: (addr token) <- address expected-token-storage
     read-from-stream expected, expected-token
+#?     dump-token-from-cursor expected-token
     var match?/eax: boolean <- tokens-equal? curr-token, expected-token
     compare match?, 0/false
     {
@@ -376,7 +393,7 @@ fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), tr
   parenthesize tokens, out, trace
 }
 
-fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
+fn tokenize-salient in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
   var tokens-storage: (stream token 0x400)
   var tokens/edx: (addr stream token) <- address tokens-storage
   tokenize in, tokens, trace
@@ -386,5 +403,5 @@ fn tokenize-and-strip-indent in: (addr gap-buffer), out: (addr stream token), tr
     break-if-=
     return
   }
-  emit-non-indent-tokens tokens, out
+  emit-salient-tokens tokens, out
 }
diff --git a/shell/tokenize.mu b/shell/tokenize.mu
index 1675b728..ab25615f 100644
--- a/shell/tokenize.mu
+++ b/shell/tokenize.mu
@@ -29,9 +29,6 @@ fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace)
       break-if-=
       return
     }
-    var comment?/eax: boolean <- comment-token? token
-    compare comment?, 0/false
-    loop-if-!=
     var skip?/eax: boolean <- skip-token? token
     compare skip?, 0/false
     loop-if-!=
@@ -389,24 +386,36 @@ fn test-tokenize-indent {
 fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
   trace-text trace, "tokenize", "next-token"
   trace-lower trace
+  # first save an indent token
   {
     compare start-of-line?, 0/false
     break-if-=
-    next-indent-token in, out, trace
-    trace-higher trace
-    return 0/not-at-start-of-line
+    next-indent-token in, out, trace  # might not be returned
   }
   skip-spaces-from-gap-buffer in
+  var g/eax: grapheme <- peek-from-gap-buffer in
+  {
+    compare g, 0x23/comment
+    break-if-!=
+    skip-rest-of-line in
+  }
+  var g/eax: grapheme <- peek-from-gap-buffer in
   {
-    var g/eax: grapheme <- peek-from-gap-buffer in
     compare g, 0xa/newline
     break-if-!=
     trace-text trace, "tokenize", "newline"
     g <- read-from-gap-buffer in
-    initialize-skip-token out
+    initialize-skip-token out  # might drop indent if that's all there was in this line
     return 1/at-start-of-line
   }
   {
+    compare start-of-line?, 0/false
+    break-if-=
+    # still here? no comment or newline?
+    trace-higher trace
+    return 0/not-at-start-of-line
+  }
+  {
     var done?/eax: boolean <- gap-buffer-scan-done? in
     compare done?, 0/false
     break-if-=
@@ -436,14 +445,6 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
       next-stream-token in, out, trace
       break $next-token:case
     }
-    # comment
-    {
-      compare g, 0x23/comment
-      break-if-!=
-      rest-of-line in, out, trace
-      copy-to start-of-line?, 1/true
-      break $next-token:case
-    }
     # special-case: '-'
     {
       compare g, 0x2d/minus
@@ -530,6 +531,11 @@ fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean,
       initialize-token out, ","
       break $next-token:case
     }
+    set-cursor-position 0/screen, 0x40 0x20
+    {
+      var foo/eax: int <- copy g
+      draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
+    }
     abort "unknown token type"
   }
   trace-higher trace
@@ -765,37 +771,17 @@ fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) {
   }
 }
 
-fn rest-of-line in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
-  trace-text trace, "tokenize", "comment"
-  var out/eax: (addr token) <- copy _out
-  var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
-  populate-stream out-data-ah, 0x40
-  var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
-  var out-data/edi: (addr stream byte) <- copy _out-data
+fn skip-rest-of-line in: (addr gap-buffer) {
   {
-    var empty?/eax: boolean <- gap-buffer-scan-done? in
-    compare empty?, 0/false
-    {
-      break-if-=
-      return
-    }
-    var g/eax: grapheme <- read-from-gap-buffer in
+    var done?/eax: boolean <- gap-buffer-scan-done? in
+    compare done?, 0/false
+    break-if-!=
+    var g/eax: grapheme <- peek-from-gap-buffer in
     compare g, 0xa/newline
     break-if-=
-    write-grapheme out-data, g
+    g <- read-from-gap-buffer in  # consume
     loop
   }
-  {
-    var should-trace?/eax: boolean <- should-trace? trace
-    compare should-trace?, 0/false
-    break-if-=
-    var stream-storage: (stream byte 0x80)
-    var stream/esi: (addr stream byte) <- address stream-storage
-    write stream, "=> "
-    rewind-stream out-data
-    write-stream stream, out-data
-    trace trace, "tokenize", stream
-  }
 }
 
 fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
@@ -1296,20 +1282,6 @@ fn stream-token? _self: (addr token) -> _/eax: boolean {
   return 1/true
 }
 
-fn comment-token? _self: (addr token) -> _/eax: boolean {
-  var self/eax: (addr token) <- copy _self
-  var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
-  var in-data/eax: (addr stream byte) <- lookup *in-data-ah
-  rewind-stream in-data
-  var g/eax: grapheme <- read-grapheme in-data
-  compare g, 0x23/hash
-  {
-    break-if-=
-    return 0/false
-  }
-  return 1/true
-}
-
 fn skip-token? _self: (addr token) -> _/eax: boolean {
   var self/eax: (addr token) <- copy _self
   var in-type/eax: (addr int) <- get self, type