From e0b8eddb5b2463a6a2890fd2a089cc3cc1a0f711 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Wed, 23 Jun 2021 13:19:20 -0700 Subject: . --- html/shell/parenthesize.mu.html | 478 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 478 insertions(+) create mode 100644 html/shell/parenthesize.mu.html (limited to 'html/shell/parenthesize.mu.html') diff --git a/html/shell/parenthesize.mu.html b/html/shell/parenthesize.mu.html new file mode 100644 index 00000000..724e0343 --- /dev/null +++ b/html/shell/parenthesize.mu.html @@ -0,0 +1,478 @@ + + + + +Mu - shell/parenthesize.mu + + + + + + + + + + +https://github.com/akkartik/mu/blob/main/shell/parenthesize.mu +
+  1 ## insert explicit parens based on indentation
+  2 
+  3 # Design goals:
+  4 #  keywords in other languages should look different from functions: def, if, while, etc.
+  5 #  fully-parenthesized expressions should not be messed with
+  6 #    ignore indent when lines start with parens
+  7 #    ignore indent inside parens
+  8 #    no modes to disable this pass
+  9 #  introduce no new operators
+ 10 #    the language doesn't use nested lists like Scheme's `cond`
+ 11 #    lines with one word are never wrapped in parens
+ 12 #  encourage macros to explicitly insert all parens
+ 13 #    ignore indent inside backquote
+ 14 
+ 15 fn parenthesize in: (addr stream token), out: (addr stream token), trace: (addr trace) {
+ 16   trace-text trace, "parenthesize", "insert parens"
+ 17   trace-lower trace
+ 18   var buffer-storage: (stream token 0x40)
+ 19   var buffer/edi: (addr stream token) <- address buffer-storage
+ 20   var curr-line-indent: int
+ 21   var num-words-in-line: int
+ 22   var paren-at-start-of-line?: boolean
+ 23   var explicit-open-parens-storage: int
+ 24   var explicit-open-parens/ebx: (addr int) <- address explicit-open-parens-storage
+ 25   var implicit-open-parens-storage: int-stack
+ 26   var implicit-open-parens/esi: (addr int-stack) <- address implicit-open-parens-storage
+ 27   initialize-int-stack implicit-open-parens, 0x10  # potentially a major memory leak
+ 28   rewind-stream in
+ 29   {
+ 30     var done?/eax: boolean <- stream-empty? in
+ 31     compare done?, 0/false
+ 32     break-if-!=
+ 33     #
+ 34     var curr-token-storage: token
+ 35     var curr-token/ecx: (addr token) <- address curr-token-storage
+ 36     read-from-stream in, curr-token
+ 37 #?     dump-token-from-cursor curr-token
+ 38     # update state
+ 39     {
+ 40       var is-indent?/eax: boolean <- indent-token? curr-token
+ 41       compare is-indent?, 0/false
+ 42       break-if-=
+ 43       copy-to num-words-in-line, 0
+ 44       copy-to paren-at-start-of-line?, 0/false
+ 45       var tmp/eax: int <- indent-level curr-token
+ 46       copy-to curr-line-indent, tmp
+ 47     }
+ 48     {
+ 49       var is-word?/eax: boolean <- word-token? curr-token
+ 50       compare is-word?, 0/false
+ 51       break-if-=
+ 52       increment num-words-in-line
+ 53     }
+ 54     {
+ 55       compare num-words-in-line, 0
+ 56       break-if-!=
+ 57       var is-open?/eax: boolean <- open-paren-token? curr-token
+ 58       compare is-open?, 0/false
+ 59       break-if-=
+ 60       copy-to paren-at-start-of-line?, 1/true
+ 61     }
+ 62     #
+ 63     $parenthesize:emit: {
+ 64       {
+ 65         compare paren-at-start-of-line?, 0/false
+ 66         break-if-=
+ 67 #?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "A", 7/fg 0/bg
+ 68         emit-all buffer, curr-token, out, explicit-open-parens
+ 69         break $parenthesize:emit
+ 70       }
+ 71       {
+ 72         var is-indent?/eax: boolean <- indent-token? curr-token
+ 73         compare is-indent?, 0/false
+ 74         break-if-=
+ 75 #?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "B", 7/fg 0/bg
+ 76         emit-all buffer, curr-token, out, explicit-open-parens
+ 77         break $parenthesize:emit
+ 78       }
+ 79       {
+ 80         compare num-words-in-line, 2
+ 81         break-if->=
+ 82 #?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "C", 7/fg 0/bg
+ 83         write-to-stream buffer, curr-token
+ 84         break $parenthesize:emit
+ 85       }
+ 86       {
+ 87         compare num-words-in-line, 2
+ 88         break-if-!=
+ 89         var is-word?/eax: boolean <- word-token? curr-token
+ 90         compare is-word?, 0/false
+ 91         break-if-=
+ 92         compare *explicit-open-parens, 0
+ 93         break-if-!=
+ 94 #?         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen "(\n", 7/fg 0/bg
+ 95         var paren-storage: token
+ 96         var paren-token/eax: (addr token) <- address paren-storage
+ 97         initialize-token paren-token, "("
+ 98         write-to-stream out, paren-token
+ 99         push-int-stack implicit-open-parens, curr-line-indent
+100       }
+101       emit-all buffer, curr-token, out, explicit-open-parens
+102     }
+103     {
+104       var is-indent?/eax: boolean <- indent-token? curr-token
+105       compare is-indent?, 0/false
+106       break-if-=
+107       {
+108         # . loop check
+109         var done?/eax: boolean <- int-stack-empty? implicit-open-parens
+110         compare done?, 0/false
+111         break-if-!=
+112         var top-indent/eax: int <- int-stack-top implicit-open-parens
+113         compare top-indent, curr-line-indent
+114         break-if-<
+115         # . loop body
+116         var paren-storage: token
+117         var paren-token/eax: (addr token) <- address paren-storage
+118         initialize-token paren-token, ")"
+119         write-to-stream out, paren-token
+120         # . update
+121         var dummy/eax: int <- pop-int-stack implicit-open-parens
+122         loop
+123       }
+124     }
+125     loop
+126   }
+127   emit-all buffer, 0/no-curr-token, out, explicit-open-parens
+128   {
+129     # . loop check
+130     var done?/eax: boolean <- int-stack-empty? implicit-open-parens
+131     compare done?, 0/false
+132     break-if-!=
+133     # . loop body
+134     var paren-storage: token
+135     var paren-token/eax: (addr token) <- address paren-storage
+136     initialize-token paren-token, ")"
+137     write-to-stream out, paren-token
+138     # . update
+139     var dummy/eax: int <- pop-int-stack implicit-open-parens
+140     loop
+141   }
+142   trace-higher trace
+143 }
+144 
+145 fn indent-level _in: (addr token) -> _/eax: int {
+146   var in/eax: (addr token) <- copy _in
+147   var result/eax: (addr int) <- get in, number-data
+148   return *result
+149 }
+150 
+151 fn word-token? in: (addr token) -> _/eax: boolean {
+152   {
+153     var is-indent?/eax: boolean <- indent-token? in
+154     compare is-indent?, 0/false
+155     break-if-!=
+156     var is-bracket?/eax: boolean <- bracket-token? in  # overzealously checks for [], but shouldn't ever encounter it
+157     compare is-bracket?, 0/false
+158     break-if-!=
+159     var is-quote?/eax: boolean <- quote-token? in
+160     compare is-quote?, 0/false
+161     break-if-!=
+162     var is-backquote?/eax: boolean <- backquote-token? in
+163     compare is-backquote?, 0/false
+164     break-if-!=
+165     var is-unquote?/eax: boolean <- unquote-token? in
+166     compare is-unquote?, 0/false
+167     break-if-!=
+168     var is-unquote-splice?/eax: boolean <- unquote-splice-token? in
+169     compare is-unquote-splice?, 0/false
+170     break-if-!=
+171     return 1/true
+172   }
+173   return 0/false
+174 }
+175 
+176 fn emit-all first: (addr stream token), second: (addr token), out: (addr stream token), explicit-open-parens: (addr int) {
+177   rewind-stream first
+178   {
+179     var done?/eax: boolean <- stream-empty? first
+180     compare done?, 0/false
+181     break-if-!=
+182     var curr-token-storage: token
+183     var curr-token/eax: (addr token) <- address curr-token-storage
+184     read-from-stream first, curr-token
+185     emit curr-token, out, explicit-open-parens
+186     loop
+187   }
+188   clear-stream first
+189   {
+190     compare second, 0
+191     break-if-=
+192     emit second, out, explicit-open-parens
+193   }
+194 }
+195 
+196 fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr int) {
+197   {
+198     var is-indent?/eax: boolean <- indent-token? t
+199     compare is-indent?, 0/false
+200     break-if-=
+201     return
+202   }
+203   write-to-stream out, t
+204   var explicit-open-parens/edi: (addr int) <- copy explicit-open-parens
+205   {
+206     var is-open?/eax: boolean <- open-paren-token? t
+207     compare is-open?, 0/false
+208     break-if-=
+209     increment *explicit-open-parens
+210   }
+211   {
+212     var is-close?/eax: boolean <- close-paren-token? t
+213     compare is-close?, 0/false
+214     break-if-=
+215     decrement *explicit-open-parens
+216     compare *explicit-open-parens, 0
+217     break-if->=
+218     abort "emit: extra ')'"
+219   }
+220 }
+221 
+222 # helper for checking parenthesize
+223 fn emit-salient-tokens in: (addr stream token), out: (addr stream token) {
+224   rewind-stream in
+225   {
+226     var done?/eax: boolean <- stream-empty? in
+227     compare done?, 0/false
+228     break-if-!=
+229     var token-storage: token
+230     var token/edx: (addr token) <- address token-storage
+231     read-from-stream in, token
+232     # skip tokens should be skipped
+233     var is-skip?/eax: boolean <- skip-token? token
+234     compare is-skip?, 0/false
+235     loop-if-!=
+236     # indent tokens should be skipped
+237     var is-indent?/eax: boolean <- indent-token? token
+238     compare is-indent?, 0/false
+239     loop-if-!=
+240     #
+241     write-to-stream out, token  # shallow copy
+242     loop
+243   }
+244 }
+245 
+246 fn test-parenthesize {
+247   check-parenthesize "a b c  ", "(a b c)", "F - test-parenthesize/1"
+248   check-parenthesize "a (b)", "(a (b))", "F - test-parenthesize/2"
+249   check-parenthesize "a (b c)", "(a (b c))", "F - test-parenthesize/3"
+250   check-parenthesize "a (b c) d", "(a (b c) d)", "F - test-parenthesize/4"
+251   check-parenthesize "a b c\nd ef", "(a b c) (d ef)", "F - test-parenthesize/5-multiple-lines"
+252   check-parenthesize "a b c\n  d ef", "(a b c (d ef))", "F - test-parenthesize/6-indented"
+253   check-parenthesize "a b c\n  (d ef)", "(a b c (d ef))", "F - test-parenthesize/7-indented"
+254   check-parenthesize "a b c\n  (d ef)\n  g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented"
+255   check-parenthesize "a b c\n  d e\n    f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented"
+256   check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment"
+257 #? a b c
+258 #?     d ef
+259 #? 
+260 #?   g
+261 #?   check-parenthesize "a b c\n    d ef\n\n  g", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
+262 #?   check-parenthesize "a b c\n    d ef\n\n  g #abc", "(a b c (d ef)) g", "F - test-parenthesize/11-comments"
+263   check-parenthesize "a b c\n    d ef\n\n  g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
+264 #? a b c
+265 #?   '(d ef)
+266 #? 
+267 #?   g #abc
+268 #?   check-parenthesize "a b c\n  '(d ef)\n  g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
+269   check-parenthesize "a b c\n  '(d ef)\n\n  g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
+270   check-parenthesize "  a b c", "(a b c)", "F - test-parenthesize/13-initial-indent"
+271   check-parenthesize "    a b c\n  34", "(a b c) 34", "F - test-parenthesize/14-initial-indent"
+272   check-parenthesize "def foo\n    a b c\n  d e\nnewdef", "(def foo (a b c) (d e)) newdef", "F - test-parenthesize/14"
+273   check-parenthesize "  a a\n    a\ny", "(a a a) y", "F - test-parenthesize/15-group-before-too-much-outdent"
+274   check-parenthesize "a `(b c)", "(a `(b c))", "F - test-parenthesize/16-backquote"
+275   check-parenthesize "'a b c", "('a b c)", "F - test-parenthesize/17-quote"
+276   check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote"
+277   check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice"
+278   check-parenthesize "a b\n  'c\n  ,d\n  e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words"
+279   check-parenthesize "def foo\n#a b c\n  d e\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
+280 }
+281 
+282 fn test-parenthesize-skips-lines-with-initial-parens {
+283   check-parenthesize "(a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/1"
+284   check-parenthesize "(a (b c))", "(a (b c))", "F - test-parenthesize-skips-lines-with-initial-parens/2"
+285   check-parenthesize "(a () b)", "(a () b)", "F - test-parenthesize-skips-lines-with-initial-parens/3"
+286   check-parenthesize "  (a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/initial-indent"
+287   check-parenthesize "(a b c\n  bc\n    def\n  gh)", "(a b c bc def gh)", "F - test-parenthesize-skips-lines-with-initial-parens/outdent"
+288   check-parenthesize "(a b c\n  (def gh)\n    (i j k)\n  lm\n\n\n    (no p))", "(a b c (def gh) (i j k) lm (no p))", "F - test-parenthesize-skips-lines-with-initial-parens/fully-parenthesized"
+289   check-parenthesize ",(a b c)", ",(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote"
+290   check-parenthesize ",@(a b c)", ",@(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote-splice"
+291   check-parenthesize ",,(a b c)", ",,(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-nested-unquote"
+292   check-parenthesize "(def foo\n    #a b c\n  d e)\nnew", "(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment"
+293   check-parenthesize "`(def foo\n    #a b c\n  d e)\nnew", "`(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment-after-backquote"
+294   check-parenthesize "  (a b c\n    d e)", "(a b c d e)", "F - test-parenthesize-skips-lines-with-initial-parens/with-indent"
+295   check-parenthesize "def foo(a (b)\n    c d)\n  d e\nnew", "(def foo (a (b) c d) (d e)) new", "F - test-parenthesize-skips-lines-with-initial-parens/inside-arg-lists"
+296 }
+297 
+298 fn test-parenthesize-skips-single-word-lines {
+299   # lines usually get grouped with later indented lines
+300   check-parenthesize "a b\n  c", "(a b c)", "F - test-parenthesize-skips-single-word-lines/0"
+301   # but single-word lines don't
+302   check-parenthesize "a\n  c", "a c", "F - test-parenthesize-skips-single-word-lines/1"
+303   check-parenthesize "a", "a", "F - test-parenthesize-skips-single-word-lines/2"
+304   check-parenthesize "a  \nb\nc", "a b c", "F - test-parenthesize-skips-single-word-lines/3"
+305 }
+306 
+307 fn check-parenthesize actual: (addr array byte), expected: (addr array byte), message: (addr array byte) {
+308   var trace-storage: trace
+309   var trace/edx: (addr trace) <- address trace-storage
+310   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
+311   #
+312   var actual-buffer-storage: gap-buffer
+313   var actual-buffer/eax: (addr gap-buffer) <- address actual-buffer-storage
+314   initialize-gap-buffer-with actual-buffer, actual
+315   var actual-tokens-storage: (stream token 0x40)
+316   var actual-tokens/esi: (addr stream token) <- address actual-tokens-storage
+317   tokenize-and-parenthesize actual-buffer, actual-tokens, trace
+318   #
+319   var expected-buffer-storage: gap-buffer
+320   var expected-buffer/eax: (addr gap-buffer) <- address expected-buffer-storage
+321   initialize-gap-buffer-with expected-buffer, expected
+322   var expected-tokens-storage: (stream token 0x40)
+323   var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage
+324   tokenize-salient expected-buffer, expected-tokens, trace
+325   #
+326   rewind-stream actual-tokens
+327   check-token-streams-data-equal actual-tokens, expected-tokens, message
+328 }
+329 
+330 fn check-token-streams-data-equal actual: (addr stream token), expected: (addr stream token), message: (addr array byte) {
+331   rewind-stream actual
+332   rewind-stream expected
+333   {
+334     # loop termination checks
+335     var actual-done?/eax: boolean <- stream-empty? actual
+336     {
+337       compare actual-done?, 0/false
+338       break-if-=
+339       var expected-done?/eax: boolean <- stream-empty? expected
+340       compare expected-done?, 0/false
+341       {
+342         break-if-!=
+343         # actual empty, but expected not empty
+344         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
+345         draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too short\n", 3/fg=cyan 0/bg
+346         count-test-failure
+347         return
+348       }
+349       draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg/cyan, 0/bg
+350       return
+351     }
+352     var expected-done?/eax: boolean <- stream-empty? expected
+353     compare expected-done?, 0/false
+354     {
+355       break-if-=
+356       # actual not empty, but expected empty
+357       draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
+358       draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too long\n", 3/fg=cyan 0/bg
+359       count-test-failure
+360       return
+361     }
+362     # loop body
+363     var curr-token-storage: token
+364     var curr-token/ecx: (addr token) <- address curr-token-storage
+365     read-from-stream actual, curr-token
+366 #?     dump-token-from-cursor curr-token
+367     var expected-token-storage: token
+368     var expected-token/edx: (addr token) <- address expected-token-storage
+369     read-from-stream expected, expected-token
+370 #?     dump-token-from-cursor expected-token
+371     var match?/eax: boolean <- tokens-equal? curr-token, expected-token
+372     compare match?, 0/false
+373     {
+374       break-if-!=
+375       draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
+376       count-test-failure
+377       return
+378     }
+379     loop
+380   }
+381 }
+382 
+383 fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
+384   var tokens-storage: (stream token 0x400)
+385   var tokens/edx: (addr stream token) <- address tokens-storage
+386   tokenize in, tokens, trace
+387   var error?/eax: boolean <- has-errors? trace
+388   compare error?, 0/false
+389   {
+390     break-if-=
+391     return
+392   }
+393   parenthesize tokens, out, trace
+394 }
+395 
+396 fn tokenize-salient in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
+397   var tokens-storage: (stream token 0x400)
+398   var tokens/edx: (addr stream token) <- address tokens-storage
+399   tokenize in, tokens, trace
+400   var error?/eax: boolean <- has-errors? trace
+401   compare error?, 0/false
+402   {
+403     break-if-=
+404     return
+405   }
+406   emit-salient-tokens tokens, out
+407 }
+
+ + + -- cgit 1.4.1-2-gfad0