https://github.com/akkartik/mu/blob/main/shell/parenthesize.mu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 fn parenthesize in: (addr stream token), out: (addr stream token), trace: (addr trace) {
16 trace-text trace, "parenthesize", "insert parens"
17 trace-lower trace
18 var buffer-storage: (stream token 0x40)
19 var buffer/edi: (addr stream token) <- address buffer-storage
20 var curr-line-indent: int
21 var num-words-in-line: int
22 var paren-at-start-of-line?: boolean
23 var explicit-open-parens-storage: int
24 var explicit-open-parens/ebx: (addr int) <- address explicit-open-parens-storage
25 var implicit-open-parens-storage: int-stack
26 var implicit-open-parens/esi: (addr int-stack) <- address implicit-open-parens-storage
27 initialize-int-stack implicit-open-parens, 0x10
28 rewind-stream in
29 {
30 var done?/eax: boolean <- stream-empty? in
31 compare done?, 0/false
32 break-if-!=
33
34 var curr-token-storage: token
35 var curr-token/ecx: (addr token) <- address curr-token-storage
36 read-from-stream in, curr-token
37
38
39 {
40 var is-indent?/eax: boolean <- indent-token? curr-token
41 compare is-indent?, 0/false
42 break-if-=
43 copy-to num-words-in-line, 0
44 copy-to paren-at-start-of-line?, 0/false
45 var tmp/eax: int <- indent-level curr-token
46 copy-to curr-line-indent, tmp
47 }
48 {
49 var is-word?/eax: boolean <- word-token? curr-token
50 compare is-word?, 0/false
51 break-if-=
52 increment num-words-in-line
53 }
54 {
55 compare num-words-in-line, 0
56 break-if-!=
57 var is-open?/eax: boolean <- open-paren-token? curr-token
58 compare is-open?, 0/false
59 break-if-=
60 copy-to paren-at-start-of-line?, 1/true
61 }
62
63 $parenthesize:emit: {
64 {
65 compare paren-at-start-of-line?, 0/false
66 break-if-=
67
68 emit-all buffer, curr-token, out, explicit-open-parens
69 break $parenthesize:emit
70 }
71 {
72 var is-indent?/eax: boolean <- indent-token? curr-token
73 compare is-indent?, 0/false
74 break-if-=
75
76 emit-all buffer, curr-token, out, explicit-open-parens
77 break $parenthesize:emit
78 }
79 {
80 compare num-words-in-line, 2
81 break-if->=
82
83 write-to-stream buffer, curr-token
84 break $parenthesize:emit
85 }
86 {
87 compare num-words-in-line, 2
88 break-if-!=
89 var is-word?/eax: boolean <- word-token? curr-token
90 compare is-word?, 0/false
91 break-if-=
92 compare *explicit-open-parens, 0
93 break-if-!=
94
95 var paren-storage: token
96 var paren-token/eax: (addr token) <- address paren-storage
97 initialize-token paren-token, "("
98 write-to-stream out, paren-token
99 push-int-stack implicit-open-parens, curr-line-indent
100 }
101 emit-all buffer, curr-token, out, explicit-open-parens
102 }
103 {
104 var is-indent?/eax: boolean <- indent-token? curr-token
105 compare is-indent?, 0/false
106 break-if-=
107 {
108
109 var done?/eax: boolean <- int-stack-empty? implicit-open-parens
110 compare done?, 0/false
111 break-if-!=
112 var top-indent/eax: int <- int-stack-top implicit-open-parens
113 compare top-indent, curr-line-indent
114 break-if-<
115
116 var paren-storage: token
117 var paren-token/eax: (addr token) <- address paren-storage
118 initialize-token paren-token, ")"
119 write-to-stream out, paren-token
120
121 var dummy/eax: int <- pop-int-stack implicit-open-parens
122 loop
123 }
124 }
125 loop
126 }
127 emit-all buffer, 0/no-curr-token, out, explicit-open-parens
128 {
129
130 var done?/eax: boolean <- int-stack-empty? implicit-open-parens
131 compare done?, 0/false
132 break-if-!=
133
134 var paren-storage: token
135 var paren-token/eax: (addr token) <- address paren-storage
136 initialize-token paren-token, ")"
137 write-to-stream out, paren-token
138
139 var dummy/eax: int <- pop-int-stack implicit-open-parens
140 loop
141 }
142 trace-higher trace
143 }
144
145 fn indent-level _in: (addr token) -> _/eax: int {
146 var in/eax: (addr token) <- copy _in
147 var result/eax: (addr int) <- get in, number-data
148 return *result
149 }
150
151 fn word-token? in: (addr token) -> _/eax: boolean {
152 {
153 var is-indent?/eax: boolean <- indent-token? in
154 compare is-indent?, 0/false
155 break-if-!=
156 var is-bracket?/eax: boolean <- bracket-token? in
157 compare is-bracket?, 0/false
158 break-if-!=
159 var is-quote?/eax: boolean <- quote-token? in
160 compare is-quote?, 0/false
161 break-if-!=
162 var is-backquote?/eax: boolean <- backquote-token? in
163 compare is-backquote?, 0/false
164 break-if-!=
165 var is-unquote?/eax: boolean <- unquote-token? in
166 compare is-unquote?, 0/false
167 break-if-!=
168 var is-unquote-splice?/eax: boolean <- unquote-splice-token? in
169 compare is-unquote-splice?, 0/false
170 break-if-!=
171 return 1/true
172 }
173 return 0/false
174 }
175
176 fn emit-all first: (addr stream token), second: (addr token), out: (addr stream token), explicit-open-parens: (addr int) {
177 rewind-stream first
178 {
179 var done?/eax: boolean <- stream-empty? first
180 compare done?, 0/false
181 break-if-!=
182 var curr-token-storage: token
183 var curr-token/eax: (addr token) <- address curr-token-storage
184 read-from-stream first, curr-token
185 emit curr-token, out, explicit-open-parens
186 loop
187 }
188 clear-stream first
189 {
190 compare second, 0
191 break-if-=
192 emit second, out, explicit-open-parens
193 }
194 }
195
196 fn emit t: (addr token), out: (addr stream token), explicit-open-parens: (addr int) {
197 {
198 var is-indent?/eax: boolean <- indent-token? t
199 compare is-indent?, 0/false
200 break-if-=
201 return
202 }
203 write-to-stream out, t
204 var explicit-open-parens/edi: (addr int) <- copy explicit-open-parens
205 {
206 var is-open?/eax: boolean <- open-paren-token? t
207 compare is-open?, 0/false
208 break-if-=
209 increment *explicit-open-parens
210 }
211 {
212 var is-close?/eax: boolean <- close-paren-token? t
213 compare is-close?, 0/false
214 break-if-=
215 decrement *explicit-open-parens
216 compare *explicit-open-parens, 0
217 break-if->=
218 abort "emit: extra ')'"
219 }
220 }
221
222
223 fn emit-salient-tokens in: (addr stream token), out: (addr stream token) {
224 rewind-stream in
225 {
226 var done?/eax: boolean <- stream-empty? in
227 compare done?, 0/false
228 break-if-!=
229 var token-storage: token
230 var token/edx: (addr token) <- address token-storage
231 read-from-stream in, token
232
233 var is-skip?/eax: boolean <- skip-token? token
234 compare is-skip?, 0/false
235 loop-if-!=
236
237 var is-indent?/eax: boolean <- indent-token? token
238 compare is-indent?, 0/false
239 loop-if-!=
240
241 write-to-stream out, token
242 loop
243 }
244 }
245
246 fn test-parenthesize {
247 check-parenthesize "a b c ", "(a b c)", "F - test-parenthesize/1"
248 check-parenthesize "a (b)", "(a (b))", "F - test-parenthesize/2"
249 check-parenthesize "a (b c)", "(a (b c))", "F - test-parenthesize/3"
250 check-parenthesize "a (b c) d", "(a (b c) d)", "F - test-parenthesize/4"
251 check-parenthesize "a b c\nd ef", "(a b c) (d ef)", "F - test-parenthesize/5-multiple-lines"
252 check-parenthesize "a b c\n d ef", "(a b c (d ef))", "F - test-parenthesize/6-indented"
253 check-parenthesize "a b c\n (d ef)", "(a b c (d ef))", "F - test-parenthesize/7-indented"
254 check-parenthesize "a b c\n (d ef)\n g", "(a b c (d ef) g)", "F - test-parenthesize/8-indented"
255 check-parenthesize "a b c\n d e\n f\ny", "(a b c (d e f)) y", "F - test-parenthesize/9-indented"
256 check-parenthesize "#a\na b", "(a b)", "F - test-parenthesize/10-initial-comment"
257
258
259
260
261
262
263 check-parenthesize "a b c\n d ef\n\n g #abc", "(a b c (d ef) g)", "F - test-parenthesize/11-comments"
264
265
266
267
268
269 check-parenthesize "a b c\n '(d ef)\n\n g #abc", "(a b c '(d ef) g)", "F - test-parenthesize/12-quotes-and-comments"
270 check-parenthesize " a b c", "(a b c)", "F - test-parenthesize/13-initial-indent"
271 check-parenthesize " a b c\n 34", "(a b c) 34", "F - test-parenthesize/14-initial-indent"
272 check-parenthesize "def foo\n a b c\n d e\nnewdef", "(def foo (a b c) (d e)) newdef", "F - test-parenthesize/14"
273 check-parenthesize " a a\n a\ny", "(a a a) y", "F - test-parenthesize/15-group-before-too-much-outdent"
274 check-parenthesize "a `(b c)", "(a `(b c))", "F - test-parenthesize/16-backquote"
275 check-parenthesize "'a b c", "('a b c)", "F - test-parenthesize/17-quote"
276 check-parenthesize ",a b c", "(,a b c)", "F - test-parenthesize/18-unquote"
277 check-parenthesize ",@a b c", "(,@a b c)", "F - test-parenthesize/19-unquote-splice"
278 check-parenthesize "a b\n 'c\n ,d\n e", "(a b 'c ,d e)", "F - test-parenthesize/20-quotes-are-not-words"
279 check-parenthesize "def foo\n#a b c\n d e\nnew", "(def foo (d e)) new", "F - test-parenthesize/21-group-across-comments"
280 }
281
282 fn test-parenthesize-skips-lines-with-initial-parens {
283 check-parenthesize "(a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/1"
284 check-parenthesize "(a (b c))", "(a (b c))", "F - test-parenthesize-skips-lines-with-initial-parens/2"
285 check-parenthesize "(a () b)", "(a () b)", "F - test-parenthesize-skips-lines-with-initial-parens/3"
286 check-parenthesize " (a b c)", "(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/initial-indent"
287 check-parenthesize "(a b c\n bc\n def\n gh)", "(a b c bc def gh)", "F - test-parenthesize-skips-lines-with-initial-parens/outdent"
288 check-parenthesize "(a b c\n (def gh)\n (i j k)\n lm\n\n\n (no p))", "(a b c (def gh) (i j k) lm (no p))", "F - test-parenthesize-skips-lines-with-initial-parens/fully-parenthesized"
289 check-parenthesize ",(a b c)", ",(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote"
290 check-parenthesize ",@(a b c)", ",@(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-unquote-splice"
291 check-parenthesize ",,(a b c)", ",,(a b c)", "F - test-parenthesize-skips-lines-with-initial-parens/after-nested-unquote"
292 check-parenthesize "(def foo\n #a b c\n d e)\nnew", "(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment"
293 check-parenthesize "`(def foo\n #a b c\n d e)\nnew", "`(def foo d e) new", "F - test-parenthesize-skips-lines-with-initial-parens/across-comment-after-backquote"
294 check-parenthesize " (a b c\n d e)", "(a b c d e)", "F - test-parenthesize-skips-lines-with-initial-parens/with-indent"
295 check-parenthesize "def foo(a (b)\n c d)\n d e\nnew", "(def foo (a (b) c d) (d e)) new", "F - test-parenthesize-skips-lines-with-initial-parens/inside-arg-lists"
296 }
297
298 fn test-parenthesize-skips-single-word-lines {
299
300 check-parenthesize "a b\n c", "(a b c)", "F - test-parenthesize-skips-single-word-lines/0"
301
302 check-parenthesize "a\n c", "a c", "F - test-parenthesize-skips-single-word-lines/1"
303 check-parenthesize "a", "a", "F - test-parenthesize-skips-single-word-lines/2"
304 check-parenthesize "a \nb\nc", "a b c", "F - test-parenthesize-skips-single-word-lines/3"
305 }
306
307 fn check-parenthesize actual: (addr array byte), expected: (addr array byte), message: (addr array byte) {
308 var trace-storage: trace
309 var trace/edx: (addr trace) <- address trace-storage
310 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
311
312 var actual-buffer-storage: gap-buffer
313 var actual-buffer/eax: (addr gap-buffer) <- address actual-buffer-storage
314 initialize-gap-buffer-with actual-buffer, actual
315 var actual-tokens-storage: (stream token 0x40)
316 var actual-tokens/esi: (addr stream token) <- address actual-tokens-storage
317 tokenize-and-parenthesize actual-buffer, actual-tokens, trace
318
319 var expected-buffer-storage: gap-buffer
320 var expected-buffer/eax: (addr gap-buffer) <- address expected-buffer-storage
321 initialize-gap-buffer-with expected-buffer, expected
322 var expected-tokens-storage: (stream token 0x40)
323 var expected-tokens/edi: (addr stream token) <- address expected-tokens-storage
324 tokenize-salient expected-buffer, expected-tokens, trace
325
326 rewind-stream actual-tokens
327 check-token-streams-data-equal actual-tokens, expected-tokens, message
328 }
329
330 fn check-token-streams-data-equal actual: (addr stream token), expected: (addr stream token), message: (addr array byte) {
331 rewind-stream actual
332 rewind-stream expected
333 {
334
335 var actual-done?/eax: boolean <- stream-empty? actual
336 {
337 compare actual-done?, 0/false
338 break-if-=
339 var expected-done?/eax: boolean <- stream-empty? expected
340 compare expected-done?, 0/false
341 {
342 break-if-!=
343
344 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
345 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too short\n", 3/fg=cyan 0/bg
346 count-test-failure
347 return
348 }
349 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ".", 3/fg/cyan, 0/bg
350 return
351 }
352 var expected-done?/eax: boolean <- stream-empty? expected
353 compare expected-done?, 0/false
354 {
355 break-if-=
356
357 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
358 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, ": too long\n", 3/fg=cyan 0/bg
359 count-test-failure
360 return
361 }
362
363 var curr-token-storage: token
364 var curr-token/ecx: (addr token) <- address curr-token-storage
365 read-from-stream actual, curr-token
366
367 var expected-token-storage: token
368 var expected-token/edx: (addr token) <- address expected-token-storage
369 read-from-stream expected, expected-token
370
371 var match?/eax: boolean <- tokens-equal? curr-token, expected-token
372 compare match?, 0/false
373 {
374 break-if-!=
375 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, message, 3/fg=cyan 0/bg
376 count-test-failure
377 return
378 }
379 loop
380 }
381 }
382
383 fn tokenize-and-parenthesize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
384 var tokens-storage: (stream token 0x400)
385 var tokens/edx: (addr stream token) <- address tokens-storage
386 tokenize in, tokens, trace
387 var error?/eax: boolean <- has-errors? trace
388 compare error?, 0/false
389 {
390 break-if-=
391 return
392 }
393 parenthesize tokens, out, trace
394 }
395
396 fn tokenize-salient in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
397 var tokens-storage: (stream token 0x400)
398 var tokens/edx: (addr stream token) <- address tokens-storage
399 tokenize in, tokens, trace
400 var error?/eax: boolean <- has-errors? trace
401 compare error?, 0/false
402 {
403 break-if-=
404 return
405 }
406 emit-salient-tokens tokens, out
407 }