https://github.com/akkartik/mu/blob/main/shell/infix.mu
  1 fn transform-infix x-ah: (addr handle cell), trace: (addr trace) {
  2   trace-text trace, "infix", "transform infix"
  3   trace-lower trace
  4 #?   trace-text trace, "infix", "todo"
  5 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a:", 2/fg 0/bg
  6 #?   dump-cell-from-cursor-over-full-screen x-ah, 7/fg 0/bg
  7   transform-infix-2 x-ah, trace, 1/at-head-of-list
  8   trace-higher trace
  9 }
 10 
 11 # Break any symbols containing operators down in place into s-expressions
 12 # Transform (... sym op sym ...) greedily in place into (... (op sym sym) ...)
 13 # Lisp code typed in at the keyboard will never have cycles
 14 fn transform-infix-2 _x-ah: (addr handle cell), trace: (addr trace), at-head-of-list?: boolean {
 15   var x-ah/edi: (addr handle cell) <- copy _x-ah
 16   var x/eax: (addr cell) <- lookup *x-ah
 17 +-- 14 lines: # trace x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------------
 31   trace-lower trace
 32 #?   {
 33 #?     var foo/eax: int <- copy x
 34 #?     draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
 35 #?   }
 36 #?   dump-cell-from-cursor-over-full-screen x-ah, 5/fg 0/bg
 37   # null? return
 38   compare x, 0
 39   {
 40     break-if-!=
 41     trace-higher trace
 42     trace-text trace, "infix", "=> NULL"
 43     return
 44   }
 45   # nil? return
 46   {
 47     var nil?/eax: boolean <- nil? x
 48     compare nil?, 0/false
 49     break-if-=
 50     trace-higher trace
 51     trace-text trace, "infix", "=> nil"
 52     return
 53   }
 54   var x-type/ecx: (addr int) <- get x, type
 55   # symbol? maybe break it down into a pair
 56   {
 57     compare *x-type, 2/symbol
 58     break-if-!=
 59     tokenize-infix x-ah, trace
 60   }
 61   # not a pair? return
 62 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a", 4/fg 0/bg
 63 #?   draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *x-type, 5/fg 0/bg
 64   {
 65     compare *x-type, 0/pair
 66     break-if-=
 67     trace-higher trace
 68 +-- 15 lines: # trace "=> " x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------
 83 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "^", 4/fg 0/bg
 84     return
 85   }
 86 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "b", 4/fg 0/bg
 87   # singleton operator? unwrap
 88   {
 89     var first-ah/ecx: (addr handle cell) <- get x, left
 90     {
 91       var first/eax: (addr cell) <- lookup *first-ah
 92       var infix?/eax: boolean <- operator-symbol? first
 93       compare infix?, 0/false
 94     }
 95     break-if-=
 96     var rest-ah/eax: (addr handle cell) <- get x, right
 97     var rest/eax: (addr cell) <- lookup *rest-ah
 98     var rest-nil?/eax: boolean <- nil? rest
 99     compare rest-nil?, 0/false
100     break-if-=
101     copy-object first-ah, x-ah
102     trace-higher trace
103 +-- 15 lines: # trace "=> " x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------
118     return
119   }
120 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "c", 4/fg 0/bg
121   ## non-singleton pair
122   # try to "pinch out" (op expr op ...) into ((op expr) op ...)
123   # (op expr expr ...) => operator in prefix position; do nothing
124   {
125     compare at-head-of-list?, 0/false
126     break-if-=
127     var first-ah/ecx: (addr handle cell) <- get x, left
128     var rest-ah/esi: (addr handle cell) <- get x, right
129     var first/eax: (addr cell) <- lookup *first-ah
130     var first-infix?/eax: boolean <- operator-symbol? first
131     compare first-infix?, 0/false
132     break-if-=
133     var rest/eax: (addr cell) <- lookup *rest-ah
134     {
135       var continue?/eax: boolean <- not-null-not-nil-pair? rest
136       compare continue?, 0/false
137     }
138     break-if-=
139     var second-ah/edx: (addr handle cell) <- get rest, left
140     rest-ah <- get rest, right
141     var rest/eax: (addr cell) <- lookup *rest-ah
142     {
143       var continue?/eax: boolean <- not-null-not-nil-pair? rest
144       compare continue?, 0/false
145     }
146     break-if-=
147     var third-ah/ebx: (addr handle cell) <- get rest, left
148     {
149       var third/eax: (addr cell) <- lookup *third-ah
150       var third-is-operator?/eax: boolean <- operator-symbol? third
151       compare third-is-operator?, 0/false
152     }
153     break-if-=
154     # if first and third are operators, bud out first two
155     var saved-rest-h: (handle cell)
156     var saved-rest-ah/eax: (addr handle cell) <- address saved-rest-h
157     copy-object rest-ah, saved-rest-ah
158     nil rest-ah
159     var result-h: (handle cell)
160     var result-ah/eax: (addr handle cell) <- address result-h
161     new-pair result-ah, *x-ah, saved-rest-h
162     # save
163     copy-object result-ah, x-ah
164     # there was a mutation; rerun
165     transform-infix-2 x-ah, trace, 1/at-head-of-list
166   }
167   # try to "pinch out" (... expr op expr ...) pattern
168   $transform-infix-2:pinch: {
169     # scan past first three elements
170     var first-ah/ecx: (addr handle cell) <- get x, left
171     var rest-ah/esi: (addr handle cell) <- get x, right
172     {
173       var quote-or-unquote?/eax: boolean <- quote-or-unquote? first-ah
174       compare quote-or-unquote?, 0/false
175     }
176     break-if-!=
177     var rest/eax: (addr cell) <- lookup *rest-ah
178     {
179       var continue?/eax: boolean <- not-null-not-nil-pair? rest
180       compare continue?, 0/false
181     }
182     break-if-=
183 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "d", 4/fg 0/bg
184 #?     dump-cell-from-cursor-over-full-screen rest-ah, 7/fg 0/bg
185     var second-ah/edx: (addr handle cell) <- get rest, left
186     rest-ah <- get rest, right
187     var rest/eax: (addr cell) <- lookup *rest-ah
188     {
189       var continue?/eax: boolean <- not-null-not-nil-pair? rest
190       compare continue?, 0/false
191     }
192     break-if-=
193 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "e", 4/fg 0/bg
194     var third-ah/ebx: (addr handle cell) <- get rest, left
195     rest-ah <- get rest, right
196     # if second is not an operator, break
197     {
198       var second/eax: (addr cell) <- lookup *second-ah
199       var infix?/eax: boolean <- operator-symbol? second
200       compare infix?, 0/false
201     }
202     break-if-=
203 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "f", 4/fg 0/bg
204     # swap the top 2
205     swap-cells first-ah, second-ah
206     ## if we're at the head of the list and there's just three elements, stop there
207     {
208       compare at-head-of-list?, 0/false
209       break-if-=
210       rest <- lookup *rest-ah
211       var rest-nil?/eax: boolean <- nil? rest
212       compare rest-nil?, 0/false
213       break-if-!= $transform-infix-2:pinch
214     }
215     ## otherwise perform a more complex 'rotation'
216 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "g", 4/fg 0/bg
217     # save and clear third->right
218     var saved-rest-h: (handle cell)
219     var saved-rest-ah/eax: (addr handle cell) <- address saved-rest-h
220     copy-object rest-ah, saved-rest-ah
221     nil rest-ah
222     # create new-node out of first..third and rest
223     var result-h: (handle cell)
224     var result-ah/eax: (addr handle cell) <- address result-h
225     new-pair result-ah, *x-ah, saved-rest-h
226     # save
227     copy-object result-ah, x-ah
228     # there was a mutation; rerun
229     transform-infix-2 x-ah, trace, 1/at-head-of-list
230     return
231   }
232   # recurse
233 #?   dump-cell-from-cursor-over-full-screen x-ah, 1/fg 0/bg
234   var left-ah/ecx: (addr handle cell) <- get x, left
235 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "x", 1/fg 0/bg
236 #?   dump-cell-from-cursor-over-full-screen left-ah, 2/fg 0/bg
237   transform-infix-2 left-ah, trace, 1/at-head-of-list
238   var right-ah/edx: (addr handle cell) <- get x, right
239 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "y", 1/fg 0/bg
240 #?   dump-cell-from-cursor-over-full-screen right-ah, 3/fg 0/bg
241   var right-at-head-of-list?/eax: boolean <- copy at-head-of-list?
242   {
243     compare right-at-head-of-list?, 0/false
244     break-if-=
245     # if left is a quote or unquote, cdr is still head of list
246     {
247       var left-is-quote-or-unquote?/eax: boolean <- quote-or-unquote? left-ah
248       compare left-is-quote-or-unquote?, 0/false
249     }
250     break-if-!=
251     right-at-head-of-list? <- copy 0/false
252   }
253   transform-infix-2 right-ah, trace, right-at-head-of-list?
254 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "z", 1/fg 0/bg
255   trace-higher trace
256 +-- 15 lines: # trace "=> " x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------
271 }
272 
273 fn not-null-not-nil-pair? _x: (addr cell) -> _/eax: boolean {
274   var x/esi: (addr cell) <- copy _x
275   compare x, 0
276   {
277     break-if-!=
278     return 0/false
279   }
280   var x-type/eax: (addr int) <- get x, type
281   compare *x-type, 0/pair
282   {
283     break-if-=
284     return 0/false
285   }
286   var nil?/eax: boolean <- nil? x
287   compare nil?, 0/false
288   {
289     break-if-=
290     return 0/false
291   }
292   return 1/true
293 }
294 
295 fn swap-cells a-ah: (addr handle cell), b-ah: (addr handle cell) {
296   var tmp-h: (handle cell)
297   var tmp-ah/eax: (addr handle cell) <- address tmp-h
298   copy-object a-ah, tmp-ah
299   copy-object b-ah, a-ah
300   copy-object tmp-ah, b-ah
301 }
302 
303 fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) {
304   var sym-ah/eax: (addr handle cell) <- copy _sym-ah
305   var sym/eax: (addr cell) <- lookup *sym-ah
306   var sym-data-ah/eax: (addr handle stream byte) <- get sym, text-data
307   var _sym-data/eax: (addr stream byte) <- lookup *sym-data-ah
308   var sym-data/esi: (addr stream byte) <- copy _sym-data
309   rewind-stream sym-data
310   # read sym into a gap buffer and insert spaces in a few places
311   var buffer-storage: gap-buffer
312   var buffer/edi: (addr gap-buffer) <- address buffer-storage
313   initialize-gap-buffer buffer, 0x40/max-symbol-size
314   # scan for first non-$
315   var g/eax: grapheme <- read-grapheme sym-data
316   add-grapheme-at-gap buffer, g
317   {
318     compare g, 0x24/dollar
319     break-if-!=
320     {
321       var done?/eax: boolean <- stream-empty? sym-data
322       compare done?, 0/false
323       break-if-=
324       return  # symbol is all '$'s; do nothing
325     }
326     g <- read-grapheme sym-data
327     add-grapheme-at-gap buffer, g
328     loop
329   }
330   var tokenization-needed?: boolean
331   var _operator-so-far?/eax: boolean <- operator-grapheme? g
332   var operator-so-far?/ecx: boolean <- copy _operator-so-far?
333   {
334     var done?/eax: boolean <- stream-empty? sym-data
335     compare done?, 0/false
336     break-if-!=
337     var g/eax: grapheme <- read-grapheme sym-data
338     {
339       var curr-operator?/eax: boolean <- operator-grapheme? g
340       compare curr-operator?, operator-so-far?
341       break-if-=
342       # state change; insert a space
343       add-grapheme-at-gap buffer, 0x20/space
344       operator-so-far? <- copy curr-operator?
345       copy-to tokenization-needed?, 1/true
346     }
347     add-grapheme-at-gap buffer, g
348     loop
349   }
350   compare tokenization-needed?, 0/false
351   break-if-=
352 #?   {
353 #?     var dummy1/eax: int <- copy 0
354 #?     var dummy2/ecx: int <- copy 0
355 #?     dummy1, dummy2 <- render-gap-buffer-wrapping-right-then-down 0/screen, buffer, 0x20/xmin 5/ymin, 0x80/xmax 0x30/ymax, 0/no-cursor, 3/fg 0/bg
356 #?     {
357 #?       loop
358 #?     }
359 #?   }
360   # recursively process buffer
361   # this time we're guaranteed we won't enter tokenize-infix
362   read-cell buffer, _sym-ah, trace
363 }
364 
365 fn test-infix {
366   check-infix "abc", "abc", "F - test-infix/regular-symbol"
367   check-infix "-3", "-3", "F - test-infix/negative-integer-literal"
368   check-infix "[a b+c]", "[a b+c]", "F - test-infix/string-literal"
369   check-infix "$", "$", "F - test-infix/dollar-sym"
370   check-infix "$$", "$$", "F - test-infix/dollar-sym-2"
371   check-infix "$a", "$a", "F - test-infix/dollar-var"
372   check-infix "$+", "$+", "F - test-infix/dollar-operator"
373   check-infix "(+)", "+", "F - test-infix/operator-without-args"
374   check-infix "(= (+) 3)", "(= + 3)", "F - test-infix/operator-without-args-2"
375   check-infix "($+)", "$+", "F - test-infix/dollar-operator-without-args"
376   check-infix "',(a + b)", "',(+ a b)", "F - test-infix/nested-quotes"
377   check-infix "',(+)", "',+", "F - test-infix/nested-quotes-2"
378   check-infix "(a + b)", "(+ a b)", "F - test-infix/simple-list"
379   check-infix "(a (+) b)", "(a + b)", "F - test-infix/wrapped-operator"
380   check-infix "(+ a b)", "(+ a b)", "F - test-infix/prefix-operator"
381   check-infix "(a . b)", "(a . b)", "F - test-infix/dot-operator"
382   check-infix "(a b . c)", "(a b . c)", "F - test-infix/dotted-list"
383   check-infix "(+ . b)", "(+ . b)", "F - test-infix/dotted-list-with-operator"
384   check-infix "(+ a)", "(+ a)", "F - test-infix/unary-operator"
385   check-infix "((a + b))", "((+ a b))", "F - test-infix/nested-list"
386   check-infix "(do (a + b))", "(do (+ a b))", "F - test-infix/nested-list-2"
387   check-infix "(a = (a + 1))", "(= a (+ a 1))", "F - test-infix/nested-list-3"
388   check-infix "(a + b + c)", "(+ (+ a b) c)", "F - test-infix/left-associative"
389   check-infix "(f a + b)", "(f (+ a b))", "F - test-infix/higher-precedence-than-call"
390   check-infix "(f a + b c + d)", "(f (+ a b) (+ c d))", "F - test-infix/multiple"
391   check-infix "+a", "(+ a)", "F - test-infix/unary-operator-2"
392   check-infix "(+a)", "((+ a))", "F - test-infix/unary-operator-3"
393   check-infix "-a", "(- a)", "F - test-infix/unary-operator-4"
394   check-infix "a+b", "(+ a b)", "F - test-infix/no-spaces"
395   check-infix "3+1", "(+ 3 1)", "F - test-infix/no-spaces-starting-with-digit"
396   check-infix "',a+b", "',(+ a b)", "F - test-infix/no-spaces-with-nested-quotes"
397   check-infix "$a+b", "(+ $a b)", "F - test-infix/no-spaces-2"
398   check-infix "-a+b", "(+ (- a) b)", "F - test-infix/unary-over-binary"
399   check-infix "~a+b", "(+ (~ a) b)", "F - test-infix/unary-complement"
400   check-infix "(n * n-1)", "(* n (- n 1))", "F - test-infix/no-spaces-over-spaces"
401   check-infix "`(a + b)", "`(+ a b)", "F - test-infix/backquote"
402   check-infix "`(+ a b)", "`(+ a b)", "F - test-infix/backquote-2"
403   check-infix ",@a+b", ",@(+ a b)", "F - test-infix/unquote-splice"
404   check-infix ",@(a + b)", ",@(+ a b)", "F - test-infix/unquote-splice-2"
405 }
406 
407 # helpers
408 
409 fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
410   var x/esi: (addr cell) <- copy _x
411   {
412     var x-type/eax: (addr int) <- get x, type
413     compare *x-type, 2/symbol
414     break-if-=
415     return 0/false
416   }
417   var x-data-ah/eax: (addr handle stream byte) <- get x, text-data
418   var _x-data/eax: (addr stream byte) <- lookup *x-data-ah
419   var x-data/esi: (addr stream byte) <- copy _x-data
420   rewind-stream x-data
421   var g/eax: grapheme <- read-grapheme x-data
422   # special case: '$' is reserved for gensyms, and can work with either
423   # operator or non-operator symbols.
424   {
425     compare g, 0x24/dollar
426     break-if-!=
427     {
428       var all-dollars?/eax: boolean <- stream-empty? x-data
429       compare all-dollars?, 0/false
430       break-if-=
431       # '$', '$$', '$$$', etc. are regular symbols
432       return 0/false
433     }
434     g <- read-grapheme x-data
435     loop
436   }
437   {
438     {
439       var result/eax: boolean <- operator-grapheme? g
440       compare result, 0/false
441       break-if-!=
442       return 0/false
443     }
444     {
445       var done?/eax: boolean <- stream-empty? x-data
446       compare done?, 0/false
447     }
448     break-if-!=
449     g <- read-grapheme x-data
450     loop
451   }
452   return 1/true
453 }
454 
455 # just a short list of operator graphemes for now
456 fn operator-grapheme? g: grapheme -> _/eax: boolean {
457   # '$' is special and can be in either a symbol or operator; here we treat it as a symbol
458   compare g, 0x25/percent
459   {
460     break-if-!=
461     return 1/true
462   }
463   compare g, 0x26/ampersand
464   {
465     break-if-!=
466     return 1/true
467   }
468   compare g, 0x2a/asterisk
469   {
470     break-if-!=
471     return 1/true
472   }
473   compare g, 0x2b/plus
474   {
475     break-if-!=
476     return 1/true
477   }
478   compare g, 0x2d/dash  # '-' not allowed in symbols
479   {
480     break-if-!=
481     return 1/true
482   }
483   compare g, 0x2e/period
484   {
485     break-if-!=
486     return 1/true
487   }
488   compare g, 0x2f/slash
489   {
490     break-if-!=
491     return 1/true
492   }
493   compare g, 0x3a/colon
494   {
495     break-if-!=
496     return 1/true
497   }
498   compare g, 0x3b/semi-colon
499   {
500     break-if-!=
501     return 1/true
502   }
503   compare g, 0x3c/less-than
504   {
505     break-if-!=
506     return 1/true
507   }
508   compare g, 0x3d/equal
509   {
510     break-if-!=
511     return 1/true
512   }
513   compare g, 0x3e/greater-than
514   {
515     break-if-!=
516     return 1/true
517   }
518   # '?' is a symbol char
519   compare g, 0x5c/backslash
520   {
521     break-if-!=
522     return 1/true
523   }
524   compare g, 0x5e/caret
525   {
526     break-if-!=
527     return 1/true
528   }
529   # '_' is a symbol char
530   compare g, 0x7c/vertical-line
531   {
532     break-if-!=
533     return 1/true
534   }
535   compare g, 0x7e/tilde
536   {
537     break-if-!=
538     return 1/true
539   }
540   return 0/false
541 }
542 
543 fn quote-or-unquote? _x-ah: (addr handle cell) -> _/eax: boolean {
544   var x-ah/eax: (addr handle cell) <- copy _x-ah
545   var x/eax: (addr cell) <- lookup *x-ah
546   {
547     var quote?/eax: boolean <- symbol-equal? x, "'"
548     compare quote?, 0/false
549     break-if-=
550     return 1/true
551   }
552   {
553     var backquote?/eax: boolean <- symbol-equal? x, "`"
554     compare backquote?, 0/false
555     break-if-=
556     return 1/true
557   }
558   {
559     var unquote?/eax: boolean <- symbol-equal? x, ","
560     compare unquote?, 0/false
561     break-if-=
562     return 1/true
563   }
564   {
565     var unquote-splice?/eax: boolean <- symbol-equal? x, ",@"
566     compare unquote-splice?, 0/false
567     break-if-=
568     return 1/true
569   }
570   return 0/false
571 }
572 
573 # helpers for tests
574 
575 fn check-infix actual: (addr array byte), expected: (addr array byte), message: (addr array byte) {
576   var trace-storage: trace
577   var trace/edx: (addr trace) <- address trace-storage
578 #?   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
579   initialize-trace trace, 0x10/levels, 0x1000/capacity, 0/visible
580   #
581   var actual-buffer-storage: gap-buffer
582   var actual-buffer/eax: (addr gap-buffer) <- address actual-buffer-storage
583   initialize-gap-buffer-with actual-buffer, actual
584   var actual-tree-h: (handle cell)
585   var actual-tree-ah/esi: (addr handle cell) <- address actual-tree-h
586   read-cell actual-buffer, actual-tree-ah, trace
587 #?   dump-trace-with-label trace, "infix"
588 #?   dump-cell-from-cursor-over-full-screen actual-tree-ah, 7/fg 0/bg
589   var _actual-tree/eax: (addr cell) <- lookup *actual-tree-ah
590   var actual-tree/esi: (addr cell) <- copy _actual-tree
591   #
592   var expected-buffer-storage: gap-buffer
593   var expected-buffer/eax: (addr gap-buffer) <- address expected-buffer-storage
594   initialize-gap-buffer-with expected-buffer, expected
595   var expected-tree-h: (handle cell)
596   var expected-tree-ah/edi: (addr handle cell) <- address expected-tree-h
597   read-without-infix expected-buffer, expected-tree-ah, trace
598   var expected-tree/eax: (addr cell) <- lookup *expected-tree-ah
599   #
600   var match?/eax: boolean <- cell-isomorphic? actual-tree, expected-tree, trace
601   check match?, message
602 }
603 
604 fn read-without-infix in: (addr gap-buffer), out: (addr handle cell), trace: (addr trace) {
605   # eagerly tokenize everything so that the phases are easier to see in the trace
606   var tokens-storage: (stream token 0x400)
607   var tokens/edx: (addr stream token) <- address tokens-storage
608   tokenize in, tokens, trace
609   var error?/eax: boolean <- has-errors? trace
610   compare error?, 0/false
611   {
612     break-if-=
613     dump-trace trace
614     return
615   }
616   # insert more parens based on indentation
617   var parenthesized-tokens-storage: (stream token 0x400)
618   var parenthesized-tokens/ecx: (addr stream token) <- address parenthesized-tokens-storage
619   parenthesize tokens, parenthesized-tokens, trace
620   var error?/eax: boolean <- has-errors? trace
621   compare error?, 0/false
622   {
623     break-if-=
624     dump-trace trace
625     return
626   }
627   parse-input parenthesized-tokens, out, trace
628 }