https://github.com/akkartik/mu/blob/main/shell/parse.mu
  1 fn parse-input tokens: (addr stream cell), out: (addr handle cell), trace: (addr trace) {
  2   rewind-stream tokens
  3   var empty?/eax: boolean <- stream-empty? tokens
  4   compare empty?, 0/false
  5   {
  6     break-if-=
  7     error trace, "nothing to parse"
  8     return
  9   }
 10   var close-paren?/eax: boolean <- copy 0/false
 11   var dummy?/ecx: boolean <- copy 0/false
 12   close-paren?, dummy? <- parse-sexpression tokens, out, trace
 13   {
 14     compare close-paren?, 0/false
 15     break-if-=
 16     error trace, "')' is not a valid expression"
 17     return
 18   }
 19   {
 20     var empty?/eax: boolean <- stream-empty? tokens
 21     compare empty?, 0/false
 22     break-if-!=
 23     error trace, "unexpected tokens at end; only type in a single expression at a time"
 24   }
 25 }
 26 
 27 # return values:
 28 #   unmatched close-paren encountered?
 29 #   dot encountered? (only used internally by recursive calls)
 30 fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
 31   trace-text trace, "read", "parse"
 32   trace-lower trace
 33   var curr-token-storage: cell
 34   var curr-token/ecx: (addr cell) <- address curr-token-storage
 35   var empty?/eax: boolean <- stream-empty? tokens
 36   compare empty?, 0/false
 37   {
 38     break-if-=
 39     error trace, "end of stream; never found a balancing ')'"
 40     return 1/true, 0/false
 41   }
 42   read-from-stream tokens, curr-token
 43   $parse-sexpression:type-check: {
 44     # single quote -> parse as list with a special car
 45     var quote-token?/eax: boolean <- quote-token? curr-token
 46     compare quote-token?, 0/false
 47     {
 48       break-if-=
 49       var out/edi: (addr handle cell) <- copy _out
 50       allocate-pair out
 51       var out-addr/eax: (addr cell) <- lookup *out
 52       var left-ah/edx: (addr handle cell) <- get out-addr, left
 53       new-symbol left-ah, "'"
 54       var right-ah/edx: (addr handle cell) <- get out-addr, right
 55       var close-paren?/eax: boolean <- copy 0/false
 56       var dot?/ecx: boolean <- copy 0/false
 57       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 58       return close-paren?, dot?
 59     }
 60     # dot -> return
 61     var dot?/eax: boolean <- dot-token? curr-token
 62     compare dot?, 0/false
 63     {
 64       break-if-=
 65       trace-higher trace
 66       return 0/false, 1/true
 67     }
 68     # not bracket -> parse atom
 69     var bracket-token?/eax: boolean <- bracket-token? curr-token
 70     compare bracket-token?, 0/false
 71     {
 72       break-if-!=
 73       parse-atom curr-token, _out, trace
 74       break $parse-sexpression:type-check
 75     }
 76     # open paren -> parse list
 77     var open-paren?/eax: boolean <- open-paren-token? curr-token
 78     compare open-paren?, 0/false
 79     {
 80       break-if-=
 81       var curr/esi: (addr handle cell) <- copy _out
 82       allocate-pair curr
 83       var curr-addr/eax: (addr cell) <- lookup *curr
 84       var left/edx: (addr handle cell) <- get curr-addr, left
 85       {
 86         var close-paren?/eax: boolean <- copy 0/false
 87         var dot?/ecx: boolean <- copy 0/false
 88         close-paren?, dot? <- parse-sexpression tokens, left, trace
 89         {
 90           compare dot?, 0/false
 91           break-if-=
 92           error trace, "'.' cannot be at the start of a list"
 93           return 1/true, dot?
 94         }
 95         compare close-paren?, 0/false
 96         break-if-!=
 97         var curr-addr/eax: (addr cell) <- lookup *curr
 98         curr <- get curr-addr, right
 99         var tmp-storage: (handle cell)
100         var tmp/edx: (addr handle cell) <- address tmp-storage
101         $parse-sexpression:list-loop: {
102           var close-paren?/eax: boolean <- copy 0/false
103           var dot?/ecx: boolean <- copy 0/false
104           close-paren?, dot? <- parse-sexpression tokens, tmp, trace
105           # '.' -> clean up right here and return
106           compare dot?, 0/false
107           {
108             break-if-=
109             parse-dot-tail tokens, curr, trace
110             return 0/false, 0/false
111           }
112           allocate-pair curr
113           # ')' -> return
114           compare close-paren?, 0/false
115           break-if-!=
116           var curr-addr/eax: (addr cell) <- lookup *curr
117           var left/ecx: (addr handle cell) <- get curr-addr, left
118           copy-object tmp, left
119           #
120           curr <- get curr-addr, right
121           loop
122         }
123       }
124       break $parse-sexpression:type-check
125     }
126     # close paren -> return
127     var close-paren?/eax: boolean <- close-paren-token? curr-token
128     compare close-paren?, 0/false
129     {
130       break-if-=
131       trace-higher trace
132       return 1/true, 0/false
133     }
134     # otherwise abort
135     var stream-storage: (stream byte 0x40)
136     var stream/edx: (addr stream byte) <- address stream-storage
137     write stream, "unexpected token "
138     var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
139     var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
140     rewind-stream curr-token-data
141     write-stream stream, curr-token-data
142     trace trace, "error", stream
143   }
144   trace-higher trace
145   return 0/false, 0/false
146 }
147 
148 fn parse-atom _curr-token: (addr cell), _out: (addr handle cell), trace: (addr trace) {
149   trace-text trace, "read", "parse atom"
150   var curr-token/ecx: (addr cell) <- copy _curr-token
151   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
152   var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
153   var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
154   trace trace, "read", curr-token-data
155   # number
156   var number-token?/eax: boolean <- number-token? curr-token
157   compare number-token?, 0/false
158   {
159     break-if-=
160     rewind-stream curr-token-data
161     var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
162     var val/ecx: int <- copy _val
163     var val-float/xmm0: float <- convert val
164     allocate-number _out
165     var out/eax: (addr handle cell) <- copy _out
166     var out-addr/eax: (addr cell) <- lookup *out
167     var dest/edi: (addr float) <- get out-addr, number-data
168     copy-to *dest, val-float
169     {
170       var stream-storage: (stream byte 0x40)
171       var stream/ecx: (addr stream byte) <- address stream-storage
172       write stream, "=> number "
173       print-number out-addr, stream, 0/no-trace
174       trace trace, "read", stream
175     }
176     return
177   }
178   # default: copy either to a symbol or a stream
179   # stream token -> literal
180   var stream-token?/eax: boolean <- stream-token? curr-token
181   compare stream-token?, 0/false
182   {
183     break-if-=
184     allocate-stream _out
185   }
186   compare stream-token?, 0/false
187   {
188     break-if-!=
189     allocate-symbol _out
190   }
191   # copy token data
192   var out/eax: (addr handle cell) <- copy _out
193   var out-addr/eax: (addr cell) <- lookup *out
194   var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
195   var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
196   copy-object curr-token-data-ah, dest-ah
197   {
198     var stream-storage: (stream byte 0x40)
199     var stream/ecx: (addr stream byte) <- address stream-storage
200     write stream, "=> symbol "
201     print-symbol out-addr, stream, 0/no-trace
202     trace trace, "read", stream
203   }
204 }
205 
206 fn parse-dot-tail tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) {
207   var out/edi: (addr handle cell) <- copy _out
208   var close-paren?/eax: boolean <- copy 0/false
209   var dot?/ecx: boolean <- copy 0/false
210   close-paren?, dot? <- parse-sexpression tokens, out, trace
211   compare close-paren?, 0/false
212   {
213     break-if-=
214     error trace, "'. )' makes no sense"
215     return
216   }
217   compare dot?, 0/false
218   {
219     break-if-=
220     error trace, "'. .' makes no sense"
221     return
222   }
223   #
224   var dummy: (handle cell)
225   var dummy-ah/edi: (addr handle cell) <- address dummy
226   close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
227   compare close-paren?, 0/false
228   {
229     break-if-!=
230     error trace, "cannot have multiple expressions between '.' and ')'"
231     return
232   }
233   compare dot?, 0/false
234   {
235     break-if-=
236     error trace, "cannot have two dots in a single list"
237     return
238   }
239 }