https://github.com/akkartik/mu/blob/main/shell/parse.mu
  1 fn parse-input tokens: (addr stream token), out: (addr handle cell), trace: (addr trace) {
  2   rewind-stream tokens
  3   var empty?/eax: boolean <- stream-empty? tokens
  4   compare empty?, 0/false
  5   {
  6     break-if-=
  7     error trace, "nothing to parse"
  8     return
  9   }
 10   var close-paren?/eax: boolean <- copy 0/false
 11   var dot?/ecx: boolean <- copy 0/false
 12   close-paren?, dot? <- parse-sexpression tokens, out, trace
 13   {
 14     compare close-paren?, 0/false
 15     break-if-=
 16     error trace, "')' is not a valid expression"
 17     return
 18   }
 19   {
 20     compare dot?, 0/false
 21     break-if-=
 22     error trace, "'.' is not a valid expression"
 23     return
 24   }
 25   {
 26     var empty?/eax: boolean <- stream-empty? tokens
 27     compare empty?, 0/false
 28     break-if-!=
 29     error trace, "unexpected tokens at end; only type in a single expression at a time"
 30   }
 31 }
 32 
 33 # return values:
 34 #   unmatched close-paren encountered?
 35 #   dot encountered? (only used internally by recursive calls)
 36 fn parse-sexpression tokens: (addr stream token), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
 37   trace-text trace, "parse", "parse"
 38   trace-lower trace
 39   var curr-token-storage: token
 40   var curr-token/ecx: (addr token) <- address curr-token-storage
 41   var empty?/eax: boolean <- stream-empty? tokens
 42   compare empty?, 0/false
 43   {
 44     break-if-=
 45     error trace, "end of stream; never found a balancing ')'"
 46     trace-higher trace
 47     return 1/true, 0/false
 48   }
 49   read-from-stream tokens, curr-token
 50   $parse-sexpression:type-check: {
 51     # single quote -> parse as list with a special car
 52     var quote-token?/eax: boolean <- quote-token? curr-token
 53     compare quote-token?, 0/false
 54     {
 55       break-if-=
 56       var out/edi: (addr handle cell) <- copy _out
 57       allocate-pair out
 58       var out-addr/eax: (addr cell) <- lookup *out
 59       var left-ah/edx: (addr handle cell) <- get out-addr, left
 60       new-symbol left-ah, "'"
 61       var right-ah/edx: (addr handle cell) <- get out-addr, right
 62       var close-paren?/eax: boolean <- copy 0/false
 63       var dot?/ecx: boolean <- copy 0/false
 64       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 65       trace-higher trace
 66       return close-paren?, dot?
 67     }
 68     # backquote quote -> parse as list with a special car
 69     var backquote-token?/eax: boolean <- backquote-token? curr-token
 70     compare backquote-token?, 0/false
 71     {
 72       break-if-=
 73       var out/edi: (addr handle cell) <- copy _out
 74       allocate-pair out
 75       var out-addr/eax: (addr cell) <- lookup *out
 76       var left-ah/edx: (addr handle cell) <- get out-addr, left
 77       new-symbol left-ah, "`"
 78       var right-ah/edx: (addr handle cell) <- get out-addr, right
 79       var close-paren?/eax: boolean <- copy 0/false
 80       var dot?/ecx: boolean <- copy 0/false
 81       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 82       trace-higher trace
 83       return close-paren?, dot?
 84     }
 85     # unquote -> parse as list with a special car
 86     var unquote-token?/eax: boolean <- unquote-token? curr-token
 87     compare unquote-token?, 0/false
 88     {
 89       break-if-=
 90       var out/edi: (addr handle cell) <- copy _out
 91       allocate-pair out
 92       var out-addr/eax: (addr cell) <- lookup *out
 93       var left-ah/edx: (addr handle cell) <- get out-addr, left
 94       new-symbol left-ah, ","
 95       var right-ah/edx: (addr handle cell) <- get out-addr, right
 96       var close-paren?/eax: boolean <- copy 0/false
 97       var dot?/ecx: boolean <- copy 0/false
 98       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 99       trace-higher trace
100       return close-paren?, dot?
101     }
102     # unquote-splice -> parse as list with a special car
103     var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token
104     compare unquote-splice-token?, 0/false
105     {
106       break-if-=
107       var out/edi: (addr handle cell) <- copy _out
108       allocate-pair out
109       var out-addr/eax: (addr cell) <- lookup *out
110       var left-ah/edx: (addr handle cell) <- get out-addr, left
111       new-symbol left-ah, ",@"
112       var right-ah/edx: (addr handle cell) <- get out-addr, right
113       var close-paren?/eax: boolean <- copy 0/false
114       var dot?/ecx: boolean <- copy 0/false
115       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
116       trace-higher trace
117       return close-paren?, dot?
118     }
119     # dot -> return
120     var dot?/eax: boolean <- dot-token? curr-token
121     compare dot?, 0/false
122     {
123       break-if-=
124       trace-higher trace
125       return 0/false, 1/true
126     }
127     # not bracket -> parse atom
128     var bracket-token?/eax: boolean <- bracket-token? curr-token
129     compare bracket-token?, 0/false
130     {
131       break-if-!=
132       parse-atom curr-token, _out, trace
133       break $parse-sexpression:type-check
134     }
135     # open paren -> parse list
136     var open-paren?/eax: boolean <- open-paren-token? curr-token
137     compare open-paren?, 0/false
138     {
139       break-if-=
140       var curr/esi: (addr handle cell) <- copy _out
141       allocate-pair curr
142       var curr-addr/eax: (addr cell) <- lookup *curr
143       var left/edx: (addr handle cell) <- get curr-addr, left
144       {
145         var close-paren?/eax: boolean <- copy 0/false
146         var dot?/ecx: boolean <- copy 0/false
147         close-paren?, dot? <- parse-sexpression tokens, left, trace
148         {
149           compare dot?, 0/false
150           break-if-=
151           error trace, "'.' cannot be at the start of a list"
152           return 1/true, dot?
153         }
154         compare close-paren?, 0/false
155         break-if-!=
156         var curr-addr/eax: (addr cell) <- lookup *curr
157         curr <- get curr-addr, right
158         var tmp-storage: (handle cell)
159         var tmp/edx: (addr handle cell) <- address tmp-storage
160         $parse-sexpression:list-loop: {
161           var close-paren?/eax: boolean <- copy 0/false
162           var dot?/ecx: boolean <- copy 0/false
163           close-paren?, dot? <- parse-sexpression tokens, tmp, trace
164           # '.' -> clean up right here and return
165           compare dot?, 0/false
166           {
167             break-if-=
168             parse-dot-tail tokens, curr, trace
169             return 0/false, 0/false
170           }
171           allocate-pair curr
172           # ')' -> return
173           compare close-paren?, 0/false
174           break-if-!=
175           var curr-addr/eax: (addr cell) <- lookup *curr
176           var left/ecx: (addr handle cell) <- get curr-addr, left
177           copy-object tmp, left
178           #
179           curr <- get curr-addr, right
180           loop
181         }
182       }
183       break $parse-sexpression:type-check
184     }
185     # close paren -> return
186     var close-paren?/eax: boolean <- close-paren-token? curr-token
187     compare close-paren?, 0/false
188     {
189       break-if-=
190       trace-higher trace
191       return 1/true, 0/false
192     }
193     # otherwise abort
194     var stream-storage: (stream byte 0x400)
195     var stream/edx: (addr stream byte) <- address stream-storage
196     write stream, "unexpected token "
197     var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
198     var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
199     rewind-stream curr-token-data
200     write-stream stream, curr-token-data
201     error-stream trace, stream
202   }
203   trace-higher trace
204   return 0/false, 0/false
205 }
206 
207 fn parse-atom _curr-token: (addr token), _out: (addr handle cell), trace: (addr trace) {
208   trace-text trace, "parse", "parse atom"
209   var curr-token/ecx: (addr token) <- copy _curr-token
210   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
211   var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
212   var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
213   trace trace, "parse", curr-token-data
214   # number
215   var number-token?/eax: boolean <- number-token? curr-token
216   compare number-token?, 0/false
217   {
218     break-if-=
219     rewind-stream curr-token-data
220     var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
221     var val/ecx: int <- copy _val
222     var val-float/xmm0: float <- convert val
223     allocate-number _out
224     var out/eax: (addr handle cell) <- copy _out
225     var out-addr/eax: (addr cell) <- lookup *out
226     var dest/edi: (addr float) <- get out-addr, number-data
227     copy-to *dest, val-float
228     {
229       {
230         var should-trace?/eax: boolean <- should-trace? trace
231         compare should-trace?, 0/false
232       }
233       break-if-=
234       var stream-storage: (stream byte 0x400)
235       var stream/ecx: (addr stream byte) <- address stream-storage
236       write stream, "=> number "
237       var nested-trace-storage: trace
238       var nested-trace/edi: (addr trace) <- address nested-trace-storage
239       initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
240       print-number out-addr, stream, nested-trace
241       trace trace, "parse", stream
242     }
243     return
244   }
245   # default: copy either to a symbol or a stream
246   # stream token -> literal
247   var stream-token?/eax: boolean <- stream-token? curr-token
248   compare stream-token?, 0/false
249   {
250     break-if-=
251     allocate-stream _out
252   }
253   compare stream-token?, 0/false
254   {
255     break-if-!=
256     allocate-symbol _out
257   }
258   # copy token data
259   var out/eax: (addr handle cell) <- copy _out
260   var out-addr/eax: (addr cell) <- lookup *out
261   var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
262   var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
263   copy-object curr-token-data-ah, dest-ah
264   {
265     {
266       var should-trace?/eax: boolean <- should-trace? trace
267       compare should-trace?, 0/false
268     }
269     break-if-=
270     var stream-storage: (stream byte 0x40000)
271     var stream/ecx: (addr stream byte) <- address stream-storage
272     write stream, "=> symbol "
273     var nested-trace-storage: trace
274     var nested-trace/edi: (addr trace) <- address nested-trace-storage
275     initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
276     print-symbol out-addr, stream, nested-trace
277     trace trace, "parse", stream
278   }
279 }
280 
281 fn parse-dot-tail tokens: (addr stream token), _out: (addr handle cell), trace: (addr trace) {
282   var out/edi: (addr handle cell) <- copy _out
283   var close-paren?/eax: boolean <- copy 0/false
284   var dot?/ecx: boolean <- copy 0/false
285   close-paren?, dot? <- parse-sexpression tokens, out, trace
286   compare close-paren?, 0/false
287   {
288     break-if-=
289     error trace, "'. )' makes no sense"
290     return
291   }
292   compare dot?, 0/false
293   {
294     break-if-=
295     error trace, "'. .' makes no sense"
296     return
297   }
298   #
299   var dummy: (handle cell)
300   var dummy-ah/edi: (addr handle cell) <- address dummy
301   close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
302   compare close-paren?, 0/false
303   {
304     break-if-!=
305     error trace, "cannot have multiple expressions between '.' and ')'"
306     return
307   }
308   compare dot?, 0/false
309   {
310     break-if-=
311     error trace, "cannot have two dots in a single list"
312     return
313   }
314 }