https://github.com/akkartik/mu/blob/main/shell/parse.mu
  1 fn parse-input tokens: (addr stream cell), out: (addr handle cell), trace: (addr trace) {
  2   rewind-stream tokens
  3   var empty?/eax: boolean <- stream-empty? tokens
  4   compare empty?, 0/false
  5   {
  6     break-if-=
  7     error trace, "nothing to parse"
  8     return
  9   }
 10   var close-paren?/eax: boolean <- copy 0/false
 11   var dummy?/ecx: boolean <- copy 0/false
 12   close-paren?, dummy? <- parse-sexpression tokens, out, trace
 13   {
 14     compare close-paren?, 0/false
 15     break-if-=
 16     error trace, "')' is not a valid expression"
 17     return
 18   }
 19   {
 20     var empty?/eax: boolean <- stream-empty? tokens
 21     compare empty?, 0/false
 22     break-if-!=
 23     error trace, "unexpected tokens at end; only type in a single expression at a time"
 24   }
 25 }
 26 
 27 # return values:
 28 #   unmatched close-paren encountered?
 29 #   dot encountered? (only used internally by recursive calls)
 30 fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
 31   trace-text trace, "parse", "parse"
 32   trace-lower trace
 33   var curr-token-storage: cell
 34   var curr-token/ecx: (addr cell) <- address curr-token-storage
 35   var empty?/eax: boolean <- stream-empty? tokens
 36   compare empty?, 0/false
 37   {
 38     break-if-=
 39     error trace, "end of stream; never found a balancing ')'"
 40     return 1/true, 0/false
 41   }
 42   read-from-stream tokens, curr-token
 43   $parse-sexpression:type-check: {
 44     # single quote -> parse as list with a special car
 45     var quote-token?/eax: boolean <- quote-token? curr-token
 46     compare quote-token?, 0/false
 47     {
 48       break-if-=
 49       var out/edi: (addr handle cell) <- copy _out
 50       allocate-pair out
 51       var out-addr/eax: (addr cell) <- lookup *out
 52       var left-ah/edx: (addr handle cell) <- get out-addr, left
 53       new-symbol left-ah, "'"
 54       var right-ah/edx: (addr handle cell) <- get out-addr, right
 55       var close-paren?/eax: boolean <- copy 0/false
 56       var dot?/ecx: boolean <- copy 0/false
 57       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 58       return close-paren?, dot?
 59     }
 60     # backquote quote -> parse as list with a special car
 61     var backquote-token?/eax: boolean <- backquote-token? curr-token
 62     compare backquote-token?, 0/false
 63     {
 64       break-if-=
 65       var out/edi: (addr handle cell) <- copy _out
 66       allocate-pair out
 67       var out-addr/eax: (addr cell) <- lookup *out
 68       var left-ah/edx: (addr handle cell) <- get out-addr, left
 69       new-symbol left-ah, "`"
 70       var right-ah/edx: (addr handle cell) <- get out-addr, right
 71       var close-paren?/eax: boolean <- copy 0/false
 72       var dot?/ecx: boolean <- copy 0/false
 73       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 74       return close-paren?, dot?
 75     }
 76     # unquote -> parse as list with a special car
 77     var unquote-token?/eax: boolean <- unquote-token? curr-token
 78     compare unquote-token?, 0/false
 79     {
 80       break-if-=
 81       var out/edi: (addr handle cell) <- copy _out
 82       allocate-pair out
 83       var out-addr/eax: (addr cell) <- lookup *out
 84       var left-ah/edx: (addr handle cell) <- get out-addr, left
 85       new-symbol left-ah, ","
 86       var right-ah/edx: (addr handle cell) <- get out-addr, right
 87       var close-paren?/eax: boolean <- copy 0/false
 88       var dot?/ecx: boolean <- copy 0/false
 89       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 90       return close-paren?, dot?
 91     }
 92     # unquote-splice -> parse as list with a special car
 93     var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token
 94     compare unquote-splice-token?, 0/false
 95     {
 96       break-if-=
 97       var out/edi: (addr handle cell) <- copy _out
 98       allocate-pair out
 99       var out-addr/eax: (addr cell) <- lookup *out
100       var left-ah/edx: (addr handle cell) <- get out-addr, left
101       new-symbol left-ah, ",@"
102       var right-ah/edx: (addr handle cell) <- get out-addr, right
103       var close-paren?/eax: boolean <- copy 0/false
104       var dot?/ecx: boolean <- copy 0/false
105       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
106       return close-paren?, dot?
107     }
108     # dot -> return
109     var dot?/eax: boolean <- dot-token? curr-token
110     compare dot?, 0/false
111     {
112       break-if-=
113       trace-higher trace
114       return 0/false, 1/true
115     }
116     # not bracket -> parse atom
117     var bracket-token?/eax: boolean <- bracket-token? curr-token
118     compare bracket-token?, 0/false
119     {
120       break-if-!=
121       parse-atom curr-token, _out, trace
122       break $parse-sexpression:type-check
123     }
124     # open paren -> parse list
125     var open-paren?/eax: boolean <- open-paren-token? curr-token
126     compare open-paren?, 0/false
127     {
128       break-if-=
129       var curr/esi: (addr handle cell) <- copy _out
130       allocate-pair curr
131       var curr-addr/eax: (addr cell) <- lookup *curr
132       var left/edx: (addr handle cell) <- get curr-addr, left
133       {
134         var close-paren?/eax: boolean <- copy 0/false
135         var dot?/ecx: boolean <- copy 0/false
136         close-paren?, dot? <- parse-sexpression tokens, left, trace
137         {
138           compare dot?, 0/false
139           break-if-=
140           error trace, "'.' cannot be at the start of a list"
141           return 1/true, dot?
142         }
143         compare close-paren?, 0/false
144         break-if-!=
145         var curr-addr/eax: (addr cell) <- lookup *curr
146         curr <- get curr-addr, right
147         var tmp-storage: (handle cell)
148         var tmp/edx: (addr handle cell) <- address tmp-storage
149         $parse-sexpression:list-loop: {
150           var close-paren?/eax: boolean <- copy 0/false
151           var dot?/ecx: boolean <- copy 0/false
152           close-paren?, dot? <- parse-sexpression tokens, tmp, trace
153           # '.' -> clean up right here and return
154           compare dot?, 0/false
155           {
156             break-if-=
157             parse-dot-tail tokens, curr, trace
158             return 0/false, 0/false
159           }
160           allocate-pair curr
161           # ')' -> return
162           compare close-paren?, 0/false
163           break-if-!=
164           var curr-addr/eax: (addr cell) <- lookup *curr
165           var left/ecx: (addr handle cell) <- get curr-addr, left
166           copy-object tmp, left
167           #
168           curr <- get curr-addr, right
169           loop
170         }
171       }
172       break $parse-sexpression:type-check
173     }
174     # close paren -> return
175     var close-paren?/eax: boolean <- close-paren-token? curr-token
176     compare close-paren?, 0/false
177     {
178       break-if-=
179       trace-higher trace
180       return 1/true, 0/false
181     }
182     # otherwise abort
183     var stream-storage: (stream byte 0x400)
184     var stream/edx: (addr stream byte) <- address stream-storage
185     write stream, "unexpected token "
186     var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
187     var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
188     rewind-stream curr-token-data
189     write-stream stream, curr-token-data
190     trace trace, "error", stream
191   }
192   trace-higher trace
193   return 0/false, 0/false
194 }
195 
196 fn parse-atom _curr-token: (addr cell), _out: (addr handle cell), trace: (addr trace) {
197   trace-text trace, "parse", "parse atom"
198   var curr-token/ecx: (addr cell) <- copy _curr-token
199   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
200   var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
201   var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
202   trace trace, "parse", curr-token-data
203   # number
204   var number-token?/eax: boolean <- number-token? curr-token
205   compare number-token?, 0/false
206   {
207     break-if-=
208     rewind-stream curr-token-data
209     var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
210     var val/ecx: int <- copy _val
211     var val-float/xmm0: float <- convert val
212     allocate-number _out
213     var out/eax: (addr handle cell) <- copy _out
214     var out-addr/eax: (addr cell) <- lookup *out
215     var dest/edi: (addr float) <- get out-addr, number-data
216     copy-to *dest, val-float
217     {
218       var stream-storage: (stream byte 0x400)
219       var stream/ecx: (addr stream byte) <- address stream-storage
220       write stream, "=> number "
221       print-number out-addr, stream, 0/no-trace
222       trace trace, "parse", stream
223     }
224     return
225   }
226   # default: copy either to a symbol or a stream
227   # stream token -> literal
228   var stream-token?/eax: boolean <- stream-token? curr-token
229   compare stream-token?, 0/false
230   {
231     break-if-=
232     allocate-stream _out
233   }
234   compare stream-token?, 0/false
235   {
236     break-if-!=
237     allocate-symbol _out
238   }
239   # copy token data
240   var out/eax: (addr handle cell) <- copy _out
241   var out-addr/eax: (addr cell) <- lookup *out
242   var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
243   var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
244   copy-object curr-token-data-ah, dest-ah
245   {
246     var stream-storage: (stream byte 0x400)
247     var stream/ecx: (addr stream byte) <- address stream-storage
248     write stream, "=> symbol "
249     print-symbol out-addr, stream, 0/no-trace
250     trace trace, "parse", stream
251   }
252 }
253 
254 fn parse-dot-tail tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) {
255   var out/edi: (addr handle cell) <- copy _out
256   var close-paren?/eax: boolean <- copy 0/false
257   var dot?/ecx: boolean <- copy 0/false
258   close-paren?, dot? <- parse-sexpression tokens, out, trace
259   compare close-paren?, 0/false
260   {
261     break-if-=
262     error trace, "'. )' makes no sense"
263     return
264   }
265   compare dot?, 0/false
266   {
267     break-if-=
268     error trace, "'. .' makes no sense"
269     return
270   }
271   #
272   var dummy: (handle cell)
273   var dummy-ah/edi: (addr handle cell) <- address dummy
274   close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
275   compare close-paren?, 0/false
276   {
277     break-if-!=
278     error trace, "cannot have multiple expressions between '.' and ')'"
279     return
280   }
281   compare dot?, 0/false
282   {
283     break-if-=
284     error trace, "cannot have two dots in a single list"
285     return
286   }
287 }