https://github.com/akkartik/mu/blob/main/shell/parse.mu
  1 fn parse-input tokens: (addr stream cell), out: (addr handle cell), trace: (addr trace) {
  2   rewind-stream tokens
  3   var empty?/eax: boolean <- stream-empty? tokens
  4   compare empty?, 0/false
  5   {
  6     break-if-=
  7     error trace, "nothing to parse"
  8     return
  9   }
 10   var close-paren?/eax: boolean <- copy 0/false
 11   var dummy?/ecx: boolean <- copy 0/false
 12   close-paren?, dummy? <- parse-sexpression tokens, out, trace
 13   {
 14     compare close-paren?, 0/false
 15     break-if-=
 16     error trace, "')' is not a valid expression"
 17     return
 18   }
 19   {
 20     var empty?/eax: boolean <- stream-empty? tokens
 21     compare empty?, 0/false
 22     break-if-!=
 23     error trace, "unexpected tokens at end; only type in a single expression at a time"
 24   }
 25 }
 26 
 27 # return values:
 28 #   unmatched close-paren encountered?
 29 #   dot encountered? (only used internally by recursive calls)
 30 fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
 31   trace-text trace, "read", "parse"
 32   trace-lower trace
 33   var curr-token-storage: cell
 34   var curr-token/ecx: (addr cell) <- address curr-token-storage
 35   var empty?/eax: boolean <- stream-empty? tokens
 36   compare empty?, 0/false
 37   {
 38     break-if-=
 39     error trace, "end of stream; never found a balancing ')'"
 40     return 1/true, 0/false
 41   }
 42   read-from-stream tokens, curr-token
 43   $parse-sexpression:type-check: {
 44     # single quote -> parse as list with a special car
 45     var quote-token?/eax: boolean <- quote-token? curr-token
 46     compare quote-token?, 0/false
 47     {
 48       break-if-=
 49       var out/edi: (addr handle cell) <- copy _out
 50       allocate-pair out
 51       var out-addr/eax: (addr cell) <- lookup *out
 52       var left-ah/edx: (addr handle cell) <- get out-addr, left
 53       new-symbol left-ah, "'"
 54       var right-ah/edx: (addr handle cell) <- get out-addr, right
 55       var close-paren?/eax: boolean <- copy 0/false
 56       var dot?/ecx: boolean <- copy 0/false
 57       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 58       return close-paren?, dot?
 59     }
 60     # dot -> return
 61     var dot?/eax: boolean <- dot-token? curr-token
 62     compare dot?, 0/false
 63     {
 64       break-if-=
 65       trace-higher trace
 66       return 0/false, 1/true
 67     }
 68     # not bracket -> parse atom
 69     var bracket-token?/eax: boolean <- bracket-token? curr-token
 70     compare bracket-token?, 0/false
 71     {
 72       break-if-!=
 73       parse-atom curr-token, _out, trace
 74       break $parse-sexpression:type-check
 75     }
 76     # open paren -> parse list
 77     var open-paren?/eax: boolean <- open-paren-token? curr-token
 78     compare open-paren?, 0/false
 79     {
 80       break-if-=
 81       var curr/esi: (addr handle cell) <- copy _out
 82       allocate-pair curr
 83       var curr-addr/eax: (addr cell) <- lookup *curr
 84       var left/edx: (addr handle cell) <- get curr-addr, left
 85       {
 86         var close-paren?/eax: boolean <- copy 0/false
 87         var dot?/ecx: boolean <- copy 0/false
 88         close-paren?, dot? <- parse-sexpression tokens, left, trace
 89         {
 90           compare dot?, 0/false
 91           break-if-=
 92           error trace, "'.' cannot be at the start of a list"
 93           return 1/true, dot?
 94         }
 95         compare close-paren?, 0/false
 96         break-if-!=
 97         var curr-addr/eax: (addr cell) <- lookup *curr
 98         curr <- get curr-addr, right
 99         var tmp-storage: (handle cell)
100         var tmp/edx: (addr handle cell) <- address tmp-storage
101         $parse-sexpression:list-loop: {
102           var close-paren?/eax: boolean <- copy 0/false
103           var dot?/ecx: boolean <- copy 0/false
104           close-paren?, dot? <- parse-sexpression tokens, tmp, trace
105           # '.' -> clean up right here and return
106           compare dot?, 0/false
107           {
108             break-if-=
109             parse-dot-tail tokens, curr, trace
110             return 0/false, 0/false
111           }
112           allocate-pair curr
113           # ')' -> return
114           compare close-paren?, 0/false
115           break-if-!=
116           var curr-addr/eax: (addr cell) <- lookup *curr
117           var left/ecx: (addr handle cell) <- get curr-addr, left
118           copy-object tmp, left
119           #
120           curr <- get curr-addr, right
121           loop
122         }
123       }
124       break $parse-sexpression:type-check
125     }
126     # close paren -> return
127     var close-paren?/eax: boolean <- close-paren-token? curr-token
128     compare close-paren?, 0/false
129     {
130       break-if-=
131       trace-higher trace
132       return 1/true, 0/false
133     }
134     # otherwise abort
135     var stream-storage: (stream byte 0x40)
136     var stream/edx: (addr stream byte) <- address stream-storage
137     write stream, "unexpected token "
138     var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
139     var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
140     rewind-stream curr-token-data
141     write-stream stream, curr-token-data
142     trace trace, "error", stream
143   }
144   trace-higher trace
145   return 0/false, 0/false
146 }
147 
148 fn parse-atom _curr-token: (addr cell), _out: (addr handle cell), trace: (addr trace) {
149   trace-text trace, "read", "parse atom"
150   var curr-token/ecx: (addr cell) <- copy _curr-token
151   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
152   var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
153   var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
154   trace trace, "read", curr-token-data
155   # number
156   var number-token?/eax: boolean <- number-token? curr-token
157   compare number-token?, 0/false
158   {
159     break-if-=
160     rewind-stream curr-token-data
161     var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
162     var val/ecx: int <- copy _val
163     var val-float/xmm0: float <- convert val
164     allocate-number _out
165     var out/eax: (addr handle cell) <- copy _out
166     var out-addr/eax: (addr cell) <- lookup *out
167     var dest/edi: (addr float) <- get out-addr, number-data
168     copy-to *dest, val-float
169     {
170       var stream-storage: (stream byte 0x40)
171       var stream/ecx: (addr stream byte) <- address stream-storage
172       write stream, "=> number "
173       print-number out-addr, stream, 0/no-trace
174       trace trace, "read", stream
175     }
176     return
177   }
178   # default: symbol
179   # just copy token data
180   allocate-symbol _out
181   var out/eax: (addr handle cell) <- copy _out
182   var out-addr/eax: (addr cell) <- lookup *out
183   var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
184   var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
185   copy-object curr-token-data-ah, dest-ah
186   {
187     var stream-storage: (stream byte 0x40)
188     var stream/ecx: (addr stream byte) <- address stream-storage
189     write stream, "=> symbol "
190     print-symbol out-addr, stream, 0/no-trace
191     trace trace, "read", stream
192   }
193 }
194 
195 fn parse-dot-tail tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) {
196   var out/edi: (addr handle cell) <- copy _out
197   var close-paren?/eax: boolean <- copy 0/false
198   var dot?/ecx: boolean <- copy 0/false
199   close-paren?, dot? <- parse-sexpression tokens, out, trace
200   compare close-paren?, 0/false
201   {
202     break-if-=
203     error trace, "'. )' makes no sense"
204     return
205   }
206   compare dot?, 0/false
207   {
208     break-if-=
209     error trace, "'. .' makes no sense"
210     return
211   }
212   #
213   var dummy: (handle cell)
214   var dummy-ah/edi: (addr handle cell) <- address dummy
215   close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
216   compare close-paren?, 0/false
217   {
218     break-if-!=
219     error trace, "cannot have multiple expressions between '.' and ')'"
220     return
221   }
222   compare dot?, 0/false
223   {
224     break-if-=
225     error trace, "cannot have two dots in a single list"
226     return
227   }
228 }