https://github.com/akkartik/mu/blob/main/shell/parse.mu
1 fn parse-input tokens: (addr stream token), out: (addr handle cell), trace: (addr trace) {
2 rewind-stream tokens
3 var empty?/eax: boolean <- stream-empty? tokens
4 compare empty?, 0/false
5 {
6 break-if-=
7 error trace, "nothing to parse"
8 return
9 }
10 var close-paren?/eax: boolean <- copy 0/false
11 var dot?/ecx: boolean <- copy 0/false
12 close-paren?, dot? <- parse-sexpression tokens, out, trace
13 {
14 compare close-paren?, 0/false
15 break-if-=
16 error trace, "')' is not a valid expression"
17 return
18 }
19 {
20 compare dot?, 0/false
21 break-if-=
22 error trace, "'.' is not a valid expression"
23 return
24 }
25 {
26 var empty?/eax: boolean <- stream-empty? tokens
27 compare empty?, 0/false
28 break-if-!=
29 error trace, "unexpected tokens at end; only type in a single expression at a time"
30 }
31 }
32
33
34
35
36 fn parse-sexpression tokens: (addr stream token), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
37 trace-text trace, "parse", "parse"
38 trace-lower trace
39 var curr-token-storage: token
40 var curr-token/ecx: (addr token) <- address curr-token-storage
41 var empty?/eax: boolean <- stream-empty? tokens
42 compare empty?, 0/false
43 {
44 break-if-=
45 error trace, "end of stream; never found a balancing ')'"
46 trace-higher trace
47 return 1/true, 0/false
48 }
49 read-from-stream tokens, curr-token
50 $parse-sexpression:type-check: {
51
52 var quote-token?/eax: boolean <- quote-token? curr-token
53 compare quote-token?, 0/false
54 {
55 break-if-=
56 var out/edi: (addr handle cell) <- copy _out
57 allocate-pair out
58 var out-addr/eax: (addr cell) <- lookup *out
59 var left-ah/edx: (addr handle cell) <- get out-addr, left
60 new-symbol left-ah, "'"
61 var right-ah/edx: (addr handle cell) <- get out-addr, right
62 var close-paren?/eax: boolean <- copy 0/false
63 var dot?/ecx: boolean <- copy 0/false
64 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
65 trace-higher trace
66 return close-paren?, dot?
67 }
68
69 var backquote-token?/eax: boolean <- backquote-token? curr-token
70 compare backquote-token?, 0/false
71 {
72 break-if-=
73 var out/edi: (addr handle cell) <- copy _out
74 allocate-pair out
75 var out-addr/eax: (addr cell) <- lookup *out
76 var left-ah/edx: (addr handle cell) <- get out-addr, left
77 new-symbol left-ah, "`"
78 var right-ah/edx: (addr handle cell) <- get out-addr, right
79 var close-paren?/eax: boolean <- copy 0/false
80 var dot?/ecx: boolean <- copy 0/false
81 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
82 trace-higher trace
83 return close-paren?, dot?
84 }
85
86 var unquote-token?/eax: boolean <- unquote-token? curr-token
87 compare unquote-token?, 0/false
88 {
89 break-if-=
90 var out/edi: (addr handle cell) <- copy _out
91 allocate-pair out
92 var out-addr/eax: (addr cell) <- lookup *out
93 var left-ah/edx: (addr handle cell) <- get out-addr, left
94 new-symbol left-ah, ","
95 var right-ah/edx: (addr handle cell) <- get out-addr, right
96 var close-paren?/eax: boolean <- copy 0/false
97 var dot?/ecx: boolean <- copy 0/false
98 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
99 trace-higher trace
100 return close-paren?, dot?
101 }
102
103 var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token
104 compare unquote-splice-token?, 0/false
105 {
106 break-if-=
107 var out/edi: (addr handle cell) <- copy _out
108 allocate-pair out
109 var out-addr/eax: (addr cell) <- lookup *out
110 var left-ah/edx: (addr handle cell) <- get out-addr, left
111 new-symbol left-ah, ",@"
112 var right-ah/edx: (addr handle cell) <- get out-addr, right
113 var close-paren?/eax: boolean <- copy 0/false
114 var dot?/ecx: boolean <- copy 0/false
115 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
116 trace-higher trace
117 return close-paren?, dot?
118 }
119
120 var dot?/eax: boolean <- dot-token? curr-token
121 compare dot?, 0/false
122 {
123 break-if-=
124 trace-higher trace
125 return 0/false, 1/true
126 }
127
128 var bracket-token?/eax: boolean <- bracket-token? curr-token
129 compare bracket-token?, 0/false
130 {
131 break-if-!=
132 parse-atom curr-token, _out, trace
133 break $parse-sexpression:type-check
134 }
135
136 var open-paren?/eax: boolean <- open-paren-token? curr-token
137 compare open-paren?, 0/false
138 {
139 break-if-=
140 var curr/esi: (addr handle cell) <- copy _out
141 allocate-pair curr
142 var curr-addr/eax: (addr cell) <- lookup *curr
143 var left/edx: (addr handle cell) <- get curr-addr, left
144 {
145 var close-paren?/eax: boolean <- copy 0/false
146 var dot?/ecx: boolean <- copy 0/false
147 close-paren?, dot? <- parse-sexpression tokens, left, trace
148 {
149 compare dot?, 0/false
150 break-if-=
151 error trace, "'.' cannot be at the start of a list"
152 return 1/true, dot?
153 }
154 compare close-paren?, 0/false
155 break-if-!=
156 var curr-addr/eax: (addr cell) <- lookup *curr
157 curr <- get curr-addr, right
158 var tmp-storage: (handle cell)
159 var tmp/edx: (addr handle cell) <- address tmp-storage
160 $parse-sexpression:list-loop: {
161 var close-paren?/eax: boolean <- copy 0/false
162 var dot?/ecx: boolean <- copy 0/false
163 close-paren?, dot? <- parse-sexpression tokens, tmp, trace
164
165 compare dot?, 0/false
166 {
167 break-if-=
168 parse-dot-tail tokens, curr, trace
169 return 0/false, 0/false
170 }
171 allocate-pair curr
172
173 compare close-paren?, 0/false
174 break-if-!=
175 var curr-addr/eax: (addr cell) <- lookup *curr
176 var left/ecx: (addr handle cell) <- get curr-addr, left
177 copy-object tmp, left
178
179 curr <- get curr-addr, right
180 loop
181 }
182 }
183 break $parse-sexpression:type-check
184 }
185
186 var close-paren?/eax: boolean <- close-paren-token? curr-token
187 compare close-paren?, 0/false
188 {
189 break-if-=
190 trace-higher trace
191 return 1/true, 0/false
192 }
193
194 var stream-storage: (stream byte 0x400)
195 var stream/edx: (addr stream byte) <- address stream-storage
196 write stream, "unexpected token "
197 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
198 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
199 rewind-stream curr-token-data
200 write-stream stream, curr-token-data
201 error-stream trace, stream
202 }
203 trace-higher trace
204 return 0/false, 0/false
205 }
206
207 fn parse-atom _curr-token: (addr token), _out: (addr handle cell), trace: (addr trace) {
208 trace-text trace, "parse", "parse atom"
209 var curr-token/ecx: (addr token) <- copy _curr-token
210 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
211 var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
212 var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
213 trace trace, "parse", curr-token-data
214
215 var number-token?/eax: boolean <- number-token? curr-token
216 compare number-token?, 0/false
217 {
218 break-if-=
219 rewind-stream curr-token-data
220 var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
221 var val/ecx: int <- copy _val
222 var val-float/xmm0: float <- convert val
223 allocate-number _out
224 var out/eax: (addr handle cell) <- copy _out
225 var out-addr/eax: (addr cell) <- lookup *out
226 var dest/edi: (addr float) <- get out-addr, number-data
227 copy-to *dest, val-float
228 {
229 {
230 var should-trace?/eax: boolean <- should-trace? trace
231 compare should-trace?, 0/false
232 }
233 break-if-=
234 var stream-storage: (stream byte 0x400)
235 var stream/ecx: (addr stream byte) <- address stream-storage
236 write stream, "=> number "
237 var nested-trace-storage: trace
238 var nested-trace/edi: (addr trace) <- address nested-trace-storage
239 initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
240 print-number out-addr, stream, nested-trace
241 trace trace, "parse", stream
242 }
243 return
244 }
245
246
247 var stream-token?/eax: boolean <- stream-token? curr-token
248 compare stream-token?, 0/false
249 {
250 break-if-=
251 allocate-stream _out
252 }
253 compare stream-token?, 0/false
254 {
255 break-if-!=
256 allocate-symbol _out
257 }
258
259 var out/eax: (addr handle cell) <- copy _out
260 var out-addr/eax: (addr cell) <- lookup *out
261 var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
262 var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
263 copy-object curr-token-data-ah, dest-ah
264 {
265 {
266 var should-trace?/eax: boolean <- should-trace? trace
267 compare should-trace?, 0/false
268 }
269 break-if-=
270 var stream-storage: (stream byte 0x40000)
271 var stream/ecx: (addr stream byte) <- address stream-storage
272 write stream, "=> symbol "
273 var nested-trace-storage: trace
274 var nested-trace/edi: (addr trace) <- address nested-trace-storage
275 initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
276 print-symbol out-addr, stream, nested-trace
277 trace trace, "parse", stream
278 }
279 }
280
281 fn parse-dot-tail tokens: (addr stream token), _out: (addr handle cell), trace: (addr trace) {
282 var out/edi: (addr handle cell) <- copy _out
283 var close-paren?/eax: boolean <- copy 0/false
284 var dot?/ecx: boolean <- copy 0/false
285 close-paren?, dot? <- parse-sexpression tokens, out, trace
286 compare close-paren?, 0/false
287 {
288 break-if-=
289 error trace, "'. )' makes no sense"
290 return
291 }
292 compare dot?, 0/false
293 {
294 break-if-=
295 error trace, "'. .' makes no sense"
296 return
297 }
298
299 var dummy: (handle cell)
300 var dummy-ah/edi: (addr handle cell) <- address dummy
301 close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
302 compare close-paren?, 0/false
303 {
304 break-if-!=
305 error trace, "cannot have multiple expressions between '.' and ')'"
306 return
307 }
308 compare dot?, 0/false
309 {
310 break-if-=
311 error trace, "cannot have two dots in a single list"
312 return
313 }
314 }