https://github.com/akkartik/mu/blob/main/shell/parse.mu
1 fn parse-input tokens: (addr stream cell), out: (addr handle cell), trace: (addr trace) {
2 rewind-stream tokens
3 var empty?/eax: boolean <- stream-empty? tokens
4 compare empty?, 0/false
5 {
6 break-if-=
7 error trace, "nothing to parse"
8 return
9 }
10 var close-paren?/eax: boolean <- copy 0/false
11 var dummy?/ecx: boolean <- copy 0/false
12 close-paren?, dummy? <- parse-sexpression tokens, out, trace
13 {
14 compare close-paren?, 0/false
15 break-if-=
16 error trace, "')' is not a valid expression"
17 return
18 }
19 {
20 var empty?/eax: boolean <- stream-empty? tokens
21 compare empty?, 0/false
22 break-if-!=
23 error trace, "unexpected tokens at end; only type in a single expression at a time"
24 }
25 }
26
27
28
29
30 fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
31 trace-text trace, "parse", "parse"
32 trace-lower trace
33 var curr-token-storage: cell
34 var curr-token/ecx: (addr cell) <- address curr-token-storage
35 var empty?/eax: boolean <- stream-empty? tokens
36 compare empty?, 0/false
37 {
38 break-if-=
39 error trace, "end of stream; never found a balancing ')'"
40 trace-higher trace
41 return 1/true, 0/false
42 }
43 read-from-stream tokens, curr-token
44 $parse-sexpression:type-check: {
45
46 var quote-token?/eax: boolean <- quote-token? curr-token
47 compare quote-token?, 0/false
48 {
49 break-if-=
50 var out/edi: (addr handle cell) <- copy _out
51 allocate-pair out
52 var out-addr/eax: (addr cell) <- lookup *out
53 var left-ah/edx: (addr handle cell) <- get out-addr, left
54 new-symbol left-ah, "'"
55 var right-ah/edx: (addr handle cell) <- get out-addr, right
56 var close-paren?/eax: boolean <- copy 0/false
57 var dot?/ecx: boolean <- copy 0/false
58 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
59 trace-higher trace
60 return close-paren?, dot?
61 }
62
63 var backquote-token?/eax: boolean <- backquote-token? curr-token
64 compare backquote-token?, 0/false
65 {
66 break-if-=
67 var out/edi: (addr handle cell) <- copy _out
68 allocate-pair out
69 var out-addr/eax: (addr cell) <- lookup *out
70 var left-ah/edx: (addr handle cell) <- get out-addr, left
71 new-symbol left-ah, "`"
72 var right-ah/edx: (addr handle cell) <- get out-addr, right
73 var close-paren?/eax: boolean <- copy 0/false
74 var dot?/ecx: boolean <- copy 0/false
75 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
76 trace-higher trace
77 return close-paren?, dot?
78 }
79
80 var unquote-token?/eax: boolean <- unquote-token? curr-token
81 compare unquote-token?, 0/false
82 {
83 break-if-=
84 var out/edi: (addr handle cell) <- copy _out
85 allocate-pair out
86 var out-addr/eax: (addr cell) <- lookup *out
87 var left-ah/edx: (addr handle cell) <- get out-addr, left
88 new-symbol left-ah, ","
89 var right-ah/edx: (addr handle cell) <- get out-addr, right
90 var close-paren?/eax: boolean <- copy 0/false
91 var dot?/ecx: boolean <- copy 0/false
92 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
93 trace-higher trace
94 return close-paren?, dot?
95 }
96
97 var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token
98 compare unquote-splice-token?, 0/false
99 {
100 break-if-=
101 var out/edi: (addr handle cell) <- copy _out
102 allocate-pair out
103 var out-addr/eax: (addr cell) <- lookup *out
104 var left-ah/edx: (addr handle cell) <- get out-addr, left
105 new-symbol left-ah, ",@"
106 var right-ah/edx: (addr handle cell) <- get out-addr, right
107 var close-paren?/eax: boolean <- copy 0/false
108 var dot?/ecx: boolean <- copy 0/false
109 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
110 trace-higher trace
111 return close-paren?, dot?
112 }
113
114 var dot?/eax: boolean <- dot-token? curr-token
115 compare dot?, 0/false
116 {
117 break-if-=
118 trace-higher trace
119 return 0/false, 1/true
120 }
121
122 var bracket-token?/eax: boolean <- bracket-token? curr-token
123 compare bracket-token?, 0/false
124 {
125 break-if-!=
126 parse-atom curr-token, _out, trace
127 break $parse-sexpression:type-check
128 }
129
130 var open-paren?/eax: boolean <- open-paren-token? curr-token
131 compare open-paren?, 0/false
132 {
133 break-if-=
134 var curr/esi: (addr handle cell) <- copy _out
135 allocate-pair curr
136 var curr-addr/eax: (addr cell) <- lookup *curr
137 var left/edx: (addr handle cell) <- get curr-addr, left
138 {
139 var close-paren?/eax: boolean <- copy 0/false
140 var dot?/ecx: boolean <- copy 0/false
141 close-paren?, dot? <- parse-sexpression tokens, left, trace
142 {
143 compare dot?, 0/false
144 break-if-=
145 error trace, "'.' cannot be at the start of a list"
146 return 1/true, dot?
147 }
148 compare close-paren?, 0/false
149 break-if-!=
150 var curr-addr/eax: (addr cell) <- lookup *curr
151 curr <- get curr-addr, right
152 var tmp-storage: (handle cell)
153 var tmp/edx: (addr handle cell) <- address tmp-storage
154 $parse-sexpression:list-loop: {
155 var close-paren?/eax: boolean <- copy 0/false
156 var dot?/ecx: boolean <- copy 0/false
157 close-paren?, dot? <- parse-sexpression tokens, tmp, trace
158
159 compare dot?, 0/false
160 {
161 break-if-=
162 parse-dot-tail tokens, curr, trace
163 return 0/false, 0/false
164 }
165 allocate-pair curr
166
167 compare close-paren?, 0/false
168 break-if-!=
169 var curr-addr/eax: (addr cell) <- lookup *curr
170 var left/ecx: (addr handle cell) <- get curr-addr, left
171 copy-object tmp, left
172
173 curr <- get curr-addr, right
174 loop
175 }
176 }
177 break $parse-sexpression:type-check
178 }
179
180 var close-paren?/eax: boolean <- close-paren-token? curr-token
181 compare close-paren?, 0/false
182 {
183 break-if-=
184 trace-higher trace
185 return 1/true, 0/false
186 }
187
188 var stream-storage: (stream byte 0x400)
189 var stream/edx: (addr stream byte) <- address stream-storage
190 write stream, "unexpected token "
191 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
192 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
193 rewind-stream curr-token-data
194 write-stream stream, curr-token-data
195 error-stream trace, stream
196 }
197 trace-higher trace
198 return 0/false, 0/false
199 }
200
201 fn parse-atom _curr-token: (addr cell), _out: (addr handle cell), trace: (addr trace) {
202 trace-text trace, "parse", "parse atom"
203 var curr-token/ecx: (addr cell) <- copy _curr-token
204 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
205 var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
206 var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
207 trace trace, "parse", curr-token-data
208
209 var number-token?/eax: boolean <- number-token? curr-token
210 compare number-token?, 0/false
211 {
212 break-if-=
213 rewind-stream curr-token-data
214 var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
215 var val/ecx: int <- copy _val
216 var val-float/xmm0: float <- convert val
217 allocate-number _out
218 var out/eax: (addr handle cell) <- copy _out
219 var out-addr/eax: (addr cell) <- lookup *out
220 var dest/edi: (addr float) <- get out-addr, number-data
221 copy-to *dest, val-float
222 {
223 var stream-storage: (stream byte 0x400)
224 var stream/ecx: (addr stream byte) <- address stream-storage
225 write stream, "=> number "
226 var nested-trace-storage: trace
227 var nested-trace/edi: (addr trace) <- address nested-trace-storage
228 initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
229 print-number out-addr, stream, nested-trace
230 trace trace, "parse", stream
231 }
232 return
233 }
234
235
236 var stream-token?/eax: boolean <- stream-token? curr-token
237 compare stream-token?, 0/false
238 {
239 break-if-=
240 allocate-stream _out
241 }
242 compare stream-token?, 0/false
243 {
244 break-if-!=
245 allocate-symbol _out
246 }
247
248 var out/eax: (addr handle cell) <- copy _out
249 var out-addr/eax: (addr cell) <- lookup *out
250 var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
251 var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
252 copy-object curr-token-data-ah, dest-ah
253 {
254 var stream-storage: (stream byte 0x400)
255 var stream/ecx: (addr stream byte) <- address stream-storage
256 write stream, "=> symbol "
257 var nested-trace-storage: trace
258 var nested-trace/edi: (addr trace) <- address nested-trace-storage
259 initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
260 print-symbol out-addr, stream, nested-trace
261 trace trace, "parse", stream
262 }
263 }
264
265 fn parse-dot-tail tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) {
266 var out/edi: (addr handle cell) <- copy _out
267 var close-paren?/eax: boolean <- copy 0/false
268 var dot?/ecx: boolean <- copy 0/false
269 close-paren?, dot? <- parse-sexpression tokens, out, trace
270 compare close-paren?, 0/false
271 {
272 break-if-=
273 error trace, "'. )' makes no sense"
274 return
275 }
276 compare dot?, 0/false
277 {
278 break-if-=
279 error trace, "'. .' makes no sense"
280 return
281 }
282
283 var dummy: (handle cell)
284 var dummy-ah/edi: (addr handle cell) <- address dummy
285 close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
286 compare close-paren?, 0/false
287 {
288 break-if-!=
289 error trace, "cannot have multiple expressions between '.' and ')'"
290 return
291 }
292 compare dot?, 0/false
293 {
294 break-if-=
295 error trace, "cannot have two dots in a single list"
296 return
297 }
298 }