https://github.com/akkartik/mu/blob/main/shell/parse.mu
1 fn parse-input tokens: (addr stream cell), out: (addr handle cell), trace: (addr trace) {
2 rewind-stream tokens
3 var empty?/eax: boolean <- stream-empty? tokens
4 compare empty?, 0/false
5 {
6 break-if-=
7 error trace, "nothing to parse"
8 return
9 }
10 var close-paren?/eax: boolean <- copy 0/false
11 var dummy?/ecx: boolean <- copy 0/false
12 close-paren?, dummy? <- parse-sexpression tokens, out, trace
13 {
14 compare close-paren?, 0/false
15 break-if-=
16 error trace, "')' is not a valid expression"
17 return
18 }
19 {
20 var empty?/eax: boolean <- stream-empty? tokens
21 compare empty?, 0/false
22 break-if-!=
23 error trace, "unexpected tokens at end; only type in a single expression at a time"
24 }
25 }
26
27
28
29
30 fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
31 trace-text trace, "parse", "parse"
32 trace-lower trace
33 var curr-token-storage: cell
34 var curr-token/ecx: (addr cell) <- address curr-token-storage
35 var empty?/eax: boolean <- stream-empty? tokens
36 compare empty?, 0/false
37 {
38 break-if-=
39 error trace, "end of stream; never found a balancing ')'"
40 return 1/true, 0/false
41 }
42 read-from-stream tokens, curr-token
43 $parse-sexpression:type-check: {
44
45 var quote-token?/eax: boolean <- quote-token? curr-token
46 compare quote-token?, 0/false
47 {
48 break-if-=
49 var out/edi: (addr handle cell) <- copy _out
50 allocate-pair out
51 var out-addr/eax: (addr cell) <- lookup *out
52 var left-ah/edx: (addr handle cell) <- get out-addr, left
53 new-symbol left-ah, "'"
54 var right-ah/edx: (addr handle cell) <- get out-addr, right
55 var close-paren?/eax: boolean <- copy 0/false
56 var dot?/ecx: boolean <- copy 0/false
57 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
58 return close-paren?, dot?
59 }
60
61 var backquote-token?/eax: boolean <- backquote-token? curr-token
62 compare backquote-token?, 0/false
63 {
64 break-if-=
65 var out/edi: (addr handle cell) <- copy _out
66 allocate-pair out
67 var out-addr/eax: (addr cell) <- lookup *out
68 var left-ah/edx: (addr handle cell) <- get out-addr, left
69 new-symbol left-ah, "`"
70 var right-ah/edx: (addr handle cell) <- get out-addr, right
71 var close-paren?/eax: boolean <- copy 0/false
72 var dot?/ecx: boolean <- copy 0/false
73 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
74 return close-paren?, dot?
75 }
76
77 var unquote-token?/eax: boolean <- unquote-token? curr-token
78 compare unquote-token?, 0/false
79 {
80 break-if-=
81 var out/edi: (addr handle cell) <- copy _out
82 allocate-pair out
83 var out-addr/eax: (addr cell) <- lookup *out
84 var left-ah/edx: (addr handle cell) <- get out-addr, left
85 new-symbol left-ah, ","
86 var right-ah/edx: (addr handle cell) <- get out-addr, right
87 var close-paren?/eax: boolean <- copy 0/false
88 var dot?/ecx: boolean <- copy 0/false
89 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
90 return close-paren?, dot?
91 }
92
93 var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token
94 compare unquote-splice-token?, 0/false
95 {
96 break-if-=
97 var out/edi: (addr handle cell) <- copy _out
98 allocate-pair out
99 var out-addr/eax: (addr cell) <- lookup *out
100 var left-ah/edx: (addr handle cell) <- get out-addr, left
101 new-symbol left-ah, ",@"
102 var right-ah/edx: (addr handle cell) <- get out-addr, right
103 var close-paren?/eax: boolean <- copy 0/false
104 var dot?/ecx: boolean <- copy 0/false
105 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
106 return close-paren?, dot?
107 }
108
109 var dot?/eax: boolean <- dot-token? curr-token
110 compare dot?, 0/false
111 {
112 break-if-=
113 trace-higher trace
114 return 0/false, 1/true
115 }
116
117 var bracket-token?/eax: boolean <- bracket-token? curr-token
118 compare bracket-token?, 0/false
119 {
120 break-if-!=
121 parse-atom curr-token, _out, trace
122 break $parse-sexpression:type-check
123 }
124
125 var open-paren?/eax: boolean <- open-paren-token? curr-token
126 compare open-paren?, 0/false
127 {
128 break-if-=
129 var curr/esi: (addr handle cell) <- copy _out
130 allocate-pair curr
131 var curr-addr/eax: (addr cell) <- lookup *curr
132 var left/edx: (addr handle cell) <- get curr-addr, left
133 {
134 var close-paren?/eax: boolean <- copy 0/false
135 var dot?/ecx: boolean <- copy 0/false
136 close-paren?, dot? <- parse-sexpression tokens, left, trace
137 {
138 compare dot?, 0/false
139 break-if-=
140 error trace, "'.' cannot be at the start of a list"
141 return 1/true, dot?
142 }
143 compare close-paren?, 0/false
144 break-if-!=
145 var curr-addr/eax: (addr cell) <- lookup *curr
146 curr <- get curr-addr, right
147 var tmp-storage: (handle cell)
148 var tmp/edx: (addr handle cell) <- address tmp-storage
149 $parse-sexpression:list-loop: {
150 var close-paren?/eax: boolean <- copy 0/false
151 var dot?/ecx: boolean <- copy 0/false
152 close-paren?, dot? <- parse-sexpression tokens, tmp, trace
153
154 compare dot?, 0/false
155 {
156 break-if-=
157 parse-dot-tail tokens, curr, trace
158 return 0/false, 0/false
159 }
160 allocate-pair curr
161
162 compare close-paren?, 0/false
163 break-if-!=
164 var curr-addr/eax: (addr cell) <- lookup *curr
165 var left/ecx: (addr handle cell) <- get curr-addr, left
166 copy-object tmp, left
167
168 curr <- get curr-addr, right
169 loop
170 }
171 }
172 break $parse-sexpression:type-check
173 }
174
175 var close-paren?/eax: boolean <- close-paren-token? curr-token
176 compare close-paren?, 0/false
177 {
178 break-if-=
179 trace-higher trace
180 return 1/true, 0/false
181 }
182
183 var stream-storage: (stream byte 0x400)
184 var stream/edx: (addr stream byte) <- address stream-storage
185 write stream, "unexpected token "
186 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
187 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
188 rewind-stream curr-token-data
189 write-stream stream, curr-token-data
190 trace trace, "error", stream
191 }
192 trace-higher trace
193 return 0/false, 0/false
194 }
195
196 fn parse-atom _curr-token: (addr cell), _out: (addr handle cell), trace: (addr trace) {
197 trace-text trace, "parse", "parse atom"
198 var curr-token/ecx: (addr cell) <- copy _curr-token
199 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
200 var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
201 var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
202 trace trace, "parse", curr-token-data
203
204 var number-token?/eax: boolean <- number-token? curr-token
205 compare number-token?, 0/false
206 {
207 break-if-=
208 rewind-stream curr-token-data
209 var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
210 var val/ecx: int <- copy _val
211 var val-float/xmm0: float <- convert val
212 allocate-number _out
213 var out/eax: (addr handle cell) <- copy _out
214 var out-addr/eax: (addr cell) <- lookup *out
215 var dest/edi: (addr float) <- get out-addr, number-data
216 copy-to *dest, val-float
217 {
218 var stream-storage: (stream byte 0x400)
219 var stream/ecx: (addr stream byte) <- address stream-storage
220 write stream, "=> number "
221 print-number out-addr, stream, 0/no-trace
222 trace trace, "parse", stream
223 }
224 return
225 }
226
227
228 var stream-token?/eax: boolean <- stream-token? curr-token
229 compare stream-token?, 0/false
230 {
231 break-if-=
232 allocate-stream _out
233 }
234 compare stream-token?, 0/false
235 {
236 break-if-!=
237 allocate-symbol _out
238 }
239
240 var out/eax: (addr handle cell) <- copy _out
241 var out-addr/eax: (addr cell) <- lookup *out
242 var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
243 var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
244 copy-object curr-token-data-ah, dest-ah
245 {
246 var stream-storage: (stream byte 0x400)
247 var stream/ecx: (addr stream byte) <- address stream-storage
248 write stream, "=> symbol "
249 print-symbol out-addr, stream, 0/no-trace
250 trace trace, "parse", stream
251 }
252 }
253
254 fn parse-dot-tail tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) {
255 var out/edi: (addr handle cell) <- copy _out
256 var close-paren?/eax: boolean <- copy 0/false
257 var dot?/ecx: boolean <- copy 0/false
258 close-paren?, dot? <- parse-sexpression tokens, out, trace
259 compare close-paren?, 0/false
260 {
261 break-if-=
262 error trace, "'. )' makes no sense"
263 return
264 }
265 compare dot?, 0/false
266 {
267 break-if-=
268 error trace, "'. .' makes no sense"
269 return
270 }
271
272 var dummy: (handle cell)
273 var dummy-ah/edi: (addr handle cell) <- address dummy
274 close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
275 compare close-paren?, 0/false
276 {
277 break-if-!=
278 error trace, "cannot have multiple expressions between '.' and ')'"
279 return
280 }
281 compare dot?, 0/false
282 {
283 break-if-=
284 error trace, "cannot have two dots in a single list"
285 return
286 }
287 }