https://github.com/akkartik/mu/blob/main/shell/parse.mu
1 fn parse-input tokens: (addr stream cell), out: (addr handle cell), trace: (addr trace) {
2 rewind-stream tokens
3 var empty?/eax: boolean <- stream-empty? tokens
4 compare empty?, 0/false
5 {
6 break-if-=
7 error trace, "nothing to parse"
8 return
9 }
10 var close-paren?/eax: boolean <- copy 0/false
11 var dummy?/ecx: boolean <- copy 0/false
12 close-paren?, dummy? <- parse-sexpression tokens, out, trace
13 {
14 compare close-paren?, 0/false
15 break-if-=
16 error trace, "')' is not a valid expression"
17 return
18 }
19 {
20 var empty?/eax: boolean <- stream-empty? tokens
21 compare empty?, 0/false
22 break-if-!=
23 error trace, "unexpected tokens at end; only type in a single expression at a time"
24 }
25 }
26
27
28
29
30 fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
31 trace-text trace, "read", "parse"
32 trace-lower trace
33 var curr-token-storage: cell
34 var curr-token/ecx: (addr cell) <- address curr-token-storage
35 var empty?/eax: boolean <- stream-empty? tokens
36 compare empty?, 0/false
37 {
38 break-if-=
39 error trace, "end of stream; never found a balancing ')'"
40 return 1/true, 0/false
41 }
42 read-from-stream tokens, curr-token
43 $parse-sexpression:type-check: {
44
45 var quote-token?/eax: boolean <- quote-token? curr-token
46 compare quote-token?, 0/false
47 {
48 break-if-=
49 var out/edi: (addr handle cell) <- copy _out
50 allocate-pair out
51 var out-addr/eax: (addr cell) <- lookup *out
52 var left-ah/edx: (addr handle cell) <- get out-addr, left
53 new-symbol left-ah, "'"
54 var right-ah/edx: (addr handle cell) <- get out-addr, right
55 var close-paren?/eax: boolean <- copy 0/false
56 var dot?/ecx: boolean <- copy 0/false
57 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
58 return close-paren?, dot?
59 }
60
61 var dot?/eax: boolean <- dot-token? curr-token
62 compare dot?, 0/false
63 {
64 break-if-=
65 trace-higher trace
66 return 0/false, 1/true
67 }
68
69 var bracket-token?/eax: boolean <- bracket-token? curr-token
70 compare bracket-token?, 0/false
71 {
72 break-if-!=
73 parse-atom curr-token, _out, trace
74 break $parse-sexpression:type-check
75 }
76
77 var open-paren?/eax: boolean <- open-paren-token? curr-token
78 compare open-paren?, 0/false
79 {
80 break-if-=
81 var curr/esi: (addr handle cell) <- copy _out
82 allocate-pair curr
83 var curr-addr/eax: (addr cell) <- lookup *curr
84 var left/edx: (addr handle cell) <- get curr-addr, left
85 {
86 var close-paren?/eax: boolean <- copy 0/false
87 var dot?/ecx: boolean <- copy 0/false
88 close-paren?, dot? <- parse-sexpression tokens, left, trace
89 {
90 compare dot?, 0/false
91 break-if-=
92 error trace, "'.' cannot be at the start of a list"
93 return 1/true, dot?
94 }
95 compare close-paren?, 0/false
96 break-if-!=
97 var curr-addr/eax: (addr cell) <- lookup *curr
98 curr <- get curr-addr, right
99 var tmp-storage: (handle cell)
100 var tmp/edx: (addr handle cell) <- address tmp-storage
101 $parse-sexpression:list-loop: {
102 var close-paren?/eax: boolean <- copy 0/false
103 var dot?/ecx: boolean <- copy 0/false
104 close-paren?, dot? <- parse-sexpression tokens, tmp, trace
105
106 compare dot?, 0/false
107 {
108 break-if-=
109 parse-dot-tail tokens, curr, trace
110 return 0/false, 0/false
111 }
112 allocate-pair curr
113
114 compare close-paren?, 0/false
115 break-if-!=
116 var curr-addr/eax: (addr cell) <- lookup *curr
117 var left/ecx: (addr handle cell) <- get curr-addr, left
118 copy-object tmp, left
119
120 curr <- get curr-addr, right
121 loop
122 }
123 }
124 break $parse-sexpression:type-check
125 }
126
127 var close-paren?/eax: boolean <- close-paren-token? curr-token
128 compare close-paren?, 0/false
129 {
130 break-if-=
131 trace-higher trace
132 return 1/true, 0/false
133 }
134
135 var stream-storage: (stream byte 0x40)
136 var stream/edx: (addr stream byte) <- address stream-storage
137 write stream, "unexpected token "
138 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
139 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
140 rewind-stream curr-token-data
141 write-stream stream, curr-token-data
142 trace trace, "error", stream
143 }
144 trace-higher trace
145 return 0/false, 0/false
146 }
147
148 fn parse-atom _curr-token: (addr cell), _out: (addr handle cell), trace: (addr trace) {
149 trace-text trace, "read", "parse atom"
150 var curr-token/ecx: (addr cell) <- copy _curr-token
151 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
152 var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
153 var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
154 trace trace, "read", curr-token-data
155
156 var number-token?/eax: boolean <- number-token? curr-token
157 compare number-token?, 0/false
158 {
159 break-if-=
160 rewind-stream curr-token-data
161 var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
162 var val/ecx: int <- copy _val
163 var val-float/xmm0: float <- convert val
164 allocate-number _out
165 var out/eax: (addr handle cell) <- copy _out
166 var out-addr/eax: (addr cell) <- lookup *out
167 var dest/edi: (addr float) <- get out-addr, number-data
168 copy-to *dest, val-float
169 {
170 var stream-storage: (stream byte 0x40)
171 var stream/ecx: (addr stream byte) <- address stream-storage
172 write stream, "=> number "
173 print-number out-addr, stream, 0/no-trace
174 trace trace, "read", stream
175 }
176 return
177 }
178
179
180 var stream-token?/eax: boolean <- stream-token? curr-token
181 compare stream-token?, 0/false
182 {
183 break-if-=
184 allocate-stream _out
185 }
186 compare stream-token?, 0/false
187 {
188 break-if-!=
189 allocate-symbol _out
190 }
191
192 var out/eax: (addr handle cell) <- copy _out
193 var out-addr/eax: (addr cell) <- lookup *out
194 var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
195 var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
196 copy-object curr-token-data-ah, dest-ah
197 {
198 var stream-storage: (stream byte 0x40)
199 var stream/ecx: (addr stream byte) <- address stream-storage
200 write stream, "=> symbol "
201 print-symbol out-addr, stream, 0/no-trace
202 trace trace, "read", stream
203 }
204 }
205
206 fn parse-dot-tail tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) {
207 var out/edi: (addr handle cell) <- copy _out
208 var close-paren?/eax: boolean <- copy 0/false
209 var dot?/ecx: boolean <- copy 0/false
210 close-paren?, dot? <- parse-sexpression tokens, out, trace
211 compare close-paren?, 0/false
212 {
213 break-if-=
214 error trace, "'. )' makes no sense"
215 return
216 }
217 compare dot?, 0/false
218 {
219 break-if-=
220 error trace, "'. .' makes no sense"
221 return
222 }
223
224 var dummy: (handle cell)
225 var dummy-ah/edi: (addr handle cell) <- address dummy
226 close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
227 compare close-paren?, 0/false
228 {
229 break-if-!=
230 error trace, "cannot have multiple expressions between '.' and ')'"
231 return
232 }
233 compare dot?, 0/false
234 {
235 break-if-=
236 error trace, "cannot have two dots in a single list"
237 return
238 }
239 }