https://github.com/akkartik/mu/blob/main/shell/tokenize.mu
1
2
3
4
5 fn tokenize in: (addr gap-buffer), out: (addr stream cell), trace: (addr trace) {
6 trace-text trace, "read", "tokenize"
7 trace-lower trace
8 rewind-gap-buffer in
9 var token-storage: cell
10 var token/edx: (addr cell) <- address token-storage
11 {
12 skip-whitespace-from-gap-buffer in
13 var done?/eax: boolean <- gap-buffer-scan-done? in
14 compare done?, 0/false
15 break-if-!=
16
17 var dest-ah/eax: (addr handle stream byte) <- get token, text-data
18 populate-stream dest-ah, 0x40/max-token-size
19
20 next-token in, token, trace
21 var error?/eax: boolean <- has-errors? trace
22 compare error?, 0/false
23 {
24 break-if-=
25 return
26 }
27 write-to-stream out, token
28 loop
29 }
30 trace-higher trace
31 }
32
33 fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace) {
34 trace-text trace, "read", "next-token"
35 trace-lower trace
36 var out-cell/eax: (addr cell) <- copy _out-cell
37 var out-ah/eax: (addr handle stream byte) <- get out-cell, text-data
38 var _out/eax: (addr stream byte) <- lookup *out-ah
39 var out/edi: (addr stream byte) <- copy _out
40 $next-token:body: {
41 clear-stream out
42 skip-whitespace-from-gap-buffer in
43 var g/eax: grapheme <- peek-from-gap-buffer in
44 {
45 var stream-storage: (stream byte 0x40)
46 var stream/esi: (addr stream byte) <- address stream-storage
47 write stream, "next: "
48 var gval/eax: int <- copy g
49 write-int32-hex stream, gval
50 trace trace, "read", stream
51 }
52
53 {
54 var digit?/eax: boolean <- decimal-digit? g
55 compare digit?, 0/false
56 break-if-=
57 next-number-token in, out, trace
58 break $next-token:body
59 }
60
61 {
62 var symbol?/eax: boolean <- symbol-grapheme? g
63 compare symbol?, 0/false
64 break-if-=
65 next-symbol-token in, out, trace
66 break $next-token:body
67 }
68
69 {
70 var bracket?/eax: boolean <- bracket-grapheme? g
71 compare bracket?, 0/false
72 break-if-=
73 var g/eax: grapheme <- read-from-gap-buffer in
74 next-bracket-token g, out, trace
75 break $next-token:body
76 }
77
78 {
79 var operator?/eax: boolean <- operator-grapheme? g
80 compare operator?, 0/false
81 break-if-=
82 next-operator-token in, out, trace
83 break $next-token:body
84 }
85 }
86 trace-higher trace
87 var stream-storage: (stream byte 0x40)
88 var stream/eax: (addr stream byte) <- address stream-storage
89 write stream, "=> "
90 rewind-stream out
91 write-stream stream, out
92 trace trace, "read", stream
93 }
94
95 fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
96 trace-text trace, "read", "looking for a symbol"
97 trace-lower trace
98 $next-symbol-token:loop: {
99 var done?/eax: boolean <- gap-buffer-scan-done? in
100 compare done?, 0/false
101 break-if-!=
102 var g/eax: grapheme <- peek-from-gap-buffer in
103 {
104 var stream-storage: (stream byte 0x40)
105 var stream/esi: (addr stream byte) <- address stream-storage
106 write stream, "next: "
107 var gval/eax: int <- copy g
108 write-int32-hex stream, gval
109 trace trace, "read", stream
110 }
111
112 {
113 var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
114 compare symbol-grapheme?, 0/false
115 break-if-!=
116 trace-text trace, "read", "stop"
117 break $next-symbol-token:loop
118 }
119 var g/eax: grapheme <- read-from-gap-buffer in
120 write-grapheme out, g
121 loop
122 }
123 trace-higher trace
124 var stream-storage: (stream byte 0x40)
125 var stream/esi: (addr stream byte) <- address stream-storage
126 write stream, "=> "
127 rewind-stream out
128 write-stream stream, out
129 trace trace, "read", stream
130 }
131
132 fn next-operator-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
133 trace-text trace, "read", "looking for a operator"
134 trace-lower trace
135 $next-operator-token:loop: {
136 var done?/eax: boolean <- gap-buffer-scan-done? in
137 compare done?, 0/false
138 break-if-!=
139 var g/eax: grapheme <- peek-from-gap-buffer in
140 {
141 var stream-storage: (stream byte 0x40)
142 var stream/esi: (addr stream byte) <- address stream-storage
143 write stream, "next: "
144 var gval/eax: int <- copy g
145 write-int32-hex stream, gval
146 trace trace, "read", stream
147 }
148
149 {
150 var operator-grapheme?/eax: boolean <- operator-grapheme? g
151 compare operator-grapheme?, 0/false
152 break-if-!=
153 trace-text trace, "read", "stop"
154 break $next-operator-token:loop
155 }
156 var g/eax: grapheme <- read-from-gap-buffer in
157 write-grapheme out, g
158 loop
159 }
160 trace-higher trace
161 var stream-storage: (stream byte 0x40)
162 var stream/esi: (addr stream byte) <- address stream-storage
163 write stream, "=> "
164 rewind-stream out
165 write-stream stream, out
166 trace trace, "read", stream
167 }
168
169 fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
170 trace-text trace, "read", "looking for a number"
171 trace-lower trace
172 $next-number-token:loop: {
173 var done?/eax: boolean <- gap-buffer-scan-done? in
174 compare done?, 0/false
175 break-if-!=
176 var g/eax: grapheme <- peek-from-gap-buffer in
177 {
178 var stream-storage: (stream byte 0x40)
179 var stream/esi: (addr stream byte) <- address stream-storage
180 write stream, "next: "
181 var gval/eax: int <- copy g
182 write-int32-hex stream, gval
183 trace trace, "read", stream
184 }
185
186 {
187 var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
188 compare symbol-grapheme?, 0/false
189 break-if-!=
190 trace-text trace, "read", "stop"
191 break $next-number-token:loop
192 }
193
194 {
195 var digit?/eax: boolean <- decimal-digit? g
196 compare digit?, 0/false
197 break-if-!=
198 error trace, "invalid number"
199 return
200 }
201 trace-text trace, "read", "append"
202 var g/eax: grapheme <- read-from-gap-buffer in
203 write-grapheme out, g
204 loop
205 }
206 trace-higher trace
207 }
208
209 fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) {
210 trace-text trace, "read", "bracket"
211 write-grapheme out, g
212 var stream-storage: (stream byte 0x40)
213 var stream/esi: (addr stream byte) <- address stream-storage
214 write stream, "=> "
215 rewind-stream out
216 write-stream stream, out
217 trace trace, "read", stream
218 }
219
220 fn symbol-grapheme? g: grapheme -> _/eax: boolean {
221
222 compare g, 9/tab
223 {
224 break-if-!=
225 return 0/false
226 }
227 compare g, 0xa/newline
228 {
229 break-if-!=
230 return 0/false
231 }
232 compare g, 0x20/space
233 {
234 break-if-!=
235 return 0/false
236 }
237
238 compare g, 0x22/double-quote
239 {
240 break-if-!=
241 return 0/false
242 }
243 compare g, 0x60/backquote
244 {
245 break-if-!=
246 return 0/false
247 }
248
249 compare g, 0x28/open-paren
250 {
251 break-if-!=
252 return 0/false
253 }
254 compare g, 0x29/close-paren
255 {
256 break-if-!=
257 return 0/false
258 }
259 compare g, 0x5b/open-square-bracket
260 {
261 break-if-!=
262 return 0/false
263 }
264 compare g, 0x5d/close-square-bracket
265 {
266 break-if-!=
267 return 0/false
268 }
269 compare g, 0x7b/open-curly-bracket
270 {
271 break-if-!=
272 return 0/false
273 }
274 compare g, 0x7d/close-curly-bracket
275 {
276 break-if-!=
277 return 0/false
278 }
279
280
281 compare g, 0x23/hash
282 {
283 break-if-!=
284 return 0/false
285 }
286
287 compare g, 0x25/percent
288 {
289 break-if-!=
290 return 0/false
291 }
292 compare g, 0x26/ampersand
293 {
294 break-if-!=
295 return 0/false
296 }
297 compare g, 0x27/single-quote
298 {
299 break-if-!=
300 return 0/false
301 }
302 compare g, 0x2a/asterisk
303 {
304 break-if-!=
305 return 0/false
306 }
307 compare g, 0x2b/plus
308 {
309 break-if-!=
310 return 0/false
311 }
312 compare g, 0x2c/comma
313 {
314 break-if-!=
315 return 0/false
316 }
317 compare g, 0x2d/dash
318 {
319 break-if-!=
320 return 0/false
321 }
322 compare g, 0x2e/period
323 {
324 break-if-!=
325 return 0/false
326 }
327 compare g, 0x2f/slash
328 {
329 break-if-!=
330 return 0/false
331 }
332 compare g, 0x3a/colon
333 {
334 break-if-!=
335 return 0/false
336 }
337 compare g, 0x3b/semi-colon
338 {
339 break-if-!=
340 return 0/false
341 }
342 compare g, 0x3c/less-than
343 {
344 break-if-!=
345 return 0/false
346 }
347 compare g, 0x3d/equal
348 {
349 break-if-!=
350 return 0/false
351 }
352 compare g, 0x3e/greater-than
353 {
354 break-if-!=
355 return 0/false
356 }
357
358 compare g, 0x40/at-sign
359 {
360 break-if-!=
361 return 0/false
362 }
363 compare g, 0x5c/backslash
364 {
365 break-if-!=
366 return 0/false
367 }
368 compare g, 0x5e/caret
369 {
370 break-if-!=
371 return 0/false
372 }
373
374 compare g, 0x7c/vertical-line
375 {
376 break-if-!=
377 return 0/false
378 }
379 compare g, 0x7e/tilde
380 {
381 break-if-!=
382 return 0/false
383 }
384 return 1/true
385 }
386
387 fn bracket-grapheme? g: grapheme -> _/eax: boolean {
388 compare g, 0x28/open-paren
389 {
390 break-if-!=
391 return 1/true
392 }
393 compare g, 0x29/close-paren
394 {
395 break-if-!=
396 return 1/true
397 }
398 compare g, 0x5b/open-square-bracket
399 {
400 break-if-!=
401 return 1/true
402 }
403 compare g, 0x5d/close-square-bracket
404 {
405 break-if-!=
406 return 1/true
407 }
408 compare g, 0x7b/open-curly-bracket
409 {
410 break-if-!=
411 return 1/true
412 }
413 compare g, 0x7d/close-curly-bracket
414 {
415 break-if-!=
416 return 1/true
417 }
418 return 0/false
419 }
420
421 fn operator-grapheme? g: grapheme -> _/eax: boolean {
422
423 compare g, 0x25/percent
424 {
425 break-if-!=
426 return 1/false
427 }
428 compare g, 0x26/ampersand
429 {
430 break-if-!=
431 return 1/true
432 }
433 compare g, 0x27/single-quote
434 {
435 break-if-!=
436 return 1/true
437 }
438 compare g, 0x2a/asterisk
439 {
440 break-if-!=
441 return 1/true
442 }
443 compare g, 0x2b/plus
444 {
445 break-if-!=
446 return 1/true
447 }
448 compare g, 0x2c/comma
449 {
450 break-if-!=
451 return 1/true
452 }
453 compare g, 0x2d/dash
454 {
455 break-if-!=
456 return 1/true
457 }
458 compare g, 0x2e/period
459 {
460 break-if-!=
461 return 1/true
462 }
463 compare g, 0x2f/slash
464 {
465 break-if-!=
466 return 1/true
467 }
468 compare g, 0x3a/colon
469 {
470 break-if-!=
471 return 1/true
472 }
473 compare g, 0x3b/semi-colon
474 {
475 break-if-!=
476 return 1/true
477 }
478 compare g, 0x3c/less-than
479 {
480 break-if-!=
481 return 1/true
482 }
483 compare g, 0x3d/equal
484 {
485 break-if-!=
486 return 1/true
487 }
488 compare g, 0x3e/greater-than
489 {
490 break-if-!=
491 return 1/true
492 }
493
494 compare g, 0x40/at-sign
495 {
496 break-if-!=
497 return 1/true
498 }
499 compare g, 0x5c/backslash
500 {
501 break-if-!=
502 return 1/true
503 }
504 compare g, 0x5e/caret
505 {
506 break-if-!=
507 return 1/true
508 }
509
510 compare g, 0x7c/vertical-line
511 {
512 break-if-!=
513 return 1/true
514 }
515 compare g, 0x7e/tilde
516 {
517 break-if-!=
518 return 1/true
519 }
520 return 0/false
521 }
522
523 fn number-token? _in: (addr cell) -> _/eax: boolean {
524 var in/eax: (addr cell) <- copy _in
525 var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
526 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
527 rewind-stream in-data
528 var g/eax: grapheme <- read-grapheme in-data
529 var result/eax: boolean <- decimal-digit? g
530 return result
531 }
532
533 fn bracket-token? _in: (addr cell) -> _/eax: boolean {
534 var in/eax: (addr cell) <- copy _in
535 var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
536 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
537 rewind-stream in-data
538 var g/eax: grapheme <- read-grapheme in-data
539 var result/eax: boolean <- bracket-grapheme? g
540 return result
541 }
542
543 fn quote-token? _in: (addr cell) -> _/eax: boolean {
544 var in/eax: (addr cell) <- copy _in
545 var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
546 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
547 rewind-stream in-data
548 var g/eax: grapheme <- read-grapheme in-data
549 compare g, 0x27/single-quote
550 {
551 break-if-!=
552 return 1/true
553 }
554 return 0/false
555 }
556
557 fn open-paren-token? _in: (addr cell) -> _/eax: boolean {
558 var in/eax: (addr cell) <- copy _in
559 var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
560 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
561 rewind-stream in-data
562 var g/eax: grapheme <- read-grapheme in-data
563 compare g, 0x28/open-paren
564 {
565 break-if-!=
566 return 1/true
567 }
568 return 0/false
569 }
570
571 fn close-paren-token? _in: (addr cell) -> _/eax: boolean {
572 var in/eax: (addr cell) <- copy _in
573 var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
574 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
575 rewind-stream in-data
576 var g/eax: grapheme <- read-grapheme in-data
577 compare g, 0x29/open-paren
578 {
579 break-if-!=
580 return 1/true
581 }
582 return 0/false
583 }