https://github.com/akkartik/mu/blob/main/shell/tokenize.mu
  1 # We reuse the cell data structure for tokenization
  2 # Token cells are special, though. They have no type, they're always atoms,
  3 # they always have text-data.
  4 
  5 fn tokenize in: (addr gap-buffer), out: (addr stream cell), trace: (addr trace) {
  6   trace-text trace, "tokenize", "tokenize"
  7   trace-lower trace
  8   rewind-gap-buffer in
  9   var token-storage: cell
 10   var token/edx: (addr cell) <- address token-storage
 11   {
 12     skip-whitespace-from-gap-buffer in
 13     var done?/eax: boolean <- gap-buffer-scan-done? in
 14     compare done?, 0/false
 15     break-if-!=
 16     #
 17     next-token in, token, trace
 18     var error?/eax: boolean <- has-errors? trace
 19     compare error?, 0/false
 20     {
 21       break-if-=
 22       return
 23     }
 24     var skip?/eax: boolean <- comment-token? token
 25     compare skip?, 0/false
 26     loop-if-!=
 27     write-to-stream out, token  # shallow-copy text-data
 28     loop
 29   }
 30   trace-higher trace
 31 }
 32 
 33 fn test-tokenize-quote {
 34   var in-storage: gap-buffer
 35   var in/esi: (addr gap-buffer) <- address in-storage
 36   initialize-gap-buffer-with in, "'(a)"
 37   #
 38   var stream-storage: (stream cell 0x10)
 39   var stream/edi: (addr stream cell) <- address stream-storage
 40   #
 41   var trace-storage: trace
 42   var trace/edx: (addr trace) <- address trace-storage
 43   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 44   tokenize in, stream, trace
 45   #
 46   var curr-token-storage: cell
 47   var curr-token/ebx: (addr cell) <- address curr-token-storage
 48   read-from-stream stream, curr-token
 49   var quote?/eax: boolean <- quote-token? curr-token
 50   check quote?, "F - test-tokenize-quote: quote"
 51   read-from-stream stream, curr-token
 52   var open-paren?/eax: boolean <- open-paren-token? curr-token
 53   check open-paren?, "F - test-tokenize-quote: open paren"
 54   read-from-stream stream, curr-token  # skip a
 55   read-from-stream stream, curr-token
 56   var close-paren?/eax: boolean <- close-paren-token? curr-token
 57   check close-paren?, "F - test-tokenize-quote: close paren"
 58 }
 59 
 60 fn test-tokenize-backquote {
 61   var in-storage: gap-buffer
 62   var in/esi: (addr gap-buffer) <- address in-storage
 63   initialize-gap-buffer-with in, "`(a)"
 64   #
 65   var stream-storage: (stream cell 0x10)
 66   var stream/edi: (addr stream cell) <- address stream-storage
 67   #
 68   var trace-storage: trace
 69   var trace/edx: (addr trace) <- address trace-storage
 70   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 71   tokenize in, stream, trace
 72   #
 73   var curr-token-storage: cell
 74   var curr-token/ebx: (addr cell) <- address curr-token-storage
 75   read-from-stream stream, curr-token
 76   var backquote?/eax: boolean <- backquote-token? curr-token
 77   check backquote?, "F - test-tokenize-backquote: backquote"
 78   read-from-stream stream, curr-token
 79   var open-paren?/eax: boolean <- open-paren-token? curr-token
 80   check open-paren?, "F - test-tokenize-backquote: open paren"
 81   read-from-stream stream, curr-token  # skip a
 82   read-from-stream stream, curr-token
 83   var close-paren?/eax: boolean <- close-paren-token? curr-token
 84   check close-paren?, "F - test-tokenize-backquote: close paren"
 85 }
 86 
 87 fn test-tokenize-unquote {
 88   var in-storage: gap-buffer
 89   var in/esi: (addr gap-buffer) <- address in-storage
 90   initialize-gap-buffer-with in, ",(a)"
 91   #
 92   var stream-storage: (stream cell 0x10)
 93   var stream/edi: (addr stream cell) <- address stream-storage
 94   #
 95   var trace-storage: trace
 96   var trace/edx: (addr trace) <- address trace-storage
 97   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 98   tokenize in, stream, trace
 99   #
100   var curr-token-storage: cell
101   var curr-token/ebx: (addr cell) <- address curr-token-storage
102   read-from-stream stream, curr-token
103   var unquote?/eax: boolean <- unquote-token? curr-token
104   check unquote?, "F - test-tokenize-unquote: unquote"
105   read-from-stream stream, curr-token
106   var open-paren?/eax: boolean <- open-paren-token? curr-token
107   check open-paren?, "F - test-tokenize-unquote: open paren"
108   read-from-stream stream, curr-token  # skip a
109   read-from-stream stream, curr-token
110   var close-paren?/eax: boolean <- close-paren-token? curr-token
111   check close-paren?, "F - test-tokenize-unquote: close paren"
112 }
113 
114 fn test-tokenize-unquote-splice {
115   var in-storage: gap-buffer
116   var in/esi: (addr gap-buffer) <- address in-storage
117   initialize-gap-buffer-with in, ",@a"
118   #
119   var stream-storage: (stream cell 0x10)
120   var stream/edi: (addr stream cell) <- address stream-storage
121   #
122   var trace-storage: trace
123   var trace/edx: (addr trace) <- address trace-storage
124   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
125   tokenize in, stream, trace
126   #
127   var curr-token-storage: cell
128   var curr-token/ebx: (addr cell) <- address curr-token-storage
129   read-from-stream stream, curr-token
130   var unquote-splice?/eax: boolean <- unquote-splice-token? curr-token
131   check unquote-splice?, "F - test-tokenize-unquote-splice: unquote-splice"
132 }
133 
134 fn test-tokenize-dotted-list {
135   var in-storage: gap-buffer
136   var in/esi: (addr gap-buffer) <- address in-storage
137   initialize-gap-buffer-with in, "(a . b)"
138   #
139   var stream-storage: (stream cell 0x10)
140   var stream/edi: (addr stream cell) <- address stream-storage
141   #
142   var trace-storage: trace
143   var trace/edx: (addr trace) <- address trace-storage
144   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
145   tokenize in, stream, trace
146   #
147   var curr-token-storage: cell
148   var curr-token/ebx: (addr cell) <- address curr-token-storage
149   read-from-stream stream, curr-token
150   var open-paren?/eax: boolean <- open-paren-token? curr-token
151   check open-paren?, "F - test-tokenize-dotted-list: open paren"
152   read-from-stream stream, curr-token  # skip a
153   read-from-stream stream, curr-token
154   var dot?/eax: boolean <- dot-token? curr-token
155   check dot?, "F - test-tokenize-dotted-list: dot"
156   read-from-stream stream, curr-token  # skip b
157   read-from-stream stream, curr-token
158   var close-paren?/eax: boolean <- close-paren-token? curr-token
159   check close-paren?, "F - test-tokenize-dotted-list: close paren"
160 }
161 
162 fn test-tokenize-stream-literal {
163   var in-storage: gap-buffer
164   var in/esi: (addr gap-buffer) <- address in-storage
165   initialize-gap-buffer-with in, "[abc def]"
166   #
167   var stream-storage: (stream cell 0x10)
168   var stream/edi: (addr stream cell) <- address stream-storage
169   #
170   var trace-storage: trace
171   var trace/edx: (addr trace) <- address trace-storage
172   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
173   tokenize in, stream, trace
174   #
175   var curr-token-storage: cell
176   var curr-token/ebx: (addr cell) <- address curr-token-storage
177   read-from-stream stream, curr-token
178   var stream?/eax: boolean <- stream-token? curr-token
179   check stream?, "F - test-tokenize-stream-literal: type"
180   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
181   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
182   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
183   check data-equal?, "F - test-tokenize-stream-literal"
184   var empty?/eax: boolean <- stream-empty? stream
185   check empty?, "F - test-tokenize-stream-literal: empty?"
186 }
187 
188 fn test-tokenize-stream-literal-in-tree {
189   var in-storage: gap-buffer
190   var in/esi: (addr gap-buffer) <- address in-storage
191   initialize-gap-buffer-with in, "([abc def])"
192   #
193   var stream-storage: (stream cell 0x10)
194   var stream/edi: (addr stream cell) <- address stream-storage
195   #
196   var trace-storage: trace
197   var trace/edx: (addr trace) <- address trace-storage
198   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
199   tokenize in, stream, trace
200   #
201   var curr-token-storage: cell
202   var curr-token/ebx: (addr cell) <- address curr-token-storage
203   read-from-stream stream, curr-token
204   var bracket?/eax: boolean <- bracket-token? curr-token
205   check bracket?, "F - test-tokenize-stream-literal-in-tree: open paren"
206   read-from-stream stream, curr-token
207   var stream?/eax: boolean <- stream-token? curr-token
208   check stream?, "F - test-tokenize-stream-literal-in-tree: type"
209   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
210   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
211   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
212   check data-equal?, "F - test-tokenize-stream-literal-in-tree"
213   read-from-stream stream, curr-token
214   var bracket?/eax: boolean <- bracket-token? curr-token
215   check bracket?, "F - test-tokenize-stream-literal-in-tree: close paren"
216   var empty?/eax: boolean <- stream-empty? stream
217   check empty?, "F - test-tokenize-stream-literal-in-tree: empty?"
218 }
219 
220 fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace) {
221   trace-text trace, "tokenize", "next-token"
222   trace-lower trace
223   var _g/eax: grapheme <- peek-from-gap-buffer in
224   var g/ecx: grapheme <- copy _g
225   {
226     var stream-storage: (stream byte 0x40)
227     var stream/esi: (addr stream byte) <- address stream-storage
228     write stream, "next: "
229     var gval/eax: int <- copy g
230     write-int32-hex stream, gval
231     trace trace, "tokenize", stream
232   }
233   var out-cell/eax: (addr cell) <- copy _out-cell
234   {
235     var out-cell-type/eax: (addr int) <- get out-cell, type
236     copy-to *out-cell-type, 0/uninitialized
237   }
238   var out-ah/edi: (addr handle stream byte) <- get out-cell, text-data
239   $next-token:allocate: {
240     # Allocate a large buffer if it's a stream.
241     # Sometimes a whole function definition will need to fit in it.
242     compare g, 0x5b/open-square-bracket
243     {
244       break-if-!=
245       populate-stream out-ah, 0x400/max-definition-size=1KB
246       break $next-token:allocate
247     }
248     populate-stream out-ah, 0x40
249   }
250   var _out/eax: (addr stream byte) <- lookup *out-ah
251   var out/edi: (addr stream byte) <- copy _out
252   clear-stream out
253   $next-token:case: {
254     # open square brackets begin streams
255     {
256       compare g, 0x5b/open-square-bracket
257       break-if-!=
258       var dummy/eax: grapheme <- read-from-gap-buffer in  # skip open bracket
259       next-stream-token in, out, trace
260       var out-cell/eax: (addr cell) <- copy _out-cell
261       # streams set the type
262       var out-cell-type/eax: (addr int) <- get out-cell, type
263       copy-to *out-cell-type, 3/stream
264       break $next-token:case
265     }
266     # comment
267     {
268       compare g, 0x23/comment
269       break-if-!=
270       rest-of-line in, out, trace
271       break $next-token:case
272     }
273     # digit
274     {
275       var digit?/eax: boolean <- decimal-digit? g
276       compare digit?, 0/false
277       break-if-=
278       next-number-token in, out, trace
279       break $next-token:case
280     }
281     # other symbol char
282     {
283       var symbol?/eax: boolean <- symbol-grapheme? g
284       compare symbol?, 0/false
285       break-if-=
286       next-symbol-token in, out, trace
287       break $next-token:case
288     }
289     # unbalanced close square brackets are errors
290     {
291       compare g, 0x5d/close-square-bracket
292       break-if-!=
293       error trace, "unbalanced ']'"
294       return
295     }
296     # other brackets are always single-char tokens
297     {
298       var bracket?/eax: boolean <- bracket-grapheme? g
299       compare bracket?, 0/false
300       break-if-=
301       var g/eax: grapheme <- read-from-gap-buffer in
302       next-bracket-token g, out, trace
303       break $next-token:case
304     }
305     # non-symbol operators
306     {
307       var operator?/eax: boolean <- operator-grapheme? g
308       compare operator?, 0/false
309       break-if-=
310       next-operator-token in, out, trace
311       break $next-token:case
312     }
313     # quote
314     {
315       compare g, 0x27/single-quote
316       break-if-!=
317       var g/eax: grapheme <- read-from-gap-buffer in  # consume
318       write-grapheme out, g
319       break $next-token:case
320     }
321     # backquote
322     {
323       compare g, 0x60/backquote
324       break-if-!=
325       var g/eax: grapheme <- read-from-gap-buffer in  # consume
326       write-grapheme out, g
327       break $next-token:case
328     }
329     # unquote
330     {
331       compare g, 0x2c/comma
332       break-if-!=
333       var g/eax: grapheme <- read-from-gap-buffer in  # consume
334       write-grapheme out, g
335       # check for unquote-splice
336       {
337         var g2/eax: grapheme <- peek-from-gap-buffer in
338         compare g2, 0x40/at-sign
339         break-if-!=
340         g2 <- read-from-gap-buffer in
341         write-grapheme out, g2
342       }
343       break $next-token:case
344     }
345     abort "unknown token type"
346   }
347   trace-higher trace
348   var stream-storage: (stream byte 0x400)  # maximum possible token size (next-stream-token)
349   var stream/eax: (addr stream byte) <- address stream-storage
350   write stream, "=> "
351   rewind-stream out
352   write-stream stream, out
353   trace trace, "tokenize", stream
354 }
355 
356 fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
357   trace-text trace, "tokenize", "looking for a symbol"
358   trace-lower trace
359   $next-symbol-token:loop: {
360     var done?/eax: boolean <- gap-buffer-scan-done? in
361     compare done?, 0/false
362     break-if-!=
363     var g/eax: grapheme <- peek-from-gap-buffer in
364     {
365       var stream-storage: (stream byte 0x40)
366       var stream/esi: (addr stream byte) <- address stream-storage
367       write stream, "next: "
368       var gval/eax: int <- copy g
369       write-int32-hex stream, gval
370       trace trace, "tokenize", stream
371     }
372     # if non-symbol, return
373     {
374       var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
375       compare symbol-grapheme?, 0/false
376       break-if-!=
377       trace-text trace, "tokenize", "stop"
378       break $next-symbol-token:loop
379     }
380     var g/eax: grapheme <- read-from-gap-buffer in
381     write-grapheme out, g
382     loop
383   }
384   trace-higher trace
385   var stream-storage: (stream byte 0x40)
386   var stream/esi: (addr stream byte) <- address stream-storage
387   write stream, "=> "
388   rewind-stream out
389   write-stream stream, out
390   trace trace, "tokenize", stream
391 }
392 
393 fn next-operator-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
394   trace-text trace, "tokenize", "looking for a operator"
395   trace-lower trace
396   $next-operator-token:loop: {
397     var done?/eax: boolean <- gap-buffer-scan-done? in
398     compare done?, 0/false
399     break-if-!=
400     var g/eax: grapheme <- peek-from-gap-buffer in
401     {
402       var stream-storage: (stream byte 0x40)
403       var stream/esi: (addr stream byte) <- address stream-storage
404       write stream, "next: "
405       var gval/eax: int <- copy g
406       write-int32-hex stream, gval
407       trace trace, "tokenize", stream
408     }
409     # if non-operator, return
410     {
411       var operator-grapheme?/eax: boolean <- operator-grapheme? g
412       compare operator-grapheme?, 0/false
413       break-if-!=
414       trace-text trace, "tokenize", "stop"
415       break $next-operator-token:loop
416     }
417     var g/eax: grapheme <- read-from-gap-buffer in
418     write-grapheme out, g
419     loop
420   }
421   trace-higher trace
422   var stream-storage: (stream byte 0x40)
423   var stream/esi: (addr stream byte) <- address stream-storage
424   write stream, "=> "
425   rewind-stream out
426   write-stream stream, out
427   trace trace, "tokenize", stream
428 }
429 
430 fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
431   trace-text trace, "tokenize", "looking for a number"
432   trace-lower trace
433   $next-number-token:loop: {
434     var done?/eax: boolean <- gap-buffer-scan-done? in
435     compare done?, 0/false
436     break-if-!=
437     var g/eax: grapheme <- peek-from-gap-buffer in
438     {
439       var stream-storage: (stream byte 0x40)
440       var stream/esi: (addr stream byte) <- address stream-storage
441       write stream, "next: "
442       var gval/eax: int <- copy g
443       write-int32-hex stream, gval
444       trace trace, "tokenize", stream
445     }
446     # if not symbol grapheme, return
447     {
448       var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
449       compare symbol-grapheme?, 0/false
450       break-if-!=
451       trace-text trace, "tokenize", "stop"
452       break $next-number-token:loop
453     }
454     # if not digit grapheme, abort
455     {
456       var digit?/eax: boolean <- decimal-digit? g
457       compare digit?, 0/false
458       break-if-!=
459       error trace, "invalid number"
460       return
461     }
462     trace-text trace, "tokenize", "append"
463     var g/eax: grapheme <- read-from-gap-buffer in
464     write-grapheme out, g
465     loop
466   }
467   trace-higher trace
468 }
469 
470 fn next-stream-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
471   trace-text trace, "tokenize", "stream"
472   {
473     var empty?/eax: boolean <- gap-buffer-scan-done? in
474     compare empty?, 0/false
475     {
476       break-if-=
477       error trace, "unbalanced '['"
478       return
479     }
480     var g/eax: grapheme <- read-from-gap-buffer in
481     compare g, 0x5d/close-square-bracket
482     break-if-=
483     write-grapheme out, g
484     loop
485   }
486   var stream-storage: (stream byte 0x400)  # max-definition-size
487   var stream/esi: (addr stream byte) <- address stream-storage
488   write stream, "=> "
489   rewind-stream out
490   write-stream stream, out
491   trace trace, "tokenize", stream
492 }
493 
494 fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) {
495   trace-text trace, "tokenize", "bracket"
496   write-grapheme out, g
497   var stream-storage: (stream byte 0x40)
498   var stream/esi: (addr stream byte) <- address stream-storage
499   write stream, "=> "
500   rewind-stream out
501   write-stream stream, out
502   trace trace, "tokenize", stream
503 }
504 
505 fn rest-of-line in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
506   trace-text trace, "tokenize", "comment"
507   {
508     var empty?/eax: boolean <- gap-buffer-scan-done? in
509     compare empty?, 0/false
510     {
511       break-if-=
512       return
513     }
514     var g/eax: grapheme <- read-from-gap-buffer in
515     compare g, 0xa/newline
516     break-if-=
517     write-grapheme out, g
518     loop
519   }
520   var stream-storage: (stream byte 0x80)
521   var stream/esi: (addr stream byte) <- address stream-storage
522   write stream, "=> "
523   rewind-stream out
524   write-stream stream, out
525   trace trace, "tokenize", stream
526 }
527 
528 fn symbol-grapheme? g: grapheme -> _/eax: boolean {
529   ## whitespace
530   compare g, 9/tab
531   {
532     break-if-!=
533     return 0/false
534   }
535   compare g, 0xa/newline
536   {
537     break-if-!=
538     return 0/false
539   }
540   compare g, 0x20/space
541   {
542     break-if-!=
543     return 0/false
544   }
545   ## quotes
546   compare g, 0x22/double-quote
547   {
548     break-if-!=
549     return 0/false
550   }
551   compare g, 0x60/backquote
552   {
553     break-if-!=
554     return 0/false
555   }
556   ## brackets
557   compare g, 0x28/open-paren
558   {
559     break-if-!=
560     return 0/false
561   }
562   compare g, 0x29/close-paren
563   {
564     break-if-!=
565     return 0/false
566   }
567   compare g, 0x5b/open-square-bracket
568   {
569     break-if-!=
570     return 0/false
571   }
572   compare g, 0x5d/close-square-bracket
573   {
574     break-if-!=
575     return 0/false
576   }
577   compare g, 0x7b/open-curly-bracket
578   {
579     break-if-!=
580     return 0/false
581   }
582   compare g, 0x7d/close-curly-bracket
583   {
584     break-if-!=
585     return 0/false
586   }
587   # - other punctuation
588   # '!' is a symbol char
589   compare g, 0x23/hash
590   {
591     break-if-!=
592     return 0/false
593   }
594   # '$' is a symbol char
595   compare g, 0x25/percent
596   {
597     break-if-!=
598     return 0/false
599   }
600   compare g, 0x26/ampersand
601   {
602     break-if-!=
603     return 0/false
604   }
605   compare g, 0x27/single-quote
606   {
607     break-if-!=
608     return 0/false
609   }
610   compare g, 0x60/backquote
611   {
612     break-if-!=
613     return 0/false
614   }
615   compare g, 0x2c/comma
616   {
617     break-if-!=
618     return 0/false
619   }
620   compare g, 0x40/at-sign
621   {
622     break-if-!=
623     return 0/false
624   }
625   compare g, 0x2a/asterisk
626   {
627     break-if-!=
628     return 0/false
629   }
630   compare g, 0x2b/plus
631   {
632     break-if-!=
633     return 0/false
634   }
635   compare g, 0x2d/dash  # '-' not allowed in symbols
636   {
637     break-if-!=
638     return 0/false
639   }
640   compare g, 0x2e/period
641   {
642     break-if-!=
643     return 0/false
644   }
645   compare g, 0x2f/slash
646   {
647     break-if-!=
648     return 0/false
649   }
650   compare g, 0x3a/colon
651   {
652     break-if-!=
653     return 0/false
654   }
655   compare g, 0x3b/semi-colon
656   {
657     break-if-!=
658     return 0/false
659   }
660   compare g, 0x3c/less-than
661   {
662     break-if-!=
663     return 0/false
664   }
665   compare g, 0x3d/equal
666   {
667     break-if-!=
668     return 0/false
669   }
670   compare g, 0x3e/greater-than
671   {
672     break-if-!=
673     return 0/false
674   }
675   # '?' is a symbol char
676   compare g, 0x5c/backslash
677   {
678     break-if-!=
679     return 0/false
680   }
681   compare g, 0x5e/caret
682   {
683     break-if-!=
684     return 0/false
685   }
686   # '_' is a symbol char
687   compare g, 0x7c/vertical-line
688   {
689     break-if-!=
690     return 0/false
691   }
692   compare g, 0x7e/tilde
693   {
694     break-if-!=
695     return 0/false
696   }
697   return 1/true
698 }
699 
700 fn bracket-grapheme? g: grapheme -> _/eax: boolean {
701   compare g, 0x28/open-paren
702   {
703     break-if-!=
704     return 1/true
705   }
706   compare g, 0x29/close-paren
707   {
708     break-if-!=
709     return 1/true
710   }
711   compare g, 0x5b/open-square-bracket
712   {
713     break-if-!=
714     return 1/true
715   }
716   compare g, 0x5d/close-square-bracket
717   {
718     break-if-!=
719     return 1/true
720   }
721   compare g, 0x7b/open-curly-bracket
722   {
723     break-if-!=
724     return 1/true
725   }
726   compare g, 0x7d/close-curly-bracket
727   {
728     break-if-!=
729     return 1/true
730   }
731   return 0/false
732 }
733 
734 fn operator-grapheme? g: grapheme -> _/eax: boolean {
735   # '$' is a symbol char
736   compare g, 0x25/percent
737   {
738     break-if-!=
739     return 1/false
740   }
741   compare g, 0x26/ampersand
742   {
743     break-if-!=
744     return 1/true
745   }
746   compare g, 0x27/single-quote
747   {
748     break-if-!=
749     return 0/true
750   }
751   compare g, 0x60/backquote
752   {
753     break-if-!=
754     return 0/false
755   }
756   compare g, 0x2c/comma
757   {
758     break-if-!=
759     return 0/false
760   }
761   compare g, 0x40/at-sign
762   {
763     break-if-!=
764     return 0/false
765   }
766   compare g, 0x2a/asterisk
767   {
768     break-if-!=
769     return 1/true
770   }
771   compare g, 0x2b/plus
772   {
773     break-if-!=
774     return 1/true
775   }
776   compare g, 0x2d/dash  # '-' not allowed in symbols
777   {
778     break-if-!=
779     return 1/true
780   }
781   compare g, 0x2e/period
782   {
783     break-if-!=
784     return 1/true
785   }
786   compare g, 0x2f/slash
787   {
788     break-if-!=
789     return 1/true
790   }
791   compare g, 0x3a/colon
792   {
793     break-if-!=
794     return 1/true
795   }
796   compare g, 0x3b/semi-colon
797   {
798     break-if-!=
799     return 1/true
800   }
801   compare g, 0x3c/less-than
802   {
803     break-if-!=
804     return 1/true
805   }
806   compare g, 0x3d/equal
807   {
808     break-if-!=
809     return 1/true
810   }
811   compare g, 0x3e/greater-than
812   {
813     break-if-!=
814     return 1/true
815   }
816   # '?' is a symbol char
817   compare g, 0x5c/backslash
818   {
819     break-if-!=
820     return 1/true
821   }
822   compare g, 0x5e/caret
823   {
824     break-if-!=
825     return 1/true
826   }
827   # '_' is a symbol char
828   compare g, 0x7c/vertical-line
829   {
830     break-if-!=
831     return 1/true
832   }
833   compare g, 0x7e/tilde
834   {
835     break-if-!=
836     return 1/true
837   }
838   return 0/false
839 }
840 
841 fn number-token? _in: (addr cell) -> _/eax: boolean {
842   var in/eax: (addr cell) <- copy _in
843   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
844   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
845   rewind-stream in-data
846   var g/eax: grapheme <- read-grapheme in-data
847   var result/eax: boolean <- decimal-digit? g
848   return result
849 }
850 
851 fn bracket-token? _in: (addr cell) -> _/eax: boolean {
852   var in/eax: (addr cell) <- copy _in
853   {
854     var in-type/eax: (addr int) <- get in, type
855     compare *in-type, 3/stream
856     break-if-!=
857     # streams are never paren tokens
858     return 0/false
859   }
860   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
861   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
862   rewind-stream in-data
863   var g/eax: grapheme <- read-grapheme in-data
864   var result/eax: boolean <- bracket-grapheme? g
865   return result
866 }
867 
868 fn quote-token? _in: (addr cell) -> _/eax: boolean {
869   var in/eax: (addr cell) <- copy _in
870   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
871   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
872   rewind-stream in-data
873   var result/eax: boolean <- stream-data-equal? in-data, "'"
874   return result
875 }
876 
877 fn backquote-token? _in: (addr cell) -> _/eax: boolean {
878   var in/eax: (addr cell) <- copy _in
879   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
880   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
881   rewind-stream in-data
882   var result/eax: boolean <- stream-data-equal? in-data, "`"
883   return result
884 }
885 
886 fn unquote-token? _in: (addr cell) -> _/eax: boolean {
887   var in/eax: (addr cell) <- copy _in
888   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
889   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
890   rewind-stream in-data
891   var result/eax: boolean <- stream-data-equal? in-data, ","
892   return result
893 }
894 
895 fn unquote-splice-token? _in: (addr cell) -> _/eax: boolean {
896   var in/eax: (addr cell) <- copy _in
897   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
898   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
899   rewind-stream in-data
900   var result/eax: boolean <- stream-data-equal? in-data, ",@"
901   return result
902 }
903 
904 fn open-paren-token? _in: (addr cell) -> _/eax: boolean {
905   var in/eax: (addr cell) <- copy _in
906   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
907   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
908   var in-data/ecx: (addr stream byte) <- copy _in-data
909   rewind-stream in-data
910   var g/eax: grapheme <- read-grapheme in-data
911   compare g, 0x28/open-paren
912   {
913     break-if-!=
914     var result/eax: boolean <- stream-empty? in-data
915     return result
916   }
917   return 0/false
918 }
919 
920 fn close-paren-token? _in: (addr cell) -> _/eax: boolean {
921   var in/eax: (addr cell) <- copy _in
922   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
923   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
924   var in-data/ecx: (addr stream byte) <- copy _in-data
925   rewind-stream in-data
926   var g/eax: grapheme <- read-grapheme in-data
927   compare g, 0x29/close-paren
928   {
929     break-if-!=
930     var result/eax: boolean <- stream-empty? in-data
931     return result
932   }
933   return 0/false
934 }
935 
936 fn dot-token? _in: (addr cell) -> _/eax: boolean {
937   var in/eax: (addr cell) <- copy _in
938   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
939   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
940   var in-data/ecx: (addr stream byte) <- copy _in-data
941   rewind-stream in-data
942   var g/eax: grapheme <- read-grapheme in-data
943   compare g, 0x2e/dot
944   {
945     break-if-!=
946     var result/eax: boolean <- stream-empty? in-data
947     return result
948   }
949   return 0/false
950 }
951 
952 fn test-dot-token {
953   var tmp-storage: (handle cell)
954   var tmp-ah/eax: (addr handle cell) <- address tmp-storage
955   new-symbol tmp-ah, "."
956   var tmp/eax: (addr cell) <- lookup *tmp-ah
957   var result/eax: boolean <- dot-token? tmp
958   check result, "F - test-dot-token"
959 }
960 
961 fn stream-token? _in: (addr cell) -> _/eax: boolean {
962   var in/eax: (addr cell) <- copy _in
963   var in-type/eax: (addr int) <- get in, type
964   compare *in-type, 3/stream
965   {
966     break-if-=
967     return 0/false
968   }
969   return 1/true
970 }
971 
972 fn comment-token? _in: (addr cell) -> _/eax: boolean {
973   var in/eax: (addr cell) <- copy _in
974   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
975   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
976   rewind-stream in-data
977   var g/eax: grapheme <- read-grapheme in-data
978   compare g, 0x23/hash
979   {
980     break-if-=
981     return 0/false
982   }
983   return 1/true
984 }