https://github.com/akkartik/mu/blob/main/shell/tokenize.mu
  1 # We reuse the cell data structure for tokenization
  2 # Token cells are special, though. They have no type, they're always atoms,
  3 # they always have text-data.
  4 
  5 fn tokenize in: (addr gap-buffer), out: (addr stream cell), trace: (addr trace) {
  6   trace-text trace, "tokenize", "tokenize"
  7   trace-lower trace
  8   rewind-gap-buffer in
  9   var token-storage: cell
 10   var token/edx: (addr cell) <- address token-storage
 11   {
 12     skip-whitespace-from-gap-buffer in
 13     var done?/eax: boolean <- gap-buffer-scan-done? in
 14     compare done?, 0/false
 15     break-if-!=
 16     # initialize token data each iteration to avoid aliasing
 17     var dest-ah/eax: (addr handle stream byte) <- get token, text-data
 18     # I'm allocating 1KB for every. single. token. Just because a whole definition needs to fit in a string sometimes. Absolutely bonkers.
 19     populate-stream dest-ah, 0x400/max-definition-size
 20     #
 21     next-token in, token, trace
 22     var skip?/eax: boolean <- comment-token? token
 23     compare skip?, 0/false
 24     loop-if-!=
 25     var error?/eax: boolean <- has-errors? trace
 26     compare error?, 0/false
 27     {
 28       break-if-=
 29       return
 30     }
 31     write-to-stream out, token  # shallow-copy text-data
 32     loop
 33   }
 34   trace-higher trace
 35 }
 36 
 37 fn test-tokenize-quote {
 38   var in-storage: gap-buffer
 39   var in/esi: (addr gap-buffer) <- address in-storage
 40   initialize-gap-buffer-with in, "'(a)"
 41   #
 42   var stream-storage: (stream cell 0x10)
 43   var stream/edi: (addr stream cell) <- address stream-storage
 44   #
 45   tokenize in, stream, 0/no-trace
 46   #
 47   var curr-token-storage: cell
 48   var curr-token/ebx: (addr cell) <- address curr-token-storage
 49   read-from-stream stream, curr-token
 50   var quote?/eax: boolean <- quote-token? curr-token
 51   check quote?, "F - test-tokenize-quote: quote"
 52   read-from-stream stream, curr-token
 53   var open-paren?/eax: boolean <- open-paren-token? curr-token
 54   check open-paren?, "F - test-tokenize-quote: open paren"
 55   read-from-stream stream, curr-token  # skip a
 56   read-from-stream stream, curr-token
 57   var close-paren?/eax: boolean <- close-paren-token? curr-token
 58   check close-paren?, "F - test-tokenize-quote: close paren"
 59 }
 60 
 61 fn test-tokenize-backquote {
 62   var in-storage: gap-buffer
 63   var in/esi: (addr gap-buffer) <- address in-storage
 64   initialize-gap-buffer-with in, "`(a)"
 65   #
 66   var stream-storage: (stream cell 0x10)
 67   var stream/edi: (addr stream cell) <- address stream-storage
 68   #
 69   tokenize in, stream, 0/no-trace
 70   #
 71   var curr-token-storage: cell
 72   var curr-token/ebx: (addr cell) <- address curr-token-storage
 73   read-from-stream stream, curr-token
 74   var backquote?/eax: boolean <- backquote-token? curr-token
 75   check backquote?, "F - test-tokenize-backquote: backquote"
 76   read-from-stream stream, curr-token
 77   var open-paren?/eax: boolean <- open-paren-token? curr-token
 78   check open-paren?, "F - test-tokenize-backquote: open paren"
 79   read-from-stream stream, curr-token  # skip a
 80   read-from-stream stream, curr-token
 81   var close-paren?/eax: boolean <- close-paren-token? curr-token
 82   check close-paren?, "F - test-tokenize-backquote: close paren"
 83 }
 84 
 85 fn test-tokenize-unquote {
 86   var in-storage: gap-buffer
 87   var in/esi: (addr gap-buffer) <- address in-storage
 88   initialize-gap-buffer-with in, ",(a)"
 89   #
 90   var stream-storage: (stream cell 0x10)
 91   var stream/edi: (addr stream cell) <- address stream-storage
 92   #
 93   tokenize in, stream, 0/no-trace
 94   #
 95   var curr-token-storage: cell
 96   var curr-token/ebx: (addr cell) <- address curr-token-storage
 97   read-from-stream stream, curr-token
 98   var unquote?/eax: boolean <- unquote-token? curr-token
 99   check unquote?, "F - test-tokenize-unquote: unquote"
100   read-from-stream stream, curr-token
101   var open-paren?/eax: boolean <- open-paren-token? curr-token
102   check open-paren?, "F - test-tokenize-unquote: open paren"
103   read-from-stream stream, curr-token  # skip a
104   read-from-stream stream, curr-token
105   var close-paren?/eax: boolean <- close-paren-token? curr-token
106   check close-paren?, "F - test-tokenize-unquote: close paren"
107 }
108 
109 fn test-tokenize-unquote-splice {
110   var in-storage: gap-buffer
111   var in/esi: (addr gap-buffer) <- address in-storage
112   initialize-gap-buffer-with in, ",@a"
113   #
114   var stream-storage: (stream cell 0x10)
115   var stream/edi: (addr stream cell) <- address stream-storage
116   #
117   tokenize in, stream, 0/no-trace
118   #
119   var curr-token-storage: cell
120   var curr-token/ebx: (addr cell) <- address curr-token-storage
121   read-from-stream stream, curr-token
122   var unquote-splice?/eax: boolean <- unquote-splice-token? curr-token
123   check unquote-splice?, "F - test-tokenize-unquote-splice: unquote-splice"
124 }
125 
126 fn test-tokenize-dotted-list {
127   var in-storage: gap-buffer
128   var in/esi: (addr gap-buffer) <- address in-storage
129   initialize-gap-buffer-with in, "(a . b)"
130   #
131   var stream-storage: (stream cell 0x10)
132   var stream/edi: (addr stream cell) <- address stream-storage
133   #
134   tokenize in, stream, 0/no-trace
135   #
136   var curr-token-storage: cell
137   var curr-token/ebx: (addr cell) <- address curr-token-storage
138   read-from-stream stream, curr-token
139   var open-paren?/eax: boolean <- open-paren-token? curr-token
140   check open-paren?, "F - test-tokenize-dotted-list: open paren"
141   read-from-stream stream, curr-token  # skip a
142   read-from-stream stream, curr-token
143   var dot?/eax: boolean <- dot-token? curr-token
144   check dot?, "F - test-tokenize-dotted-list: dot"
145   read-from-stream stream, curr-token  # skip b
146   read-from-stream stream, curr-token
147   var close-paren?/eax: boolean <- close-paren-token? curr-token
148   check close-paren?, "F - test-tokenize-dotted-list: close paren"
149 }
150 
151 fn test-tokenize-stream-literal {
152   var in-storage: gap-buffer
153   var in/esi: (addr gap-buffer) <- address in-storage
154   initialize-gap-buffer-with in, "[abc def]"
155   #
156   var stream-storage: (stream cell 0x10)
157   var stream/edi: (addr stream cell) <- address stream-storage
158   #
159   tokenize in, stream, 0/no-trace
160   #
161   var curr-token-storage: cell
162   var curr-token/ebx: (addr cell) <- address curr-token-storage
163   read-from-stream stream, curr-token
164   var stream?/eax: boolean <- stream-token? curr-token
165   check stream?, "F - test-tokenize-stream-literal: type"
166   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
167   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
168   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
169   check data-equal?, "F - test-tokenize-stream-literal"
170   var empty?/eax: boolean <- stream-empty? stream
171   check empty?, "F - test-tokenize-stream-literal: empty?"
172 }
173 
174 fn test-tokenize-stream-literal-in-tree {
175   var in-storage: gap-buffer
176   var in/esi: (addr gap-buffer) <- address in-storage
177   initialize-gap-buffer-with in, "([abc def])"
178   #
179   var stream-storage: (stream cell 0x10)
180   var stream/edi: (addr stream cell) <- address stream-storage
181   #
182   tokenize in, stream, 0/no-trace
183   #
184   var curr-token-storage: cell
185   var curr-token/ebx: (addr cell) <- address curr-token-storage
186   read-from-stream stream, curr-token
187   var bracket?/eax: boolean <- bracket-token? curr-token
188   check bracket?, "F - test-tokenize-stream-literal-in-tree: open paren"
189   read-from-stream stream, curr-token
190   var stream?/eax: boolean <- stream-token? curr-token
191   check stream?, "F - test-tokenize-stream-literal-in-tree: type"
192   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
193   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
194   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
195   check data-equal?, "F - test-tokenize-stream-literal-in-tree"
196   read-from-stream stream, curr-token
197   var bracket?/eax: boolean <- bracket-token? curr-token
198   check bracket?, "F - test-tokenize-stream-literal-in-tree: close paren"
199   var empty?/eax: boolean <- stream-empty? stream
200   check empty?, "F - test-tokenize-stream-literal-in-tree: empty?"
201 }
202 
203 fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace) {
204   trace-text trace, "tokenize", "next-token"
205   trace-lower trace
206   var out-cell/eax: (addr cell) <- copy _out-cell
207   {
208     var out-cell-type/eax: (addr int) <- get out-cell, type
209     copy-to *out-cell-type, 0/uninitialized
210   }
211   var out-ah/eax: (addr handle stream byte) <- get out-cell, text-data
212   var _out/eax: (addr stream byte) <- lookup *out-ah
213   var out/edi: (addr stream byte) <- copy _out
214   $next-token:body: {
215     clear-stream out
216     var g/eax: grapheme <- peek-from-gap-buffer in
217 #?     draw-grapheme-at-cursor 0/screen, g, 7/fg, 0/bg
218 #?     move-cursor-rightward-and-downward 0/screen, 0, 0x80
219     {
220       var stream-storage: (stream byte 0x40)
221       var stream/esi: (addr stream byte) <- address stream-storage
222       write stream, "next: "
223       var gval/eax: int <- copy g
224       write-int32-hex stream, gval
225       trace trace, "tokenize", stream
226     }
227     # comment
228     {
229       compare g, 0x23/comment
230       break-if-!=
231       rest-of-line in, out, trace
232       break $next-token:body
233     }
234     # digit
235     {
236       var digit?/eax: boolean <- decimal-digit? g
237       compare digit?, 0/false
238       break-if-=
239       next-number-token in, out, trace
240       break $next-token:body
241     }
242     # other symbol char
243     {
244       var symbol?/eax: boolean <- symbol-grapheme? g
245       compare symbol?, 0/false
246       break-if-=
247       next-symbol-token in, out, trace
248       break $next-token:body
249     }
250     # open square brackets begin streams
251     {
252       compare g, 0x5b/open-square-bracket
253       break-if-!=
254       g <- read-from-gap-buffer in  # skip open bracket
255       next-stream-token in, out, trace
256       var out-cell/eax: (addr cell) <- copy _out-cell
257       var out-cell-type/eax: (addr int) <- get out-cell, type
258       copy-to *out-cell-type, 3/stream
259       break $next-token:body
260     }
261     # unbalanced close square brackets are errors
262     {
263       compare g, 0x5d/close-square-bracket
264       break-if-!=
265       error trace, "unbalanced ']'"
266       return
267     }
268     # other brackets are always single-char tokens
269     {
270       var bracket?/eax: boolean <- bracket-grapheme? g
271       compare bracket?, 0/false
272       break-if-=
273       var g/eax: grapheme <- read-from-gap-buffer in
274       next-bracket-token g, out, trace
275       break $next-token:body
276     }
277     # non-symbol operators
278     {
279       var operator?/eax: boolean <- operator-grapheme? g
280       compare operator?, 0/false
281       break-if-=
282       next-operator-token in, out, trace
283       break $next-token:body
284     }
285     # quote
286     {
287       compare g, 0x27/single-quote
288       break-if-!=
289       g <- read-from-gap-buffer in  # consume
290       write-grapheme out, g
291       break $next-token:body
292     }
293     # backquote
294     {
295       compare g, 0x60/single-quote
296       break-if-!=
297       g <- read-from-gap-buffer in  # consume
298       write-grapheme out, g
299       break $next-token:body
300     }
301     # unquote
302     {
303       compare g, 0x2c/comma
304       break-if-!=
305       g <- read-from-gap-buffer in  # consume
306       write-grapheme out, g
307       # check for unquote-splice
308       {
309         var g2/eax: grapheme <- peek-from-gap-buffer in
310         compare g2, 0x40/at-sign
311         break-if-!=
312         g2 <- read-from-gap-buffer in
313         write-grapheme out, g2
314       }
315       break $next-token:body
316     }
317     abort "unknown token type"
318   }
319   trace-higher trace
320   var stream-storage: (stream byte 0x400)  # maximum possible token size (next-stream-token)
321   var stream/eax: (addr stream byte) <- address stream-storage
322   write stream, "=> "
323   rewind-stream out
324   write-stream stream, out
325   trace trace, "tokenize", stream
326 }
327 
328 fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
329   trace-text trace, "tokenize", "looking for a symbol"
330   trace-lower trace
331   $next-symbol-token:loop: {
332     var done?/eax: boolean <- gap-buffer-scan-done? in
333     compare done?, 0/false
334     break-if-!=
335     var g/eax: grapheme <- peek-from-gap-buffer in
336     {
337       var stream-storage: (stream byte 0x40)
338       var stream/esi: (addr stream byte) <- address stream-storage
339       write stream, "next: "
340       var gval/eax: int <- copy g
341       write-int32-hex stream, gval
342       trace trace, "tokenize", stream
343     }
344     # if non-symbol, return
345     {
346       var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
347       compare symbol-grapheme?, 0/false
348       break-if-!=
349       trace-text trace, "tokenize", "stop"
350       break $next-symbol-token:loop
351     }
352     var g/eax: grapheme <- read-from-gap-buffer in
353     write-grapheme out, g
354     loop
355   }
356   trace-higher trace
357   var stream-storage: (stream byte 0x40)
358   var stream/esi: (addr stream byte) <- address stream-storage
359   write stream, "=> "
360   rewind-stream out
361   write-stream stream, out
362   trace trace, "tokenize", stream
363 }
364 
365 fn next-operator-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
366   trace-text trace, "tokenize", "looking for a operator"
367   trace-lower trace
368   $next-operator-token:loop: {
369     var done?/eax: boolean <- gap-buffer-scan-done? in
370     compare done?, 0/false
371     break-if-!=
372     var g/eax: grapheme <- peek-from-gap-buffer in
373     {
374       var stream-storage: (stream byte 0x40)
375       var stream/esi: (addr stream byte) <- address stream-storage
376       write stream, "next: "
377       var gval/eax: int <- copy g
378       write-int32-hex stream, gval
379       trace trace, "tokenize", stream
380     }
381     # if non-operator, return
382     {
383       var operator-grapheme?/eax: boolean <- operator-grapheme? g
384       compare operator-grapheme?, 0/false
385       break-if-!=
386       trace-text trace, "tokenize", "stop"
387       break $next-operator-token:loop
388     }
389     var g/eax: grapheme <- read-from-gap-buffer in
390     write-grapheme out, g
391     loop
392   }
393   trace-higher trace
394   var stream-storage: (stream byte 0x40)
395   var stream/esi: (addr stream byte) <- address stream-storage
396   write stream, "=> "
397   rewind-stream out
398   write-stream stream, out
399   trace trace, "tokenize", stream
400 }
401 
402 fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
403   trace-text trace, "tokenize", "looking for a number"
404   trace-lower trace
405   $next-number-token:loop: {
406     var done?/eax: boolean <- gap-buffer-scan-done? in
407     compare done?, 0/false
408     break-if-!=
409     var g/eax: grapheme <- peek-from-gap-buffer in
410     {
411       var stream-storage: (stream byte 0x40)
412       var stream/esi: (addr stream byte) <- address stream-storage
413       write stream, "next: "
414       var gval/eax: int <- copy g
415       write-int32-hex stream, gval
416       trace trace, "tokenize", stream
417     }
418     # if not symbol grapheme, return
419     {
420       var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
421       compare symbol-grapheme?, 0/false
422       break-if-!=
423       trace-text trace, "tokenize", "stop"
424       break $next-number-token:loop
425     }
426     # if not digit grapheme, abort
427     {
428       var digit?/eax: boolean <- decimal-digit? g
429       compare digit?, 0/false
430       break-if-!=
431       error trace, "invalid number"
432       return
433     }
434     trace-text trace, "tokenize", "append"
435     var g/eax: grapheme <- read-from-gap-buffer in
436     write-grapheme out, g
437     loop
438   }
439   trace-higher trace
440 }
441 
442 fn next-stream-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
443   trace-text trace, "tokenize", "stream"
444   {
445     var empty?/eax: boolean <- gap-buffer-scan-done? in
446     compare empty?, 0/false
447     {
448       break-if-=
449       error trace, "unbalanced '['"
450       return
451     }
452     var g/eax: grapheme <- read-from-gap-buffer in
453     compare g, 0x5d/close-square-bracket
454     break-if-=
455     write-grapheme out, g
456     loop
457   }
458   var stream-storage: (stream byte 0x400)  # max-definition-size
459   var stream/esi: (addr stream byte) <- address stream-storage
460   write stream, "=> "
461   rewind-stream out
462   write-stream stream, out
463   trace trace, "tokenize", stream
464 }
465 
466 fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) {
467   trace-text trace, "tokenize", "bracket"
468   write-grapheme out, g
469   var stream-storage: (stream byte 0x40)
470   var stream/esi: (addr stream byte) <- address stream-storage
471   write stream, "=> "
472   rewind-stream out
473   write-stream stream, out
474   trace trace, "tokenize", stream
475 }
476 
477 fn rest-of-line in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
478   trace-text trace, "tokenize", "comment"
479   {
480     var empty?/eax: boolean <- gap-buffer-scan-done? in
481     compare empty?, 0/false
482     {
483       break-if-=
484       return
485     }
486     var g/eax: grapheme <- read-from-gap-buffer in
487     compare g, 0xa/newline
488     break-if-=
489     write-grapheme out, g
490     loop
491   }
492   var stream-storage: (stream byte 0x80)
493   var stream/esi: (addr stream byte) <- address stream-storage
494   write stream, "=> "
495   rewind-stream out
496   write-stream stream, out
497   trace trace, "tokenize", stream
498 }
499 
500 fn symbol-grapheme? g: grapheme -> _/eax: boolean {
501   ## whitespace
502   compare g, 9/tab
503   {
504     break-if-!=
505     return 0/false
506   }
507   compare g, 0xa/newline
508   {
509     break-if-!=
510     return 0/false
511   }
512   compare g, 0x20/space
513   {
514     break-if-!=
515     return 0/false
516   }
517   ## quotes
518   compare g, 0x22/double-quote
519   {
520     break-if-!=
521     return 0/false
522   }
523   compare g, 0x60/backquote
524   {
525     break-if-!=
526     return 0/false
527   }
528   ## brackets
529   compare g, 0x28/open-paren
530   {
531     break-if-!=
532     return 0/false
533   }
534   compare g, 0x29/close-paren
535   {
536     break-if-!=
537     return 0/false
538   }
539   compare g, 0x5b/open-square-bracket
540   {
541     break-if-!=
542     return 0/false
543   }
544   compare g, 0x5d/close-square-bracket
545   {
546     break-if-!=
547     return 0/false
548   }
549   compare g, 0x7b/open-curly-bracket
550   {
551     break-if-!=
552     return 0/false
553   }
554   compare g, 0x7d/close-curly-bracket
555   {
556     break-if-!=
557     return 0/false
558   }
559   # - other punctuation
560   # '!' is a symbol char
561   compare g, 0x23/hash
562   {
563     break-if-!=
564     return 0/false
565   }
566   # '$' is a symbol char
567   compare g, 0x25/percent
568   {
569     break-if-!=
570     return 0/false
571   }
572   compare g, 0x26/ampersand
573   {
574     break-if-!=
575     return 0/false
576   }
577   compare g, 0x27/single-quote
578   {
579     break-if-!=
580     return 0/false
581   }
582   compare g, 0x60/backquote
583   {
584     break-if-!=
585     return 0/false
586   }
587   compare g, 0x2c/comma
588   {
589     break-if-!=
590     return 0/false
591   }
592   compare g, 0x40/at-sign
593   {
594     break-if-!=
595     return 0/false
596   }
597   compare g, 0x2a/asterisk
598   {
599     break-if-!=
600     return 0/false
601   }
602   compare g, 0x2b/plus
603   {
604     break-if-!=
605     return 0/false
606   }
607   compare g, 0x2d/dash  # '-' not allowed in symbols
608   {
609     break-if-!=
610     return 0/false
611   }
612   compare g, 0x2e/period
613   {
614     break-if-!=
615     return 0/false
616   }
617   compare g, 0x2f/slash
618   {
619     break-if-!=
620     return 0/false
621   }
622   compare g, 0x3a/colon
623   {
624     break-if-!=
625     return 0/false
626   }
627   compare g, 0x3b/semi-colon
628   {
629     break-if-!=
630     return 0/false
631   }
632   compare g, 0x3c/less-than
633   {
634     break-if-!=
635     return 0/false
636   }
637   compare g, 0x3d/equal
638   {
639     break-if-!=
640     return 0/false
641   }
642   compare g, 0x3e/greater-than
643   {
644     break-if-!=
645     return 0/false
646   }
647   # '?' is a symbol char
648   compare g, 0x5c/backslash
649   {
650     break-if-!=
651     return 0/false
652   }
653   compare g, 0x5e/caret
654   {
655     break-if-!=
656     return 0/false
657   }
658   # '_' is a symbol char
659   compare g, 0x7c/vertical-line
660   {
661     break-if-!=
662     return 0/false
663   }
664   compare g, 0x7e/tilde
665   {
666     break-if-!=
667     return 0/false
668   }
669   return 1/true
670 }
671 
672 fn bracket-grapheme? g: grapheme -> _/eax: boolean {
673   compare g, 0x28/open-paren
674   {
675     break-if-!=
676     return 1/true
677   }
678   compare g, 0x29/close-paren
679   {
680     break-if-!=
681     return 1/true
682   }
683   compare g, 0x5b/open-square-bracket
684   {
685     break-if-!=
686     return 1/true
687   }
688   compare g, 0x5d/close-square-bracket
689   {
690     break-if-!=
691     return 1/true
692   }
693   compare g, 0x7b/open-curly-bracket
694   {
695     break-if-!=
696     return 1/true
697   }
698   compare g, 0x7d/close-curly-bracket
699   {
700     break-if-!=
701     return 1/true
702   }
703   return 0/false
704 }
705 
706 fn operator-grapheme? g: grapheme -> _/eax: boolean {
707   # '$' is a symbol char
708   compare g, 0x25/percent
709   {
710     break-if-!=
711     return 1/false
712   }
713   compare g, 0x26/ampersand
714   {
715     break-if-!=
716     return 1/true
717   }
718   compare g, 0x27/single-quote
719   {
720     break-if-!=
721     return 0/true
722   }
723   compare g, 0x60/backquote
724   {
725     break-if-!=
726     return 0/false
727   }
728   compare g, 0x2c/comma
729   {
730     break-if-!=
731     return 0/false
732   }
733   compare g, 0x40/at-sign
734   {
735     break-if-!=
736     return 0/false
737   }
738   compare g, 0x2a/asterisk
739   {
740     break-if-!=
741     return 1/true
742   }
743   compare g, 0x2b/plus
744   {
745     break-if-!=
746     return 1/true
747   }
748   compare g, 0x2d/dash  # '-' not allowed in symbols
749   {
750     break-if-!=
751     return 1/true
752   }
753   compare g, 0x2e/period
754   {
755     break-if-!=
756     return 1/true
757   }
758   compare g, 0x2f/slash
759   {
760     break-if-!=
761     return 1/true
762   }
763   compare g, 0x3a/colon
764   {
765     break-if-!=
766     return 1/true
767   }
768   compare g, 0x3b/semi-colon
769   {
770     break-if-!=
771     return 1/true
772   }
773   compare g, 0x3c/less-than
774   {
775     break-if-!=
776     return 1/true
777   }
778   compare g, 0x3d/equal
779   {
780     break-if-!=
781     return 1/true
782   }
783   compare g, 0x3e/greater-than
784   {
785     break-if-!=
786     return 1/true
787   }
788   # '?' is a symbol char
789   compare g, 0x5c/backslash
790   {
791     break-if-!=
792     return 1/true
793   }
794   compare g, 0x5e/caret
795   {
796     break-if-!=
797     return 1/true
798   }
799   # '_' is a symbol char
800   compare g, 0x7c/vertical-line
801   {
802     break-if-!=
803     return 1/true
804   }
805   compare g, 0x7e/tilde
806   {
807     break-if-!=
808     return 1/true
809   }
810   return 0/false
811 }
812 
813 fn number-token? _in: (addr cell) -> _/eax: boolean {
814   var in/eax: (addr cell) <- copy _in
815   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
816   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
817   rewind-stream in-data
818   var g/eax: grapheme <- read-grapheme in-data
819   var result/eax: boolean <- decimal-digit? g
820   return result
821 }
822 
823 fn bracket-token? _in: (addr cell) -> _/eax: boolean {
824   var in/eax: (addr cell) <- copy _in
825   {
826     var in-type/eax: (addr int) <- get in, type
827     compare *in-type, 3/stream
828     break-if-!=
829     # streams are never paren tokens
830     return 0/false
831   }
832   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
833   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
834   rewind-stream in-data
835   var g/eax: grapheme <- read-grapheme in-data
836   var result/eax: boolean <- bracket-grapheme? g
837   return result
838 }
839 
840 fn quote-token? _in: (addr cell) -> _/eax: boolean {
841   var in/eax: (addr cell) <- copy _in
842   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
843   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
844   rewind-stream in-data
845   var result/eax: boolean <- stream-data-equal? in-data, "'"
846   return result
847 }
848 
849 fn backquote-token? _in: (addr cell) -> _/eax: boolean {
850   var in/eax: (addr cell) <- copy _in
851   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
852   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
853   rewind-stream in-data
854   var result/eax: boolean <- stream-data-equal? in-data, "`"
855   return result
856 }
857 
858 fn unquote-token? _in: (addr cell) -> _/eax: boolean {
859   var in/eax: (addr cell) <- copy _in
860   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
861   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
862   rewind-stream in-data
863   var result/eax: boolean <- stream-data-equal? in-data, ","
864   return result
865 }
866 
867 fn unquote-splice-token? _in: (addr cell) -> _/eax: boolean {
868   var in/eax: (addr cell) <- copy _in
869   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
870   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
871   rewind-stream in-data
872   var result/eax: boolean <- stream-data-equal? in-data, ",@"
873   return result
874 }
875 
876 fn open-paren-token? _in: (addr cell) -> _/eax: boolean {
877   var in/eax: (addr cell) <- copy _in
878   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
879   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
880   var in-data/ecx: (addr stream byte) <- copy _in-data
881   rewind-stream in-data
882   var g/eax: grapheme <- read-grapheme in-data
883   compare g, 0x28/open-paren
884   {
885     break-if-!=
886     var result/eax: boolean <- stream-empty? in-data
887     return result
888   }
889   return 0/false
890 }
891 
892 fn close-paren-token? _in: (addr cell) -> _/eax: boolean {
893   var in/eax: (addr cell) <- copy _in
894   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
895   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
896   var in-data/ecx: (addr stream byte) <- copy _in-data
897   rewind-stream in-data
898   var g/eax: grapheme <- read-grapheme in-data
899   compare g, 0x29/close-paren
900   {
901     break-if-!=
902     var result/eax: boolean <- stream-empty? in-data
903     return result
904   }
905   return 0/false
906 }
907 
908 fn dot-token? _in: (addr cell) -> _/eax: boolean {
909   var in/eax: (addr cell) <- copy _in
910   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
911   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
912   var in-data/ecx: (addr stream byte) <- copy _in-data
913   rewind-stream in-data
914   var g/eax: grapheme <- read-grapheme in-data
915   compare g, 0x2e/dot
916   {
917     break-if-!=
918     var result/eax: boolean <- stream-empty? in-data
919     return result
920   }
921   return 0/false
922 }
923 
924 fn test-dot-token {
925   var tmp-storage: (handle cell)
926   var tmp-ah/eax: (addr handle cell) <- address tmp-storage
927   new-symbol tmp-ah, "."
928   var tmp/eax: (addr cell) <- lookup *tmp-ah
929   var result/eax: boolean <- dot-token? tmp
930   check result, "F - test-dot-token"
931 }
932 
933 fn stream-token? _in: (addr cell) -> _/eax: boolean {
934   var in/eax: (addr cell) <- copy _in
935   var in-type/eax: (addr int) <- get in, type
936   compare *in-type, 3/stream
937   {
938     break-if-=
939     return 0/false
940   }
941   return 1/true
942 }
943 
944 fn comment-token? _in: (addr cell) -> _/eax: boolean {
945   var in/eax: (addr cell) <- copy _in
946   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
947   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
948   rewind-stream in-data
949   var g/eax: grapheme <- read-grapheme in-data
950   compare g, 0x23/hash
951   {
952     break-if-=
953     return 0/false
954   }
955   return 1/true
956 }