https://github.com/akkartik/mu/blob/main/shell/tokenize.mu
   1 # The language is indent-sensitive.
   2 # Each line consists of an initial indent token followed by other tokens.
   3 type token {
   4   type: int
   5   # type 0: default
   6   # type 1: stream
   7   text-data: (handle stream byte)
   8   # type 2: skip (end of line or end of file)
   9   # type 3: indent
  10   number-data: int
  11 }
  12 
  13 fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
  14   trace-text trace, "tokenize", "tokenize"
  15   trace-lower trace
  16   rewind-gap-buffer in
  17   var at-start-of-line?/edi: boolean <- copy 1/true
  18   {
  19     var done?/eax: boolean <- gap-buffer-scan-done? in
  20     compare done?, 0/false
  21     break-if-!=
  22     #
  23     var token-storage: token
  24     var token/edx: (addr token) <- address token-storage
  25     at-start-of-line? <- next-token in, token, at-start-of-line?, trace
  26     var error?/eax: boolean <- has-errors? trace
  27     compare error?, 0/false
  28     {
  29       break-if-=
  30       return
  31     }
  32     var skip?/eax: boolean <- skip-token? token
  33     compare skip?, 0/false
  34     loop-if-!=
  35     write-to-stream out, token  # shallow-copy text-data
  36     loop
  37   }
  38   trace-higher trace
  39 }
  40 
  41 fn test-tokenize-number {
  42   var in-storage: gap-buffer
  43   var in/esi: (addr gap-buffer) <- address in-storage
  44   initialize-gap-buffer-with in, "123 a"
  45   #
  46   var stream-storage: (stream token 0x10)
  47   var stream/edi: (addr stream token) <- address stream-storage
  48   #
  49   var trace-storage: trace
  50   var trace/edx: (addr trace) <- address trace-storage
  51   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
  52   tokenize in, stream, trace
  53   #
  54   var curr-token-storage: token
  55   var curr-token/ebx: (addr token) <- address curr-token-storage
  56   read-from-stream stream, curr-token
  57   var curr-token-type/eax: (addr int) <- get curr-token, type
  58   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-number/before-indent-type"
  59   var curr-token-data/eax: (addr int) <- get curr-token, number-data
  60   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-number/before-indent"
  61   read-from-stream stream, curr-token
  62   var number?/eax: boolean <- number-token? curr-token
  63   check number?, "F - test-tokenize-number"
  64   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
  65   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
  66   check-stream-equal curr-token-data, "123", "F - test-tokenize-number: value"
  67 }
  68 
  69 fn test-tokenize-negative-number {
  70   var in-storage: gap-buffer
  71   var in/esi: (addr gap-buffer) <- address in-storage
  72   initialize-gap-buffer-with in, "-123 a"
  73   #
  74   var stream-storage: (stream token 0x10)
  75   var stream/edi: (addr stream token) <- address stream-storage
  76   #
  77   var trace-storage: trace
  78   var trace/edx: (addr trace) <- address trace-storage
  79   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
  80   tokenize in, stream, trace
  81   #
  82   var curr-token-storage: token
  83   var curr-token/ebx: (addr token) <- address curr-token-storage
  84   read-from-stream stream, curr-token
  85   var curr-token-type/eax: (addr int) <- get curr-token, type
  86   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-negative-number/before-indent-type"
  87   var curr-token-data/eax: (addr int) <- get curr-token, number-data
  88   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-negative-number/before-indent"
  89   read-from-stream stream, curr-token
  90   var number?/eax: boolean <- number-token? curr-token
  91   check number?, "F - test-tokenize-negative-number"
  92   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
  93   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
  94   check-stream-equal curr-token-data, "-123", "F - test-tokenize-negative-number: value"
  95 }
  96 
  97 fn test-tokenize-quote {
  98   var in-storage: gap-buffer
  99   var in/esi: (addr gap-buffer) <- address in-storage
 100   initialize-gap-buffer-with in, "'(a)"
 101   #
 102   var stream-storage: (stream token 0x10)
 103   var stream/edi: (addr stream token) <- address stream-storage
 104   #
 105   var trace-storage: trace
 106   var trace/edx: (addr trace) <- address trace-storage
 107   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 108   tokenize in, stream, trace
 109   #
 110   var curr-token-storage: token
 111   var curr-token/ebx: (addr token) <- address curr-token-storage
 112   read-from-stream stream, curr-token
 113   var curr-token-type/eax: (addr int) <- get curr-token, type
 114   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-quote/before-indent-type"
 115   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 116   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-quote/before-indent"
 117   read-from-stream stream, curr-token
 118   var quote?/eax: boolean <- quote-token? curr-token
 119   check quote?, "F - test-tokenize-quote: quote"
 120   read-from-stream stream, curr-token
 121   var open-paren?/eax: boolean <- open-paren-token? curr-token
 122   check open-paren?, "F - test-tokenize-quote: open paren"
 123   read-from-stream stream, curr-token  # skip a
 124   read-from-stream stream, curr-token
 125   var close-paren?/eax: boolean <- close-paren-token? curr-token
 126   check close-paren?, "F - test-tokenize-quote: close paren"
 127 }
 128 
 129 fn test-tokenize-backquote {
 130   var in-storage: gap-buffer
 131   var in/esi: (addr gap-buffer) <- address in-storage
 132   initialize-gap-buffer-with in, "`(a)"
 133   #
 134   var stream-storage: (stream token 0x10)
 135   var stream/edi: (addr stream token) <- address stream-storage
 136   #
 137   var trace-storage: trace
 138   var trace/edx: (addr trace) <- address trace-storage
 139   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 140   tokenize in, stream, trace
 141   #
 142   var curr-token-storage: token
 143   var curr-token/ebx: (addr token) <- address curr-token-storage
 144   read-from-stream stream, curr-token
 145   var curr-token-type/eax: (addr int) <- get curr-token, type
 146   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-backquote/before-indent-type"
 147   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 148   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-backquote/before-indent"
 149   read-from-stream stream, curr-token
 150   var backquote?/eax: boolean <- backquote-token? curr-token
 151   check backquote?, "F - test-tokenize-backquote: backquote"
 152   read-from-stream stream, curr-token
 153   var open-paren?/eax: boolean <- open-paren-token? curr-token
 154   check open-paren?, "F - test-tokenize-backquote: open paren"
 155   read-from-stream stream, curr-token  # skip a
 156   read-from-stream stream, curr-token
 157   var close-paren?/eax: boolean <- close-paren-token? curr-token
 158   check close-paren?, "F - test-tokenize-backquote: close paren"
 159 }
 160 
 161 fn test-tokenize-unquote {
 162   var in-storage: gap-buffer
 163   var in/esi: (addr gap-buffer) <- address in-storage
 164   initialize-gap-buffer-with in, ",(a)"
 165   #
 166   var stream-storage: (stream token 0x10)
 167   var stream/edi: (addr stream token) <- address stream-storage
 168   #
 169   var trace-storage: trace
 170   var trace/edx: (addr trace) <- address trace-storage
 171   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 172   tokenize in, stream, trace
 173   #
 174   var curr-token-storage: token
 175   var curr-token/ebx: (addr token) <- address curr-token-storage
 176   read-from-stream stream, curr-token
 177   var curr-token-type/eax: (addr int) <- get curr-token, type
 178   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote/before-indent-type"
 179   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 180   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote/before-indent"
 181   read-from-stream stream, curr-token
 182   var unquote?/eax: boolean <- unquote-token? curr-token
 183   check unquote?, "F - test-tokenize-unquote: unquote"
 184   read-from-stream stream, curr-token
 185   var open-paren?/eax: boolean <- open-paren-token? curr-token
 186   check open-paren?, "F - test-tokenize-unquote: open paren"
 187   read-from-stream stream, curr-token  # skip a
 188   read-from-stream stream, curr-token
 189   var close-paren?/eax: boolean <- close-paren-token? curr-token
 190   check close-paren?, "F - test-tokenize-unquote: close paren"
 191 }
 192 
 193 fn test-tokenize-unquote-splice {
 194   var in-storage: gap-buffer
 195   var in/esi: (addr gap-buffer) <- address in-storage
 196   initialize-gap-buffer-with in, ",@a"
 197   #
 198   var stream-storage: (stream token 0x10)
 199   var stream/edi: (addr stream token) <- address stream-storage
 200   #
 201   var trace-storage: trace
 202   var trace/edx: (addr trace) <- address trace-storage
 203   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 204   tokenize in, stream, trace
 205   #
 206   var curr-token-storage: token
 207   var curr-token/ebx: (addr token) <- address curr-token-storage
 208   read-from-stream stream, curr-token
 209   var curr-token-type/eax: (addr int) <- get curr-token, type
 210   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote-splice/before-indent-type"
 211   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 212   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote-splice/before-indent"
 213   read-from-stream stream, curr-token
 214   var unquote-splice?/eax: boolean <- unquote-splice-token? curr-token
 215   check unquote-splice?, "F - test-tokenize-unquote-splice: unquote-splice"
 216 }
 217 
 218 fn test-tokenize-dotted-list {
 219   var in-storage: gap-buffer
 220   var in/esi: (addr gap-buffer) <- address in-storage
 221   initialize-gap-buffer-with in, "(a . b)"
 222   #
 223   var stream-storage: (stream token 0x10)
 224   var stream/edi: (addr stream token) <- address stream-storage
 225   #
 226   var trace-storage: trace
 227   var trace/edx: (addr trace) <- address trace-storage
 228   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 229   tokenize in, stream, trace
 230   #
 231   var curr-token-storage: token
 232   var curr-token/ebx: (addr token) <- address curr-token-storage
 233   read-from-stream stream, curr-token
 234   var curr-token-type/eax: (addr int) <- get curr-token, type
 235   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-dotted-list/before-indent-type"
 236   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 237   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-dotted-list/before-indent"
 238   read-from-stream stream, curr-token
 239   var open-paren?/eax: boolean <- open-paren-token? curr-token
 240   check open-paren?, "F - test-tokenize-dotted-list: open paren"
 241   read-from-stream stream, curr-token  # skip a
 242   read-from-stream stream, curr-token
 243   var dot?/eax: boolean <- dot-token? curr-token
 244   check dot?, "F - test-tokenize-dotted-list: dot"
 245   read-from-stream stream, curr-token  # skip b
 246   read-from-stream stream, curr-token
 247   var close-paren?/eax: boolean <- close-paren-token? curr-token
 248   check close-paren?, "F - test-tokenize-dotted-list: close paren"
 249 }
 250 
 251 # double quotes with zero escaping support
 252 fn test-tokenize-stream-literal {
 253   var in-storage: gap-buffer
 254   var in/esi: (addr gap-buffer) <- address in-storage
 255   initialize-gap-buffer-with in, "\"abc def\""
 256   #
 257   var stream-storage: (stream token 0x10)
 258   var stream/edi: (addr stream token) <- address stream-storage
 259   #
 260   var trace-storage: trace
 261   var trace/edx: (addr trace) <- address trace-storage
 262   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 263   tokenize in, stream, trace
 264   #
 265   var curr-token-storage: token
 266   var curr-token/ebx: (addr token) <- address curr-token-storage
 267   read-from-stream stream, curr-token
 268   var curr-token-type/eax: (addr int) <- get curr-token, type
 269   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal/before-indent-type"
 270   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 271   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal/before-indent"
 272   read-from-stream stream, curr-token
 273   var stream?/eax: boolean <- stream-token? curr-token
 274   check stream?, "F - test-tokenize-stream-literal: type"
 275   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
 276   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
 277   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
 278   check data-equal?, "F - test-tokenize-stream-literal"
 279   var empty?/eax: boolean <- stream-empty? stream
 280   check empty?, "F - test-tokenize-stream-literal: empty?"
 281 }
 282 
 283 # alternative syntax for strings with balancing brackets
 284 fn test-tokenize-balanced-stream-literal {
 285   var in-storage: gap-buffer
 286   var in/esi: (addr gap-buffer) <- address in-storage
 287   initialize-gap-buffer-with in, "[abc def]"
 288   #
 289   var stream-storage: (stream token 0x10)
 290   var stream/edi: (addr stream token) <- address stream-storage
 291   #
 292   var trace-storage: trace
 293   var trace/edx: (addr trace) <- address trace-storage
 294   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 295   tokenize in, stream, trace
 296   #
 297   var curr-token-storage: token
 298   var curr-token/ebx: (addr token) <- address curr-token-storage
 299   read-from-stream stream, curr-token
 300   var curr-token-type/eax: (addr int) <- get curr-token, type
 301   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-balanced-stream-literal/before-indent-type"
 302   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 303   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-balanced-stream-literal/before-indent"
 304   read-from-stream stream, curr-token
 305   var stream?/eax: boolean <- stream-token? curr-token
 306   check stream?, "F - test-tokenize-stream-literal: type"
 307   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
 308   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
 309   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
 310   check data-equal?, "F - test-tokenize-balanced-stream-literal"
 311   var empty?/eax: boolean <- stream-empty? stream
 312   check empty?, "F - test-tokenize-balanced-stream-literal: empty?"
 313 }
 314 
 315 fn test-tokenize-nested-stream-literal {
 316   var in-storage: gap-buffer
 317   var in/esi: (addr gap-buffer) <- address in-storage
 318   initialize-gap-buffer-with in, "[abc [def]]"
 319   #
 320   var stream-storage: (stream token 0x10)
 321   var stream/edi: (addr stream token) <- address stream-storage
 322   #
 323   var trace-storage: trace
 324   var trace/edx: (addr trace) <- address trace-storage
 325   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 326   tokenize in, stream, trace
 327   #
 328   var curr-token-storage: token
 329   var curr-token/ebx: (addr token) <- address curr-token-storage
 330   read-from-stream stream, curr-token
 331   var curr-token-type/eax: (addr int) <- get curr-token, type
 332   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-nested-stream-literal/before-indent-type"
 333   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 334   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-nested-stream-literal/before-indent"
 335   read-from-stream stream, curr-token
 336   var stream?/eax: boolean <- stream-token? curr-token
 337   check stream?, "F - test-tokenize-stream-literal: type"
 338   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
 339   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
 340   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc [def]"
 341   check data-equal?, "F - test-tokenize-nested-stream-literal"
 342   var empty?/eax: boolean <- stream-empty? stream
 343   check empty?, "F - test-tokenize-nested-stream-literal: empty?"
 344 }
 345 
 346 fn test-tokenize-stream-literal-in-tree {
 347   var in-storage: gap-buffer
 348   var in/esi: (addr gap-buffer) <- address in-storage
 349   initialize-gap-buffer-with in, "([abc def])"
 350   #
 351   var stream-storage: (stream token 0x10)
 352   var stream/edi: (addr stream token) <- address stream-storage
 353   #
 354   var trace-storage: trace
 355   var trace/edx: (addr trace) <- address trace-storage
 356   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 357   tokenize in, stream, trace
 358   #
 359   var curr-token-storage: token
 360   var curr-token/ebx: (addr token) <- address curr-token-storage
 361   read-from-stream stream, curr-token
 362   var curr-token-type/eax: (addr int) <- get curr-token, type
 363   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal-in-tree/before-indent-type"
 364   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 365   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal-in-tree/before-indent"
 366   read-from-stream stream, curr-token
 367   var bracket?/eax: boolean <- bracket-token? curr-token
 368   check bracket?, "F - test-tokenize-stream-literal-in-tree: open paren"
 369   read-from-stream stream, curr-token
 370   var stream?/eax: boolean <- stream-token? curr-token
 371   check stream?, "F - test-tokenize-stream-literal-in-tree: type"
 372   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
 373   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
 374   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
 375   check data-equal?, "F - test-tokenize-stream-literal-in-tree"
 376   read-from-stream stream, curr-token
 377   var bracket?/eax: boolean <- bracket-token? curr-token
 378   check bracket?, "F - test-tokenize-stream-literal-in-tree: close paren"
 379   var empty?/eax: boolean <- stream-empty? stream
 380   check empty?, "F - test-tokenize-stream-literal-in-tree: empty?"
 381 }
 382 
 383 fn test-tokenize-indent {
 384   var in-storage: gap-buffer
 385   var in/esi: (addr gap-buffer) <- address in-storage
 386   initialize-gap-buffer-with in, "abc\n  def"
 387   #
 388   var stream-storage: (stream token 0x10)
 389   var stream/edi: (addr stream token) <- address stream-storage
 390   #
 391   var trace-storage: trace
 392   var trace/edx: (addr trace) <- address trace-storage
 393   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
 394   tokenize in, stream, trace
 395   #
 396   var curr-token-storage: token
 397   var curr-token/ebx: (addr token) <- address curr-token-storage
 398   read-from-stream stream, curr-token
 399   var curr-token-type/eax: (addr int) <- get curr-token, type
 400   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/before-indent-type"
 401   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 402   check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-indent/before-indent"
 403   read-from-stream stream, curr-token
 404   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
 405   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
 406   check-stream-equal curr-token-data, "abc", "F - test-tokenize-indent/before"
 407   #
 408   read-from-stream stream, curr-token
 409   var curr-token-type/eax: (addr int) <- get curr-token, type
 410   check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/type"
 411   var curr-token-data/eax: (addr int) <- get curr-token, number-data
 412   check-ints-equal *curr-token-data, 2/spaces, "F - test-tokenize-indent"
 413   #
 414   read-from-stream stream, curr-token
 415   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
 416   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
 417   check-stream-equal curr-token-data, "def", "F - test-tokenize-indent/after"
 418 }
 419 
 420 # caller is responsible for threading start-of-line? between calls to next-token
 421 # 'in' may contain whitespace if start-of-line?
 422 fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
 423   trace-text trace, "tokenize", "next-token"
 424   trace-lower trace
 425   # save an indent token if necessary
 426   {
 427     compare start-of-line?, 0/false
 428     break-if-=
 429     next-indent-token in, out, trace  # might not be returned
 430   }
 431   skip-spaces-from-gap-buffer in
 432   var g/eax: code-point-utf8 <- peek-from-gap-buffer in
 433   {
 434     compare g, 0x23/comment
 435     break-if-!=
 436     skip-rest-of-line in
 437   }
 438   var g/eax: code-point-utf8 <- peek-from-gap-buffer in
 439   {
 440     compare g, 0xa/newline
 441     break-if-!=
 442     trace-text trace, "tokenize", "newline"
 443     g <- read-from-gap-buffer in
 444     initialize-skip-token out  # might drop indent if that's all there was in this line
 445     trace-higher trace
 446     return 1/at-start-of-line
 447   }
 448   {
 449     compare start-of-line?, 0/false
 450     break-if-=
 451     # still here? no comment or newline? return saved indent
 452     trace-higher trace
 453     return 0/not-at-start-of-line
 454   }
 455   {
 456     var done?/eax: boolean <- gap-buffer-scan-done? in
 457     compare done?, 0/false
 458     break-if-=
 459     trace-text trace, "tokenize", "end"
 460     initialize-skip-token out
 461     trace-higher trace
 462     return 1/at-start-of-line
 463   }
 464   var _g/eax: code-point-utf8 <- peek-from-gap-buffer in
 465   var g/ecx: code-point-utf8 <- copy _g
 466   {
 467     var should-trace?/eax: boolean <- should-trace? trace
 468     compare should-trace?, 0/false
 469     break-if-=
 470     var stream-storage: (stream byte 0x40)
 471     var stream/esi: (addr stream byte) <- address stream-storage
 472     write stream, "next: "
 473     var gval/eax: int <- copy g
 474     write-int32-hex stream, gval
 475     trace trace, "tokenize", stream
 476   }
 477   $next-token:case: {
 478     # double quotes begin streams
 479     {
 480       compare g, 0x22/double-quote
 481       break-if-!=
 482       var dummy/eax: code-point-utf8 <- read-from-gap-buffer in  # skip
 483       next-stream-token in, out, trace
 484       break $next-token:case
 485     }
 486     # open square brackets begin balanced streams
 487     {
 488       compare g, 0x5b/open-square-bracket
 489       break-if-!=
 490       var dummy/eax: code-point-utf8 <- read-from-gap-buffer in  # skip open bracket
 491       next-balanced-stream-token in, out, trace
 492       break $next-token:case
 493     }
 494     # other symbol char
 495     {
 496       var symbol?/eax: boolean <- symbol-code-point-utf8? g
 497       compare symbol?, 0/false
 498       break-if-=
 499       next-symbol-token in, out, trace
 500       break $next-token:case
 501     }
 502     # unbalanced close square brackets are errors
 503     {
 504       compare g, 0x5d/close-square-bracket
 505       break-if-!=
 506       error trace, "unbalanced ']'"
 507       return start-of-line?
 508     }
 509     # other brackets are always single-char tokens
 510     {
 511       var bracket?/eax: boolean <- bracket-code-point-utf8? g
 512       compare bracket?, 0/false
 513       break-if-=
 514       var g/eax: code-point-utf8 <- read-from-gap-buffer in
 515       next-bracket-token g, out, trace
 516       break $next-token:case
 517     }
 518     # quote
 519     {
 520       compare g, 0x27/single-quote
 521       break-if-!=
 522       var g/eax: code-point-utf8 <- read-from-gap-buffer in  # consume
 523       initialize-token out, "'"
 524       break $next-token:case
 525     }
 526     # backquote
 527     {
 528       compare g, 0x60/backquote
 529       break-if-!=
 530       var g/eax: code-point-utf8 <- read-from-gap-buffer in  # consume
 531       initialize-token out, "`"
 532       break $next-token:case
 533     }
 534     # unquote
 535     {
 536       compare g, 0x2c/comma
 537       break-if-!=
 538       var g/eax: code-point-utf8 <- read-from-gap-buffer in  # consume
 539       # check for unquote-splice
 540       {
 541         g <- peek-from-gap-buffer in
 542         compare g, 0x40/at-sign
 543         break-if-!=
 544         g <- read-from-gap-buffer in
 545         initialize-token out, ",@"
 546         break $next-token:case
 547       }
 548       initialize-token out, ","
 549       break $next-token:case
 550     }
 551     set-cursor-position 0/screen, 0x40 0x20
 552     {
 553       var foo/eax: int <- copy g
 554       draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
 555     }
 556     abort "unknown token type"
 557   }
 558   trace-higher trace
 559   {
 560     var should-trace?/eax: boolean <- should-trace? trace
 561     compare should-trace?, 0/false
 562     break-if-=
 563     var stream-storage: (stream byte 0x400)  # maximum possible token size (next-stream-token)
 564     var stream/eax: (addr stream byte) <- address stream-storage
 565     write stream, "=> "
 566     write-token-text-data stream, out
 567     trace trace, "tokenize", stream
 568   }
 569   return start-of-line?
 570 }
 571 
 572 fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
 573   trace-text trace, "tokenize", "looking for a symbol"
 574   trace-lower trace
 575   var out/eax: (addr token) <- copy _out
 576   var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
 577   populate-stream out-data-ah, 0x40/max-symbol-size
 578   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
 579   var out-data/edi: (addr stream byte) <- copy _out-data
 580   $next-symbol-token:loop: {
 581     var done?/eax: boolean <- gap-buffer-scan-done? in
 582     compare done?, 0/false
 583     break-if-!=
 584     var g/eax: code-point-utf8 <- peek-from-gap-buffer in
 585     {
 586       {
 587         var should-trace?/eax: boolean <- should-trace? trace
 588         compare should-trace?, 0/false
 589       }
 590       break-if-=
 591       var stream-storage: (stream byte 0x40)
 592       var stream/esi: (addr stream byte) <- address stream-storage
 593       write stream, "next: "
 594       var gval/eax: int <- copy g
 595       write-int32-hex stream, gval
 596       trace trace, "tokenize", stream
 597     }
 598     # if non-symbol, return
 599     {
 600       var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
 601       compare symbol-code-point-utf8?, 0/false
 602       break-if-!=
 603       trace-text trace, "tokenize", "stop"
 604       break $next-symbol-token:loop
 605     }
 606     var g/eax: code-point-utf8 <- read-from-gap-buffer in
 607     write-code-point-utf8 out-data, g
 608     loop
 609   }
 610   trace-higher trace
 611   {
 612     var should-trace?/eax: boolean <- should-trace? trace
 613     compare should-trace?, 0/false
 614     break-if-=
 615     var stream-storage: (stream byte 0x40)
 616     var stream/esi: (addr stream byte) <- address stream-storage
 617     write stream, "=> "
 618     rewind-stream out-data
 619     write-stream stream, out-data
 620     trace trace, "tokenize", stream
 621   }
 622 }
 623 
 624 fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
 625   trace-text trace, "tokenize", "looking for a number"
 626   trace-lower trace
 627   var out/eax: (addr token) <- copy _out
 628   var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
 629   populate-stream out-data-ah, 0x40
 630   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
 631   var out-data/edi: (addr stream byte) <- copy _out-data
 632   $next-number-token:check-minus: {
 633     var g/eax: code-point-utf8 <- peek-from-gap-buffer in
 634     compare g, 0x2d/minus
 635     g <- read-from-gap-buffer in  # consume
 636     write-code-point-utf8 out-data, g
 637   }
 638   $next-number-token:loop: {
 639     var done?/eax: boolean <- gap-buffer-scan-done? in
 640     compare done?, 0/false
 641     break-if-!=
 642     var g/eax: code-point-utf8 <- peek-from-gap-buffer in
 643     {
 644       {
 645         var should-trace?/eax: boolean <- should-trace? trace
 646         compare should-trace?, 0/false
 647       }
 648       break-if-=
 649       var stream-storage: (stream byte 0x40)
 650       var stream/esi: (addr stream byte) <- address stream-storage
 651       write stream, "next: "
 652       var gval/eax: int <- copy g
 653       write-int32-hex stream, gval
 654       trace trace, "tokenize", stream
 655     }
 656     # if not symbol code-point-utf8, return
 657     {
 658       var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
 659       compare symbol-code-point-utf8?, 0/false
 660       break-if-!=
 661       trace-text trace, "tokenize", "stop"
 662       break $next-number-token:loop
 663     }
 664     # if not digit code-point-utf8, abort
 665     {
 666       var digit?/eax: boolean <- decimal-digit? g
 667       compare digit?, 0/false
 668       break-if-!=
 669       error trace, "invalid number"
 670       return
 671     }
 672     trace-text trace, "tokenize", "append"
 673     var g/eax: code-point-utf8 <- read-from-gap-buffer in
 674     write-code-point-utf8 out-data, g
 675     loop
 676   }
 677   trace-higher trace
 678 }
 679 
 680 fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
 681   trace-text trace, "tokenize", "stream"
 682   var out/edi: (addr token) <- copy _out
 683   var out-type/eax: (addr int) <- get out, type
 684   copy-to *out-type, 1/stream
 685   var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
 686   # stream tokens contain whole function definitions on boot, so we always
 687   # give them plenty of space
 688   populate-stream out-data-ah, 0x400/max-definition-size=1KB
 689   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
 690   var out-data/edi: (addr stream byte) <- copy _out-data
 691   {
 692     var empty?/eax: boolean <- gap-buffer-scan-done? in
 693     compare empty?, 0/false
 694     {
 695       break-if-=
 696       error trace, "unbalanced '\"'"
 697       return
 698     }
 699     var g/eax: code-point-utf8 <- read-from-gap-buffer in
 700     compare g, 0x22/double-quote
 701     break-if-=
 702     write-code-point-utf8 out-data, g
 703     loop
 704   }
 705   {
 706     var should-trace?/eax: boolean <- should-trace? trace
 707     compare should-trace?, 0/false
 708     break-if-=
 709     var stream-storage: (stream byte 0x400)  # max-definition-size
 710     var stream/esi: (addr stream byte) <- address stream-storage
 711     write stream, "=> "
 712     rewind-stream out-data
 713     write-stream-immutable stream, out-data
 714     trace trace, "tokenize", stream
 715   }
 716 }
 717 
 718 fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
 719   trace-text trace, "tokenize", "balanced stream"
 720   var out/edi: (addr token) <- copy _out
 721   var out-type/eax: (addr int) <- get out, type
 722   copy-to *out-type, 1/stream
 723   var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
 724   var bracket-count: int
 725   # stream tokens contain whole function definitions on boot, so we always
 726   # give them plenty of space
 727   populate-stream out-data-ah, 0x40000/max-definition-size=256KB
 728   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
 729   var out-data/edi: (addr stream byte) <- copy _out-data
 730   $next-balanced-stream-token:loop: {
 731     var empty?/eax: boolean <- gap-buffer-scan-done? in
 732     compare empty?, 0/false
 733     {
 734       break-if-=
 735       error trace, "unbalanced '['"
 736       return
 737     }
 738     var g/eax: code-point-utf8 <- read-from-gap-buffer in
 739     {
 740       compare g, 0x5b/open-square-bracket
 741       break-if-!=
 742       increment bracket-count
 743     }
 744     {
 745       compare g, 0x5d/close-square-bracket
 746       break-if-!=
 747       compare bracket-count, 0
 748       break-if-= $next-balanced-stream-token:loop
 749       decrement bracket-count
 750     }
 751     write-code-point-utf8 out-data, g
 752     loop
 753   }
 754   {
 755     var should-trace?/eax: boolean <- should-trace? trace
 756     compare should-trace?, 0/false
 757     break-if-=
 758     var stream-storage: (stream byte 0x400)  # max-definition-size
 759     var stream/esi: (addr stream byte) <- address stream-storage
 760     write stream, "=> "
 761     rewind-stream out-data
 762     write-stream-immutable stream, out-data
 763     trace trace, "tokenize", stream
 764   }
 765 }
 766 
 767 fn next-bracket-token g: code-point-utf8, _out: (addr token), trace: (addr trace) {
 768   trace-text trace, "tokenize", "bracket"
 769   var out/eax: (addr token) <- copy _out
 770   var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
 771   populate-stream out-data-ah, 0x40
 772   var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
 773   var out-data/edi: (addr stream byte) <- copy _out-data
 774   write-code-point-utf8 out-data, g
 775   {
 776     var should-trace?/eax: boolean <- should-trace? trace
 777     compare should-trace?, 0/false
 778     break-if-=
 779     var stream-storage: (stream byte 0x40)
 780     var stream/esi: (addr stream byte) <- address stream-storage
 781     write stream, "=> "
 782     rewind-stream out-data
 783     write-stream stream, out-data
 784     trace trace, "tokenize", stream
 785   }
 786 }
 787 
 788 fn skip-rest-of-line in: (addr gap-buffer) {
 789   {
 790     var done?/eax: boolean <- gap-buffer-scan-done? in
 791     compare done?, 0/false
 792     break-if-!=
 793     var g/eax: code-point-utf8 <- peek-from-gap-buffer in
 794     compare g, 0xa/newline
 795     break-if-=
 796     g <- read-from-gap-buffer in  # consume
 797     loop
 798   }
 799 }
 800 
 801 fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
 802   trace-text trace, "tokenize", "indent"
 803   trace-lower trace
 804   var out/edi: (addr token) <- copy _out
 805   var out-type/eax: (addr int) <- get out, type
 806   copy-to *out-type, 3/indent
 807   var dest/edi: (addr int) <- get out, number-data
 808   copy-to *dest, 0
 809   {
 810     var done?/eax: boolean <- gap-buffer-scan-done? in
 811     compare done?, 0/false
 812     break-if-!=
 813     var g/eax: code-point-utf8 <- peek-from-gap-buffer in
 814     {
 815       {
 816         var should-trace?/eax: boolean <- should-trace? trace
 817         compare should-trace?, 0/false
 818       }
 819       break-if-=
 820       var stream-storage: (stream byte 0x40)
 821       var stream/esi: (addr stream byte) <- address stream-storage
 822       write stream, "next: "
 823       var gval/eax: int <- copy g
 824       write-int32-hex stream, gval
 825       trace trace, "tokenize", stream
 826     }
 827     # if non-space, break
 828     compare g, 0x20/space
 829     break-if-!=
 830     g <- read-from-gap-buffer in
 831     increment *dest
 832     loop
 833   }
 834   trace-higher trace
 835   {
 836     var should-trace?/eax: boolean <- should-trace? trace
 837     compare should-trace?, 0/false
 838     break-if-=
 839     var stream-storage: (stream byte 0x40)
 840     var stream/esi: (addr stream byte) <- address stream-storage
 841     write stream, "=> indent "
 842     write-int32-hex stream, *dest
 843     trace trace, "tokenize", stream
 844   }
 845 }
 846 
 847 # Mu carves up the space of code-point-utf8s into 4 categories:
 848 #   whitespace
 849 #   quotes and unquotes (from a Lisp perspective; doesn't include double
 850 #                        quotes or other Unicode quotes)
 851 #   operators
 852 #   symbols
 853 # (Numbers have their own parsing rules that don't fit cleanly in this
 854 # partition.)
 855 #
 856 # During tokenization operators and symbols are treated identically.
 857 # A later phase digs into that nuance.
 858 
 859 fn symbol-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
 860   var whitespace?/eax: boolean <- whitespace-code-point-utf8? g
 861   compare whitespace?, 0/false
 862   {
 863     break-if-=
 864     return 0/false
 865   }
 866   var quote-or-unquote?/eax: boolean <- quote-or-unquote-code-point-utf8? g
 867   compare quote-or-unquote?, 0/false
 868   {
 869     break-if-=
 870     return 0/false
 871   }
 872   var bracket?/eax: boolean <- bracket-code-point-utf8? g
 873   compare bracket?, 0/false
 874   {
 875     break-if-=
 876     return 0/false
 877   }
 878   compare g, 0x23/hash  # comments get filtered out
 879   {
 880     break-if-!=
 881     return 0/false
 882   }
 883   compare g, 0x22/double-quote  # double quotes reserved for now
 884   {
 885     break-if-!=
 886     return 0/false
 887   }
 888   return 1/true
 889 }
 890 
 891 fn whitespace-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
 892   compare g, 9/tab
 893   {
 894     break-if-!=
 895     return 1/true
 896   }
 897   compare g, 0xa/newline
 898   {
 899     break-if-!=
 900     return 1/true
 901   }
 902   compare g, 0x20/space
 903   {
 904     break-if-!=
 905     return 1/true
 906   }
 907   return 0/false
 908 }
 909 
 910 fn quote-or-unquote-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
 911   compare g, 0x27/single-quote
 912   {
 913     break-if-!=
 914     return 1/true
 915   }
 916   compare g, 0x60/backquote
 917   {
 918     break-if-!=
 919     return 1/true
 920   }
 921   compare g, 0x2c/comma
 922   {
 923     break-if-!=
 924     return 1/true
 925   }
 926   compare g, 0x40/at-sign
 927   {
 928     break-if-!=
 929     return 1/true
 930   }
 931   return 0/false
 932 }
 933 
 934 fn bracket-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
 935   compare g, 0x28/open-paren
 936   {
 937     break-if-!=
 938     return 1/true
 939   }
 940   compare g, 0x29/close-paren
 941   {
 942     break-if-!=
 943     return 1/true
 944   }
 945   compare g, 0x5b/open-square-bracket
 946   {
 947     break-if-!=
 948     return 1/true
 949   }
 950   compare g, 0x5d/close-square-bracket
 951   {
 952     break-if-!=
 953     return 1/true
 954   }
 955   compare g, 0x7b/open-curly-bracket
 956   {
 957     break-if-!=
 958     return 1/true
 959   }
 960   compare g, 0x7d/close-curly-bracket
 961   {
 962     break-if-!=
 963     return 1/true
 964   }
 965   return 0/false
 966 }
 967 
 968 fn number-token? _self: (addr token) -> _/eax: boolean {
 969   var self/eax: (addr token) <- copy _self
 970   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
 971   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
 972   var in-data/ecx: (addr stream byte) <- copy _in-data
 973   rewind-stream in-data
 974   var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
 975   # if '-', read another
 976   {
 977     compare g, 0x2d/minus
 978     break-if-!=
 979     g <- read-code-point-utf8 in-data
 980   }
 981   {
 982     {
 983       var result/eax: boolean <- decimal-digit? g
 984       compare result, 0/false
 985       break-if-!=
 986       return 0/false
 987     }
 988     {
 989       var done?/eax: boolean <- stream-empty? in-data
 990       compare done?, 0/false
 991     }
 992     break-if-!=
 993     g <- read-code-point-utf8 in-data
 994     loop
 995   }
 996   return 1/true
 997 }
 998 
 999 fn bracket-token? _self: (addr token) -> _/eax: boolean {
1000   var self/eax: (addr token) <- copy _self
1001   {
1002     var in-type/eax: (addr int) <- get self, type
1003     compare *in-type, 1/stream
1004     break-if-!=
1005     # streams are never paren tokens
1006     return 0/false
1007   }
1008   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1009   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1010   rewind-stream in-data
1011   var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1012   var result/eax: boolean <- bracket-code-point-utf8? g
1013   return result
1014 }
1015 
1016 fn quote-token? _self: (addr token) -> _/eax: boolean {
1017   var self/eax: (addr token) <- copy _self
1018   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1019   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1020   rewind-stream in-data
1021   var result/eax: boolean <- stream-data-equal? in-data, "'"
1022   return result
1023 }
1024 
1025 fn backquote-token? _self: (addr token) -> _/eax: boolean {
1026   var self/eax: (addr token) <- copy _self
1027   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1028   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1029   rewind-stream in-data
1030   var result/eax: boolean <- stream-data-equal? in-data, "`"
1031   return result
1032 }
1033 
1034 fn unquote-token? _self: (addr token) -> _/eax: boolean {
1035   var self/eax: (addr token) <- copy _self
1036   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1037   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1038   rewind-stream in-data
1039   var result/eax: boolean <- stream-data-equal? in-data, ","
1040   return result
1041 }
1042 
1043 fn unquote-splice-token? _self: (addr token) -> _/eax: boolean {
1044   var self/eax: (addr token) <- copy _self
1045   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1046   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1047   rewind-stream in-data
1048   var result/eax: boolean <- stream-data-equal? in-data, ",@"
1049   return result
1050 }
1051 
1052 fn open-paren-token? _self: (addr token) -> _/eax: boolean {
1053   var self/eax: (addr token) <- copy _self
1054   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1055   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
1056   var in-data/ecx: (addr stream byte) <- copy _in-data
1057   rewind-stream in-data
1058   var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1059   compare g, 0x28/open-paren
1060   {
1061     break-if-!=
1062     var result/eax: boolean <- stream-empty? in-data
1063     return result
1064   }
1065   return 0/false
1066 }
1067 
1068 fn close-paren-token? _self: (addr token) -> _/eax: boolean {
1069   var self/eax: (addr token) <- copy _self
1070   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1071   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
1072   var in-data/ecx: (addr stream byte) <- copy _in-data
1073   rewind-stream in-data
1074   var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1075   compare g, 0x29/close-paren
1076   {
1077     break-if-!=
1078     var result/eax: boolean <- stream-empty? in-data
1079     return result
1080   }
1081   return 0/false
1082 }
1083 
1084 fn dot-token? _self: (addr token) -> _/eax: boolean {
1085   var self/eax: (addr token) <- copy _self
1086   var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1087   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
1088   var in-data/ecx: (addr stream byte) <- copy _in-data
1089   rewind-stream in-data
1090   var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1091   compare g, 0x2e/dot
1092   {
1093     break-if-!=
1094     var result/eax: boolean <- stream-empty? in-data
1095     return result
1096   }
1097   return 0/false
1098 }
1099 
1100 fn test-dot-token {
1101   var tmp-storage: (handle token)
1102   var tmp-ah/eax: (addr handle token) <- address tmp-storage
1103   allocate-token tmp-ah
1104   var tmp/eax: (addr token) <- lookup *tmp-ah
1105   initialize-token tmp, "."
1106   var result/eax: boolean <- dot-token? tmp
1107   check result, "F - test-dot-token"
1108 }
1109 
1110 fn stream-token? _self: (addr token) -> _/eax: boolean {
1111   var self/eax: (addr token) <- copy _self
1112   var in-type/eax: (addr int) <- get self, type
1113   compare *in-type, 1/stream
1114   {
1115     break-if-=
1116     return 0/false
1117   }
1118   return 1/true
1119 }
1120 
1121 fn skip-token? _self: (addr token) -> _/eax: boolean {
1122   var self/eax: (addr token) <- copy _self
1123   var in-type/eax: (addr int) <- get self, type
1124   compare *in-type, 2/skip
1125   {
1126     break-if-=
1127     return 0/false
1128   }
1129   return 1/true
1130 }
1131 
1132 fn indent-token? _self: (addr token) -> _/eax: boolean {
1133   var self/eax: (addr token) <- copy _self
1134   var in-type/eax: (addr int) <- get self, type
1135   compare *in-type, 3/indent
1136   {
1137     break-if-=
1138     return 0/false
1139   }
1140   return 1/true
1141 }
1142 
1143 fn allocate-token _self-ah: (addr handle token) {
1144   var self-ah/eax: (addr handle token) <- copy _self-ah
1145   allocate self-ah
1146   var self/eax: (addr token) <- lookup *self-ah
1147   var dest-ah/eax: (addr handle stream byte) <- get self, text-data
1148   populate-stream dest-ah, 0x40/max-symbol-size
1149 }
1150 
1151 fn initialize-token _self: (addr token), val: (addr array byte) {
1152   var self/eax: (addr token) <- copy _self
1153   var dest-ah/eax: (addr handle stream byte) <- get self, text-data
1154   populate-stream dest-ah, 0x40
1155   var dest/eax: (addr stream byte) <- lookup *dest-ah
1156   write dest, val
1157 }
1158 
1159 fn initialize-skip-token _self: (addr token) {
1160   var self/eax: (addr token) <- copy _self
1161   var self-type/eax: (addr int) <- get self, type
1162   copy-to *self-type, 2/skip
1163 }
1164 
1165 fn write-token-text-data out: (addr stream byte), _self: (addr token) {
1166   var self/eax: (addr token) <- copy _self
1167   var data-ah/eax: (addr handle stream byte) <- get self, text-data
1168   var data/eax: (addr stream byte) <- lookup *data-ah
1169   rewind-stream data
1170   write-stream out, data
1171 }
1172 
1173 fn tokens-equal? _a: (addr token), _b: (addr token) -> _/eax: boolean {
1174   var a/edx: (addr token) <- copy _a
1175   var b/ebx: (addr token) <- copy _b
1176   var a-type-addr/eax: (addr int) <- get a, type
1177   var a-type/eax: int <- copy *a-type-addr
1178   var b-type-addr/ecx: (addr int) <- get b, type
1179   compare a-type, *b-type-addr
1180   {
1181     break-if-=
1182     return 0/false
1183   }
1184   compare a-type, 2/skip
1185   {
1186     break-if-!=
1187     # skip tokens have no other data
1188     return 1/true
1189   }
1190   compare a-type, 3/indent
1191   {
1192     break-if-!=
1193     # indent tokens have no other data
1194     var a-number-data-addr/eax: (addr int) <- get a, number-data
1195     var a-number-data/eax: int <- copy *a-number-data-addr
1196     var b-number-data-addr/ecx: (addr int) <- get b, number-data
1197     compare a-number-data, *b-number-data-addr
1198     {
1199       break-if-=
1200       return 0/false
1201     }
1202     return 1/true
1203   }
1204   var b-data-ah/eax: (addr handle stream byte) <- get b, text-data
1205   var _b-data/eax: (addr stream byte) <- lookup *b-data-ah
1206   var b-data/ebx: (addr stream byte) <- copy _b-data
1207   var a-data-ah/eax: (addr handle stream byte) <- get a, text-data
1208   var a-data/eax: (addr stream byte) <- lookup *a-data-ah
1209   var data-match?/eax: boolean <- streams-data-equal? a-data, b-data
1210   return data-match?
1211 }
1212 
1213 fn dump-token-from-cursor _t: (addr token) {
1214   var t/esi: (addr token) <- copy _t
1215   var type/eax: (addr int) <- get t, type
1216   draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *type, 7/fg 0/bg
1217   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
1218   var text-ah/eax: (addr handle stream byte) <- get t, text-data
1219   var text/eax: (addr stream byte) <- lookup *text-ah
1220   rewind-stream text
1221   draw-stream-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, text, 7/fg 0/bg
1222   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
1223   var num/eax: (addr int) <- get t, number-data
1224   draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *num, 7/fg 0/bg
1225   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "\n", 7/fg 0/bg
1226 }