# out is not allocated fn read-cell in: (addr gap-buffer), out: (addr handle cell), trace: (addr trace) { trace-text trace, "read", "tokenize" trace-lower trace rewind-gap-buffer in var token-storage: (stream byte 0x1000) # strings can be large var token/ecx: (addr stream byte) <- address token-storage { var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false break-if-!= next-token in, token, trace var error?/eax: boolean <- has-errors? trace compare error?, 0/false break-if-!= read-symbol token, out loop } # TODO: # insert parens # transform infix # token tree # syntax tree trace-higher trace } fn next-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) { trace-text trace, "read", "next-token" trace-lower trace $next-token:body: { clear-stream out skip-whitespace-from-gap-buffer in var g/eax: grapheme <- peek-from-gap-buffer in { var stream-storage: (stream byte 0x40) var stream/esi: (addr stream byte) <- address stream-storage write stream, "next: " var gval/eax: int <- copy g write-int32-hex stream, gval trace trace, "read", stream } # digit { var digit?/eax: boolean <- is-decimal-digit? g compare digit?, 0/false break-if-= next-number-token in, out, trace break $next-token:body } # other symbol char { var symbol?/eax: boolean <- is-symbol-grapheme? g compare symbol?, 0/false break-if-= next-symbol-token in, out, trace break $next-token:body } } trace-higher trace var stream-storage: (stream byte 0x40) var stream/eax: (addr stream byte) <- address stream-storage write stream, "=> " write-stream stream, out trace trace, "read", stream } fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) { trace-text trace, "read", "looking for a symbol" trace-lower trace $next-symbol-token:loop: { var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false break-if-!= var g/eax: grapheme <- peek-from-gap-buffer in { var stream-storage: (stream byte 0x40) var stream/esi: (addr stream byte) <- address stream-storage write stream, "next: " var gval/eax: int <- copy g write-int32-hex stream, gval trace trace, "read", stream } # if non-symbol, return { var symbol-grapheme?/eax: boolean <- is-symbol-grapheme? g compare symbol-grapheme?, 0/false break-if-!= trace-text trace, "read", "stop" break $next-symbol-token:loop } var g/eax: grapheme <- read-from-gap-buffer in write-grapheme out, g loop } trace-higher trace var stream-storage: (stream byte 0x40) var stream/esi: (addr stream byte) <- address stream-storage write stream, "=> " write-stream stream, out trace trace, "read", stream } fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) { trace-text trace, "read", "looking for a number" trace-lower trace $next-number-token:loop: { var done?/eax: boolean <- gap-buffer-scan-done? in compare done?, 0/false break-if-!= var g/eax: grapheme <- peek-from-gap-buffer in { var stream-storage: (stream byte 0x40) var stream/esi: (addr stream byte) <- address stream-storage write stream, "next: " var gval/eax: int <- copy g write-int32-hex stream, gval trace trace, "read", stream } # if not symbol grapheme, return { var symbol-grapheme?/eax: boolean <- is-symbol-grapheme? g compare symbol-grapheme?, 0/false break-if-!= trace-text trace, "read", "stop" break $next-number-token:loop } # if not digit grapheme, abort { var digit?/eax: boolean <- is-decimal-digit? g compare digit?, 0/false break-if-!= error trace, "invalid number" return } trace-text trace, "read", "append" var g/eax: grapheme <- read-from-gap-buffer in write-grapheme out, g loop } trace-higher trace } fn read-symbol in: (addr stream byte), _out: (addr handle cell) { rewind-stream in var out/eax: (addr handle cell) <- copy _out new-symbol out var out-a/eax: (addr cell) <- lookup *out var out-data-ah/eax: (addr handle stream byte) <- get out-a, text-data var _out-data/eax: (addr stream byte) <- lookup *out-data-ah var out-data/edi: (addr stream byte) <- copy _out-data { var done?/eax: boolean <- stream-empty? in compare done?, 0/false break-if-!= var g/eax: grapheme <- read-grapheme in write-grapheme out-data, g loop } } fn is-symbol-grapheme? g: grapheme -> _/eax: boolean { ## whitespace compare g, 9/tab { break-if-!= return 0/false } compare g, 0xa/newline { break-if-!= return 0/false } compare g, 0x20/space { break-if-!= return 0/false } ## quotes compare g, 0x22/double-quote { break-if-!= return 0/false } compare g, 0x27/single-quote { break-if-!= return 0/false } compare g, 0x60/backquote { break-if-!= return 0/false } ## brackets compare g, 0x28/open-paren { break-if-!= return 0/false } compare g, 0x29/close-paren { break-if-!= return 0/false } compare g, 0x5b/open-square-bracket { break-if-!= return 0/false } compare g, 0x5d/close-square-bracket { break-if-!= return 0/false } compare g, 0x7b/open-curly-bracket { break-if-!= return 0/false } compare g, 0x7d/close-curly-bracket { break-if-!= return 0/false } # - other punctuation # '!' is a symbol char compare g, 0x23/hash { break-if-!= return 0/false } # '$' is a symbol char compare g, 0x25/percent { break-if-!= return 0/false } compare g, 0x26/ampersand { break-if-!= return 0/false } compare g, 0x2a/asterisk { break-if-!= return 0/false } compare g, 0x2b/plus { break-if-!= return 0/false } compare g, 0x2c/comma { break-if-!= return 0/false } # '-' is a symbol char compare g, 0x2e/period { break-if-!= return 0/false } compare g, 0x2f/slash { break-if-!= return 0/false } compare g, 0x2f/slash { break-if-!= return 0/false } compare g, 0x3a/colon { break-if-!= return 0/false } compare g, 0x3b/semi-colon { break-if-!= return 0/false } compare g, 0x3c/less-than { break-if-!= return 0/false } compare g, 0x3d/equal { break-if-!= return 0/false } compare g, 0x3e/greater-than { break-if-!= return 0/false } # '?' is a symbol char compare g, 0x40/at-sign { break-if-!= return 0/false } compare g, 0x5c/backslash { break-if-!= return 0/false } compare g, 0x5e/caret { break-if-!= return 0/false } # '_' is a symbol char compare g, 0x7c/vertical-line { break-if-!= return 0/false } compare g, 0x7e/tilde { break-if-!= return 0/false } return 1/true }