From c4f43035b0794ff2d430fa02437fcbba19c5004f Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Fri, 26 Feb 2021 22:11:57 -0800 Subject: 7815 --- baremetal/shell/parse.mu | 14 ++ baremetal/shell/read.mu | 386 -------------------------------------------- baremetal/shell/tokenize.mu | 370 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 384 insertions(+), 386 deletions(-) create mode 100644 baremetal/shell/parse.mu create mode 100644 baremetal/shell/tokenize.mu diff --git a/baremetal/shell/parse.mu b/baremetal/shell/parse.mu new file mode 100644 index 00000000..b9eeccab --- /dev/null +++ b/baremetal/shell/parse.mu @@ -0,0 +1,14 @@ +fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) { + # For now we just convert first token into a symbol and return it. TODO + var empty?/eax: boolean <- stream-empty? tokens + compare empty?, 0/false + { + break-if-!= + var out/eax: (addr handle cell) <- copy _out + allocate out + var out-addr/eax: (addr cell) <- lookup *out + read-from-stream tokens, out-addr + var type/ecx: (addr int) <- get out-addr, type + copy-to *type, 2/symbol + } +} diff --git a/baremetal/shell/read.mu b/baremetal/shell/read.mu index ec560e76..d5a1d776 100644 --- a/baremetal/shell/read.mu +++ b/baremetal/shell/read.mu @@ -7,389 +7,3 @@ fn read-cell in: (addr gap-buffer), out: (addr handle cell), trace: (addr trace) # TODO: transform infix parse-sexpression tokens, out, trace } - -fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) { - # For now we just convert first token into a symbol and return it. TODO - var empty?/eax: boolean <- stream-empty? tokens - compare empty?, 0/false - { - break-if-!= - var out/eax: (addr handle cell) <- copy _out - allocate out - var out-addr/eax: (addr cell) <- lookup *out - read-from-stream tokens, out-addr - var type/ecx: (addr int) <- get out-addr, type - copy-to *type, 2/symbol - } -} - -fn tokenize in: (addr gap-buffer), out: (addr stream cell), trace: (addr trace) { - trace-text trace, "read", "tokenize" - trace-lower trace - rewind-gap-buffer in - var token-storage: cell - var token/edx: (addr cell) <- address token-storage - # initialize token - var dest-ah/eax: (addr handle stream byte) <- get token, text-data - populate-stream dest-ah, 0x40/max-token-size - # - { - var done?/eax: boolean <- gap-buffer-scan-done? in - compare done?, 0/false - break-if-!= - next-token in, token, trace - var error?/eax: boolean <- has-errors? trace - compare error?, 0/false - { - break-if-= - return - } - write-to-stream out, token - loop - } - trace-higher trace -} - -fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace) { - trace-text trace, "read", "next-token" - trace-lower trace - var out-cell/eax: (addr cell) <- copy _out-cell - var out-ah/eax: (addr handle stream byte) <- get out-cell, text-data - var _out/eax: (addr stream byte) <- lookup *out-ah - var out/edi: (addr stream byte) <- copy _out - $next-token:body: { - clear-stream out - skip-whitespace-from-gap-buffer in - var g/eax: grapheme <- peek-from-gap-buffer in - { - var stream-storage: (stream byte 0x40) - var stream/esi: (addr stream byte) <- address stream-storage - write stream, "next: " - var gval/eax: int <- copy g - write-int32-hex stream, gval - trace trace, "read", stream - } - # digit - { - var digit?/eax: boolean <- is-decimal-digit? g - compare digit?, 0/false - break-if-= - next-number-token in, out, trace - break $next-token:body - } - # other symbol char - { - var symbol?/eax: boolean <- is-symbol-grapheme? g - compare symbol?, 0/false - break-if-= - next-symbol-token in, out, trace - break $next-token:body - } - # brackets are always single-char tokens - { - var bracket?/eax: boolean <- is-bracket-grapheme? g - compare bracket?, 0/false - break-if-= - var g/eax: grapheme <- read-from-gap-buffer in - next-bracket-token g, out, trace - break $next-token:body - } - } - trace-higher trace - var stream-storage: (stream byte 0x40) - var stream/eax: (addr stream byte) <- address stream-storage - write stream, "=> " - rewind-stream out - write-stream stream, out - trace trace, "read", stream -} - -fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) { - trace-text trace, "read", "looking for a symbol" - trace-lower trace - $next-symbol-token:loop: { - var done?/eax: boolean <- gap-buffer-scan-done? in - compare done?, 0/false - break-if-!= - var g/eax: grapheme <- peek-from-gap-buffer in - { - var stream-storage: (stream byte 0x40) - var stream/esi: (addr stream byte) <- address stream-storage - write stream, "next: " - var gval/eax: int <- copy g - write-int32-hex stream, gval - trace trace, "read", stream - } - # if non-symbol, return - { - var symbol-grapheme?/eax: boolean <- is-symbol-grapheme? g - compare symbol-grapheme?, 0/false - break-if-!= - trace-text trace, "read", "stop" - break $next-symbol-token:loop - } - var g/eax: grapheme <- read-from-gap-buffer in - write-grapheme out, g - loop - } - trace-higher trace - var stream-storage: (stream byte 0x40) - var stream/esi: (addr stream byte) <- address stream-storage - write stream, "=> " - rewind-stream out - write-stream stream, out - trace trace, "read", stream -} - -fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) { - trace-text trace, "read", "looking for a number" - trace-lower trace - $next-number-token:loop: { - var done?/eax: boolean <- gap-buffer-scan-done? in - compare done?, 0/false - break-if-!= - var g/eax: grapheme <- peek-from-gap-buffer in - { - var stream-storage: (stream byte 0x40) - var stream/esi: (addr stream byte) <- address stream-storage - write stream, "next: " - var gval/eax: int <- copy g - write-int32-hex stream, gval - trace trace, "read", stream - } - # if not symbol grapheme, return - { - var symbol-grapheme?/eax: boolean <- is-symbol-grapheme? g - compare symbol-grapheme?, 0/false - break-if-!= - trace-text trace, "read", "stop" - break $next-number-token:loop - } - # if not digit grapheme, abort - { - var digit?/eax: boolean <- is-decimal-digit? g - compare digit?, 0/false - break-if-!= - error trace, "invalid number" - return - } - trace-text trace, "read", "append" - var g/eax: grapheme <- read-from-gap-buffer in - write-grapheme out, g - loop - } - trace-higher trace -} - -fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) { - trace-text trace, "read", "bracket" - write-grapheme out, g - var stream-storage: (stream byte 0x40) - var stream/esi: (addr stream byte) <- address stream-storage - write stream, "=> " - rewind-stream out - write-stream stream, out - trace trace, "read", stream -} - -fn is-symbol-grapheme? g: grapheme -> _/eax: boolean { - ## whitespace - compare g, 9/tab - { - break-if-!= - return 0/false - } - compare g, 0xa/newline - { - break-if-!= - return 0/false - } - compare g, 0x20/space - { - break-if-!= - return 0/false - } - ## quotes - compare g, 0x22/double-quote - { - break-if-!= - return 0/false - } - compare g, 0x27/single-quote - { - break-if-!= - return 0/false - } - compare g, 0x60/backquote - { - break-if-!= - return 0/false - } - ## brackets - compare g, 0x28/open-paren - { - break-if-!= - return 0/false - } - compare g, 0x29/close-paren - { - break-if-!= - return 0/false - } - compare g, 0x5b/open-square-bracket - { - break-if-!= - return 0/false - } - compare g, 0x5d/close-square-bracket - { - break-if-!= - return 0/false - } - compare g, 0x7b/open-curly-bracket - { - break-if-!= - return 0/false - } - compare g, 0x7d/close-curly-bracket - { - break-if-!= - return 0/false - } - # - other punctuation - # '!' is a symbol char - compare g, 0x23/hash - { - break-if-!= - return 0/false - } - # '$' is a symbol char - compare g, 0x25/percent - { - break-if-!= - return 0/false - } - compare g, 0x26/ampersand - { - break-if-!= - return 0/false - } - compare g, 0x2a/asterisk - { - break-if-!= - return 0/false - } - compare g, 0x2b/plus - { - break-if-!= - return 0/false - } - compare g, 0x2c/comma - { - break-if-!= - return 0/false - } - # '-' is a symbol char - compare g, 0x2e/period - { - break-if-!= - return 0/false - } - compare g, 0x2f/slash - { - break-if-!= - return 0/false - } - compare g, 0x2f/slash - { - break-if-!= - return 0/false - } - compare g, 0x3a/colon - { - break-if-!= - return 0/false - } - compare g, 0x3b/semi-colon - { - break-if-!= - return 0/false - } - compare g, 0x3c/less-than - { - break-if-!= - return 0/false - } - compare g, 0x3d/equal - { - break-if-!= - return 0/false - } - compare g, 0x3e/greater-than - { - break-if-!= - return 0/false - } - # '?' is a symbol char - compare g, 0x40/at-sign - { - break-if-!= - return 0/false - } - compare g, 0x5c/backslash - { - break-if-!= - return 0/false - } - compare g, 0x5e/caret - { - break-if-!= - return 0/false - } - # '_' is a symbol char - compare g, 0x7c/vertical-line - { - break-if-!= - return 0/false - } - compare g, 0x7e/tilde - { - break-if-!= - return 0/false - } - return 1/true -} - -fn is-bracket-grapheme? g: grapheme -> _/eax: boolean { - compare g, 0x28/open-paren - { - break-if-!= - return 1/true - } - compare g, 0x29/close-paren - { - break-if-!= - return 1/true - } - compare g, 0x5b/open-square-bracket - { - break-if-!= - return 1/true - } - compare g, 0x5d/close-square-bracket - { - break-if-!= - return 1/true - } - compare g, 0x7b/open-curly-bracket - { - break-if-!= - return 1/true - } - compare g, 0x7d/close-curly-bracket - { - break-if-!= - return 1/true - } - return 0/false -} diff --git a/baremetal/shell/tokenize.mu b/baremetal/shell/tokenize.mu new file mode 100644 index 00000000..2f1c4e62 --- /dev/null +++ b/baremetal/shell/tokenize.mu @@ -0,0 +1,370 @@ +fn tokenize in: (addr gap-buffer), out: (addr stream cell), trace: (addr trace) { + trace-text trace, "read", "tokenize" + trace-lower trace + rewind-gap-buffer in + var token-storage: cell + var token/edx: (addr cell) <- address token-storage + # initialize token + var dest-ah/eax: (addr handle stream byte) <- get token, text-data + populate-stream dest-ah, 0x40/max-token-size + # + { + var done?/eax: boolean <- gap-buffer-scan-done? in + compare done?, 0/false + break-if-!= + next-token in, token, trace + var error?/eax: boolean <- has-errors? trace + compare error?, 0/false + { + break-if-= + return + } + write-to-stream out, token + loop + } + trace-higher trace +} + +fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace) { + trace-text trace, "read", "next-token" + trace-lower trace + var out-cell/eax: (addr cell) <- copy _out-cell + var out-ah/eax: (addr handle stream byte) <- get out-cell, text-data + var _out/eax: (addr stream byte) <- lookup *out-ah + var out/edi: (addr stream byte) <- copy _out + $next-token:body: { + clear-stream out + skip-whitespace-from-gap-buffer in + var g/eax: grapheme <- peek-from-gap-buffer in + { + var stream-storage: (stream byte 0x40) + var stream/esi: (addr stream byte) <- address stream-storage + write stream, "next: " + var gval/eax: int <- copy g + write-int32-hex stream, gval + trace trace, "read", stream + } + # digit + { + var digit?/eax: boolean <- is-decimal-digit? g + compare digit?, 0/false + break-if-= + next-number-token in, out, trace + break $next-token:body + } + # other symbol char + { + var symbol?/eax: boolean <- is-symbol-grapheme? g + compare symbol?, 0/false + break-if-= + next-symbol-token in, out, trace + break $next-token:body + } + # brackets are always single-char tokens + { + var bracket?/eax: boolean <- is-bracket-grapheme? g + compare bracket?, 0/false + break-if-= + var g/eax: grapheme <- read-from-gap-buffer in + next-bracket-token g, out, trace + break $next-token:body + } + } + trace-higher trace + var stream-storage: (stream byte 0x40) + var stream/eax: (addr stream byte) <- address stream-storage + write stream, "=> " + rewind-stream out + write-stream stream, out + trace trace, "read", stream +} + +fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) { + trace-text trace, "read", "looking for a symbol" + trace-lower trace + $next-symbol-token:loop: { + var done?/eax: boolean <- gap-buffer-scan-done? in + compare done?, 0/false + break-if-!= + var g/eax: grapheme <- peek-from-gap-buffer in + { + var stream-storage: (stream byte 0x40) + var stream/esi: (addr stream byte) <- address stream-storage + write stream, "next: " + var gval/eax: int <- copy g + write-int32-hex stream, gval + trace trace, "read", stream + } + # if non-symbol, return + { + var symbol-grapheme?/eax: boolean <- is-symbol-grapheme? g + compare symbol-grapheme?, 0/false + break-if-!= + trace-text trace, "read", "stop" + break $next-symbol-token:loop + } + var g/eax: grapheme <- read-from-gap-buffer in + write-grapheme out, g + loop + } + trace-higher trace + var stream-storage: (stream byte 0x40) + var stream/esi: (addr stream byte) <- address stream-storage + write stream, "=> " + rewind-stream out + write-stream stream, out + trace trace, "read", stream +} + +fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) { + trace-text trace, "read", "looking for a number" + trace-lower trace + $next-number-token:loop: { + var done?/eax: boolean <- gap-buffer-scan-done? in + compare done?, 0/false + break-if-!= + var g/eax: grapheme <- peek-from-gap-buffer in + { + var stream-storage: (stream byte 0x40) + var stream/esi: (addr stream byte) <- address stream-storage + write stream, "next: " + var gval/eax: int <- copy g + write-int32-hex stream, gval + trace trace, "read", stream + } + # if not symbol grapheme, return + { + var symbol-grapheme?/eax: boolean <- is-symbol-grapheme? g + compare symbol-grapheme?, 0/false + break-if-!= + trace-text trace, "read", "stop" + break $next-number-token:loop + } + # if not digit grapheme, abort + { + var digit?/eax: boolean <- is-decimal-digit? g + compare digit?, 0/false + break-if-!= + error trace, "invalid number" + return + } + trace-text trace, "read", "append" + var g/eax: grapheme <- read-from-gap-buffer in + write-grapheme out, g + loop + } + trace-higher trace +} + +fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) { + trace-text trace, "read", "bracket" + write-grapheme out, g + var stream-storage: (stream byte 0x40) + var stream/esi: (addr stream byte) <- address stream-storage + write stream, "=> " + rewind-stream out + write-stream stream, out + trace trace, "read", stream +} + +fn is-symbol-grapheme? g: grapheme -> _/eax: boolean { + ## whitespace + compare g, 9/tab + { + break-if-!= + return 0/false + } + compare g, 0xa/newline + { + break-if-!= + return 0/false + } + compare g, 0x20/space + { + break-if-!= + return 0/false + } + ## quotes + compare g, 0x22/double-quote + { + break-if-!= + return 0/false + } + compare g, 0x27/single-quote + { + break-if-!= + return 0/false + } + compare g, 0x60/backquote + { + break-if-!= + return 0/false + } + ## brackets + compare g, 0x28/open-paren + { + break-if-!= + return 0/false + } + compare g, 0x29/close-paren + { + break-if-!= + return 0/false + } + compare g, 0x5b/open-square-bracket + { + break-if-!= + return 0/false + } + compare g, 0x5d/close-square-bracket + { + break-if-!= + return 0/false + } + compare g, 0x7b/open-curly-bracket + { + break-if-!= + return 0/false + } + compare g, 0x7d/close-curly-bracket + { + break-if-!= + return 0/false + } + # - other punctuation + # '!' is a symbol char + compare g, 0x23/hash + { + break-if-!= + return 0/false + } + # '$' is a symbol char + compare g, 0x25/percent + { + break-if-!= + return 0/false + } + compare g, 0x26/ampersand + { + break-if-!= + return 0/false + } + compare g, 0x2a/asterisk + { + break-if-!= + return 0/false + } + compare g, 0x2b/plus + { + break-if-!= + return 0/false + } + compare g, 0x2c/comma + { + break-if-!= + return 0/false + } + # '-' is a symbol char + compare g, 0x2e/period + { + break-if-!= + return 0/false + } + compare g, 0x2f/slash + { + break-if-!= + return 0/false + } + compare g, 0x2f/slash + { + break-if-!= + return 0/false + } + compare g, 0x3a/colon + { + break-if-!= + return 0/false + } + compare g, 0x3b/semi-colon + { + break-if-!= + return 0/false + } + compare g, 0x3c/less-than + { + break-if-!= + return 0/false + } + compare g, 0x3d/equal + { + break-if-!= + return 0/false + } + compare g, 0x3e/greater-than + { + break-if-!= + return 0/false + } + # '?' is a symbol char + compare g, 0x40/at-sign + { + break-if-!= + return 0/false + } + compare g, 0x5c/backslash + { + break-if-!= + return 0/false + } + compare g, 0x5e/caret + { + break-if-!= + return 0/false + } + # '_' is a symbol char + compare g, 0x7c/vertical-line + { + break-if-!= + return 0/false + } + compare g, 0x7e/tilde + { + break-if-!= + return 0/false + } + return 1/true +} + +fn is-bracket-grapheme? g: grapheme -> _/eax: boolean { + compare g, 0x28/open-paren + { + break-if-!= + return 1/true + } + compare g, 0x29/close-paren + { + break-if-!= + return 1/true + } + compare g, 0x5b/open-square-bracket + { + break-if-!= + return 1/true + } + compare g, 0x5d/close-square-bracket + { + break-if-!= + return 1/true + } + compare g, 0x7b/open-curly-bracket + { + break-if-!= + return 1/true + } + compare g, 0x7d/close-curly-bracket + { + break-if-!= + return 1/true + } + return 0/false +} -- cgit 1.4.1-2-gfad0