import streams import tables import times import strutils import strformat import unicode import utils/twtstr type ValueType* = enum VALUE_STRING, VALUE_INTEGER, VALUE_FLOAT, VALUE_BOOLEAN, VALUE_DATE_TIME, VALUE_TABLE, VALUE_ARRAY VALUE_TABLE_ARRAY SyntaxError = object of ValueError TomlParser = object at: int line: int stream: Stream buf: string root: TomlTable node: TomlNode currkey: seq[string] TomlValue* = ref object case vt*: ValueType of VALUE_STRING: s*: string of VALUE_INTEGER: i*: int64 of VALUE_FLOAT: f*: float64 of VALUE_BOOLEAN: b*: bool of VALUE_TABLE: t*: TomlTable of VALUE_DATE_TIME: dt*: DateTime of VALUE_ARRAY: a*: seq[TomlValue] of VALUE_TABLE_ARRAY: ta*: seq[TomlTable] TomlNode = ref object of RootObj comment: string TomlKVPair = ref object of TomlNode key*: seq[string] value*: TomlValue TomlTable = ref object of TomlNode key: seq[string] nodes: seq[TomlNode] map: Table[string, TomlValue] func `[]`*(val: TomlValue, key: string): TomlValue = return val.t.map[key] iterator pairs*(val: TomlValue): (string, TomlValue) = for k, v in val.t.map.pairs: yield (k, v) func contains*(val: TomlValue, key: string): bool = return key in val.t.map func isBare(c: char): bool = return c == '-' or c == '_' or c.isAlphaNumeric() func peek(state: TomlParser, i: int): char = return state.buf[state.at + i] func peek(state: TomlParser, i: int, len: int): string = return state.buf.substr(state.at + i, state.at + i + len) proc syntaxError(state: TomlParser, msg: string) = raise newException(SyntaxError, fmt"on line {state.line}: {msg}") proc valueError(state: TomlParser, msg: string) = raise newException(ValueError, fmt"on line {state.line}: {msg}") proc consume(state: var TomlParser): char = result = state.buf[state.at] inc state.at proc reconsume(state: var TomlParser) = dec state.at proc has(state: var TomlParser, i: int = 0): bool = if state.at + i >= state.buf.len and not state.stream.atEnd(): state.buf &= state.stream.readLine() & '\n' return state.at + i < state.buf.len proc consumeEscape(state: var TomlParser, c: char): Rune = var len = 4 if c == 'U': len = 8 let c = state.consume() var num = hexValue(c) if num != -1: var i = 0 while state.has() and i < len: let c = state.peek(0) if hexValue(c) == -1: break discard state.consume() num *= 0x10 num += hexValue(c) inc i if i != len - 1: state.syntaxError(fmt"invalid escaped length ({i}, needs {len})") if num > 0x10FFFF or num in {0xD800..0xDFFF}: state.syntaxError(fmt"invalid escaped codepoint {num}") else: return Rune(num) else: state.syntaxError(fmt"invalid escaped codepoint {c}") proc consumeString(state: var TomlParser, first: char): string = var multiline = false if first == '"': if state.has(1): let s = state.peek(0, 1) if s == "\"\"": multiline = true elif first == '\'': if state.has(1): let s = state.peek(0, 1) if s == "''": multiline = true if multiline: let c = state.peek(0) if c == '\n': discard state.consume() var escape = false var ml_trim = false while state.has(): let c = state.consume() if c == '\n' and not multiline: state.syntaxError(fmt"newline in string") elif c == first: if multiline and state.has(1): let c2 = state.peek(0) let c3 = state.peek(1) if c2 == first and c3 == first: discard state.consume() discard state.consume() break else: break elif first == '"' and c == '\\': escape = true elif escape: case c of 'b': result &= '\b' of 't': result &= '\t' of 'n': result &= '\n' of 'f': result &= '\f' of 'r': result &= '\r' of '"': result &= '"' of '\\': result &= '\\' of 'u', 'U': result &= state.consumeEscape(c) of '\n': ml_trim = true else: state.syntaxError(fmt"invalid escape sequence \{c}") escape = false elif ml_trim: if not (c in {'\n', ' ', '\t'}): result &= c ml_trim = false else: result &= c proc consumeBare(state: var TomlParser, c: char): string = result &= c while state.has(): let c = state.consume() case c of ' ', '\t': break of '.', '=', ']', '\n': state.reconsume() break elif c.isBare(): result &= c else: state.syntaxError(fmt"invalid value in token: {c}") proc flushLine(state: var TomlParser) = if state.node != nil: if state.node of TomlKVPair: var i = 0 let keys = state.currkey & TomlKVPair(state.node).key var table = state.root while i < keys.len - 1: if keys[i] in table.map: let node = table.map[keys[i]] if node.vt != VALUE_TABLE: let s = keys.join(".") state.valueError(fmt"re-definition of node {s}") else: table = node.t else: let node = TomlTable() table.map[keys[i]] = TomlValue(vt: VALUE_TABLE, t: node) table = node inc i if keys[i] in table.map: let s = keys.join(".") state.valueError(fmt"re-definition of node {s}") table.map[keys[i]] = TomlKVPair(state.node).value table.nodes.add(state.node) state.node = nil inc state.line proc consumeComment(state: var TomlParser) = state.node = TomlNode() while state.has(): let c = state.consume() if c == '\n': state.reconsume() break else: state.node.comment &= c proc consumeKey(state: var TomlParser): seq[string] = var str = "" while state.has(): let c = state.consume() case c of '"', '\'': if str.len > 0: state.syntaxError("multiple strings without dot") str = state.consumeString(c) of '=', ']': if str.len != 0: result.add(str) str = "" return result of '.': if str.len == 0: #TODO empty strings are allowed, only empty keys aren't state.syntaxError("redundant dot") else: result.add(str) str = "" of ' ', '\t': discard of '\n': if state.node != nil: state.syntaxError("newline without value") else: state.flushLine() elif c.isBare(): if str.len > 0: state.syntaxError(fmt"multiple strings without dot: {str}") str = state.consumeBare(c) else: state.syntaxError(fmt"invalid character in key: {c}") state.syntaxError("key without value") proc consumeTable(state: var TomlParser): TomlTable = new(result) while state.has(): let c = state.peek(0) case c of ' ', '\t': discard of '\n': return result of '[': #TODO table array state.syntaxError("arrays of tables are not supported yet") of '"', '\'': result.key = state.consumeKey() elif c.isBare(): result.key = state.consumeKey() else: state.syntaxError(fmt"invalid character before key: {c}") state.syntaxError("unexpected end of file") proc consumeNoState(state: var TomlParser): bool = while state.has(): let c = state.peek(0) case c of '#', '\n': return false of ' ', '\t': discard of '[': discard state.consume() let table = state.consumeTable() state.currkey = table.key state.node = table return false elif c == '"' or c == '\'' or c.isBare(): let kvpair = TomlKVPair() kvpair.key = state.consumeKey() state.node = kvpair return true else: state.syntaxError(fmt"invalid character before key: {c}") state.syntaxError("unexpected end of file") proc consumeNumber(state: var TomlParser): TomlValue = var repr: string var isfloat = false if state.has(): if state.peek(0) == '+' or state.peek(0) == '-': repr &= state.consume() while state.has() and isDigit(state.peek(0)): repr &= state.consume() if state.has(1): if state.peek(0) == '.' and isDigit(state.peek(1)): repr &= state.consume() repr &= state.consume() isfloat = true while state.has() and isDigit(state.peek(0)): repr &= state.consume() if state.has(1): if state.peek(0) == 'E' or state.peek(0) == 'e': var j = 2 if state.peek(1) == '-' or state.peek(1) == '+': inc j if state.has(j) and isDigit(state.peek(j)): while j > 0: repr &= state.consume() dec j while state.has() and isDigit(state.peek(0)): repr &= state.consume() if isfloat: let val = parseFloat64(repr) return TomlValue(vt: VALUE_FLOAT, f: val) let val = parseInt64(repr) return TomlValue(vt: VALUE_INTEGER, i: val) proc consumeValue(state: var TomlParser): TomlValue proc consumeArray(state: var TomlParser): TomlValue = result = TomlValue(vt: VALUE_ARRAY) var val: TomlValue while state.has(): let c = state.consume() case c of ' ', '\t', '\n': discard of ']': if val != nil: result.a.add(val) break of ',': if val == nil: state.syntaxError("comma without element") result.a.add(val) else: state.reconsume() val = state.consumeValue() proc consumeValue(state: var TomlParser): TomlValue = while state.has(): let c = state.consume() case c of '"', '\'': return TomlValue(vt: VALUE_STRING, s: state.consumeString(c)) of ' ', '\t': discard of '\n': state.syntaxError("newline without value") of '#': state.syntaxError("comment without value") of '+', '-', '0'..'9': return state.consumeNumber() #TODO date-time of '[': return state.consumeArray() elif c.isBare(): let s = state.consumeBare(c) case s of "true": return TomlValue(vt: VALUE_BOOLEAN, b: true) of "false": return TomlValue(vt: VALUE_BOOLEAN, b: false) else: state.syntaxError(fmt"invalid token {s}") else: state.syntaxError(fmt"invalid character in value: {c}") proc parseToml*(inputStream: Stream): TomlValue = var state: TomlParser state.stream = inputStream state.line = 1 state.root = TomlTable() while state.has(): if state.consumeNoState(): let kvpair = TomlKVPair(state.node) kvpair.value = state.consumeValue() while state.has(): let c = state.consume() case c of '\n': state.flushLine() break of '#': state.consumeComment() of '\t', ' ': discard else: state.syntaxError(fmt"invalid character after value: {c}") return TomlValue(vt: VALUE_TABLE, t: state.root)