diff options
-rw-r--r-- | lib/pure/json.nim | 517 | ||||
-rw-r--r-- | lib/pure/parsejson.nim | 535 |
2 files changed, 547 insertions, 505 deletions
diff --git a/lib/pure/json.nim b/lib/pure/json.nim index 9f9339961..ca7e164b8 100644 --- a/lib/pure/json.nim +++ b/lib/pure/json.nim @@ -89,507 +89,22 @@ ## echo j2 import - hashes, tables, strutils, lexbase, streams, unicode, macros + hashes, tables, strutils, lexbase, streams, unicode, macros, parsejson export tables.`$` +export + parsejson.JsonEventKind, parsejson.JsonError, JsonParser, JsonKindError, + open, close, str, getInt, getFloat, kind, getColumn, getLine, getFilename, + errorMsg, errorMsgExpected, next, JsonParsingError, raiseParseErr + when defined(nimJsonGet): {.pragma: deprecatedGet, deprecated.} else: {.pragma: deprecatedGet.} type - JsonEventKind* = enum ## enumeration of all events that may occur when parsing - jsonError, ## an error occurred during parsing - jsonEof, ## end of file reached - jsonString, ## a string literal - jsonInt, ## an integer literal - jsonFloat, ## a float literal - jsonTrue, ## the value ``true`` - jsonFalse, ## the value ``false`` - jsonNull, ## the value ``null`` - jsonObjectStart, ## start of an object: the ``{`` token - jsonObjectEnd, ## end of an object: the ``}`` token - jsonArrayStart, ## start of an array: the ``[`` token - jsonArrayEnd ## start of an array: the ``]`` token - - TokKind = enum # must be synchronized with TJsonEventKind! - tkError, - tkEof, - tkString, - tkInt, - tkFloat, - tkTrue, - tkFalse, - tkNull, - tkCurlyLe, - tkCurlyRi, - tkBracketLe, - tkBracketRi, - tkColon, - tkComma - - JsonError* = enum ## enumeration that lists all errors that can occur - errNone, ## no error - errInvalidToken, ## invalid token - errStringExpected, ## string expected - errColonExpected, ## ``:`` expected - errCommaExpected, ## ``,`` expected - errBracketRiExpected, ## ``]`` expected - errCurlyRiExpected, ## ``}`` expected - errQuoteExpected, ## ``"`` or ``'`` expected - errEOC_Expected, ## ``*/`` expected - errEofExpected, ## EOF expected - errExprExpected ## expr expected - - ParserState = enum - stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma, - stateExpectObjectComma, stateExpectColon, stateExpectValue - - JsonParser* = object of BaseLexer ## the parser object. - a: string - tok: TokKind - kind: JsonEventKind - err: JsonError - state: seq[ParserState] - filename: string - - JsonKindError* = object of ValueError ## raised by the ``to`` macro if the - ## JSON kind is incorrect. - -const - errorMessages: array[JsonError, string] = [ - "no error", - "invalid token", - "string expected", - "':' expected", - "',' expected", - "']' expected", - "'}' expected", - "'\"' or \"'\" expected", - "'*/' expected", - "EOF expected", - "expression expected" - ] - tokToStr: array[TokKind, string] = [ - "invalid token", - "EOF", - "string literal", - "int literal", - "float literal", - "true", - "false", - "null", - "{", "}", "[", "]", ":", "," - ] - -proc open*(my: var JsonParser, input: Stream, filename: string) = - ## initializes the parser with an input stream. `Filename` is only used - ## for nice error messages. - lexbase.open(my, input) - my.filename = filename - my.state = @[stateStart] - my.kind = jsonError - my.a = "" - -proc close*(my: var JsonParser) {.inline.} = - ## closes the parser `my` and its associated input stream. - lexbase.close(my) - -proc str*(my: JsonParser): string {.inline.} = - ## returns the character data for the events: ``jsonInt``, ``jsonFloat``, - ## ``jsonString`` - assert(my.kind in {jsonInt, jsonFloat, jsonString}) - return my.a - -proc getInt*(my: JsonParser): BiggestInt {.inline.} = - ## returns the number for the event: ``jsonInt`` - assert(my.kind == jsonInt) - return parseBiggestInt(my.a) - -proc getFloat*(my: JsonParser): float {.inline.} = - ## returns the number for the event: ``jsonFloat`` - assert(my.kind == jsonFloat) - return parseFloat(my.a) - -proc kind*(my: JsonParser): JsonEventKind {.inline.} = - ## returns the current event type for the JSON parser - return my.kind - -proc getColumn*(my: JsonParser): int {.inline.} = - ## get the current column the parser has arrived at. - result = getColNumber(my, my.bufpos) - -proc getLine*(my: JsonParser): int {.inline.} = - ## get the current line the parser has arrived at. - result = my.lineNumber - -proc getFilename*(my: JsonParser): string {.inline.} = - ## get the filename of the file that the parser processes. - result = my.filename - -proc errorMsg*(my: JsonParser): string = - ## returns a helpful error message for the event ``jsonError`` - assert(my.kind == jsonError) - result = "$1($2, $3) Error: $4" % [ - my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] - -proc errorMsgExpected*(my: JsonParser, e: string): string = - ## returns an error message "`e` expected" in the same format as the - ## other error messages - result = "$1($2, $3) Error: $4" % [ - my.filename, $getLine(my), $getColumn(my), e & " expected"] - -proc handleHexChar(c: char, x: var int): bool = - result = true # Success - case c - of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) - of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) - of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) - else: result = false # error - -proc parseEscapedUTF16(buf: cstring, pos: var int): int = - result = 0 - #UTF-16 escape is always 4 bytes. - for _ in 0..3: - if handleHexChar(buf[pos], result): - inc(pos) - else: - return -1 - -proc parseString(my: var JsonParser): TokKind = - result = tkString - var pos = my.bufpos + 1 - var buf = my.buf - while true: - case buf[pos] - of '\0': - my.err = errQuoteExpected - result = tkError - break - of '"': - inc(pos) - break - of '\\': - case buf[pos+1] - of '\\', '"', '\'', '/': - add(my.a, buf[pos+1]) - inc(pos, 2) - of 'b': - add(my.a, '\b') - inc(pos, 2) - of 'f': - add(my.a, '\f') - inc(pos, 2) - of 'n': - add(my.a, '\L') - inc(pos, 2) - of 'r': - add(my.a, '\C') - inc(pos, 2) - of 't': - add(my.a, '\t') - inc(pos, 2) - of 'u': - inc(pos, 2) - var r = parseEscapedUTF16(buf, pos) - if r < 0: - my.err = errInvalidToken - break - # Deal with surrogates - if (r and 0xfc00) == 0xd800: - if buf[pos] & buf[pos+1] != "\\u": - my.err = errInvalidToken - break - inc(pos, 2) - var s = parseEscapedUTF16(buf, pos) - if (s and 0xfc00) == 0xdc00 and s > 0: - r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) - else: - my.err = errInvalidToken - break - add(my.a, toUTF8(Rune(r))) - else: - # don't bother with the error - add(my.a, buf[pos]) - inc(pos) - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - add(my.a, '\c') - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - add(my.a, '\L') - else: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos # store back - -proc skip(my: var JsonParser) = - var pos = my.bufpos - var buf = my.buf - while true: - case buf[pos] - of '/': - if buf[pos+1] == '/': - # skip line comment: - inc(pos, 2) - while true: - case buf[pos] - of '\0': - break - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - break - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - break - else: - inc(pos) - elif buf[pos+1] == '*': - # skip long comment: - inc(pos, 2) - while true: - case buf[pos] - of '\0': - my.err = errEOC_Expected - break - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - of '*': - inc(pos) - if buf[pos] == '/': - inc(pos) - break - else: - inc(pos) - else: - break - of ' ', '\t': - inc(pos) - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - else: - break - my.bufpos = pos - -proc parseNumber(my: var JsonParser) = - var pos = my.bufpos - var buf = my.buf - if buf[pos] == '-': - add(my.a, '-') - inc(pos) - if buf[pos] == '.': - add(my.a, "0.") - inc(pos) - else: - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] == '.': - add(my.a, '.') - inc(pos) - # digits after the dot: - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] in {'E', 'e'}: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] in {'+', '-'}: - add(my.a, buf[pos]) - inc(pos) - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - -proc parseName(my: var JsonParser) = - var pos = my.bufpos - var buf = my.buf - if buf[pos] in IdentStartChars: - while buf[pos] in IdentChars: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - -proc getTok(my: var JsonParser): TokKind = - setLen(my.a, 0) - skip(my) # skip whitespace, comments - case my.buf[my.bufpos] - of '-', '.', '0'..'9': - parseNumber(my) - if {'.', 'e', 'E'} in my.a: - result = tkFloat - else: - result = tkInt - of '"': - result = parseString(my) - of '[': - inc(my.bufpos) - result = tkBracketLe - of '{': - inc(my.bufpos) - result = tkCurlyLe - of ']': - inc(my.bufpos) - result = tkBracketRi - of '}': - inc(my.bufpos) - result = tkCurlyRi - of ',': - inc(my.bufpos) - result = tkComma - of ':': - inc(my.bufpos) - result = tkColon - of '\0': - result = tkEof - of 'a'..'z', 'A'..'Z', '_': - parseName(my) - case my.a - of "null": result = tkNull - of "true": result = tkTrue - of "false": result = tkFalse - else: result = tkError - else: - inc(my.bufpos) - result = tkError - my.tok = result - -proc next*(my: var JsonParser) = - ## retrieves the first/next event. This controls the parser. - var tk = getTok(my) - var i = my.state.len-1 - # the following code is a state machine. If we had proper coroutines, - # the code could be much simpler. - case my.state[i] - of stateEof: - if tk == tkEof: - my.kind = jsonEof - else: - my.kind = jsonError - my.err = errEofExpected - of stateStart: - # tokens allowed? - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state[i] = stateEof # expect EOF next! - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state.add(stateArray) # we expect any - my.kind = jsonArrayStart - of tkCurlyLe: - my.state.add(stateObject) - my.kind = jsonObjectStart - of tkEof: - my.kind = jsonEof - else: - my.kind = jsonError - my.err = errEofExpected - of stateObject: - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state.add(stateExpectColon) - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state.add(stateExpectColon) - my.state.add(stateArray) - my.kind = jsonArrayStart - of tkCurlyLe: - my.state.add(stateExpectColon) - my.state.add(stateObject) - my.kind = jsonObjectStart - of tkCurlyRi: - my.kind = jsonObjectEnd - discard my.state.pop() - else: - my.kind = jsonError - my.err = errCurlyRiExpected - of stateArray: - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state.add(stateExpectArrayComma) # expect value next! - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state.add(stateExpectArrayComma) - my.state.add(stateArray) - my.kind = jsonArrayStart - of tkCurlyLe: - my.state.add(stateExpectArrayComma) - my.state.add(stateObject) - my.kind = jsonObjectStart - of tkBracketRi: - my.kind = jsonArrayEnd - discard my.state.pop() - else: - my.kind = jsonError - my.err = errBracketRiExpected - of stateExpectArrayComma: - case tk - of tkComma: - discard my.state.pop() - next(my) - of tkBracketRi: - my.kind = jsonArrayEnd - discard my.state.pop() # pop stateExpectArrayComma - discard my.state.pop() # pop stateArray - else: - my.kind = jsonError - my.err = errBracketRiExpected - of stateExpectObjectComma: - case tk - of tkComma: - discard my.state.pop() - next(my) - of tkCurlyRi: - my.kind = jsonObjectEnd - discard my.state.pop() # pop stateExpectObjectComma - discard my.state.pop() # pop stateObject - else: - my.kind = jsonError - my.err = errCurlyRiExpected - of stateExpectColon: - case tk - of tkColon: - my.state[i] = stateExpectValue - next(my) - else: - my.kind = jsonError - my.err = errColonExpected - of stateExpectValue: - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state[i] = stateExpectObjectComma - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state[i] = stateExpectObjectComma - my.state.add(stateArray) - my.kind = jsonArrayStart - of tkCurlyLe: - my.state[i] = stateExpectObjectComma - my.state.add(stateObject) - my.kind = jsonObjectStart - else: - my.kind = jsonError - my.err = errExprExpected - - -# ------------- higher level interface --------------------------------------- - -type JsonNodeKind* = enum ## possible JSON node types JNull, JBool, @@ -617,12 +132,6 @@ type of JArray: elems*: seq[JsonNode] - JsonParsingError* = object of ValueError ## is raised for a JSON error - -proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} = - ## raises an `EJsonParsingError` exception. - raise newException(JsonParsingError, errorMsgExpected(p, msg)) - proc newJString*(s: string): JsonNode = ## Creates a new `JString JsonNode`. new(result) @@ -1194,10 +703,6 @@ iterator mpairs*(node: var JsonNode): tuple[key: string, val: var JsonNode] = for key, val in mpairs(node.fields): yield (key, val) -proc eat(p: var JsonParser, tok: TokKind) = - if p.tok == tok: discard getTok(p) - else: raiseParseErr(p, tokToStr[tok]) - proc parseJson(p: var JsonParser): JsonNode = ## Parses JSON from a JSON Parser `p`. case p.tok @@ -1253,10 +758,12 @@ when not defined(js): ## If `s` contains extra data, it will raise `JsonParsingError`. var p: JsonParser p.open(s, filename) - defer: p.close() - discard getTok(p) # read first token - result = p.parseJson() - eat(p, tkEof) # check if there is no extra data + try: + discard getTok(p) # read first token + result = p.parseJson() + eat(p, tkEof) # check if there is no extra data + finally: + p.close() proc parseJson*(buffer: string): JsonNode = ## Parses JSON from `buffer`. diff --git a/lib/pure/parsejson.nim b/lib/pure/parsejson.nim new file mode 100644 index 000000000..9c53af6a6 --- /dev/null +++ b/lib/pure/parsejson.nim @@ -0,0 +1,535 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a json parser. It is used +## and exported by the ``json`` standard library +## module, but can also be used in its own right. + +import + strutils, lexbase, streams, unicode + +type + JsonEventKind* = enum ## enumeration of all events that may occur when parsing + jsonError, ## an error occurred during parsing + jsonEof, ## end of file reached + jsonString, ## a string literal + jsonInt, ## an integer literal + jsonFloat, ## a float literal + jsonTrue, ## the value ``true`` + jsonFalse, ## the value ``false`` + jsonNull, ## the value ``null`` + jsonObjectStart, ## start of an object: the ``{`` token + jsonObjectEnd, ## end of an object: the ``}`` token + jsonArrayStart, ## start of an array: the ``[`` token + jsonArrayEnd ## start of an array: the ``]`` token + + TokKind* = enum # must be synchronized with TJsonEventKind! + tkError, + tkEof, + tkString, + tkInt, + tkFloat, + tkTrue, + tkFalse, + tkNull, + tkCurlyLe, + tkCurlyRi, + tkBracketLe, + tkBracketRi, + tkColon, + tkComma + + JsonError* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errInvalidToken, ## invalid token + errStringExpected, ## string expected + errColonExpected, ## ``:`` expected + errCommaExpected, ## ``,`` expected + errBracketRiExpected, ## ``]`` expected + errCurlyRiExpected, ## ``}`` expected + errQuoteExpected, ## ``"`` or ``'`` expected + errEOC_Expected, ## ``*/`` expected + errEofExpected, ## EOF expected + errExprExpected ## expr expected + + ParserState = enum + stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma, + stateExpectObjectComma, stateExpectColon, stateExpectValue + + JsonParser* = object of BaseLexer ## the parser object. + a*: string + tok*: TokKind + kind: JsonEventKind + err: JsonError + state: seq[ParserState] + filename: string + rawStringLiterals: bool + + JsonKindError* = object of ValueError ## raised by the ``to`` macro if the + ## JSON kind is incorrect. + JsonParsingError* = object of ValueError ## is raised for a JSON error + +const + errorMessages*: array[JsonError, string] = [ + "no error", + "invalid token", + "string expected", + "':' expected", + "',' expected", + "']' expected", + "'}' expected", + "'\"' or \"'\" expected", + "'*/' expected", + "EOF expected", + "expression expected" + ] + tokToStr: array[TokKind, string] = [ + "invalid token", + "EOF", + "string literal", + "int literal", + "float literal", + "true", + "false", + "null", + "{", "}", "[", "]", ":", "," + ] + +proc open*(my: var JsonParser, input: Stream, filename: string; + rawStringLiterals = false) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. If `rawStringLiterals` is true, string literals + ## are kepts with their surrounding quotes and escape sequences in them are + ## left untouched too. + lexbase.open(my, input) + my.filename = filename + my.state = @[stateStart] + my.kind = jsonError + my.a = "" + my.rawStringLiterals = rawStringLiterals + +proc close*(my: var JsonParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +proc str*(my: JsonParser): string {.inline.} = + ## returns the character data for the events: ``jsonInt``, ``jsonFloat``, + ## ``jsonString`` + assert(my.kind in {jsonInt, jsonFloat, jsonString}) + return my.a + +proc getInt*(my: JsonParser): BiggestInt {.inline.} = + ## returns the number for the event: ``jsonInt`` + assert(my.kind == jsonInt) + return parseBiggestInt(my.a) + +proc getFloat*(my: JsonParser): float {.inline.} = + ## returns the number for the event: ``jsonFloat`` + assert(my.kind == jsonFloat) + return parseFloat(my.a) + +proc kind*(my: JsonParser): JsonEventKind {.inline.} = + ## returns the current event type for the JSON parser + return my.kind + +proc getColumn*(my: JsonParser): int {.inline.} = + ## get the current column the parser has arrived at. + result = getColNumber(my, my.bufpos) + +proc getLine*(my: JsonParser): int {.inline.} = + ## get the current line the parser has arrived at. + result = my.lineNumber + +proc getFilename*(my: JsonParser): string {.inline.} = + ## get the filename of the file that the parser processes. + result = my.filename + +proc errorMsg*(my: JsonParser): string = + ## returns a helpful error message for the event ``jsonError`` + assert(my.kind == jsonError) + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] + +proc errorMsgExpected*(my: JsonParser, e: string): string = + ## returns an error message "`e` expected" in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), e & " expected"] + +proc handleHexChar(c: char, x: var int): bool = + result = true # Success + case c + of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) + of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) + of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) + else: result = false # error + +proc parseEscapedUTF16*(buf: cstring, pos: var int): int = + result = 0 + #UTF-16 escape is always 4 bytes. + for _ in 0..3: + if handleHexChar(buf[pos], result): + inc(pos) + else: + return -1 + +proc parseString(my: var JsonParser): TokKind = + result = tkString + var pos = my.bufpos + 1 + var buf = my.buf + if my.rawStringLiterals: + add(my.a, '"') + while true: + case buf[pos] + of '\0': + my.err = errQuoteExpected + result = tkError + break + of '"': + if my.rawStringLiterals: + add(my.a, '"') + inc(pos) + break + of '\\': + if my.rawStringLiterals: + add(my.a, '\\') + case buf[pos+1] + of '\\', '"', '\'', '/': + add(my.a, buf[pos+1]) + inc(pos, 2) + of 'b': + add(my.a, '\b') + inc(pos, 2) + of 'f': + add(my.a, '\f') + inc(pos, 2) + of 'n': + add(my.a, '\L') + inc(pos, 2) + of 'r': + add(my.a, '\C') + inc(pos, 2) + of 't': + add(my.a, '\t') + inc(pos, 2) + of 'u': + if my.rawStringLiterals: + add(my.a, 'u') + inc(pos, 2) + var pos2 = pos + var r = parseEscapedUTF16(buf, pos) + if r < 0: + my.err = errInvalidToken + break + # Deal with surrogates + if (r and 0xfc00) == 0xd800: + if buf[pos] != '\\' or buf[pos+1] != 'u': + my.err = errInvalidToken + break + inc(pos, 2) + var s = parseEscapedUTF16(buf, pos) + if (s and 0xfc00) == 0xdc00 and s > 0: + r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) + else: + my.err = errInvalidToken + break + if my.rawStringLiterals: + let length = pos - pos2 + for i in 1 .. length: + if buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}: + add(my.a, buf[pos2]) + inc pos2 + else: + break + else: + add(my.a, toUTF8(Rune(r))) + else: + # don't bother with the error + add(my.a, buf[pos]) + inc(pos) + of '\c': + pos = lexbase.handleCR(my, pos) + buf = my.buf + add(my.a, '\c') + of '\L': + pos = lexbase.handleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos # store back + +proc skip(my: var JsonParser) = + var pos = my.bufpos + var buf = my.buf + while true: + case buf[pos] + of '/': + if buf[pos+1] == '/': + # skip line comment: + inc(pos, 2) + while true: + case buf[pos] + of '\0': + break + of '\c': + pos = lexbase.handleCR(my, pos) + buf = my.buf + break + of '\L': + pos = lexbase.handleLF(my, pos) + buf = my.buf + break + else: + inc(pos) + elif buf[pos+1] == '*': + # skip long comment: + inc(pos, 2) + while true: + case buf[pos] + of '\0': + my.err = errEOC_Expected + break + of '\c': + pos = lexbase.handleCR(my, pos) + buf = my.buf + of '\L': + pos = lexbase.handleLF(my, pos) + buf = my.buf + of '*': + inc(pos) + if buf[pos] == '/': + inc(pos) + break + else: + inc(pos) + else: + break + of ' ', '\t': + inc(pos) + of '\c': + pos = lexbase.handleCR(my, pos) + buf = my.buf + of '\L': + pos = lexbase.handleLF(my, pos) + buf = my.buf + else: + break + my.bufpos = pos + +proc parseNumber(my: var JsonParser) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] == '-': + add(my.a, '-') + inc(pos) + if buf[pos] == '.': + add(my.a, "0.") + inc(pos) + else: + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] == '.': + add(my.a, '.') + inc(pos) + # digits after the dot: + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] in {'E', 'e'}: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] in {'+', '-'}: + add(my.a, buf[pos]) + inc(pos) + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + +proc parseName(my: var JsonParser) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] in IdentStartChars: + while buf[pos] in IdentChars: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + +proc getTok*(my: var JsonParser): TokKind = + setLen(my.a, 0) + skip(my) # skip whitespace, comments + case my.buf[my.bufpos] + of '-', '.', '0'..'9': + parseNumber(my) + if {'.', 'e', 'E'} in my.a: + result = tkFloat + else: + result = tkInt + of '"': + result = parseString(my) + of '[': + inc(my.bufpos) + result = tkBracketLe + of '{': + inc(my.bufpos) + result = tkCurlyLe + of ']': + inc(my.bufpos) + result = tkBracketRi + of '}': + inc(my.bufpos) + result = tkCurlyRi + of ',': + inc(my.bufpos) + result = tkComma + of ':': + inc(my.bufpos) + result = tkColon + of '\0': + result = tkEof + of 'a'..'z', 'A'..'Z', '_': + parseName(my) + case my.a + of "null": result = tkNull + of "true": result = tkTrue + of "false": result = tkFalse + else: result = tkError + else: + inc(my.bufpos) + result = tkError + my.tok = result + + +proc next*(my: var JsonParser) = + ## retrieves the first/next event. This controls the parser. + var tk = getTok(my) + var i = my.state.len-1 + # the following code is a state machine. If we had proper coroutines, + # the code could be much simpler. + case my.state[i] + of stateEof: + if tk == tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateStart: + # tokens allowed? + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state[i] = stateEof # expect EOF next! + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateArray) # we expect any + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateObject: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectColon) + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectColon) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectColon) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateArray: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectArrayComma) # expect value next! + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectArrayComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() # pop stateExpectArrayComma + discard my.state.pop() # pop stateArray + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectObjectComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() # pop stateExpectObjectComma + discard my.state.pop() # pop stateObject + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateExpectColon: + case tk + of tkColon: + my.state[i] = stateExpectValue + next(my) + else: + my.kind = jsonError + my.err = errColonExpected + of stateExpectValue: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state[i] = stateExpectObjectComma + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateObject) + my.kind = jsonObjectStart + else: + my.kind = jsonError + my.err = errExprExpected + +proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} = + ## raises an `EJsonParsingError` exception. + raise newException(JsonParsingError, errorMsgExpected(p, msg)) + +proc eat*(p: var JsonParser, tok: TokKind) = + if p.tok == tok: discard getTok(p) + else: raiseParseErr(p, tokToStr[tok]) |