#
#
# Nim's Runtime Library
# (c) Copyright 2015 Andreas Rumpf, Dominik Picheta
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## **Note:** Import ``nimsuggest/sexp`` to use this module
import
hashes, strutils, lexbase, streams, unicode, macros
import std/private/decode_helpers
when defined(nimPreviewSlimSystem):
import std/[assertions, formatfloat]
type
SexpEventKind* = enum ## enumeration of all events that may occur when parsing
sexpError, ## an error occurred during parsing
sexpEof, ## end of file reached
sexpString, ## a string literal
sexpSymbol, ## a symbol
sexpInt, ## an integer literal
sexpFloat, ## a float literal
sexpNil, ## the value ``nil``
sexpDot, ## the dot to separate car/cdr
sexpListStart, ## start of a list: the ``(`` token
sexpListEnd, ## end of a list: the ``)`` token
TTokKind = enum # must be synchronized with SexpEventKind!
tkError,
tkEof,
tkString,
tkSymbol,
tkInt,
tkFloat,
tkNil,
tkDot,
tkParensLe,
tkParensRi
tkSpace
SexpError* = enum ## enumeration that lists all errors that can occur
errNone, ## no error
errInvalidToken, ## invalid token
errParensRiExpected, ## ``)`` expected
errQuoteExpected, ## ``"`` expected
errEofExpected, ## EOF expected
SexpParser* = object of BaseLexer ## the parser object.
a: string
tok: TTokKind
kind: SexpEventKind
err: SexpError
const
errorMessages: array[SexpError, string] = [
"no error",
"invalid token",
"')' expected",
"'\"' or \"'\" expected",
"EOF expected",
]
tokToStr: array[TTokKind, string] = [
"invalid token",
"EOF",
"string literal",
"symbol",
"int literal",
"float literal",
"nil",
".",
"(", ")", "space"
]
proc close*(my: var SexpParser) {.inline.} =
## closes the parser `my` and its associated input stream.
lexbase.close(my)
proc str*(my: SexpParser): string {.inline.} =
## returns the character data for the events: ``sexpInt``, ``sexpFloat``,
## ``sexpString``
assert(my.kind in {sexpInt, sexpFloat, sexpString})
result = my.a
proc getInt*(my: SexpParser): BiggestInt {.inline.} =
## returns the number for the event: ``sexpInt``
assert(my.kind == sexpInt)
result = parseBiggestInt(my.a)
proc getFloat*(my: SexpParser): float {.inline.} =
## returns the number for the event: ``sexpFloat``
assert(my.kind == sexpFloat)
result = parseFloat(my.a)
proc kind*(my: SexpParser): SexpEventKind {.inline.} =
## returns the current event type for the SEXP parser
result = my.kind
proc getColumn*(my: SexpParser): int {.inline.} =
## get the current column the parser has arrived at.
result = getColNumber(my, my.bufpos)
proc getLine*(my: SexpParser): int {.inline.} =
## get the current line the parser has arrived at.
result = my.lineNumber
proc errorMsg*(my: SexpParser): string =
## returns a helpful error message for the event ``sexpError``
assert(my.kind == sexpError)
result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), errorMessages[my.err]]
proc errorMsgExpected*(my: SexpParser, e: string): string =
## returns an error message "`e` expected" in the same format as the
## other error messages
result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"]
proc parseString(my: var SexpParser): TTokKind =
result = tkString
var pos = my.bufpos + 1
while true:
case my.buf[pos]
of '\0':
my.err = errQuoteExpected
result = tkError
break
of '"':
inc(pos)
break
of '\\':
case my.buf[pos+1]
of '\\', '"', '\'', '/':
add(my.a, my.buf[pos+1])
inc(pos, 2)
of 'b':
add(my.a, '\b')
inc(pos, 2)
of 'f':
add(my.a, '\f')
inc(pos, 2)
of 'n':
add(my.a, '\L')
inc(pos, 2)
of 'r':
add(my.a, '\C')
inc(pos, 2)
of 't':
add(my.a, '\t')
inc(pos, 2)
of 'u':
inc(pos, 2)
var r: int
if handleHexChar(my.buf[pos], r): inc(pos)
if handleHexChar(my.buf[pos], r): inc(pos)
if handleHexChar(my.buf[pos], r): inc(pos)
if handleHexChar(my.buf[pos], r): inc(pos)
add(my.a, toUTF8(Rune(r)))
else:
# don't bother with the error
add(my.a, my.buf[pos])
inc(pos)
of '\c':
pos = lexbase.handleCR(my, pos)
add(my.a, '\c')
of '\L':
pos = lexbase.handleLF(my, pos)
add(my.a, '\L')
else:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos # store back
proc parseNumber(my: var SexpParser) =
var pos = my.bufpos
if my.buf[pos] == '-':
add(my.a, '-')
inc(pos)
if my.buf[pos] == '.':
add(my.a, "0.")
inc(pos)
else:
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] == '.':
add(my.a, '.')
inc(pos)
# digits after the dot:
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] in {'E', 'e'}:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] in {'+', '-'}:
add(my.a, my.buf[pos])
inc(pos)
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
proc parseSymbol(my: var SexpParser) =
var pos = my.bufpos
if my.buf[pos] in IdentStartChars:
while my.buf[pos] in IdentChars:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
proc getTok(my: var SexpParser): TTokKind =
setLen(my.a, 0)
case my.buf[my.bufpos]
of '-', '0'..'9': # numbers that start with a . are not parsed
# correctly.
parseNumber(my)
if {'.', 'e', 'E'} in my.a:
result = tkFloat
else:
result = tkInt
of '"': #" # gotta fix nim-mode
result = parseString(my)
of '(':
inc(my.bufpos)
result = tkParensLe
of ')':
inc(my.bufpos)
result = tkParensRi
of '\0':
result = tkEof
of 'a'..'z', 'A'..'Z', '_':
parseSymbol(my)
if my.a == "nil":
result = tkNil
else:
result = tkSymbol
of ' ':
result = tkSpace
inc(my.bufpos)
of '.':
result = tkDot
inc(my.bufpos)
else:
inc(my.bufpos)
result = tkError
my.tok = result
# ------------- higher level interface ---------------------------------------
type
SexpNodeKind* = enum ## possible SEXP node types
SNil,
SInt,
SFloat,
SString,
SSymbol,
SList,
SCons
SexpNode* = ref SexpNodeObj ## SEXP node
SexpNodeObj* {.acyclic.} = object
case kind*: SexpNodeKind
of SString:
str*: string
of SSymbol:
symbol*: string
of SInt:
num*: BiggestInt
of SFloat:
fnum*: float
of SList:
elems*: seq[SexpNode]
of SCons:
car: SexpNode
cdr: SexpNode
of SNil:
discard
Cons = tuple[car: SexpNode, cdr: SexpNode]
SexpParsingError* = object of ValueError ## is raised for a SEXP error
proc raiseParseErr*(p: SexpParser, msg: string) {.noinline, noreturn.} =
## raises an `ESexpParsingError` exception.
raise newException(SexpParsingError, errorMsgExpected(p, msg))
proc newSString*(s: string): SexpNode =
## Creates a new `SString SexpNode`.
result = SexpNode(kind: SString, str: s)
proc newSInt*(n: BiggestInt): SexpNode =
## Creates a new `SInt SexpNode`.
result = SexpNode(kind: SInt, num: n)
proc newSFloat*(n: float): SexpNode =
## Creates a new `SFloat SexpNode`.
result = SexpNode(kind: SFloat, fnum: n)
proc newSNil*(): SexpNode =
## Creates a new `SNil SexpNode`.
result = SexpNode(kind: SNil)
proc newSCons*(car, cdr: SexpNode): SexpNode =
## Creates a new `SCons SexpNode`
result = SexpNode(kind: SCons, car: car, cdr: cdr)
proc newSList*(): SexpNode =
## Creates a new `SList SexpNode`
result = SexpNode(kind: SList, elems: @[])
proc newSSymbol*(s: string): SexpNode =
result = SexpNode(kind: SSymbol, symbol: s)
proc getStr*(n: SexpNode, default: string = ""): string =
## Retrieves the string value of a `SString SexpNode`.
##
## Returns ``default`` if ``n`` is not a ``SString``.
if n.kind != SString: return default
else: return n.str
proc getNum*(n: SexpNode, default: BiggestInt = 0): BiggestInt =
## Retrieves the int value of a `SInt SexpNode`.
##
## Returns ``default`` if ``n`` is not a ``SInt``.
if n.kind != SInt: return default
else: return n.num
proc getFNum*(n: SexpNode, default: float = 0.0): float =
## Retrieves the float value of a `SFloat SexpNode`.
##
## Returns ``default`` if ``n`` is not a ``SFloat``.
if n.kind != SFloat: return default
else: return n.fnum
proc getSymbol*(n: SexpNode, default: string = ""): string =
## Retrieves the int value of a `SList SexpNode`.
##
## Returns ``default`` if ``n`` is not a ``SList``.
if n.kind != SSymbol: return default
else: return n.symbol
proc getElems*(n: SexpNode, default: seq[SexpNode] = @[]): seq[SexpNode] =
## Retrieves the int value of a `SList SexpNode`.
##
## Returns ``default`` if ``n`` is not a ``SList``.
if n.kind == SNil: return @[]
elif n.kind != SList: return default
else: return n.elems
proc getCons*(n: SexpNode, defaults: Cons = (newSNil(), newSNil())): Cons =
## Retrieves the cons value of a `SList SexpNode`.
##
## Returns ``default`` if ``n`` is not a ``SList``.
if n.kind == SCons: return (n.car, n.cdr)
elif n.kind == SList: return (n.elems[0], n.elems[1])
else: return defaults
proc sexp*(s: string): SexpNode =
## Generic constructor for SEXP data. Creates a new `SString SexpNode`.
result = SexpNode(kind: SString, str: s)
proc sexp*(n: BiggestInt): SexpNode =
## Generic constructor for SEXP data. Creates a new `SInt SexpNode`.
result = SexpNode(kind: SInt, num: n)
proc sexp*(n: float): SexpNode =
## Generic constructor for SEXP data. Creates a new `SFloat SexpNode`.
result = SexpNode(kind: SFloat, fnum: n)
proc sexp*(b: bool): SexpNode =
## Generic constructor for SEXP data. Creates a new `SSymbol
## SexpNode` with value t or `SNil SexpNode`.
if b:
result = SexpNode(kind: SSymbol, symbol: "t")
else:
result = SexpNode(kind: SNil)
proc sexp*(elements: openArray[SexpNode]): SexpNode =
## Generic constructor for SEXP data. Creates a new `SList SexpNode`
result = SexpNode(kind: SList)
newSeq(result.elems, elements.len)
for i, p in pairs(elements): result.elems[i] = p
proc sexp*(s: SexpNode): SexpNode =
result = s
proc toSexp(x: NimNode): NimNode {.compileTime.} =
case x.kind
of nnkBracket:
result = newNimNode(nnkBracket)
for i in 0 ..< x.len:
result.add(toSexp(x[i]))
else:
result = x
result = prefix(result, "sexp")
macro convertSexp*(x: untyped): untyped =
## Convert an expression to a SexpNode directly, without having to specify
## `%` for every element.
result = toSexp(x)
func `==`* (a, b: SexpNode): bool =
## Check two nodes for equality
if a.isNil:
if b.isNil: return true
return false
elif b.isNil or a.kind != b.kind:
return false
else:
return case a.kind
of SString:
a.str == b.str
of SInt:
a.num == b.num
of SFloat:
a.fnum == b.fnum
of SNil:
true
of SList:
a.elems == b.elems
of SSymbol:
a.symbol == b.symbol
of SCons:
a.car == b.car and a.cdr == b.cdr
proc hash* (n:SexpNode): Hash =
## Compute the hash for a SEXP node
case n.kind
of SList:
result = hash(n.elems)
of SInt:
result = hash(n.num)
of SFloat:
result = hash(n.fnum)
of SString:
result = hash(n.str)
of SNil:
result = hash(0)
of SSymbol:
result = hash(n.symbol)
of SCons:
result = hash(n.car) !& hash(n.cdr)
proc len*(n: SexpNode): int =
## If `n` is a `SList`, it returns the number of elements.
## If `n` is a `JObject`, it returns the number of pairs.
## Else it returns 0.
case n.kind
of SList: result = n.elems.len
else: discard
proc `[]`*(node: SexpNode, index: int): SexpNode =
## Gets the node at `index` in a List. Result is undefined if `index`
## is out of bounds
assert(not isNil(node))
assert(node.kind == SList)
return node.elems[index]
proc add*(father, child: SexpNode) =
## Adds `child` to a SList node `father`.
assert father.kind == SList
father.elems.add(child)
# ------------- pretty printing ----------------------------------------------
proc indent(s: var string, i: int) =
s.add(spaces(i))
proc newIndent(curr, indent: int, ml: bool): int =
if ml: return curr + indent
else: return indent
proc nl(s: var string, ml: bool) =
if ml: s.add("\n")
proc escapeJson*(s: string): string =
## Converts a string `s` to its JSON representation.
result = newStringOfCap(s.len + s.len shr 3)
result.add("\"")
for x in runes(s):
var r = int(x)
if r >= 32 and r <= 127:
var c = chr(r)
case c
of '"': result.add("\\\"") #" # gotta fix nim-mode
of '\\': result.add("\\\\")
else: result.add(c)
else:
result.add("\\u")
result.add(toHex(r, 4))
result.add("\"")
proc copy*(p: SexpNode): SexpNode =
## Performs a deep copy of `a`.
case p.kind
of SString:
result = newSString(p.str)
of SInt:
result = newSInt(p.num)
of SFloat:
result = newSFloat(p.fnum)
of SNil:
result = newSNil()
of SSymbol:
result = newSSymbol(p.symbol)
of SList:
result = newSList()
for i in items(p.elems):
result.elems.add(copy(i))
of SCons:
result = newSCons(copy(p.car), copy(p.cdr))
proc toPretty(result: var string, node: SexpNode, indent = 2, ml = true,
lstArr = false, currIndent = 0) =
case node.kind
of SString:
if lstArr: result.indent(currIndent)
result.add(escapeJson(node.str))
of SInt:
if lstArr: result.indent(currIndent)
result.addInt(node.num)
of SFloat:
if lstArr: result.indent(currIndent)
result.addFloat(node.fnum)
of SNil:
if lstArr: result.indent(currIndent)
result.add("nil")
of SSymbol:
if lstArr: result.indent(currIndent)
result.add(node.symbol)
of SList:
if lstArr: result.indent(currIndent)
if len(node.elems) != 0:
result.add("(")
result.nl(ml)
for i in 0..len(node.elems)-1:
if i > 0:
result.add(" ")
result.nl(ml) # New Line
toPretty(result, node.elems[i], indent, ml,
true, newIndent(currIndent, indent, ml))
result.nl(ml)
result.indent(currIndent)
result.add(")")
else: result.add("nil")
of SCons:
if lstArr: result.indent(currIndent)
result.add("(")
toPretty(result, node.car, indent, ml,
true, newIndent(currIndent, indent, ml))
result.add(" . ")
toPretty(result, node.cdr, indent, ml,
true, newIndent(currIndent, indent, ml))
result.add(")")
proc pretty*(node: SexpNode, indent = 2): string =
## Converts `node` to its Sexp Representation, with indentation and
## on multiple lines.
result = ""
toPretty(result, node, indent)
proc `$`*(node: SexpNode): string =
## Converts `node` to its SEXP Representation on one line.
result = ""
toPretty(result, node, 0, false)
iterator items*(node: SexpNode): SexpNode =
## Iterator for the items of `node`. `node` has to be a SList.
assert node.kind == SList
for i in items(node.elems):
yield i
iterator mitems*(node: var SexpNode): var SexpNode =
## Iterator for the items of `node`. `node` has to be a SList. Items can be
## modified.
assert node.kind == SList
for i in mitems(node.elems):
yield i
proc eat(p: var SexpParser, tok: TTokKind) =
if p.tok == tok: discard getTok(p)
else: raiseParseErr(p, tokToStr[tok])
proc parseSexp(p: var SexpParser): SexpNode =
## Parses SEXP from a SEXP Parser `p`.
case p.tok
of tkString:
# we capture 'p.a' here, so we need to give it a fresh buffer afterwards:
result = SexpNode(kind: SString, str: move p.a)
discard getTok(p)
of tkInt:
result = newSInt(parseBiggestInt(p.a))
discard getTok(p)
of tkFloat:
result = newSFloat(parseFloat(p.a))
discard getTok(p)
of tkNil:
result = newSNil()
discard getTok(p)
of tkSymbol:
result = SexpNode(kind: SSymbol, symbol: move p.a)
discard getTok(p)
of tkParensLe:
result = newSList()
discard getTok(p)
while p.tok notin {tkParensRi, tkDot}:
result.add(parseSexp(p))
if p.tok != tkSpace: break
discard getTok(p)
if p.tok == tkDot:
eat(p, tkDot)
eat(p, tkSpace)
result.add(parseSexp(p))
result = newSCons(result[0], result[1])
eat(p, tkParensRi)
of tkSpace, tkDot, tkError, tkParensRi, tkEof:
raiseParseErr(p, "(")
proc open*(my: var SexpParser, input: Stream) =
## initializes the parser with an input stream.
lexbase.open(my, input)
my.kind = sexpError
my.a = ""
proc parseSexp*(s: Stream): SexpNode =
## Parses from a buffer `s` into a `SexpNode`.
var p: SexpParser
p.open(s)
discard getTok(p) # read first token
result = p.parseSexp()
p.close()
proc parseSexp*(buffer: string): SexpNode =
## Parses Sexp from `buffer`.
result = parseSexp(newStringStream(buffer))
when isMainModule:
let testSexp = parseSexp("""(1 (98 2) nil (2) foobar "foo" 9.234)""")
assert(testSexp[0].getNum == 1)
assert(testSexp[1][0].getNum == 98)
assert(testSexp[2].getElems == @[])
assert(testSexp[4].getSymbol == "foobar")
assert(testSexp[5].getStr == "foo")
let alist = parseSexp("""((1 . 2) (2 . "foo"))""")
assert(alist[0].getCons.car.getNum == 1)
assert(alist[0].getCons.cdr.getNum == 2)
assert(alist[1].getCons.cdr.getStr == "foo")
# Generator:
var j = convertSexp([true, false, "foobar", [1, 2, "baz"]])
assert($j == """(t nil "foobar" (1 2 "baz"))""")