diff options
Diffstat (limited to 'lib/pure/sexp.nim')
-rw-r--r-- | lib/pure/sexp.nim | 697 |
1 files changed, 697 insertions, 0 deletions
diff --git a/lib/pure/sexp.nim b/lib/pure/sexp.nim new file mode 100644 index 000000000..3c9fbc150 --- /dev/null +++ b/lib/pure/sexp.nim @@ -0,0 +1,697 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf, Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +import + hashes, strutils, lexbase, streams, unicode, macros + +type + SexpEventKind* = enum ## enumeration of all events that may occur when parsing + sexpError, ## an error occurred during parsing + sexpEof, ## end of file reached + sexpString, ## a string literal + sexpSymbol, ## a symbol + sexpInt, ## an integer literal + sexpFloat, ## a float literal + sexpNil, ## the value ``nil`` + sexpDot, ## the dot to separate car/cdr + sexpListStart, ## start of a list: the ``(`` token + sexpListEnd, ## end of a list: the ``)`` token + + TTokKind = enum # must be synchronized with SexpEventKind! + tkError, + tkEof, + tkString, + tkSymbol, + tkInt, + tkFloat, + tkNil, + tkDot, + tkParensLe, + tkParensRi + tkSpace + + SexpError* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errInvalidToken, ## invalid token + errParensRiExpected, ## ``)`` expected + errQuoteExpected, ## ``"`` expected + errEofExpected, ## EOF expected + + SexpParser* = object of BaseLexer ## the parser object. + a: string + tok: TTokKind + kind: SexpEventKind + err: SexpError + +const + errorMessages: array [SexpError, string] = [ + "no error", + "invalid token", + "')' expected", + "'\"' or \"'\" expected", + "EOF expected", + ] + tokToStr: array [TTokKind, string] = [ + "invalid token", + "EOF", + "string literal", + "symbol", + "int literal", + "float literal", + "nil", + ".", + "(", ")", "space" + ] + +proc close*(my: var SexpParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +proc str*(my: SexpParser): string {.inline.} = + ## returns the character data for the events: ``sexpInt``, ``sexpFloat``, + ## ``sexpString`` + assert(my.kind in {sexpInt, sexpFloat, sexpString}) + result = my.a + +proc getInt*(my: SexpParser): BiggestInt {.inline.} = + ## returns the number for the event: ``sexpInt`` + assert(my.kind == sexpInt) + result = parseBiggestInt(my.a) + +proc getFloat*(my: SexpParser): float {.inline.} = + ## returns the number for the event: ``sexpFloat`` + assert(my.kind == sexpFloat) + result = parseFloat(my.a) + +proc kind*(my: SexpParser): SexpEventKind {.inline.} = + ## returns the current event type for the SEXP parser + result = my.kind + +proc getColumn*(my: SexpParser): int {.inline.} = + ## get the current column the parser has arrived at. + result = getColNumber(my, my.bufpos) + +proc getLine*(my: SexpParser): int {.inline.} = + ## get the current line the parser has arrived at. + result = my.lineNumber + +proc errorMsg*(my: SexpParser): string = + ## returns a helpful error message for the event ``sexpError`` + assert(my.kind == sexpError) + result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), errorMessages[my.err]] + +proc errorMsgExpected*(my: SexpParser, e: string): string = + ## returns an error message "`e` expected" in the same format as the + ## other error messages + result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"] + +proc handleHexChar(c: char, x: var int): bool = + result = true # Success + case c + of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) + of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) + of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) + else: result = false # error + +proc parseString(my: var SexpParser): TTokKind = + result = tkString + var pos = my.bufpos + 1 + var buf = my.buf + while true: + case buf[pos] + of '\0': + my.err = errQuoteExpected + result = tkError + break + of '"': + inc(pos) + break + of '\\': + case buf[pos+1] + of '\\', '"', '\'', '/': + add(my.a, buf[pos+1]) + inc(pos, 2) + of 'b': + add(my.a, '\b') + inc(pos, 2) + of 'f': + add(my.a, '\f') + inc(pos, 2) + of 'n': + add(my.a, '\L') + inc(pos, 2) + of 'r': + add(my.a, '\C') + inc(pos, 2) + of 't': + add(my.a, '\t') + inc(pos, 2) + of 'u': + inc(pos, 2) + var r: int + if handleHexChar(buf[pos], r): inc(pos) + if handleHexChar(buf[pos], r): inc(pos) + if handleHexChar(buf[pos], r): inc(pos) + if handleHexChar(buf[pos], r): inc(pos) + add(my.a, toUTF8(Rune(r))) + else: + # don't bother with the error + add(my.a, buf[pos]) + inc(pos) + of '\c': + pos = lexbase.handleCR(my, pos) + buf = my.buf + add(my.a, '\c') + of '\L': + pos = lexbase.handleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos # store back + +proc parseNumber(my: var SexpParser) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] == '-': + add(my.a, '-') + inc(pos) + if buf[pos] == '.': + add(my.a, "0.") + inc(pos) + else: + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] == '.': + add(my.a, '.') + inc(pos) + # digits after the dot: + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] in {'E', 'e'}: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] in {'+', '-'}: + add(my.a, buf[pos]) + inc(pos) + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + +proc parseSymbol(my: var SexpParser) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] in IdentStartChars: + while buf[pos] in IdentChars: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + +proc getTok(my: var SexpParser): TTokKind = + setLen(my.a, 0) + case my.buf[my.bufpos] + of '-', '0'..'9': # numbers that start with a . are not parsed + # correctly. + parseNumber(my) + if {'.', 'e', 'E'} in my.a: + result = tkFloat + else: + result = tkInt + of '"': #" # gotta fix nim-mode + result = parseString(my) + of '(': + inc(my.bufpos) + result = tkParensLe + of ')': + inc(my.bufpos) + result = tkParensRi + of '\0': + result = tkEof + of 'a'..'z', 'A'..'Z', '_': + parseSymbol(my) + if my.a == "nil": + result = tkNil + else: + result = tkSymbol + of ' ': + result = tkSpace + inc(my.bufpos) + of '.': + result = tkDot + inc(my.bufpos) + else: + inc(my.bufpos) + result = tkError + my.tok = result + +# ------------- higher level interface --------------------------------------- + +type + SexpNodeKind* = enum ## possible SEXP node types + SNil, + SInt, + SFloat, + SString, + SSymbol, + SList, + SCons + + SexpNode* = ref SexpNodeObj ## SEXP node + SexpNodeObj* {.acyclic.} = object + case kind*: SexpNodeKind + of SString: + str*: string + of SSymbol: + symbol*: string + of SInt: + num*: BiggestInt + of SFloat: + fnum*: float + of SList: + elems*: seq[SexpNode] + of SCons: + car: SexpNode + cdr: SexpNode + of SNil: + discard + + Cons = tuple[car: SexpNode, cdr: SexpNode] + + SexpParsingError* = object of ValueError ## is raised for a SEXP error + +proc raiseParseErr*(p: SexpParser, msg: string) {.noinline, noreturn.} = + ## raises an `ESexpParsingError` exception. + raise newException(SexpParsingError, errorMsgExpected(p, msg)) + +proc newSString*(s: string): SexpNode {.procvar.}= + ## Creates a new `SString SexpNode`. + new(result) + result.kind = SString + result.str = s + +proc newSStringMove(s: string): SexpNode = + new(result) + result.kind = SString + shallowCopy(result.str, s) + +proc newSInt*(n: BiggestInt): SexpNode {.procvar.} = + ## Creates a new `SInt SexpNode`. + new(result) + result.kind = SInt + result.num = n + +proc newSFloat*(n: float): SexpNode {.procvar.} = + ## Creates a new `SFloat SexpNode`. + new(result) + result.kind = SFloat + result.fnum = n + +proc newSNil*(): SexpNode {.procvar.} = + ## Creates a new `SNil SexpNode`. + new(result) + +proc newSCons*(car, cdr: SexpNode): SexpNode {.procvar.} = + ## Creates a new `SCons SexpNode` + new(result) + result.kind = SCons + result.car = car + result.cdr = cdr + +proc newSList*(): SexpNode {.procvar.} = + ## Creates a new `SList SexpNode` + new(result) + result.kind = SList + result.elems = @[] + +proc newSSymbol*(s: string): SexpNode {.procvar.} = + new(result) + result.kind = SSymbol + result.symbol = s + +proc newSSymbolMove(s: string): SexpNode = + new(result) + result.kind = SSymbol + shallowCopy(result.symbol, s) + +proc getStr*(n: SexpNode, default: string = ""): string = + ## Retrieves the string value of a `SString SexpNode`. + ## + ## Returns ``default`` if ``n`` is not a ``SString``. + if n.kind != SString: return default + else: return n.str + +proc getNum*(n: SexpNode, default: BiggestInt = 0): BiggestInt = + ## Retrieves the int value of a `SInt SexpNode`. + ## + ## Returns ``default`` if ``n`` is not a ``SInt``. + if n.kind != SInt: return default + else: return n.num + +proc getFNum*(n: SexpNode, default: float = 0.0): float = + ## Retrieves the float value of a `SFloat SexpNode`. + ## + ## Returns ``default`` if ``n`` is not a ``SFloat``. + if n.kind != SFloat: return default + else: return n.fnum + +proc getSymbol*(n: SexpNode, default: string = ""): string = + ## Retrieves the int value of a `SList SexpNode`. + ## + ## Returns ``default`` if ``n`` is not a ``SList``. + if n.kind != SSymbol: return default + else: return n.symbol + +proc getElems*(n: SexpNode, default: seq[SexpNode] = @[]): seq[SexpNode] = + ## Retrieves the int value of a `SList SexpNode`. + ## + ## Returns ``default`` if ``n`` is not a ``SList``. + if n.kind == SNil: return @[] + elif n.kind != SList: return default + else: return n.elems + +proc getCons*(n: SexpNode, defaults: Cons = (newSNil(), newSNil())): Cons = + ## Retrieves the cons value of a `SList SexpNode`. + ## + ## Returns ``default`` if ``n`` is not a ``SList``. + if n.kind == SCons: return (n.car, n.cdr) + elif n.kind == SList: return (n.elems[0], n.elems[1]) + else: return defaults + +proc sexp*(s: string): SexpNode = + ## Generic constructor for SEXP data. Creates a new `SString SexpNode`. + new(result) + result.kind = SString + result.str = s + +proc sexp*(n: BiggestInt): SexpNode = + ## Generic constructor for SEXP data. Creates a new `SInt SexpNode`. + new(result) + result.kind = SInt + result.num = n + +proc sexp*(n: float): SexpNode = + ## Generic constructor for SEXP data. Creates a new `SFloat SexpNode`. + new(result) + result.kind = SFloat + result.fnum = n + +proc sexp*(b: bool): SexpNode = + ## Generic constructor for SEXP data. Creates a new `SSymbol + ## SexpNode` with value t or `SNil SexpNode`. + new(result) + if b: + result.kind = SSymbol + result.symbol = "t" + else: + result.kind = SNil + +proc sexp*(elements: openArray[SexpNode]): SexpNode = + ## Generic constructor for SEXP data. Creates a new `SList SexpNode` + new(result) + result.kind = SList + newSeq(result.elems, elements.len) + for i, p in pairs(elements): result.elems[i] = p + +proc sexp*(s: SexpNode): SexpNode = + result = s + +proc toSexp(x: NimNode): NimNode {.compiletime.} = + case x.kind + of nnkBracket: + result = newNimNode(nnkBracket) + for i in 0 .. <x.len: + result.add(toSexp(x[i])) + + else: + result = x + + result = prefix(result, "sexp") + +macro convertSexp*(x: expr): expr = + ## Convert an expression to a SexpNode directly, without having to specify + ## `%` for every element. + result = toSexp(x) + +proc `==`* (a,b: SexpNode): bool = + ## Check two nodes for equality + if a.isNil: + if b.isNil: return true + return false + elif b.isNil or a.kind != b.kind: + return false + else: + return case a.kind + of SString: + a.str == b.str + of SInt: + a.num == b.num + of SFloat: + a.fnum == b.fnum + of SNil: + true + of SList: + a.elems == b.elems + of SSymbol: + a.symbol == b.symbol + of SCons: + a.car == b.car and a.cdr == b.cdr + +proc hash* (n:SexpNode): THash = + ## Compute the hash for a SEXP node + case n.kind + of SList: + result = hash(n.elems) + of SInt: + result = hash(n.num) + of SFloat: + result = hash(n.fnum) + of SString: + result = hash(n.str) + of SNil: + result = hash(0) + of SSymbol: + result = hash(n.symbol) + of SCons: + result = hash(n.car) !& hash(n.cdr) + +proc len*(n: SexpNode): int = + ## If `n` is a `SList`, it returns the number of elements. + ## If `n` is a `JObject`, it returns the number of pairs. + ## Else it returns 0. + case n.kind + of SList: result = n.elems.len + else: discard + +proc `[]`*(node: SexpNode, index: int): SexpNode = + ## Gets the node at `index` in a List. Result is undefined if `index` + ## is out of bounds + assert(not isNil(node)) + assert(node.kind == SList) + return node.elems[index] + +proc add*(father, child: SexpNode) = + ## Adds `child` to a SList node `father`. + assert father.kind == SList + father.elems.add(child) + +# ------------- pretty printing ---------------------------------------------- + +proc indent(s: var string, i: int) = + s.add(spaces(i)) + +proc newIndent(curr, indent: int, ml: bool): int = + if ml: return curr + indent + else: return indent + +proc nl(s: var string, ml: bool) = + if ml: s.add("\n") + +proc escapeJson*(s: string): string = + ## Converts a string `s` to its JSON representation. + result = newStringOfCap(s.len + s.len shr 3) + result.add("\"") + for x in runes(s): + var r = int(x) + if r >= 32 and r <= 127: + var c = chr(r) + case c + of '"': result.add("\\\"") #" # gotta fix nim-mode + of '\\': result.add("\\\\") + else: result.add(c) + else: + result.add("\\u") + result.add(toHex(r, 4)) + result.add("\"") + +proc copy*(p: SexpNode): SexpNode = + ## Performs a deep copy of `a`. + case p.kind + of SString: + result = newSString(p.str) + of SInt: + result = newSInt(p.num) + of SFloat: + result = newSFloat(p.fnum) + of SNil: + result = newSNil() + of SSymbol: + result = newSSymbol(p.symbol) + of SList: + result = newSList() + for i in items(p.elems): + result.elems.add(copy(i)) + of SCons: + result = newSCons(copy(p.car), copy(p.cdr)) + +proc toPretty(result: var string, node: SexpNode, indent = 2, ml = true, + lstArr = false, currIndent = 0) = + case node.kind + of SString: + if lstArr: result.indent(currIndent) + result.add(escapeJson(node.str)) + of SInt: + if lstArr: result.indent(currIndent) + result.add($node.num) + of SFloat: + if lstArr: result.indent(currIndent) + result.add($node.fnum) + of SNil: + if lstArr: result.indent(currIndent) + result.add("nil") + of SSymbol: + if lstArr: result.indent(currIndent) + result.add($node.symbol) + of SList: + if lstArr: result.indent(currIndent) + if len(node.elems) != 0: + result.add("(") + result.nl(ml) + for i in 0..len(node.elems)-1: + if i > 0: + result.add(" ") + result.nl(ml) # New Line + toPretty(result, node.elems[i], indent, ml, + true, newIndent(currIndent, indent, ml)) + result.nl(ml) + result.indent(currIndent) + result.add(")") + else: result.add("nil") + of SCons: + if lstArr: result.indent(currIndent) + result.add("(") + toPretty(result, node.car, indent, ml, + true, newIndent(currIndent, indent, ml)) + result.add(" . ") + toPretty(result, node.cdr, indent, ml, + true, newIndent(currIndent, indent, ml)) + result.add(")") + +proc pretty*(node: SexpNode, indent = 2): string = + ## Converts `node` to its Sexp Representation, with indentation and + ## on multiple lines. + result = "" + toPretty(result, node, indent) + +proc `$`*(node: SexpNode): string = + ## Converts `node` to its SEXP Representation on one line. + result = "" + toPretty(result, node, 0, false) + +iterator items*(node: SexpNode): SexpNode = + ## Iterator for the items of `node`. `node` has to be a SList. + assert node.kind == SList + for i in items(node.elems): + yield i + +iterator mitems*(node: var SexpNode): var SexpNode = + ## Iterator for the items of `node`. `node` has to be a SList. Items can be + ## modified. + assert node.kind == SList + for i in mitems(node.elems): + yield i + +proc eat(p: var SexpParser, tok: TTokKind) = + if p.tok == tok: discard getTok(p) + else: raiseParseErr(p, tokToStr[tok]) + +proc parseSexp(p: var SexpParser): SexpNode = + ## Parses SEXP from a SEXP Parser `p`. + case p.tok + of tkString: + # we capture 'p.a' here, so we need to give it a fresh buffer afterwards: + result = newSStringMove(p.a) + p.a = "" + discard getTok(p) + of tkInt: + result = newSInt(parseBiggestInt(p.a)) + discard getTok(p) + of tkFloat: + result = newSFloat(parseFloat(p.a)) + discard getTok(p) + of tkNil: + result = newSNil() + discard getTok(p) + of tkSymbol: + result = newSSymbolMove(p.a) + p.a = "" + discard getTok(p) + of tkParensLe: + result = newSList() + discard getTok(p) + while p.tok notin {tkParensRi, tkDot}: + result.add(parseSexp(p)) + if p.tok != tkSpace: break + discard getTok(p) + if p.tok == tkDot: + eat(p, tkDot) + eat(p, tkSpace) + result.add(parseSexp(p)) + result = newSCons(result[0], result[1]) + eat(p, tkParensRi) + of tkSpace, tkDot, tkError, tkParensRi, tkEof: + raiseParseErr(p, "(") + +proc open*(my: var SexpParser, input: Stream) = + ## initializes the parser with an input stream. + lexbase.open(my, input) + my.kind = sexpError + my.a = "" + +proc parseSexp*(s: Stream): SexpNode = + ## Parses from a buffer `s` into a `SexpNode`. + var p: SexpParser + p.open(s) + discard getTok(p) # read first token + result = p.parseSexp() + p.close() + +proc parseSexp*(buffer: string): SexpNode = + ## Parses Sexp from `buffer`. + result = parseSexp(newStringStream(buffer)) + +when isMainModule: + let testSexp = parseSexp("""(1 (98 2) nil (2) foobar "foo" 9.234)""") + assert(testSexp[0].getNum == 1) + assert(testSexp[1][0].getNum == 98) + assert(testSexp[2].getElems == @[]) + assert(testSexp[4].getSymbol == "foobar") + assert(testSexp[5].getStr == "foo") + + let alist = parseSexp("""((1 . 2) (2 . "foo"))""") + assert(alist[0].getCons.car.getNum == 1) + assert(alist[0].getCons.cdr.getNum == 2) + assert(alist[1].getCons.cdr.getStr == "foo") + + # Generator: + var j = convertSexp([true, false, "foobar", [1, 2, "baz"]]) + assert($j == """(t nil "foobar" (1 2 "baz"))""") |