diff options
author | Simon Hafner <hafnersimon@gmail.com> | 2015-06-05 11:04:15 -0500 |
---|---|---|
committer | Oscar Campbell <oscar@campbell.nu> | 2015-06-07 13:14:53 +0200 |
commit | cb443ce73d8d451fc8a4168f035ec85350b349c7 (patch) | |
tree | 743da962de36f5b019501ee98c015b053f2b93c5 | |
parent | 25cc9befd538ec92938c9030d2e68658514f358c (diff) | |
download | Nim-cb443ce73d8d451fc8a4168f035ec85350b349c7.tar.gz |
moved sexp.nim to nimsuggest repo
-rw-r--r-- | lib/pure/sexp.nim | 698 |
1 files changed, 0 insertions, 698 deletions
diff --git a/lib/pure/sexp.nim b/lib/pure/sexp.nim deleted file mode 100644 index be6188c9e..000000000 --- a/lib/pure/sexp.nim +++ /dev/null @@ -1,698 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Andreas Rumpf, Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -import - hashes, strutils, lexbase, streams, unicode, macros - -type - SexpEventKind* = enum ## enumeration of all events that may occur when parsing - sexpError, ## an error occurred during parsing - sexpEof, ## end of file reached - sexpString, ## a string literal - sexpSymbol, ## a symbol - sexpInt, ## an integer literal - sexpFloat, ## a float literal - sexpNil, ## the value ``nil`` - sexpDot, ## the dot to separate car/cdr - sexpListStart, ## start of a list: the ``(`` token - sexpListEnd, ## end of a list: the ``)`` token - - TokKind = enum # must be synchronized with SexpEventKind! - tkError, - tkEof, - tkString, - tkSymbol, - tkInt, - tkFloat, - tkNil, - tkDot, - tkParensLe, - tkParensRi - tkSpace - - SexpError* = enum ## enumeration that lists all errors that can occur - errNone, ## no error - errInvalidToken, ## invalid token - errParensRiExpected, ## ``)`` expected - errQuoteExpected, ## ``"`` expected - errEofExpected, ## EOF expected - - SexpParser* = object of BaseLexer ## the parser object. - a: string - tok: TokKind - kind: SexpEventKind - err: SexpError -{.deprecated: [TTokKind: TokKind].} - -const - errorMessages: array [SexpError, string] = [ - "no error", - "invalid token", - "')' expected", - "'\"' or \"'\" expected", - "EOF expected", - ] - tokToStr: array [TokKind, string] = [ - "invalid token", - "EOF", - "string literal", - "symbol", - "int literal", - "float literal", - "nil", - ".", - "(", ")", "space" - ] - -proc close*(my: var SexpParser) {.inline.} = - ## closes the parser `my` and its associated input stream. - lexbase.close(my) - -proc str*(my: SexpParser): string {.inline.} = - ## returns the character data for the events: ``sexpInt``, ``sexpFloat``, - ## ``sexpString`` - assert(my.kind in {sexpInt, sexpFloat, sexpString}) - result = my.a - -proc getInt*(my: SexpParser): BiggestInt {.inline.} = - ## returns the number for the event: ``sexpInt`` - assert(my.kind == sexpInt) - result = parseBiggestInt(my.a) - -proc getFloat*(my: SexpParser): float {.inline.} = - ## returns the number for the event: ``sexpFloat`` - assert(my.kind == sexpFloat) - result = parseFloat(my.a) - -proc kind*(my: SexpParser): SexpEventKind {.inline.} = - ## returns the current event type for the SEXP parser - result = my.kind - -proc getColumn*(my: SexpParser): int {.inline.} = - ## get the current column the parser has arrived at. - result = getColNumber(my, my.bufpos) - -proc getLine*(my: SexpParser): int {.inline.} = - ## get the current line the parser has arrived at. - result = my.lineNumber - -proc errorMsg*(my: SexpParser): string = - ## returns a helpful error message for the event ``sexpError`` - assert(my.kind == sexpError) - result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), errorMessages[my.err]] - -proc errorMsgExpected*(my: SexpParser, e: string): string = - ## returns an error message "`e` expected" in the same format as the - ## other error messages - result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"] - -proc handleHexChar(c: char, x: var int): bool = - result = true # Success - case c - of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) - of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) - of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) - else: result = false # error - -proc parseString(my: var SexpParser): TokKind = - result = tkString - var pos = my.bufpos + 1 - var buf = my.buf - while true: - case buf[pos] - of '\0': - my.err = errQuoteExpected - result = tkError - break - of '"': - inc(pos) - break - of '\\': - case buf[pos+1] - of '\\', '"', '\'', '/': - add(my.a, buf[pos+1]) - inc(pos, 2) - of 'b': - add(my.a, '\b') - inc(pos, 2) - of 'f': - add(my.a, '\f') - inc(pos, 2) - of 'n': - add(my.a, '\L') - inc(pos, 2) - of 'r': - add(my.a, '\C') - inc(pos, 2) - of 't': - add(my.a, '\t') - inc(pos, 2) - of 'u': - inc(pos, 2) - var r: int - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - add(my.a, toUTF8(Rune(r))) - else: - # don't bother with the error - add(my.a, buf[pos]) - inc(pos) - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - add(my.a, '\c') - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - add(my.a, '\L') - else: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos # store back - -proc parseNumber(my: var SexpParser) = - var pos = my.bufpos - var buf = my.buf - if buf[pos] == '-': - add(my.a, '-') - inc(pos) - if buf[pos] == '.': - add(my.a, "0.") - inc(pos) - else: - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] == '.': - add(my.a, '.') - inc(pos) - # digits after the dot: - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] in {'E', 'e'}: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] in {'+', '-'}: - add(my.a, buf[pos]) - inc(pos) - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - -proc parseSymbol(my: var SexpParser) = - var pos = my.bufpos - var buf = my.buf - if buf[pos] in IdentStartChars: - while buf[pos] in IdentChars: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - -proc getTok(my: var SexpParser): TokKind = - setLen(my.a, 0) - case my.buf[my.bufpos] - of '-', '0'..'9': # numbers that start with a . are not parsed - # correctly. - parseNumber(my) - if {'.', 'e', 'E'} in my.a: - result = tkFloat - else: - result = tkInt - of '"': #" # gotta fix nim-mode - result = parseString(my) - of '(': - inc(my.bufpos) - result = tkParensLe - of ')': - inc(my.bufpos) - result = tkParensRi - of '\0': - result = tkEof - of 'a'..'z', 'A'..'Z', '_': - parseSymbol(my) - if my.a == "nil": - result = tkNil - else: - result = tkSymbol - of ' ': - result = tkSpace - inc(my.bufpos) - of '.': - result = tkDot - inc(my.bufpos) - else: - inc(my.bufpos) - result = tkError - my.tok = result - -# ------------- higher level interface --------------------------------------- - -type - SexpNodeKind* = enum ## possible SEXP node types - SNil, - SInt, - SFloat, - SString, - SSymbol, - SList, - SCons - - SexpNode* = ref SexpNodeObj ## SEXP node - SexpNodeObj* {.acyclic.} = object - case kind*: SexpNodeKind - of SString: - str*: string - of SSymbol: - symbol*: string - of SInt: - num*: BiggestInt - of SFloat: - fnum*: float - of SList: - elems*: seq[SexpNode] - of SCons: - car: SexpNode - cdr: SexpNode - of SNil: - discard - - Cons = tuple[car: SexpNode, cdr: SexpNode] - - SexpParsingError* = object of ValueError ## is raised for a SEXP error - -proc raiseParseErr*(p: SexpParser, msg: string) {.noinline, noreturn.} = - ## raises an `ESexpParsingError` exception. - raise newException(SexpParsingError, errorMsgExpected(p, msg)) - -proc newSString*(s: string): SexpNode {.procvar.}= - ## Creates a new `SString SexpNode`. - new(result) - result.kind = SString - result.str = s - -proc newSStringMove(s: string): SexpNode = - new(result) - result.kind = SString - shallowCopy(result.str, s) - -proc newSInt*(n: BiggestInt): SexpNode {.procvar.} = - ## Creates a new `SInt SexpNode`. - new(result) - result.kind = SInt - result.num = n - -proc newSFloat*(n: float): SexpNode {.procvar.} = - ## Creates a new `SFloat SexpNode`. - new(result) - result.kind = SFloat - result.fnum = n - -proc newSNil*(): SexpNode {.procvar.} = - ## Creates a new `SNil SexpNode`. - new(result) - -proc newSCons*(car, cdr: SexpNode): SexpNode {.procvar.} = - ## Creates a new `SCons SexpNode` - new(result) - result.kind = SCons - result.car = car - result.cdr = cdr - -proc newSList*(): SexpNode {.procvar.} = - ## Creates a new `SList SexpNode` - new(result) - result.kind = SList - result.elems = @[] - -proc newSSymbol*(s: string): SexpNode {.procvar.} = - new(result) - result.kind = SSymbol - result.symbol = s - -proc newSSymbolMove(s: string): SexpNode = - new(result) - result.kind = SSymbol - shallowCopy(result.symbol, s) - -proc getStr*(n: SexpNode, default: string = ""): string = - ## Retrieves the string value of a `SString SexpNode`. - ## - ## Returns ``default`` if ``n`` is not a ``SString``. - if n.kind != SString: return default - else: return n.str - -proc getNum*(n: SexpNode, default: BiggestInt = 0): BiggestInt = - ## Retrieves the int value of a `SInt SexpNode`. - ## - ## Returns ``default`` if ``n`` is not a ``SInt``. - if n.kind != SInt: return default - else: return n.num - -proc getFNum*(n: SexpNode, default: float = 0.0): float = - ## Retrieves the float value of a `SFloat SexpNode`. - ## - ## Returns ``default`` if ``n`` is not a ``SFloat``. - if n.kind != SFloat: return default - else: return n.fnum - -proc getSymbol*(n: SexpNode, default: string = ""): string = - ## Retrieves the int value of a `SList SexpNode`. - ## - ## Returns ``default`` if ``n`` is not a ``SList``. - if n.kind != SSymbol: return default - else: return n.symbol - -proc getElems*(n: SexpNode, default: seq[SexpNode] = @[]): seq[SexpNode] = - ## Retrieves the int value of a `SList SexpNode`. - ## - ## Returns ``default`` if ``n`` is not a ``SList``. - if n.kind == SNil: return @[] - elif n.kind != SList: return default - else: return n.elems - -proc getCons*(n: SexpNode, defaults: Cons = (newSNil(), newSNil())): Cons = - ## Retrieves the cons value of a `SList SexpNode`. - ## - ## Returns ``default`` if ``n`` is not a ``SList``. - if n.kind == SCons: return (n.car, n.cdr) - elif n.kind == SList: return (n.elems[0], n.elems[1]) - else: return defaults - -proc sexp*(s: string): SexpNode = - ## Generic constructor for SEXP data. Creates a new `SString SexpNode`. - new(result) - result.kind = SString - result.str = s - -proc sexp*(n: BiggestInt): SexpNode = - ## Generic constructor for SEXP data. Creates a new `SInt SexpNode`. - new(result) - result.kind = SInt - result.num = n - -proc sexp*(n: float): SexpNode = - ## Generic constructor for SEXP data. Creates a new `SFloat SexpNode`. - new(result) - result.kind = SFloat - result.fnum = n - -proc sexp*(b: bool): SexpNode = - ## Generic constructor for SEXP data. Creates a new `SSymbol - ## SexpNode` with value t or `SNil SexpNode`. - new(result) - if b: - result.kind = SSymbol - result.symbol = "t" - else: - result.kind = SNil - -proc sexp*(elements: openArray[SexpNode]): SexpNode = - ## Generic constructor for SEXP data. Creates a new `SList SexpNode` - new(result) - result.kind = SList - newSeq(result.elems, elements.len) - for i, p in pairs(elements): result.elems[i] = p - -proc sexp*(s: SexpNode): SexpNode = - result = s - -proc toSexp(x: NimNode): NimNode {.compiletime.} = - case x.kind - of nnkBracket: - result = newNimNode(nnkBracket) - for i in 0 .. <x.len: - result.add(toSexp(x[i])) - - else: - result = x - - result = prefix(result, "sexp") - -macro convertSexp*(x: expr): expr = - ## Convert an expression to a SexpNode directly, without having to specify - ## `%` for every element. - result = toSexp(x) - -proc `==`* (a,b: SexpNode): bool = - ## Check two nodes for equality - if a.isNil: - if b.isNil: return true - return false - elif b.isNil or a.kind != b.kind: - return false - else: - return case a.kind - of SString: - a.str == b.str - of SInt: - a.num == b.num - of SFloat: - a.fnum == b.fnum - of SNil: - true - of SList: - a.elems == b.elems - of SSymbol: - a.symbol == b.symbol - of SCons: - a.car == b.car and a.cdr == b.cdr - -proc hash* (n:SexpNode): Hash = - ## Compute the hash for a SEXP node - case n.kind - of SList: - result = hash(n.elems) - of SInt: - result = hash(n.num) - of SFloat: - result = hash(n.fnum) - of SString: - result = hash(n.str) - of SNil: - result = hash(0) - of SSymbol: - result = hash(n.symbol) - of SCons: - result = hash(n.car) !& hash(n.cdr) - -proc len*(n: SexpNode): int = - ## If `n` is a `SList`, it returns the number of elements. - ## If `n` is a `JObject`, it returns the number of pairs. - ## Else it returns 0. - case n.kind - of SList: result = n.elems.len - else: discard - -proc `[]`*(node: SexpNode, index: int): SexpNode = - ## Gets the node at `index` in a List. Result is undefined if `index` - ## is out of bounds - assert(not isNil(node)) - assert(node.kind == SList) - return node.elems[index] - -proc add*(father, child: SexpNode) = - ## Adds `child` to a SList node `father`. - assert father.kind == SList - father.elems.add(child) - -# ------------- pretty printing ---------------------------------------------- - -proc indent(s: var string, i: int) = - s.add(spaces(i)) - -proc newIndent(curr, indent: int, ml: bool): int = - if ml: return curr + indent - else: return indent - -proc nl(s: var string, ml: bool) = - if ml: s.add("\n") - -proc escapeJson*(s: string): string = - ## Converts a string `s` to its JSON representation. - result = newStringOfCap(s.len + s.len shr 3) - result.add("\"") - for x in runes(s): - var r = int(x) - if r >= 32 and r <= 127: - var c = chr(r) - case c - of '"': result.add("\\\"") #" # gotta fix nim-mode - of '\\': result.add("\\\\") - else: result.add(c) - else: - result.add("\\u") - result.add(toHex(r, 4)) - result.add("\"") - -proc copy*(p: SexpNode): SexpNode = - ## Performs a deep copy of `a`. - case p.kind - of SString: - result = newSString(p.str) - of SInt: - result = newSInt(p.num) - of SFloat: - result = newSFloat(p.fnum) - of SNil: - result = newSNil() - of SSymbol: - result = newSSymbol(p.symbol) - of SList: - result = newSList() - for i in items(p.elems): - result.elems.add(copy(i)) - of SCons: - result = newSCons(copy(p.car), copy(p.cdr)) - -proc toPretty(result: var string, node: SexpNode, indent = 2, ml = true, - lstArr = false, currIndent = 0) = - case node.kind - of SString: - if lstArr: result.indent(currIndent) - result.add(escapeJson(node.str)) - of SInt: - if lstArr: result.indent(currIndent) - result.add($node.num) - of SFloat: - if lstArr: result.indent(currIndent) - result.add($node.fnum) - of SNil: - if lstArr: result.indent(currIndent) - result.add("nil") - of SSymbol: - if lstArr: result.indent(currIndent) - result.add($node.symbol) - of SList: - if lstArr: result.indent(currIndent) - if len(node.elems) != 0: - result.add("(") - result.nl(ml) - for i in 0..len(node.elems)-1: - if i > 0: - result.add(" ") - result.nl(ml) # New Line - toPretty(result, node.elems[i], indent, ml, - true, newIndent(currIndent, indent, ml)) - result.nl(ml) - result.indent(currIndent) - result.add(")") - else: result.add("nil") - of SCons: - if lstArr: result.indent(currIndent) - result.add("(") - toPretty(result, node.car, indent, ml, - true, newIndent(currIndent, indent, ml)) - result.add(" . ") - toPretty(result, node.cdr, indent, ml, - true, newIndent(currIndent, indent, ml)) - result.add(")") - -proc pretty*(node: SexpNode, indent = 2): string = - ## Converts `node` to its Sexp Representation, with indentation and - ## on multiple lines. - result = "" - toPretty(result, node, indent) - -proc `$`*(node: SexpNode): string = - ## Converts `node` to its SEXP Representation on one line. - result = "" - toPretty(result, node, 0, false) - -iterator items*(node: SexpNode): SexpNode = - ## Iterator for the items of `node`. `node` has to be a SList. - assert node.kind == SList - for i in items(node.elems): - yield i - -iterator mitems*(node: var SexpNode): var SexpNode = - ## Iterator for the items of `node`. `node` has to be a SList. Items can be - ## modified. - assert node.kind == SList - for i in mitems(node.elems): - yield i - -proc eat(p: var SexpParser, tok: TokKind) = - if p.tok == tok: discard getTok(p) - else: raiseParseErr(p, tokToStr[tok]) - -proc parseSexp(p: var SexpParser): SexpNode = - ## Parses SEXP from a SEXP Parser `p`. - case p.tok - of tkString: - # we capture 'p.a' here, so we need to give it a fresh buffer afterwards: - result = newSStringMove(p.a) - p.a = "" - discard getTok(p) - of tkInt: - result = newSInt(parseBiggestInt(p.a)) - discard getTok(p) - of tkFloat: - result = newSFloat(parseFloat(p.a)) - discard getTok(p) - of tkNil: - result = newSNil() - discard getTok(p) - of tkSymbol: - result = newSSymbolMove(p.a) - p.a = "" - discard getTok(p) - of tkParensLe: - result = newSList() - discard getTok(p) - while p.tok notin {tkParensRi, tkDot}: - result.add(parseSexp(p)) - if p.tok != tkSpace: break - discard getTok(p) - if p.tok == tkDot: - eat(p, tkDot) - eat(p, tkSpace) - result.add(parseSexp(p)) - result = newSCons(result[0], result[1]) - eat(p, tkParensRi) - of tkSpace, tkDot, tkError, tkParensRi, tkEof: - raiseParseErr(p, "(") - -proc open*(my: var SexpParser, input: Stream) = - ## initializes the parser with an input stream. - lexbase.open(my, input) - my.kind = sexpError - my.a = "" - -proc parseSexp*(s: Stream): SexpNode = - ## Parses from a buffer `s` into a `SexpNode`. - var p: SexpParser - p.open(s) - discard getTok(p) # read first token - result = p.parseSexp() - p.close() - -proc parseSexp*(buffer: string): SexpNode = - ## Parses Sexp from `buffer`. - result = parseSexp(newStringStream(buffer)) - -when isMainModule: - let testSexp = parseSexp("""(1 (98 2) nil (2) foobar "foo" 9.234)""") - assert(testSexp[0].getNum == 1) - assert(testSexp[1][0].getNum == 98) - assert(testSexp[2].getElems == @[]) - assert(testSexp[4].getSymbol == "foobar") - assert(testSexp[5].getStr == "foo") - - let alist = parseSexp("""((1 . 2) (2 . "foo"))""") - assert(alist[0].getCons.car.getNum == 1) - assert(alist[0].getCons.cdr.getNum == 2) - assert(alist[1].getCons.cdr.getStr == "foo") - - # Generator: - var j = convertSexp([true, false, "foobar", [1, 2, "baz"]]) - assert($j == """(t nil "foobar" (1 2 "baz"))""") |