summary refs log tree commit diff stats
path: root/lib/pure/sexp.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/sexp.nim')
-rw-r--r--lib/pure/sexp.nim697
1 files changed, 697 insertions, 0 deletions
diff --git a/lib/pure/sexp.nim b/lib/pure/sexp.nim
new file mode 100644
index 000000000..3c9fbc150
--- /dev/null
+++ b/lib/pure/sexp.nim
@@ -0,0 +1,697 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Andreas Rumpf, Dominik Picheta
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+import
+  hashes, strutils, lexbase, streams, unicode, macros
+
+type
+  SexpEventKind* = enum  ## enumeration of all events that may occur when parsing
+    sexpError,           ## an error occurred during parsing
+    sexpEof,             ## end of file reached
+    sexpString,          ## a string literal
+    sexpSymbol,          ## a symbol
+    sexpInt,             ## an integer literal
+    sexpFloat,           ## a float literal
+    sexpNil,             ## the value ``nil``
+    sexpDot,             ## the dot to separate car/cdr
+    sexpListStart,       ## start of a list: the ``(`` token
+    sexpListEnd,         ## end of a list: the ``)`` token
+
+  TTokKind = enum        # must be synchronized with SexpEventKind!
+    tkError,
+    tkEof,
+    tkString,
+    tkSymbol,
+    tkInt,
+    tkFloat,
+    tkNil,
+    tkDot,
+    tkParensLe,
+    tkParensRi
+    tkSpace
+
+  SexpError* = enum        ## enumeration that lists all errors that can occur
+    errNone,               ## no error
+    errInvalidToken,       ## invalid token
+    errParensRiExpected,    ## ``)`` expected
+    errQuoteExpected,      ## ``"`` expected
+    errEofExpected,        ## EOF expected
+
+  SexpParser* = object of BaseLexer ## the parser object.
+    a: string
+    tok: TTokKind
+    kind: SexpEventKind
+    err: SexpError
+
+const
+  errorMessages: array [SexpError, string] = [
+    "no error",
+    "invalid token",
+    "')' expected",
+    "'\"' or \"'\" expected",
+    "EOF expected",
+  ]
+  tokToStr: array [TTokKind, string] = [
+    "invalid token",
+    "EOF",
+    "string literal",
+    "symbol",
+    "int literal",
+    "float literal",
+    "nil",
+    ".",
+    "(", ")", "space"
+  ]
+
+proc close*(my: var SexpParser) {.inline.} =
+  ## closes the parser `my` and its associated input stream.
+  lexbase.close(my)
+
+proc str*(my: SexpParser): string {.inline.} =
+  ## returns the character data for the events: ``sexpInt``, ``sexpFloat``,
+  ## ``sexpString``
+  assert(my.kind in {sexpInt, sexpFloat, sexpString})
+  result = my.a
+
+proc getInt*(my: SexpParser): BiggestInt {.inline.} =
+  ## returns the number for the event: ``sexpInt``
+  assert(my.kind == sexpInt)
+  result = parseBiggestInt(my.a)
+
+proc getFloat*(my: SexpParser): float {.inline.} =
+  ## returns the number for the event: ``sexpFloat``
+  assert(my.kind == sexpFloat)
+  result = parseFloat(my.a)
+
+proc kind*(my: SexpParser): SexpEventKind {.inline.} =
+  ## returns the current event type for the SEXP parser
+  result = my.kind
+
+proc getColumn*(my: SexpParser): int {.inline.} =
+  ## get the current column the parser has arrived at.
+  result = getColNumber(my, my.bufpos)
+
+proc getLine*(my: SexpParser): int {.inline.} =
+  ## get the current line the parser has arrived at.
+  result = my.lineNumber
+
+proc errorMsg*(my: SexpParser): string =
+  ## returns a helpful error message for the event ``sexpError``
+  assert(my.kind == sexpError)
+  result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), errorMessages[my.err]]
+
+proc errorMsgExpected*(my: SexpParser, e: string): string =
+  ## returns an error message "`e` expected" in the same format as the
+  ## other error messages
+  result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"]
+
+proc handleHexChar(c: char, x: var int): bool =
+  result = true # Success
+  case c
+  of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
+  of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
+  of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
+  else: result = false # error
+
+proc parseString(my: var SexpParser): TTokKind =
+  result = tkString
+  var pos = my.bufpos + 1
+  var buf = my.buf
+  while true:
+    case buf[pos]
+    of '\0':
+      my.err = errQuoteExpected
+      result = tkError
+      break
+    of '"':
+      inc(pos)
+      break
+    of '\\':
+      case buf[pos+1]
+      of '\\', '"', '\'', '/':
+        add(my.a, buf[pos+1])
+        inc(pos, 2)
+      of 'b':
+        add(my.a, '\b')
+        inc(pos, 2)
+      of 'f':
+        add(my.a, '\f')
+        inc(pos, 2)
+      of 'n':
+        add(my.a, '\L')
+        inc(pos, 2)
+      of 'r':
+        add(my.a, '\C')
+        inc(pos, 2)
+      of 't':
+        add(my.a, '\t')
+        inc(pos, 2)
+      of 'u':
+        inc(pos, 2)
+        var r: int
+        if handleHexChar(buf[pos], r): inc(pos)
+        if handleHexChar(buf[pos], r): inc(pos)
+        if handleHexChar(buf[pos], r): inc(pos)
+        if handleHexChar(buf[pos], r): inc(pos)
+        add(my.a, toUTF8(Rune(r)))
+      else:
+        # don't bother with the error
+        add(my.a, buf[pos])
+        inc(pos)
+    of '\c':
+      pos = lexbase.handleCR(my, pos)
+      buf = my.buf
+      add(my.a, '\c')
+    of '\L':
+      pos = lexbase.handleLF(my, pos)
+      buf = my.buf
+      add(my.a, '\L')
+    else:
+      add(my.a, buf[pos])
+      inc(pos)
+  my.bufpos = pos # store back
+
+proc parseNumber(my: var SexpParser) =
+  var pos = my.bufpos
+  var buf = my.buf
+  if buf[pos] == '-':
+    add(my.a, '-')
+    inc(pos)
+  if buf[pos] == '.':
+    add(my.a, "0.")
+    inc(pos)
+  else:
+    while buf[pos] in Digits:
+      add(my.a, buf[pos])
+      inc(pos)
+    if buf[pos] == '.':
+      add(my.a, '.')
+      inc(pos)
+  # digits after the dot:
+  while buf[pos] in Digits:
+    add(my.a, buf[pos])
+    inc(pos)
+  if buf[pos] in {'E', 'e'}:
+    add(my.a, buf[pos])
+    inc(pos)
+    if buf[pos] in {'+', '-'}:
+      add(my.a, buf[pos])
+      inc(pos)
+    while buf[pos] in Digits:
+      add(my.a, buf[pos])
+      inc(pos)
+  my.bufpos = pos
+
+proc parseSymbol(my: var SexpParser) =
+  var pos = my.bufpos
+  var buf = my.buf
+  if buf[pos] in IdentStartChars:
+    while buf[pos] in IdentChars:
+      add(my.a, buf[pos])
+      inc(pos)
+  my.bufpos = pos
+
+proc getTok(my: var SexpParser): TTokKind =
+  setLen(my.a, 0)
+  case my.buf[my.bufpos]
+  of '-', '0'..'9': # numbers that start with a . are not parsed
+                    # correctly.
+    parseNumber(my)
+    if {'.', 'e', 'E'} in my.a:
+      result = tkFloat
+    else:
+      result = tkInt
+  of '"': #" # gotta fix nim-mode
+    result = parseString(my)
+  of '(':
+    inc(my.bufpos)
+    result = tkParensLe
+  of ')':
+    inc(my.bufpos)
+    result = tkParensRi
+  of '\0':
+    result = tkEof
+  of 'a'..'z', 'A'..'Z', '_':
+    parseSymbol(my)
+    if my.a == "nil":
+      result = tkNil
+    else:
+      result = tkSymbol
+  of ' ':
+    result = tkSpace
+    inc(my.bufpos)
+  of '.':
+    result = tkDot
+    inc(my.bufpos)
+  else:
+    inc(my.bufpos)
+    result = tkError
+  my.tok = result
+
+# ------------- higher level interface ---------------------------------------
+
+type
+  SexpNodeKind* = enum ## possible SEXP node types
+    SNil,
+    SInt,
+    SFloat,
+    SString,
+    SSymbol,
+    SList,
+    SCons
+
+  SexpNode* = ref SexpNodeObj ## SEXP node
+  SexpNodeObj* {.acyclic.} = object
+    case kind*: SexpNodeKind
+    of SString:
+      str*: string
+    of SSymbol:
+      symbol*: string
+    of SInt:
+      num*: BiggestInt
+    of SFloat:
+      fnum*: float
+    of SList:
+      elems*: seq[SexpNode]
+    of SCons:
+      car: SexpNode
+      cdr: SexpNode
+    of SNil:
+      discard
+
+  Cons = tuple[car: SexpNode, cdr: SexpNode]
+
+  SexpParsingError* = object of ValueError ## is raised for a SEXP error
+
+proc raiseParseErr*(p: SexpParser, msg: string) {.noinline, noreturn.} =
+  ## raises an `ESexpParsingError` exception.
+  raise newException(SexpParsingError, errorMsgExpected(p, msg))
+
+proc newSString*(s: string): SexpNode {.procvar.}=
+  ## Creates a new `SString SexpNode`.
+  new(result)
+  result.kind = SString
+  result.str = s
+
+proc newSStringMove(s: string): SexpNode =
+  new(result)
+  result.kind = SString
+  shallowCopy(result.str, s)
+
+proc newSInt*(n: BiggestInt): SexpNode {.procvar.} =
+  ## Creates a new `SInt SexpNode`.
+  new(result)
+  result.kind = SInt
+  result.num  = n
+
+proc newSFloat*(n: float): SexpNode {.procvar.} =
+  ## Creates a new `SFloat SexpNode`.
+  new(result)
+  result.kind = SFloat
+  result.fnum  = n
+
+proc newSNil*(): SexpNode {.procvar.} =
+  ## Creates a new `SNil SexpNode`.
+  new(result)
+
+proc newSCons*(car, cdr: SexpNode): SexpNode {.procvar.} =
+  ## Creates a new `SCons SexpNode`
+  new(result)
+  result.kind = SCons
+  result.car = car
+  result.cdr = cdr
+
+proc newSList*(): SexpNode {.procvar.} =
+  ## Creates a new `SList SexpNode`
+  new(result)
+  result.kind = SList
+  result.elems = @[]
+
+proc newSSymbol*(s: string): SexpNode {.procvar.} =
+  new(result)
+  result.kind = SSymbol
+  result.symbol = s
+
+proc newSSymbolMove(s: string): SexpNode =
+  new(result)
+  result.kind = SSymbol
+  shallowCopy(result.symbol, s)
+
+proc getStr*(n: SexpNode, default: string = ""): string =
+  ## Retrieves the string value of a `SString SexpNode`.
+  ##
+  ## Returns ``default`` if ``n`` is not a ``SString``.
+  if n.kind != SString: return default
+  else: return n.str
+
+proc getNum*(n: SexpNode, default: BiggestInt = 0): BiggestInt =
+  ## Retrieves the int value of a `SInt SexpNode`.
+  ##
+  ## Returns ``default`` if ``n`` is not a ``SInt``.
+  if n.kind != SInt: return default
+  else: return n.num
+
+proc getFNum*(n: SexpNode, default: float = 0.0): float =
+  ## Retrieves the float value of a `SFloat SexpNode`.
+  ##
+  ## Returns ``default`` if ``n`` is not a ``SFloat``.
+  if n.kind != SFloat: return default
+  else: return n.fnum
+
+proc getSymbol*(n: SexpNode, default: string = ""): string =
+  ## Retrieves the int value of a `SList SexpNode`.
+  ##
+  ## Returns ``default`` if ``n`` is not a ``SList``.
+  if n.kind != SSymbol: return default
+  else: return n.symbol
+
+proc getElems*(n: SexpNode, default: seq[SexpNode] = @[]): seq[SexpNode] =
+  ## Retrieves the int value of a `SList SexpNode`.
+  ##
+  ## Returns ``default`` if ``n`` is not a ``SList``.
+  if n.kind == SNil: return @[]
+  elif n.kind != SList: return default
+  else: return n.elems
+
+proc getCons*(n: SexpNode, defaults: Cons = (newSNil(), newSNil())): Cons =
+  ## Retrieves the cons value of a `SList SexpNode`.
+  ##
+  ## Returns ``default`` if ``n`` is not a ``SList``.
+  if n.kind == SCons: return (n.car, n.cdr)
+  elif n.kind == SList: return (n.elems[0], n.elems[1])
+  else: return defaults
+
+proc sexp*(s: string): SexpNode =
+  ## Generic constructor for SEXP data. Creates a new `SString SexpNode`.
+  new(result)
+  result.kind = SString
+  result.str = s
+
+proc sexp*(n: BiggestInt): SexpNode =
+  ## Generic constructor for SEXP data. Creates a new `SInt SexpNode`.
+  new(result)
+  result.kind = SInt
+  result.num  = n
+
+proc sexp*(n: float): SexpNode =
+  ## Generic constructor for SEXP data. Creates a new `SFloat SexpNode`.
+  new(result)
+  result.kind = SFloat
+  result.fnum  = n
+
+proc sexp*(b: bool): SexpNode =
+  ## Generic constructor for SEXP data. Creates a new `SSymbol
+  ## SexpNode` with value t or `SNil SexpNode`.
+  new(result)
+  if b:
+    result.kind = SSymbol
+    result.symbol = "t"
+  else:
+    result.kind = SNil
+
+proc sexp*(elements: openArray[SexpNode]): SexpNode =
+  ## Generic constructor for SEXP data. Creates a new `SList SexpNode`
+  new(result)
+  result.kind = SList
+  newSeq(result.elems, elements.len)
+  for i, p in pairs(elements): result.elems[i] = p
+
+proc sexp*(s: SexpNode): SexpNode =
+  result = s
+
+proc toSexp(x: NimNode): NimNode {.compiletime.} =
+  case x.kind
+  of nnkBracket:
+    result = newNimNode(nnkBracket)
+    for i in 0 .. <x.len:
+      result.add(toSexp(x[i]))
+
+  else:
+    result = x
+
+  result = prefix(result, "sexp")
+
+macro convertSexp*(x: expr): expr =
+  ## Convert an expression to a SexpNode directly, without having to specify
+  ## `%` for every element.
+  result = toSexp(x)
+
+proc `==`* (a,b: SexpNode): bool =
+  ## Check two nodes for equality
+  if a.isNil:
+    if b.isNil: return true
+    return false
+  elif b.isNil or a.kind != b.kind:
+    return false
+  else:
+    return case a.kind
+    of SString:
+      a.str == b.str
+    of SInt:
+      a.num == b.num
+    of SFloat:
+      a.fnum == b.fnum
+    of SNil:
+      true
+    of SList:
+      a.elems == b.elems
+    of SSymbol:
+      a.symbol == b.symbol
+    of SCons:
+      a.car == b.car and a.cdr == b.cdr
+
+proc hash* (n:SexpNode): THash =
+  ## Compute the hash for a SEXP node
+  case n.kind
+  of SList:
+    result = hash(n.elems)
+  of SInt:
+    result = hash(n.num)
+  of SFloat:
+    result = hash(n.fnum)
+  of SString:
+    result = hash(n.str)
+  of SNil:
+    result = hash(0)
+  of SSymbol:
+    result = hash(n.symbol)
+  of SCons:
+    result = hash(n.car) !& hash(n.cdr)
+
+proc len*(n: SexpNode): int =
+  ## If `n` is a `SList`, it returns the number of elements.
+  ## If `n` is a `JObject`, it returns the number of pairs.
+  ## Else it returns 0.
+  case n.kind
+  of SList: result = n.elems.len
+  else: discard
+
+proc `[]`*(node: SexpNode, index: int): SexpNode =
+  ## Gets the node at `index` in a List. Result is undefined if `index`
+  ## is out of bounds
+  assert(not isNil(node))
+  assert(node.kind == SList)
+  return node.elems[index]
+
+proc add*(father, child: SexpNode) =
+  ## Adds `child` to a SList node `father`.
+  assert father.kind == SList
+  father.elems.add(child)
+
+# ------------- pretty printing ----------------------------------------------
+
+proc indent(s: var string, i: int) =
+  s.add(spaces(i))
+
+proc newIndent(curr, indent: int, ml: bool): int =
+  if ml: return curr + indent
+  else: return indent
+
+proc nl(s: var string, ml: bool) =
+  if ml: s.add("\n")
+
+proc escapeJson*(s: string): string =
+  ## Converts a string `s` to its JSON representation.
+  result = newStringOfCap(s.len + s.len shr 3)
+  result.add("\"")
+  for x in runes(s):
+    var r = int(x)
+    if r >= 32 and r <= 127:
+      var c = chr(r)
+      case c
+      of '"': result.add("\\\"") #" # gotta fix nim-mode
+      of '\\': result.add("\\\\")
+      else: result.add(c)
+    else:
+      result.add("\\u")
+      result.add(toHex(r, 4))
+  result.add("\"")
+
+proc copy*(p: SexpNode): SexpNode =
+  ## Performs a deep copy of `a`.
+  case p.kind
+  of SString:
+    result = newSString(p.str)
+  of SInt:
+    result = newSInt(p.num)
+  of SFloat:
+    result = newSFloat(p.fnum)
+  of SNil:
+    result = newSNil()
+  of SSymbol:
+    result = newSSymbol(p.symbol)
+  of SList:
+    result = newSList()
+    for i in items(p.elems):
+      result.elems.add(copy(i))
+  of SCons:
+    result = newSCons(copy(p.car), copy(p.cdr))
+
+proc toPretty(result: var string, node: SexpNode, indent = 2, ml = true,
+              lstArr = false, currIndent = 0) =
+  case node.kind
+  of SString:
+    if lstArr: result.indent(currIndent)
+    result.add(escapeJson(node.str))
+  of SInt:
+    if lstArr: result.indent(currIndent)
+    result.add($node.num)
+  of SFloat:
+    if lstArr: result.indent(currIndent)
+    result.add($node.fnum)
+  of SNil:
+    if lstArr: result.indent(currIndent)
+    result.add("nil")
+  of SSymbol:
+    if lstArr: result.indent(currIndent)
+    result.add($node.symbol)
+  of SList:
+    if lstArr: result.indent(currIndent)
+    if len(node.elems) != 0:
+      result.add("(")
+      result.nl(ml)
+      for i in 0..len(node.elems)-1:
+        if i > 0:
+          result.add(" ")
+          result.nl(ml) # New Line
+        toPretty(result, node.elems[i], indent, ml,
+            true, newIndent(currIndent, indent, ml))
+      result.nl(ml)
+      result.indent(currIndent)
+      result.add(")")
+    else: result.add("nil")
+  of SCons:
+    if lstArr: result.indent(currIndent)
+    result.add("(")
+    toPretty(result, node.car, indent, ml,
+        true, newIndent(currIndent, indent, ml))
+    result.add(" . ")
+    toPretty(result, node.cdr, indent, ml,
+        true, newIndent(currIndent, indent, ml))
+    result.add(")")
+
+proc pretty*(node: SexpNode, indent = 2): string =
+  ## Converts `node` to its Sexp Representation, with indentation and
+  ## on multiple lines.
+  result = ""
+  toPretty(result, node, indent)
+
+proc `$`*(node: SexpNode): string =
+  ## Converts `node` to its SEXP Representation on one line.
+  result = ""
+  toPretty(result, node, 0, false)
+
+iterator items*(node: SexpNode): SexpNode =
+  ## Iterator for the items of `node`. `node` has to be a SList.
+  assert node.kind == SList
+  for i in items(node.elems):
+    yield i
+
+iterator mitems*(node: var SexpNode): var SexpNode =
+  ## Iterator for the items of `node`. `node` has to be a SList. Items can be
+  ## modified.
+  assert node.kind == SList
+  for i in mitems(node.elems):
+    yield i
+
+proc eat(p: var SexpParser, tok: TTokKind) =
+  if p.tok == tok: discard getTok(p)
+  else: raiseParseErr(p, tokToStr[tok])
+
+proc parseSexp(p: var SexpParser): SexpNode =
+  ## Parses SEXP from a SEXP Parser `p`.
+  case p.tok
+  of tkString:
+    # we capture 'p.a' here, so we need to give it a fresh buffer afterwards:
+    result = newSStringMove(p.a)
+    p.a = ""
+    discard getTok(p)
+  of tkInt:
+    result = newSInt(parseBiggestInt(p.a))
+    discard getTok(p)
+  of tkFloat:
+    result = newSFloat(parseFloat(p.a))
+    discard getTok(p)
+  of tkNil:
+    result = newSNil()
+    discard getTok(p)
+  of tkSymbol:
+    result = newSSymbolMove(p.a)
+    p.a = ""
+    discard getTok(p)
+  of tkParensLe:
+    result = newSList()
+    discard getTok(p)
+    while p.tok notin {tkParensRi, tkDot}:
+      result.add(parseSexp(p))
+      if p.tok != tkSpace: break
+      discard getTok(p)
+    if p.tok == tkDot:
+      eat(p, tkDot)
+      eat(p, tkSpace)
+      result.add(parseSexp(p))
+      result = newSCons(result[0], result[1])
+    eat(p, tkParensRi)
+  of tkSpace, tkDot, tkError, tkParensRi, tkEof:
+    raiseParseErr(p, "(")
+
+proc open*(my: var SexpParser, input: Stream) =
+  ## initializes the parser with an input stream.
+  lexbase.open(my, input)
+  my.kind = sexpError
+  my.a = ""
+
+proc parseSexp*(s: Stream): SexpNode =
+  ## Parses from a buffer `s` into a `SexpNode`.
+  var p: SexpParser
+  p.open(s)
+  discard getTok(p) # read first token
+  result = p.parseSexp()
+  p.close()
+
+proc parseSexp*(buffer: string): SexpNode =
+  ## Parses Sexp from `buffer`.
+  result = parseSexp(newStringStream(buffer))
+
+when isMainModule:
+  let testSexp = parseSexp("""(1 (98 2) nil (2) foobar "foo" 9.234)""")
+  assert(testSexp[0].getNum == 1)
+  assert(testSexp[1][0].getNum == 98)
+  assert(testSexp[2].getElems == @[])
+  assert(testSexp[4].getSymbol == "foobar")
+  assert(testSexp[5].getStr == "foo")
+
+  let alist = parseSexp("""((1 . 2) (2 . "foo"))""")
+  assert(alist[0].getCons.car.getNum == 1)
+  assert(alist[0].getCons.cdr.getNum == 2)
+  assert(alist[1].getCons.cdr.getStr == "foo")
+
+  # Generator:
+  var j = convertSexp([true, false, "foobar", [1, 2, "baz"]])
+  assert($j == """(t nil "foobar" (1 2 "baz"))""")