#
#
# Nim's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## A simple XML tree. More efficient and simpler than the DOM.
import macros, strtabs
type
XmlNode* = ref XmlNodeObj ## an XML tree consists of ``PXmlNode``'s.
XmlNodeKind* = enum ## different kinds of ``PXmlNode``'s
xnText, ## a text element
xnElement, ## an element with 0 or more children
xnCData, ## a CDATA node
xnEntity, ## an entity (like ``&thing;``)
xnComment ## an XML comment
XmlAttributes* = StringTableRef ## an alias for a string to string mapping
XmlNodeObj {.acyclic.} = object
case k: XmlNodeKind # private, use the kind() proc to read this field.
of xnText, xnComment, xnCData, xnEntity:
fText: string
of xnElement:
fTag: string
s: seq[XmlNode]
fAttr: XmlAttributes
fClientData: int ## for other clients
{.deprecated: [PXmlNode: XmlNode, TXmlNodeKind: XmlNodeKind, PXmlAttributes:
XmlAttributes, TXmlNode: XmlNodeObj].}
proc newXmlNode(kind: XmlNodeKind): XmlNode =
## creates a new ``XmlNode``.
new(result)
result.k = kind
proc newElement*(tag: string): XmlNode =
## creates a new ``PXmlNode`` of kind ``xnText`` with the given `tag`.
result = newXmlNode(xnElement)
result.fTag = tag
result.s = @[]
# init attributes lazily to safe memory
proc newText*(text: string): XmlNode =
## creates a new ``PXmlNode`` of kind ``xnText`` with the text `text`.
result = newXmlNode(xnText)
result.fText = text
proc newComment*(comment: string): XmlNode =
## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `comment`.
result = newXmlNode(xnComment)
result.fText = comment
proc newCData*(cdata: string): XmlNode =
## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `cdata`.
result = newXmlNode(xnCData)
result.fText = cdata
proc newEntity*(entity: string): XmlNode =
## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`.
result = newXmlNode(xnEntity)
result.fText = entity
proc text*(n: XmlNode): string {.inline.} =
## gets the associated text with the node `n`. `n` can be a CDATA, Text,
## comment, or entity node.
assert n.k in {xnText, xnComment, xnCData, xnEntity}
result = n.fText
proc `text=`*(n: XmlNode, text: string){.inline.} =
## sets the associated text with the node `n`. `n` can be a CDATA, Text,
## comment, or entity node.
assert n.k in {xnText, xnComment, xnCData, xnEntity}
n.fText = text
proc rawText*(n: XmlNode): string {.inline.} =
## returns the underlying 'text' string by reference.
## This is only used for speed hacks.
shallowCopy(result, n.fText)
proc rawTag*(n: XmlNode): string {.inline.} =
## returns the underlying 'tag' string by reference.
## This is only used for speed hacks.
shallowCopy(result, n.fTag)
proc innerText*(n: XmlNode): string =
## gets the inner text of `n`:
##
## - If `n` is `xnText` or `xnEntity`, returns its content.
## - If `n` is `xnElement`, runs recursively on each child node and
## concatenates the results.
## - Otherwise returns an empty string.
proc worker(res: var string, n: XmlNode) =
case n.k
of xnText, xnEntity:
res.add(n.fText)
of xnElement:
for sub in n.s:
worker(res, sub)
else:
discard
result = ""
worker(result, n)
proc tag*(n: XmlNode): string {.inline.} =
## gets the tag name of `n`. `n` has to be an ``xnElement`` node.
assert n.k == xnElement
result = n.fTag
proc `tag=`*(n: XmlNode, tag: string) {.inline.} =
## sets the tag name of `n`. `n` has to be an ``xnElement`` node.
assert n.k == xnElement
n.fTag = tag
proc add*(father, son: XmlNode) {.inline.} =
## adds the child `son` to `father`.
add(father.s, son)
proc insert*(father, son: XmlNode, index: int) {.inline.} =
## insert the child `son` to a given position in `father`.
assert father.k == xnElement and son.k == xnElement
if len(father.s) > index:
insert(father.s, son, index)
else:
insert(father.s, son, len(father.s))
proc len*(n: XmlNode): int {.inline.} =
## returns the number `n`'s children.
if n.k == xnElement: result = len(n.s)
proc kind*(n: XmlNode): XmlNodeKind {.inline.} =
## returns `n`'s kind.
result = n.k
proc `[]`* (n: XmlNode, i: int): XmlNode {.inline.} =
## returns the `i`'th child of `n`.
assert n.k == xnElement
result = n.s[i]
proc delete*(n: XmlNode, i: Natural) {.noSideEffect.} =
## delete the `i`'th child of `n`.
assert n.k == xnElement
n.s.delete(i)
proc `[]`* (n: var XmlNode, i: int): var XmlNode {.inline.} =
## returns the `i`'th child of `n` so that it can be modified
assert n.k == xnElement
result = n.s[i]
proc mget*(n: var XmlNode, i: int): var XmlNode {.inline, deprecated.} =
## returns the `i`'th child of `n` so that it can be modified. Use ```[]```
## instead.
n[i]
iterator items*(n: XmlNode): XmlNode {.inline.} =
## iterates over any child of `n`.
assert n.k == xnElement
for i in 0 .. n.len-1: yield n[i]
iterator mitems*(n: var XmlNode): var XmlNode {.inline.} =
## iterates over any child of `n`.
assert n.k == xnElement
for i in 0 .. n.len-1: yield n[i]
proc attrs*(n: XmlNode): XmlAttributes {.inline.} =
## gets the attributes belonging to `n`.
## Returns `nil` if attributes have not been initialised for this node.
assert n.k == xnElement
result = n.fAttr
proc `attrs=`*(n: XmlNode, attr: XmlAttributes) {.inline.} =
## sets the attributes belonging to `n`.
assert n.k == xnElement
n.fAttr = attr
proc attrsLen*(n: XmlNode): int {.inline.} =
## returns the number of `n`'s attributes.
assert n.k == xnElement
if not isNil(n.fAttr): result = len(n.fAttr)
proc clientData*(n: XmlNode): int {.inline.} =
## gets the client data of `n`. The client data field is used by the HTML
## parser and generator.
result = n.fClientData
proc `clientData=`*(n: XmlNode, data: int) {.inline.} =
## sets the client data of `n`. The client data field is used by the HTML
## parser and generator.
n.fClientData = data
proc addEscaped*(result: var string, s: string) =
## same as ``result.add(escape(s))``, but more efficient.
for c in items(s):
case c
of '<': result.add("<")
of '>': result.add(">")
of '&': result.add("&")
of '"': result.add(""")
of '\'': result.add("'")
of '/': result.add("/")
else: result.add(c)
proc escape*(s: string): string =
## escapes `s` for inclusion into an XML document.
## Escapes these characters:
##
## ------------ -------------------
## char is converted to
## ------------ -------------------
## ``<`` ``<``
## ``>`` ``>``
## ``&`` ``&``
## ``"`` ``"``
## ``'`` ``'``
## ``/`` ``/``
## ------------ -------------------
result = newStringOfCap(s.len)
addEscaped(result, s)
proc addIndent(result: var string, indent: int) =
result.add("\n")
for i in 1..indent: result.add(' ')
proc noWhitespace(n: XmlNode): bool =
#for i in 1..n.len-1:
# if n[i].kind != n[0].kind: return true
for i in 0..n.len-1:
if n[i].kind in {xnText, xnEntity}: return true
proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2) =
## adds the textual representation of `n` to `result`.
proc addEscapedAttr(result: var string, s: string) =
# `addEscaped` alternative with less escaped characters.
# Only to be used for escaping attribute values enclosed in double quotes!
for c in items(s):
case c
of '<': result.add("<")
of '>': result.add(">")
of '&': result.add("&")
of '"': result.add(""")
else: result.add(c)
if n == nil: return
case n.k
of xnElement:
result.add('<')
result.add(n.fTag)
if not isNil(n.fAttr):
for key, val in pairs(n.fAttr):
result.add(' ')
result.add(key)
result.add("=\"")
result.addEscapedAttr(val)
result.add('"')
if n.len > 0:
result.add('>')
if n.len > 1:
if noWhitespace(n):
# for mixed leaves, we cannot output whitespace for readability,
# because this would be wrong. For example: ``ab`` is
# different from ``a b``.
for i in 0..n.len-1: result.add(n[i], indent+indWidth, indWidth)
else:
for i in 0..n.len-1:
result.addIndent(indent+indWidth)
result.add(n[i], indent+indWidth, indWidth)
result.addIndent(indent)
else:
result.add(n[0], indent+indWidth, indWidth)
result.add("")
result.add(n.fTag)
result.add(">")
else:
result.add(" />")
of xnText:
result.addEscaped(n.fText)
of xnComment:
result.add("")
of xnCData:
result.add("")
of xnEntity:
result.add('&')
result.add(n.fText)
result.add(';')
const
xmlHeader* = "\n"
## header to use for complete XML output
proc `$`*(n: XmlNode): string =
## converts `n` into its string representation. No ``<$xml ...$>`` declaration
## is produced, so that the produced XML fragments are composable.
result = ""
result.add(n)
proc newXmlTree*(tag: string, children: openArray[XmlNode],
attributes: XmlAttributes = nil): XmlNode =
## creates a new XML tree with `tag`, `children` and `attributes`
result = newXmlNode(xnElement)
result.fTag = tag
newSeq(result.s, children.len)
for i in 0..children.len-1: result.s[i] = children[i]
result.fAttr = attributes
proc xmlConstructor(e: NimNode): NimNode {.compileTime.} =
expectLen(e, 2)
var a = e[1]
if a.kind == nnkCall:
result = newCall("newXmlTree", toStrLit(a[0]))
var attrs = newNimNode(nnkBracket, a)
var newStringTabCall = newCall("newStringTable", attrs,
newIdentNode("modeCaseSensitive"))
var elements = newNimNode(nnkBracket, a)
for i in 1..a.len-1:
if a[i].kind == nnkExprEqExpr:
attrs.add(toStrLit(a[i][0]))
attrs.add(a[i][1])
#echo repr(attrs)
else:
elements.add(a[i])
result.add(elements)
if attrs.len > 1:
#echo repr(newStringTabCall)
result.add(newStringTabCall)
else:
result = newCall("newXmlTree", toStrLit(a))
macro `<>`*(x: untyped): untyped =
## Constructor macro for XML. Example usage:
##
## .. code-block:: nim
## <>a(href="http://nim-lang.org", newText("Nim rules."))
##
## Produces an XML tree for::
##
## Nim rules.
##
let x = callsite()
result = xmlConstructor(x)
proc child*(n: XmlNode, name: string): XmlNode =
## Finds the first child element of `n` with a name of `name`.
## Returns `nil` on failure.
assert n.kind == xnElement
for i in items(n):
if i.kind == xnElement:
if i.tag == name:
return i
proc attr*(n: XmlNode, name: string): string =
## Finds the first attribute of `n` with a name of `name`.
## Returns "" on failure.
assert n.kind == xnElement
if n.attrs == nil: return ""
return n.attrs.getOrDefault(name)
proc findAll*(n: XmlNode, tag: string, result: var seq[XmlNode]) =
## Iterates over all the children of `n` returning those matching `tag`.
##
## Found nodes satisfying the condition will be appended to the `result`
## sequence, which can't be nil or the proc will crash. Usage example:
##
## .. code-block::
## var
## html: XmlNode
## tags: seq[XmlNode] = @[]
##
## html = buildHtml()
## findAll(html, "img", tags)
## for imgTag in tags:
## process(imgTag)
assert isNil(result) == false
assert n.k == xnElement
for child in n.items():
if child.k != xnElement:
continue
if child.tag == tag:
result.add(child)
child.findAll(tag, result)
proc findAll*(n: XmlNode, tag: string): seq[XmlNode] =
## Shortcut version to assign in let blocks. Example:
##
## .. code-block::
## var html: XmlNode
##
## html = buildHtml(html)
## for imgTag in html.findAll("img"):
## process(imgTag)
newSeq(result, 0)
findAll(n, tag, result)
when isMainModule:
assert """Nim rules.""" ==
$(<>a(href="http://nim-lang.org", newText("Nim rules.")))