summary refs log tree commit diff stats
path: root/lib/pure/xmldomparser.nim
blob: 7c7f7b99ce292af953dfd3cf357ca7d11e9a54b6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#
#
#            Nim's Runtime Library
#        (c) Copyright 2010 Dominik Picheta
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

import xmldom, os, streams, parsexml, strutils

## This module parses a XML Document into a XML DOM Document representation.

#XMLDom's Parser - Turns XML into a Document

type
  # Parsing errors
  EMismatchedTag* = object of ValueError ## Raised when a tag is not properly closed
  EParserError* = object of ValueError ## Raised when an unexpected XML Parser event occurs

  # For namespaces
  XmlnsAttr = tuple[name, value: string, ownerElement: PElement]

var nsList: seq[XmlnsAttr] = @[] # Used for storing namespaces

proc getNS(prefix: string): string =
  var defaultNS: seq[string] = @[]

  for key, value, tag in items(nsList):
    if ":" in key:
      if key.split(':')[1] == prefix:
        return value

    if key == "xmlns":
      defaultNS.add(value)

  # Don't return the default namespaces
  # in the loop, because then they would have a precedence
  # over normal namespaces
  if defaultNS.len() > 0:
    return defaultNS[0] # Return the first found default namespace
                        # if none are specified for this prefix

  return ""

proc parseText(x: var XmlParser, doc: var PDocument): PText =
  result = doc.createTextNode(x.charData())

proc parseElement(x: var XmlParser, doc: var PDocument): PElement =
  var n = doc.createElement("")

  while true:
    case x.kind()
    of xmlEof:
      break
    of xmlElementStart, xmlElementOpen:
      if n.tagName() != "":
        n.appendChild(parseElement(x, doc))
      else:
        n = doc.createElementNS("", x.elementName)

    of xmlElementEnd:
      if x.elementName == n.nodeName:
        # n.normalize() # Remove any whitespace etc.

        var ns: string
        if x.elementName.contains(':'):
          ns = getNS(x.elementName.split(':')[0])
        else:
          ns = getNS("")

        n.namespaceURI = ns

        # Remove any namespaces this element declared
        var count = 0 # Variable which keeps the index
                      # We need to edit it..
        for i in low(nsList)..len(nsList)-1:
          if nsList[count][2] == n:
            nsList.delete(count)
            dec(count)
          inc(count)

        return n
      else: #The wrong element is ended
        raise newException(EMismatchedTag, "Mismatched tag at line " &
          $x.getLine() & " column " & $x.getColumn)

    of xmlCharData:
      n.appendChild(parseText(x, doc))
    of xmlAttribute:
      if x.attrKey == "xmlns" or x.attrKey.startsWith("xmlns:"):
        nsList.add((x.attrKey, x.attrValue, n))

      if x.attrKey.contains(':'):
        var ns = getNS(x.attrKey)
        n.setAttributeNS(ns, x.attrKey, x.attrValue)
      else:
        n.setAttribute(x.attrKey, x.attrValue)

    of xmlCData:
      n.appendChild(doc.createCDATASection(x.charData()))
    of xmlComment:
      n.appendChild(doc.createComment(x.charData()))
    of xmlPI:
      n.appendChild(doc.createProcessingInstruction(x.piName(), x.piRest()))

    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
      discard " Unused \'events\'"

    else:
      raise newException(EParserError, "Unexpected XML Parser event")
    x.next()

  raise newException(EMismatchedTag,
    "Mismatched tag at line " & $x.getLine() & " column " & $x.getColumn)

proc loadXMLStream*(stream: Stream): PDocument =
  ## Loads and parses XML from a stream specified by ``stream``, and returns
  ## a ``PDocument``

  var x: XmlParser
  open(x, stream, nil, {reportComments})

  var xmlDoc: PDocument
  var dom: PDOMImplementation = getDOM()

  while true:
    x.next()
    case x.kind()
    of xmlEof:
      break
    of xmlElementStart, xmlElementOpen:
      var el: PElement = parseElement(x, xmlDoc)
      xmlDoc = dom.createDocument(el)
    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
      discard " Unused \'events\'"
    else:
      raise newException(EParserError, "Unexpected XML Parser event")

  return xmlDoc

proc loadXML*(xml: string): PDocument =
  ## Loads and parses XML from a string specified by ``xml``, and returns
  ## a ``PDocument``
  var s = newStringStream(xml)
  return loadXMLStream(s)


proc loadXMLFile*(path: string): PDocument =
  ## Loads and parses XML from a file specified by ``path``, and returns
  ## a ``PDocument``

  var s = newFileStream(path, fmRead)
  if s == nil: raise newException(IOError, "Unable to read file " & path)
  return loadXMLStream(s)


when not defined(testing) and isMainModule:
  var xml = loadXMLFile("nim/xmldom/test.xml")
  #echo(xml.getElementsByTagName("m:test2")[0].namespaceURI)
  #echo(xml.getElementsByTagName("bla:test")[0].namespaceURI)
  #echo(xml.getElementsByTagName("test")[0].namespaceURI)
  for i in items(xml.getElementsByTagName("*")):
    if i.namespaceURI != nil:
      echo(i.nodeName, "=", i.namespaceURI)


  echo($xml)