summary refs log tree commit diff stats
path: root/lib/pure/xmlparser.nim
blob: 2a2d19dca34a44e8d9aed405a96be7135c8906d8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#
#
#            Nim's Runtime Library
#        (c) Copyright 2010 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module parses an XML document and creates its XML tree representation.

import streams, parsexml, strtabs, xmltree

type
  XmlError* = object of ValueError ## Exception that is raised
                                   ## for invalid XML.
    errors*: seq[string]           ## All detected parsing errors.

proc raiseInvalidXml(errors: seq[string]) =
  var e: ref XmlError
  new(e)
  e.msg = errors[0]
  e.errors = errors
  raise e

proc addNode(father, son: XmlNode) =
  if son != nil: add(father, son)

proc parse(x: var XmlParser, errors: var seq[string]): XmlNode {.gcsafe.}

proc untilElementEnd(x: var XmlParser, result: XmlNode,
                     errors: var seq[string]) =
  while true:
    case x.kind
    of xmlElementEnd:
      if x.elementName == result.tag:
        next(x)
      else:
        errors.add(errorMsg(x, "</" & result.tag & "> expected"))
        # do not skip it here!
      break
    of xmlEof:
      errors.add(errorMsg(x, "</" & result.tag & "> expected"))
      break
    else:
      result.addNode(parse(x, errors))

proc parse(x: var XmlParser, errors: var seq[string]): XmlNode =
  case x.kind
  of xmlComment:
    result = newComment(x.charData)
    next(x)
  of xmlCharData, xmlWhitespace:
    result = newText(x.charData)
    next(x)
  of xmlPI, xmlSpecial:
    # we just ignore processing instructions for now
    next(x)
  of xmlError:
    errors.add(errorMsg(x))
    next(x)
  of xmlElementStart: ## ``<elem>``
    result = newElement(x.elementName)
    next(x)
    untilElementEnd(x, result, errors)
  of xmlElementEnd:
    errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
  of xmlElementOpen:
    result = newElement(x.elementName)
    next(x)
    result.attrs = newStringTable()
    while true:
      case x.kind
      of xmlAttribute:
        result.attrs[x.attrKey] = x.attrValue
        next(x)
      of xmlElementClose:
        next(x)
        break
      of xmlError:
        errors.add(errorMsg(x))
        next(x)
        break
      else:
        errors.add(errorMsg(x, "'>' expected"))
        next(x)
        break
    untilElementEnd(x, result, errors)
  of xmlAttribute, xmlElementClose:
    errors.add(errorMsg(x, "<some_tag> expected"))
    next(x)
  of xmlCData:
    result = newCData(x.charData)
    next(x)
  of xmlEntity:
    ## &entity;
    result = newEntity(x.entityName)
    next(x)
  of xmlEof: discard

proc parseXml*(s: Stream, filename: string,
               errors: var seq[string]): XmlNode =
  ## Parses the XML from stream ``s`` and returns a ``XmlNode``. Every
  ## occurred parsing error is added to the ``errors`` sequence.
  var x: XmlParser
  open(x, s, filename, {reportComments})
  while true:
    x.next()
    case x.kind
    of xmlElementOpen, xmlElementStart:
      result = parse(x, errors)
      break
    of xmlComment, xmlWhitespace, xmlSpecial, xmlPI: discard # just skip it
    of xmlError:
      errors.add(errorMsg(x))
    else:
      errors.add(errorMsg(x, "<some_tag> expected"))
      break
  close(x)

proc parseXml*(s: Stream): XmlNode =
  ## Parses the XML from stream ``s`` and returns a ``XmlNode``. All parsing
  ## errors are turned into an ``XmlError`` exception.
  var errors: seq[string] = @[]
  result = parseXml(s, "unknown_xml_doc", errors)
  if errors.len > 0: raiseInvalidXml(errors)

proc parseXml*(str: string): XmlNode =
  ## Parses the XML from string ``str`` and returns a ``XmlNode``. All parsing
  ## errors are turned into an ``XmlError`` exception.
  parseXml(newStringStream(str))

proc loadXml*(path: string, errors: var seq[string]): XmlNode =
  ## Loads and parses XML from file specified by ``path``, and returns
  ## a ``XmlNode``. Every occurred parsing error is added to the ``errors``
  ## sequence.
  var s = newFileStream(path, fmRead)
  if s == nil: raise newException(IOError, "Unable to read file: " & path)
  result = parseXml(s, path, errors)

proc loadXml*(path: string): XmlNode =
  ## Loads and parses XML from file specified by ``path``, and returns
  ## a ``XmlNode``. All parsing errors are turned into an ``XmlError``
  ## exception.
  var errors: seq[string] = @[]
  result = loadXml(path, errors)
  if errors.len > 0: raiseInvalidXml(errors)

when isMainModule:
  when not defined(testing):
    import os

    var errors: seq[string] = @[]
    var x = loadXml(paramStr(1), errors)
    for e in items(errors): echo e

    var f: File
    if open(f, "xmltest.txt", fmWrite):
      f.write($x)
      f.close()
    else:
      quit("cannot write test.txt")
  else:
    block: # correctly parse ../../tests/testdata/doc1.xml
      let filePath = "tests/testdata/doc1.xml"
      var errors: seq[string] = @[]
      var xml = loadXml(filePath, errors)
      assert(errors.len == 0, "The file tests/testdata/doc1.xml should be parsed without errors.")

    block bug1518:
      var err: seq[string] = @[]
      assert $parsexml(newStringStream"<tag>One &amp; two</tag>", "temp.xml",
          err) == "<tag>One &amp; two</tag>"