summary refs log tree commit diff stats
path: root/lib/pure/xmlparser.nim
blob: 840cae7341a3e4e04a9e9dd79b6a48110fee3f45 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#
#
#            Nim's Runtime Library
#        (c) Copyright 2010 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module parses an XML document and creates its XML tree representation.

import streams, parsexml, strtabs, xmltree

type
  XmlError* = object of ValueError ## exception that is raised
                                   ## for invalid XML
    errors*: seq[string]           ## all detected parsing errors

{.deprecated: [EInvalidXml: XmlError].}

proc raiseInvalidXml(errors: seq[string]) = 
  var e: ref XmlError
  new(e)
  e.msg = errors[0]
  e.errors = errors
  raise e

proc addNode(father, son: XmlNode) = 
  if son != nil: add(father, son)

proc parse(x: var XmlParser, errors: var seq[string]): XmlNode

proc untilElementEnd(x: var XmlParser, result: XmlNode, 
                     errors: var seq[string]) =
  while true:
    case x.kind
    of xmlElementEnd: 
      if x.elementName == result.tag: 
        next(x)
      else:
        errors.add(errorMsg(x, "</" & result.tag & "> expected"))
        # do not skip it here!
      break
    of xmlEof:
      errors.add(errorMsg(x, "</" & result.tag & "> expected"))
      break
    else:
      result.addNode(parse(x, errors))

proc parse(x: var XmlParser, errors: var seq[string]): XmlNode =
  case x.kind
  of xmlComment: 
    result = newComment(x.charData)
    next(x)
  of xmlCharData, xmlWhitespace:
    result = newText(x.charData)
    next(x)
  of xmlPI, xmlSpecial:
    # we just ignore processing instructions for now
    next(x)
  of xmlError:
    errors.add(errorMsg(x))
    next(x)
  of xmlElementStart:    ## ``<elem>``
    result = newElement(x.elementName)
    next(x)
    untilElementEnd(x, result, errors)
  of xmlElementEnd:
    errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
  of xmlElementOpen: 
    result = newElement(x.elementName)
    next(x)
    result.attrs = newStringTable()
    while true: 
      case x.kind
      of xmlAttribute:
        result.attrs[x.attrKey] = x.attrValue
        next(x)
      of xmlElementClose:
        next(x)
        break
      of xmlError:
        errors.add(errorMsg(x))
        next(x)
        break
      else:
        errors.add(errorMsg(x, "'>' expected"))
        next(x)
        break
    untilElementEnd(x, result, errors)
  of xmlAttribute, xmlElementClose:
    errors.add(errorMsg(x, "<some_tag> expected"))
    next(x)
  of xmlCData: 
    result = newCData(x.charData)
    next(x)
  of xmlEntity:
    ## &entity;
    errors.add(errorMsg(x, "unknown entity: " & x.entityName))
    next(x)
  of xmlEof: discard

proc parseXml*(s: Stream, filename: string, 
               errors: var seq[string]): XmlNode = 
  ## parses the XML from stream `s` and returns a ``PXmlNode``. Every
  ## occurred parsing error is added to the `errors` sequence.
  var x: XmlParser
  open(x, s, filename, {reportComments})
  while true:
    x.next()
    case x.kind
    of xmlElementOpen, xmlElementStart: 
      result = parse(x, errors)
      break
    of xmlComment, xmlWhitespace, xmlSpecial, xmlPI: discard # just skip it
    of xmlError:
      errors.add(errorMsg(x))
    else:
      errors.add(errorMsg(x, "<some_tag> expected"))
      break
  close(x)

proc parseXml*(s: Stream): XmlNode = 
  ## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing
  ## errors are turned into an ``EInvalidXML`` exception.
  var errors: seq[string] = @[]
  result = parseXml(s, "unknown_html_doc", errors)
  if errors.len > 0: raiseInvalidXml(errors)

proc loadXml*(path: string, errors: var seq[string]): XmlNode =
  ## Loads and parses XML from file specified by ``path``, and returns 
  ## a ``PXmlNode``. Every occurred parsing error is added to the `errors`
  ## sequence.
  var s = newFileStream(path, fmRead)
  if s == nil: raise newException(IOError, "Unable to read file: " & path)
  result = parseXml(s, path, errors)

proc loadXml*(path: string): XmlNode =
  ## Loads and parses XML from file specified by ``path``, and returns 
  ## a ``PXmlNode``.  All parsing errors are turned into an ``EInvalidXML``
  ## exception.  
  var errors: seq[string] = @[]
  result = loadXml(path, errors)
  if errors.len > 0: raiseInvalidXml(errors)

when not defined(testing) and isMainModule:
  import os

  var errors: seq[string] = @[]  
  var x = loadXml(paramStr(1), errors)
  for e in items(errors): echo e
  
  var f: File
  if open(f, "xmltest.txt", fmWrite):
    f.write($x)
    f.close()
  else:
    quit("cannot write test.txt")