summary refs log blame commit diff stats
path: root/lib/pure/xmldomparser.nim
blob: fda46bac0ffcab17d34fa52807b091045710a283 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16















                                                                            
                  

                                                                                              
























                                                                  










                                                                    
                                       


                                           
                                                  


                                     
                                                    

















                                                      







                                                                       


                                                                
                                 

                                                    

                                              
 





                                                                            



                                                             
         
                                                                     



                                                                         


                                                                             
                    

                   
                                        











                                                

                                                             
         
                                                                     


               














                                                                          

                  








                                                                                
            
#
#
#            Nimrod's Runtime Library
#        (c) Copyright 2010 Dominik Picheta
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

import xmldom, os, streams, parsexml, strutils

## This module parses a XML Document into a XML DOM Document representation.

#XMLDom's Parser - Turns XML into a Document

type
  # Parsing errors
  EMismatchedTag* = object of EInvalidValue ## Raised when a tag is not properly closed
  EParserError* = object of EInvalidValue ## Raised when an unexpected XML Parser event occurs

  # For namespaces
  xmlnsAttr = tuple[name, value: string, ownerElement: PElement]

var nsList: seq[xmlnsAttr] = @[] # Used for storing namespaces

proc getNS(prefix: string): string =
  var defaultNS: seq[string] = @[]

  for key, value, tag in items(nsList):
    if ":" in key:
      if key.split(':')[1] == prefix:
        return value
        
    if key == "xmlns":
      defaultNS.add(value)
      
  # Don't return the default namespaces
  # in the loop, because then they would have a precedence
  # over normal namespaces
  if defaultNS.len() > 0:
    return defaultNS[0] # Return the first found default namespace
                        # if none are specified for this prefix
    
  return ""
    
proc parseText(x: var TXmlParser, doc: var PDocument): PText =
  result = doc.createTextNode(x.charData())

proc parseElement(x: var TXmlParser, doc: var PDocument): PElement =
  var n = doc.createElement("")

  while True:
    case x.kind()
    of xmlEof:
      break
    of xmlElementStart, xmlElementOpen:
      if n.tagName() != "":
        n.appendChild(parseElement(x, doc))
      else:
        n = doc.createElementNS("", x.elementName)
        
    of xmlElementEnd:
      if x.elementName == n.nodeName:
        # n.normalize() # Remove any whitespace etc.
        
        var ns: string
        if x.elementName.contains(':'):
          ns = getNS(x.elementName.split(':')[0])
        else:
          ns = getNS("")
        
        n.namespaceURI = ns
        
        # Remove any namespaces this element declared
        var count = 0 # Variable which keeps the index
                      # We need to edit it..
        for i in low(nsList)..len(nsList)-1:
          if nsList[count][2] == n:
            nsList.delete(count)
            dec(count)
          inc(count)

        return n
      else: #The wrong element is ended
        raise newException(EMismatchedTag, "Mismatched tag at line " & 
          $x.getLine() & " column " & $x.getColumn)
      
    of xmlCharData:
      n.appendChild(parseText(x, doc))
    of xmlAttribute:
      if x.attrKey == "xmlns" or x.attrKey.startsWith("xmlns:"):
        nsList.add((x.attrKey, x.attrValue, n))
        
      if x.attrKey.contains(':'):
        var ns = getNS(x.attrKey)
        n.setAttributeNS(ns, x.attrKey, x.attrValue)
      else:
        n.setAttribute(x.attrKey, x.attrValue)

    of xmlCData:
      n.appendChild(doc.createCDATASection(x.charData()))
    of xmlComment:
      n.appendChild(doc.createComment(x.charData()))
    of xmlPI:
      n.appendChild(doc.createProcessingInstruction(x.PIName(), x.PIRest()))
      
    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
      # Unused 'events'

    else:
      raise newException(EParserError, "Unexpected XML Parser event")
    x.next()

  raise newException(EMismatchedTag, 
    "Mismatched tag at line " & $x.getLine() & " column " & $x.getColumn)

proc loadXMLStream*(stream: PStream): PDocument =
  ## Loads and parses XML from a stream specified by ``stream``, and returns 
  ## a ``PDocument``

  var x: TXmlParser
  open(x, stream, nil, {reportComments})
  
  var XmlDoc: PDocument
  var DOM: PDOMImplementation = getDOM()
  
  while True:
    x.next()
    case x.kind()
    of xmlEof:
      break
    of xmlElementStart, xmlElementOpen:
      var el: PElement = parseElement(x, XmlDoc)
      XmlDoc = dom.createDocument(el)
    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
      # Unused 'events'
    else:
      raise newException(EParserError, "Unexpected XML Parser event")

  return XmlDoc

proc loadXML*(xml: string): PDocument =
  ## Loads and parses XML from a string specified by ``xml``, and returns 
  ## a ``PDocument``
  var s = newStringStream(xml)
  return loadXMLStream(s)
  
    
proc loadXMLFile*(path: string): PDocument =
  ## Loads and parses XML from a file specified by ``path``, and returns 
  ## a ``PDocument``
  
  var s = newFileStream(path, fmRead)
  if s == nil: raise newException(EIO, "Unable to read file " & path)
  return loadXMLStream(s)


when isMainModule:
  var xml = loadXMLFile(r"C:\Users\Dominik\Desktop\Code\Nimrod\xmldom\test.xml")
  #echo(xml.getElementsByTagName("m:test2")[0].namespaceURI)
  #echo(xml.getElementsByTagName("bla:test")[0].namespaceURI)
  #echo(xml.getElementsByTagName("test")[0].namespaceURI)
  for i in items(xml.getElementsByTagName("*")):
    if i.namespaceURI != nil:
      echo(i.nodeName, "=", i.namespaceURI)

    
  echo($xml)