# # # Nimrod's Runtime Library # (c) Copyright 2010 Dominik Picheta # # See the file "copying.txt", included in this # distribution, for details about the copyright. # import xmldom, os, streams, parsexml, strutils ## This module parses a XML Document into a XML DOM Document representation. #XMLDom's Parser - Turns XML into a Document type # Parsing errors EMismatchedTag* = object of EInvalidValue ## Raised when a tag is not properly closed EParserError* = object of EInvalidValue ## Raised when an unexpected XML Parser event occurs # For namespaces xmlnsAttr = tuple[name, value: string, ownerElement: PElement] var nsList: seq[xmlnsAttr] = @[] # Used for storing namespaces proc getNS(prefix: string): string = var defaultNS: seq[string] = @[] for key, value, tag in items(nsList): if ":" in key: if key.split(':')[1] == prefix: return value if key == "xmlns": defaultNS.add(value) # Don't return the default namespaces # in the loop, because then they would have a precedence # over normal namespaces if defaultNS.len() > 0: return defaultNS[0] # Return the first found default namespace # if none are specified for this prefix return "" proc parseText(x: var TXmlParser, doc: var PDocument): PText = result = doc.createTextNode(x.charData()) proc parseElement(x: var TXmlParser, doc: var PDocument): PElement = var n = doc.createElement("") while True: case x.kind() of xmlEof: break of xmlElementStart, xmlElementOpen: if n.tagName() != "": n.appendChild(parseElement(x, doc)) else: n = doc.createElementNS("", x.elementName) of xmlElementEnd: if x.elementName == n.nodeName: # n.normalize() # Remove any whitespace etc. var ns: string if x.elementName.contains(':'): ns = getNS(x.elementName.split(':')[0]) else: ns = getNS("") n.namespaceURI = ns # Remove any namespaces this element declared var count = 0 # Variable which keeps the index # We need to edit it.. for i in low(nsList)..len(nsList)-1: if nsList[count][2] == n: nsList.delete(count) dec(count) inc(count) return n else: #The wrong element is ended raise newException(EMismatchedTag, "Mismatched tag at line " & $x.getLine() & " column " & $x.getColumn) of xmlCharData: n.appendChild(parseText(x, doc)) of xmlAttribute: if x.attrKey == "xmlns" or x.attrKey.startsWith("xmlns:"): nsList.add((x.attrKey, x.attrValue, n)) if x.attrKey.contains(':'): var ns = getNS(x.attrKey) n.setAttributeNS(ns, x.attrKey, x.attrValue) else: n.setAttribute(x.attrKey, x.attrValue) of xmlCData: n.appendChild(doc.createCDATASection(x.charData())) of xmlComment: n.appendChild(doc.createComment(x.charData())) of xmlPI: n.appendChild(doc.createProcessingInstruction(x.PIName(), x.PIRest())) of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial: # Unused 'events' else: raise newException(EParserError, "Unexpected XML Parser event") x.next() raise newException(EMismatchedTag, "Mismatched tag at line " & $x.getLine() & " column " & $x.getColumn) proc loadXMLStream*(stream: PStream): PDocument = ## Loads and parses XML from a stream specified by ``stream``, and returns ## a ``PDocument`` var x: TXmlParser open(x, stream, nil, {reportComments}) var XmlDoc: PDocument var DOM: PDOMImplementation = getDOM() while True: x.next() case x.kind() of xmlEof: break of xmlElementStart, xmlElementOpen: var el: PElement = parseElement(x, XmlDoc) XmlDoc = dom.createDocument(el) of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial: # Unused 'events' else: raise newException(EParserError, "Unexpected XML Parser event") return XmlDoc proc loadXML*(xml: string): PDocument = ## Loads and parses XML from a string specified by ``xml``, and returns ## a ``PDocument`` var s = newStringStream(xml) return loadXMLStream(s) proc loadXMLFile*(path: string): PDocument = ## Loads and parses XML from a file specified by ``path``, and returns ## a ``PDocument`` var s = newFileStream(path, fmRead) if s == nil: raise newException(EIO, "Unable to read file " & path) return loadXMLStream(s) when isMainModule: var xml = loadXMLFile(r"C:\Users\Dominik\Desktop\Code\Nimrod\xmldom\test.xml") #echo(xml.getElementsByTagName("m:test2")[0].namespaceURI) #echo(xml.getElementsByTagName("bla:test")[0].namespaceURI) #echo(xml.getElementsByTagName("test")[0].namespaceURI) for i in items(xml.getElementsByTagName("*")): if i.namespaceURI != nil: echo(i.nodeName, "=", i.namespaceURI) echo($xml)