summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rwxr-xr-xdoc/lib.txt11
-rw-r--r--lib/pure/htmlparser.nim247
-rwxr-xr-xlib/pure/parsexml.nim2
-rwxr-xr-xlib/pure/re.nim6
-rw-r--r--lib/pure/xmldom.nim244
-rw-r--r--lib/pure/xmldomparser.nim13
-rwxr-xr-xlib/pure/xmlgen.nim8
-rw-r--r--lib/pure/xmltree.nim231
-rw-r--r--lib/pure/xmltreeparser.nim52
-rwxr-xr-xlib/system.nim4
-rwxr-xr-xweb/news.txt6
11 files changed, 731 insertions, 93 deletions
diff --git a/doc/lib.txt b/doc/lib.txt
index 06edf997c..609889607 100755
--- a/doc/lib.txt
+++ b/doc/lib.txt
@@ -172,7 +172,16 @@ XML Processing
   This module implements the XML DOM Level 2.
 
 * `xmldomparser <xmldomparser.html>`_
-  This module parses a XML Document into a XML DOM Document representation.
+  This module parses an XML Document into a XML DOM Document representation.
+
+* `xmltree <xmltree.html>`_
+  A simple XML tree. More efficient and simpler than the DOM.
+
+* `xmltreeparser <xmltreeparser.html>`_ 
+  This module parses an XML document and creates its XML tree representation.
+  
+* `htmlparser <htmlparser.html>`_ 
+  This module parses an HTML document and creates its XML tree representation.
 
 
 Code generation
diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim
new file mode 100644
index 000000000..df840e15c
--- /dev/null
+++ b/lib/pure/htmlparser.nim
@@ -0,0 +1,247 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2010 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module parses an HTML document and creates its XML tree representation.
+## It is supposed to handle the *wild* HTML the real world uses.
+## 
+## It can be used to parse a wild HTML document and output it as valid XHTML
+## document (if you are lucky):
+##
+## .. code-block:: nimrod
+##
+##   echo loadHtml("mydirty.html")
+##
+##
+## Every tag in the resulting tree is in lower case.
+##
+## **Note:** The resulting ``PXmlNode``s already use the ``clientData`` field, 
+## so it cannot be used by clients of this library.
+
+import streams, parsexml, xmltree
+
+type
+  THtmlTag* = enum ## list of all supported HTML tags; order will always be
+                   ## alphabetically
+    tagUnknown,    ## unknown HTML element
+    tagA,          ## the HTML ``a`` element
+    tagAcronym,    ## the HTML ``acronym`` element
+    tagAddress,    ## the HTML ``address`` element
+    tagArea,       ## the HTML ``area`` element
+    tagB,          ## the HTML ``b`` element
+    tagBase,       ## the HTML ``base`` element
+    tagBig,        ## the HTML ``big`` element
+    tagBlockquote, ## the HTML ``blockquote`` element
+    tagBody,       ## the HTML ``body`` element
+    tagBr,         ## the HTML ``br`` element
+    tagButton,     ## the HTML ``button`` element
+    tagCaption,    ## the HTML ``caption`` element
+    tagCite,       ## the HTML ``cite`` element
+    tagCode,       ## the HTML ``code`` element
+    tagCol,        ## the HTML ``col`` element
+    tagColgroup,   ## the HTML ``colgroup`` element
+    tagDd,         ## the HTML ``dd`` element
+    tagDel,        ## the HTML ``del`` element
+    tagDfn,        ## the HTML ``dfn`` element
+    tagDiv,        ## the HTML ``div`` element
+    tagDl,         ## the HTML ``dl`` element
+    tagDt,         ## the HTML ``dt`` element
+    tagEm,         ## the HTML ``em`` element
+    tagFieldset,   ## the HTML ``fieldset`` element
+    tagForm,       ## the HTML ``form`` element
+    tagH1,         ## the HTML ``h1`` element
+    tagH2,         ## the HTML ``h2`` element
+    tagH3,         ## the HTML ``h3`` element
+    tagH4,         ## the HTML ``h4`` element
+    tagH5,         ## the HTML ``h5`` element
+    tagH6,         ## the HTML ``h6`` element
+    tagHead,       ## the HTML ``head`` element
+    tagHtml,       ## the HTML ``html`` element
+    tagHr,         ## the HTML ``hr`` element
+    tagI,          ## the HTML ``i`` element
+    tagImg,        ## the HTML ``img`` element
+    tagInput,      ## the HTML ``input`` element
+    tagIns,        ## the HTML ``ins`` element
+    tagKbd,        ## the HTML ``kbd`` element
+    tagLabel,      ## the HTML ``label`` element
+    tagLegend,     ## the HTML ``legend`` element
+    tagLi,         ## the HTML ``li`` element
+    tagLink,       ## the HTML ``link`` element
+    tagMap,        ## the HTML ``map`` element
+    tagMeta,       ## the HTML ``meta`` element
+    tagNoscript,   ## the HTML ``noscript`` element
+    tagObject,     ## the HTML ``object`` element
+    tagOl,         ## the HTML ``ol`` element
+    tagOptgroup,   ## the HTML ``optgroup`` element
+    tagOption,     ## the HTML ``option`` element
+    tagP,          ## the HTML ``p`` element
+    tagParam,      ## the HTML ``param`` element
+    tagPre,        ## the HTML ``pre`` element
+    tagQ,          ## the HTML ``q`` element
+    tagSamp,       ## the HTML ``samp`` element
+    tagScript,     ## the HTML ``script`` element
+    tagSelect,     ## the HTML ``select`` element
+    tagSmall,      ## the HTML ``small`` element
+    tagSpan,       ## the HTML ``span`` element
+    tagStrong,     ## the HTML ``strong`` element
+    tagStyle,      ## the HTML ``style`` element
+    tagSub,        ## the HTML ``sub`` element
+    tagSup,        ## the HTML ``sup`` element
+    tagTable,      ## the HTML ``table`` element
+    tagTbody,      ## the HTML ``tbody`` element
+    tagTd,         ## the HTML ``td`` element
+    tagTextarea,   ## the HTML ``textarea`` element
+    tagTfoot,      ## the HTML ``tfoot`` element
+    tagTh,         ## the HTML ``th`` element
+    tagThead,      ## the HTML ``thead`` element
+    tagTitle,      ## the HTML ``title`` element
+    tagTr,         ## the HTML ``tr`` element
+    tagTt,         ## the HTML ``tt`` element
+    tagUl,         ## the HTML ``ul`` element
+    tagVar         ## the HTML ``var`` element
+
+const 
+  tagStrs = [
+    "a", "acronym", "address", "area", "b", "base", "big", "blockquote", 
+    "body", "br", "button", "caption", "cite", "code", "col", "colgroup", 
+    "dd", "del", "dfn", "div", "dl", "dt", "em", "fieldset", 
+    "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "html", "hr", 
+    "i", "img", "input", "ins", "kbd", "label", "legend", "li", "link", 
+    "map", "meta", "noscript", "object", "ol", "optgroup", "option", 
+    "p", "param", "pre", "q", "samp", "script", "select", "small", 
+    "span", "strong", "style", "sub", "sup", "table", "tbody", "td", 
+    "textarea", "tfoot", "th", "thead", "title", "tr", "tt", "ul", "var"
+  ]
+
+proc binaryStrSearch(x: openarray[string], y: string): int = 
+  ## XXX put this into the library somewhere!
+  var a = 0
+  var b = len(x) - 1
+  while a <= b: 
+    var mid = (a + b) div 2
+    var c = cmp(x[mid], y)
+    if c < 0: 
+      a = mid + 1
+    elif c > 0: 
+      b = mid - 1
+    else: 
+      return mid
+  result = - 1
+
+proc htmlTag*(n: PXmlNode): THtmlTag = 
+  ## gets `n`'s tag as a ``THtmlTag``. Even though results are cached, this is
+  ## can be more expensive than comparing ``tag`` directly to a string.
+  if n.clientData == 0:
+    n.clientData = binaryStrSearch(tagStrs, n.tag)+1
+  result = THtmlTag(n.clientData)
+
+proc parseElement(x: var TXmlParser, doc: var PDocument): PElement =
+  var n = doc.createElement("")
+
+  while True:
+    case x.kind()
+    of xmlEof:
+      break
+    of xmlElementStart:
+      if n.tagName() != "":
+        n.appendChild(parseElement(x, doc))
+      else:
+        n = doc.createElement(x.elementName)
+    of xmlElementOpen:
+      if n.tagName() != "":
+        n.appendChild(parseElement(x, doc))
+      else:
+        if x.elementName.contains(':'):
+          #TODO: NamespaceURI
+          n = doc.createElementNS("nil", x.elementName)
+        else:  
+          n = doc.createElement(x.elementName)
+        
+    of xmlElementEnd:
+      if x.elementName == n.nodeName:
+        # n.normalize() # Remove any whitespace etc.
+        return n
+      else: #The wrong element is ended
+        raise newException(EMismatchedTag, "Mismatched tag at line " & 
+          $x.getLine() & " column " & $x.getColumn)
+      
+    of xmlCharData:
+      n.appendChild(parseText(x, doc))
+    of xmlAttribute:
+      if x.attrKey.contains(':'):
+        #TODO: NamespaceURI
+        n.setAttributeNS("nil", x.attrKey, x.attrValue)
+      else:
+        n.setAttribute(x.attrKey, x.attrValue)
+    of xmlCData:
+      n.appendChild(doc.createCDATASection(x.charData()))
+    of xmlComment:
+      n.appendChild(doc.createComment(x.charData()))
+    of xmlPI:
+      n.appendChild(doc.createProcessingInstruction(x.PIName(), x.PIRest()))
+      
+    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
+      # Unused 'events'
+
+    else:
+      raise newException(EParserError, "Unexpected XML Parser event")
+    x.next()
+
+  raise newException(EMismatchedTag, 
+    "Mismatched tag at line " & $x.getLine() & " column " & $x.getColumn)
+
+
+proc parse*(x: var TXmlParser, father: PXmlNode) =
+  
+
+proc parseHtml*(s: PStream, filename: string, 
+                errors: var seq[string]): PXmlNode = 
+  ## parses the HTML from stream `s` and returns a ``PXmlNode``. Every
+  ## occured parsing error is added to the `errors` sequence.
+  var x: TXmlParser
+  open(x, s, filename, {reportComments})
+  
+  result = newElement("html")
+  while true:
+    x.next()
+    case x.kind
+    of xmlWhitespace: nil # just skip it
+    of xmlComment: 
+      result.add(newComment(x.text))
+  
+  while True:
+    x.next()
+    case x.kind
+    of xmlEof: break
+    of xmlElementStart, xmlElementOpen:
+      var el: PElement = parseElement(x, XmlDoc)
+      XmlDoc = dom.createDocument(el)
+    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
+      # Unused 'events'
+    else:
+      raise newException(EParserError, "Unexpected XML Parser event")
+  close(x)
+
+proc parseHtml*(s: PStream): PXmlNode = 
+  ## parses the HTML from stream `s` and returns a ``PXmlNode``. All parsing
+  ## errors are ignored.
+  var errors: seq[string] = @[]
+  result = parseHtml(s, "unknown_html_doc", errors)
+
+proc loadHtml*(path: string, reportErrors = false): PXmlNode = 
+  ## Loads and parses HTML from file specified by ``path``, and returns 
+  ## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are
+  ## ``echo``ed.
+  var s = newFileStream(path, fmRead)
+  if s == nil: raise newException(EIO, "Unable to read file: " & path)
+  
+  var errors: seq[string] = @[]
+  result = parseHtml(s, path, errors)
+  if reportErrors: 
+    for msg in items(errors): echo(msg)
+
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim
index 6809c0f7c..a209e8be0 100755
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
@@ -364,7 +364,7 @@ proc parsePI(my: var TXmlParser) =
         break
       add(my.b, '?')
       inc(pos)
-    of '\c':  
+    of '\c':
       # the specification says that CR-LF, CR are to be transformed to LF
       pos = lexbase.HandleCR(my, pos)
       buf = my.buf      
diff --git a/lib/pure/re.nim b/lib/pure/re.nim
index 09a38440d..953f9c744 100755
--- a/lib/pure/re.nim
+++ b/lib/pure/re.nim
@@ -127,13 +127,13 @@ template `=~` *(s: string, pattern: TRegEx): expr =
   ##
   ##   if line =~ re"\s*(\w+)\s*\=\s*(\w+)": 
   ##     # matches a key=value pair:
-  ##     echo("Key: ", matches[1])
-  ##     echo("Value: ", matches[2])
+  ##     echo("Key: ", matches[0])
+  ##     echo("Value: ", matches[1])
   ##   elif line =~ re"\s*(\#.*)":
   ##     # matches a comment
   ##     # note that the implicit ``matches`` array is different from the
   ##     # ``matches`` array of the first branch
-  ##     echo("comment: ", matches[1])
+  ##     echo("comment: ", matches[0])
   ##   else:
   ##     echo("syntax error")
   ##
diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim
index 12578a793..4e9d721d7 100644
--- a/lib/pure/xmldom.nim
+++ b/lib/pure/xmldom.nim
@@ -9,33 +9,30 @@
 
 
 import strutils
-## This module implements the XML DOM Level 2
+## This module implements XML DOM Level 2 Core specification(http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html)
 
-#http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html
-#DOMString = String
-#DOMTimeStamp = int16 ??
 
-#DECLARATIONS
+#http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html
 
 #Exceptions
 type
-  EDOMException* = object of E_Base #Base exception object for all DOM Exceptions
-  EDOMStringSizeErr* = object of EDOMException #If the specified range of text does not fit into a DOMString
-                                               #Currently not used(Since DOMString is just string)
-  EHierarchyRequestErr* = object of EDOMException #If any node is inserted somewhere it doesn't belong
-  EIndexSizeErr* = object of EDOMException #If index or size is negative, or greater than the allowed value
-  EInuseAttributeErr* = object of EDOMException #If an attempt is made to add an attribute that is already in use elsewhere
-  EInvalidAccessErr* = object of EDOMException #If a parameter or an operation is not supported by the underlying object.
-  EInvalidCharacterErr* = object of EDOMException #This exception is raised when a string parameter contains an illegal character
-  EInvalidModificationErr* = object of EDOMException #If an attempt is made to modify the type of the underlying object.
-  EInvalidStateErr* = object of EDOMException #If an attempt is made to use an object that is not, or is no longer, usable.
-  ENamespaceErr* = object of EDOMException #If an attempt is made to create or change an object in a way which is incorrect with regard to namespaces.
-  ENotFoundErr* = object of EDOMException #If an attempt is made to reference a node in a context where it does not exist
-  ENotSupportedErr* = object of EDOMException #If the implementation does not support the requested type of object or operation.
-  ENoDataAllowedErr* = object of EDOMException #If data is specified for a node which does not support data
-  ENoModificationAllowedErr* = object of EDOMException #If an attempt is made to modify an object where modifications are not allowed
-  ESyntaxErr* = object of EDOMException #If an invalid or illegal string is specified.
-  EWrongDocumentErr* = object of EDOMException #If a node is used in a different document than the one that created it (that doesn't support it)
+  EDOMException* = object of E_Base ## Base exception object for all DOM Exceptions
+  EDOMStringSizeErr* = object of EDOMException ## If the specified range of text does not fit into a DOMString
+                                               ## Currently not used(Since DOMString is just string)
+  EHierarchyRequestErr* = object of EDOMException ## If any node is inserted somewhere it doesn't belong
+  EIndexSizeErr* = object of EDOMException ## If index or size is negative, or greater than the allowed value
+  EInuseAttributeErr* = object of EDOMException ## If an attempt is made to add an attribute that is already in use elsewhere
+  EInvalidAccessErr* = object of EDOMException ## If a parameter or an operation is not supported by the underlying object.
+  EInvalidCharacterErr* = object of EDOMException ## This exception is raised when a string parameter contains an illegal character
+  EInvalidModificationErr* = object of EDOMException ## If an attempt is made to modify the type of the underlying object.
+  EInvalidStateErr* = object of EDOMException ## If an attempt is made to use an object that is not, or is no longer, usable.
+  ENamespaceErr* = object of EDOMException ## If an attempt is made to create or change an object in a way which is incorrect with regard to namespaces.
+  ENotFoundErr* = object of EDOMException ## If an attempt is made to reference a node in a context where it does not exist
+  ENotSupportedErr* = object of EDOMException ## If the implementation does not support the requested type of object or operation.
+  ENoDataAllowedErr* = object of EDOMException ## If data is specified for a node which does not support data
+  ENoModificationAllowedErr* = object of EDOMException ## If an attempt is made to modify an object where modifications are not allowed
+  ESyntaxErr* = object of EDOMException ## If an invalid or illegal string is specified.
+  EWrongDocumentErr* = object of EDOMException ## If a node is used in a different document than the one that created it (that doesn't support it)
 
 template newException(exceptn, message: expr): expr =
   block: # open a new scope
@@ -65,24 +62,24 @@ type
   Feature = tuple[name: string, version: string]
   PDOMImplementation* = ref DOMImplementation
   DOMImplementation = object
-    Features: seq[Feature] #Read-Only
+    Features: seq[Feature] # Read-Only
 
   PNode* = ref Node
   Node = object
-    attributes: seq[PAttr] #Read-only
-    childNodes*: seq[PNode] #Read-only
-    FLocalName: string #Read-only
-    FNamespaceURI: string #Read-only
-    FNodeName: string #Read-only
+    attributes*: seq[PAttr]
+    childNodes*: seq[PNode]
+    FLocalName: string # Read-only
+    FNamespaceURI: string # Read-only
+    FNodeName: string # Read-only
     nodeValue*: string
-    FNodeType: int #Read-only
-    FOwnerDocument: PDocument #Read-Only
-    FParentNode: PNode #Read-Only
+    FNodeType: int # Read-only
+    FOwnerDocument: PDocument # Read-Only
+    FParentNode: PNode # Read-Only
     prefix*: string # Setting this should change some values... TODO!
   
   PElement* = ref Element
   Element = object of Node
-    FTagName: string #Read-only
+    FTagName: string # Read-only
   
   PCharacterData = ref CharacterData
   CharacterData = object of Node
@@ -90,15 +87,15 @@ type
     
   PDocument* = ref Document
   Document = object of Node
-    FImplementation: PDOMImplementation #Read-only
-    FDocumentElement: PElement #Read-only
+    FImplementation: PDOMImplementation # Read-only
+    FDocumentElement: PElement # Read-only
     
   PAttr* = ref Attr  
   Attr = object of Node
-    FName: string #Read-only
-    FSpecified: bool #Read-only
+    FName: string # Read-only
+    FSpecified: bool # Read-only
     value*: string
-    FOwnerElement: PElement #Read-only
+    FOwnerElement: PElement # Read-only
 
   PDocumentFragment* = ref DocumentFragment
   DocumentFragment = object of Node
@@ -115,18 +112,18 @@ type
   PProcessingInstruction* = ref ProcessingInstruction
   ProcessingInstruction = object of Node
     data*: string
-    FTarget: string #Read-only
+    FTarget: string # Read-only
 
-#DOMImplementation
+# DOMImplementation
 proc getDOM*(): PDOMImplementation =
-  ##Returns a DOMImplementation
+  ## Returns a DOMImplementation
   var DOMImpl: PDOMImplementation
   new(DOMImpl)
   DOMImpl.Features = @[(name: "core", version: "2.0"), (name: "core", version: "1.0"), (name: "XML", version: "2.0")]
   return DOMImpl
 
 proc createDocument*(dom: PDOMImplementation, namespaceURI: string, qualifiedName: string): PDocument =
-  ##Creates an XML Document object of the specified type with its document element.
+  ## Creates an XML Document object of the specified type with its document element.
   var doc: PDocument
   new(doc)
   doc.FNamespaceURI = namespaceURI
@@ -142,8 +139,9 @@ proc createDocument*(dom: PDOMImplementation, namespaceURI: string, qualifiedNam
   return doc
   
 proc createDocument*(dom: PDOMImplementation, n: PElement): PDocument =
-  ##Creates an XML Document object of the specified type with its document element.
-  #This procedure is not in the specification, it's provided for the parser.
+  ## Creates an XML Document object of the specified type with its document element.
+  
+  # This procedure is not in the specification, it's provided for the parser.
   var doc: PDocument
   new(doc)
   doc.FDocumentElement = n
@@ -153,7 +151,7 @@ proc createDocument*(dom: PDOMImplementation, n: PElement): PDocument =
   return doc
   
 proc hasFeature*(dom: PDOMImplementation, feature: string, version: string = ""): bool =
-  ##Returns ``true`` if this ``version`` of the DomImplementation implements ``feature``, otherwise ``false``
+  ## Returns ``true`` if this ``version`` of the DomImplementation implements ``feature``, otherwise ``false``
   for iName, iVersion in items(dom.Features):
     if iName == feature:
       if version == "":
@@ -164,8 +162,8 @@ proc hasFeature*(dom: PDOMImplementation, feature: string, version: string = "")
   return False
 
 
-#Document
-#Attributes
+# Document
+# Attributes
   
 proc implementation*(doc: PDocument): PDOMImplementation =
   return doc.FImplementation
@@ -173,9 +171,9 @@ proc implementation*(doc: PDocument): PDOMImplementation =
 proc documentElement*(doc: PDocument): PElement = 
   return doc.FDocumentElement
 
-#Internal procedures
+# Internal procedures
 proc findNodes(nl: PNode, name: string): seq[PNode] =
-  #Made for getElementsByTagName
+  # Made for getElementsByTagName
   var r: seq[PNode] = @[]
   if nl.childNodes == nil: return @[]
   if nl.childNodes.len() == 0: return @[]
@@ -192,7 +190,7 @@ proc findNodes(nl: PNode, name: string): seq[PNode] =
   return r
   
 proc findNodesNS(nl: PNode, namespaceURI: string, localName: string): seq[PNode] =
-  #Made for getElementsByTagNameNS
+  # Made for getElementsByTagNameNS
   var r: seq[PNode] = @[]
   if nl.childNodes == nil: return @[]
   if nl.childNodes.len() == 0: return @[]
@@ -211,10 +209,10 @@ proc findNodesNS(nl: PNode, namespaceURI: string, localName: string): seq[PNode]
 
 #Procedures
 proc createAttribute*(doc: PDocument, name: string): PAttr =
-  ##Creates an Attr of the given name. Note that the Attr instance can then be set on an Element using the setAttributeNode method.
-  ##To create an attribute with a qualified name and namespace URI, use the createAttributeNS method. 
+  ## Creates an Attr of the given name. Note that the Attr instance can then be set on an Element using the setAttributeNode method.
+  ## To create an attribute with a qualified name and namespace URI, use the createAttributeNS method. 
   
-  #Check if name contains illegal characters
+  # Check if name contains illegal characters
   if illegalChars in name:
     raise newException(EInvalidCharacterErr, "Invalid character")
   
@@ -230,12 +228,12 @@ proc createAttribute*(doc: PDocument, name: string): PAttr =
   return AttrNode
 
 proc createAttributeNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PAttr =
-  ##Creates an attribute of the given qualified name and namespace URI
+  ## Creates an attribute of the given qualified name and namespace URI
   
-  #Check if name contains illegal characters
+  # Check if name contains illegal characters
   if illegalChars in namespaceURI or illegalChars in qualifiedName:
     raise newException(EInvalidCharacterErr, "Invalid character")
-  #Exceptions
+  # Exceptions
   if qualifiedName.contains(':'):
     if namespaceURI == nil or namespaceURI == "":
       raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil")
@@ -264,17 +262,17 @@ proc createAttributeNS*(doc: PDocument, namespaceURI: string, qualifiedName: str
   return AttrNode
 
 proc createCDATASection*(doc: PDocument, data: string): PCDATASection =
-  ##Creates a CDATASection node whose value is the specified string.
+  ## Creates a CDATASection node whose value is the specified string.
   var CData: PCDATASection
   new(CData)
   CData.data = data
   CData.nodeValue = data
-  CData.FNodeName = "#text" #Not sure about this, but this is technically a TextNode
+  CData.FNodeName = "#text" # Not sure about this, but this is technically a TextNode
   CData.FNodeType = CDataSectionNode
   return CData
 
 proc createComment*(doc: PDocument, data: string): PComment =
-  ##Creates a Comment node given the specified string. 
+  ## Creates a Comment node given the specified string. 
   var Comm: PComment
   new(Comm)
   Comm.data = data
@@ -284,15 +282,15 @@ proc createComment*(doc: PDocument, data: string): PComment =
   return Comm
 
 proc createDocumentFragment*(doc: PDocument): PDocumentFragment =
-  ##Creates an empty DocumentFragment object.
+  ## Creates an empty DocumentFragment object.
   var DF: PDocumentFragment
   new(DF)
   return DF
 
 proc createElement*(doc: PDocument, tagName: string): PElement =
-  ##Creates an element of the type specified.
+  ## Creates an element of the type specified.
   
-  #Check if name contains illegal characters
+  # Check if name contains illegal characters
   if illegalChars in tagName:
     raise newException(EInvalidCharacterErr, "Invalid character")
     
@@ -311,7 +309,7 @@ proc createElement*(doc: PDocument, tagName: string): PElement =
   return elNode
 
 proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PElement =
-  ##Creates an element of the given qualified name and namespace URI.
+  ## Creates an element of the given qualified name and namespace URI.
   if qualifiedName.contains(':'):
     if namespaceURI == nil or namespaceURI == "":
       raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil")
@@ -319,7 +317,7 @@ proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: strin
       raise newException(ENamespaceErr, 
         "When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"")
         
-  #Check if name contains illegal characters
+  # Check if name contains illegal characters
   if illegalChars in namespaceURI or illegalChars in qualifiedName:
     raise newException(EInvalidCharacterErr, "Invalid character")
     
@@ -342,7 +340,7 @@ proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: strin
   return elNode
 
 proc createProcessingInstruction*(doc: PDocument, target: string, data: string): PProcessingInstruction = 
-  ##Creates a ProcessingInstruction node given the specified name and data strings. 
+  ## Creates a ProcessingInstruction node given the specified name and data strings. 
   
   #Check if name contains illegal characters
   if illegalChars in target:
@@ -356,7 +354,7 @@ proc createProcessingInstruction*(doc: PDocument, target: string, data: string):
   return PI
 
 proc createTextNode*(doc: PDocument, data: string): PText = #Propably TextNode
-  ##Creates a Text node given the specified string. 
+  ## Creates a Text node given the specified string. 
   var txtNode: PText
   new(txtNode)
   txtNode.data = data
@@ -371,8 +369,8 @@ discard """proc getElementById*(doc: PDocument, elementId: string): PElement =
   #TODO"""
 
 proc getElementsByTagName*(doc: PDocument, tagName: string): seq[PNode] =
-  ##Returns a NodeList of all the Elements with a given tag name in
-  ##the order in which they are encountered in a preorder traversal of the Document tree. 
+  ## Returns a NodeList of all the Elements with a given tag name in
+  ## the order in which they are encountered in a preorder traversal of the Document tree. 
   var result: seq[PNode] = @[]
   if doc.FDocumentElement.FNodeName == tagName or tagName == "*":
     result.add(doc.FDocumentElement)
@@ -381,8 +379,8 @@ proc getElementsByTagName*(doc: PDocument, tagName: string): seq[PNode] =
   return result
   
 proc getElementsByTagNameNS*(doc: PDocument, namespaceURI: string, localName: string): seq[PNode] =
-  ##Returns a NodeList of all the Elements with a given localName and namespaceURI
-  ##in the order in which they are encountered in a preorder traversal of the Document tree. 
+  ## Returns a NodeList of all the Elements with a given localName and namespaceURI
+  ## in the order in which they are encountered in a preorder traversal of the Document tree. 
   var result: seq[PNode] = @[]
   if doc.FDocumentElement.FLocalName == localName or localName == "*":
     if doc.FDocumentElement.FNamespaceURI == namespaceURI or namespaceURI == "*":
@@ -450,57 +448,76 @@ proc importNode*(doc: PDocument, importedNode: PNode, deep: bool): PNode =
 
 # Node
 # Attributes
-proc Attributes*(n: PNode): seq[PAttr] =
-  if n.attributes == nil: n.attributes = @[] # Initialize the sequence if it's nil
-  return n.attributes
   
 proc firstChild*(n: PNode): PNode =
+  ## Returns this node's first child
+
   if n.childNodes.len() > 0:
     return n.childNodes[0]
   else:
     return nil
   
 proc lastChild*(n: PNode): PNode =
+  ## Returns this node's last child
+
   if n.childNodes.len() > 0:
     return n.childNodes[n.childNodes.len() - 1]
   else:
     return nil
   
 proc localName*(n: PNode): string =
+  ## Returns this nodes local name
+
   return n.FLocalName
 
 proc namespaceURI*(n: PNode): string =
+  ## Returns this nodes namespace URI
+
   return n.FNamespaceURI
 
 proc nextSibling*(n: PNode): PNode =
+  ## Returns the next sibling of this node
+
   var nLow: int = low(n.FParentNode.childNodes)
   var nHigh: int = high(n.FParentNode.childNodes)
   for i in nLow..nHigh:
-    if n.FParentNode.childNodes[i] == n: # HAVE TO TEST this line, not sure if ``==`` will work
+    if n.FParentNode.childNodes[i] == n:
       return n.FParentNode.childNodes[i + 1]
   return nil
 
 proc nodeName*(n: PNode): string =
+  ## Returns the name of this node
+
   return n.FNodeName
 
 proc nodeType*(n: PNode): int =
+  ## Returns the type of this node
+
   return n.FNodeType
 
 proc ownerDocument*(n: PNode): PDocument =
+  ## Returns the owner document of this node
+
   return n.FOwnerDocument
 
 proc parentNode*(n: PNode): PNode =
+  ## Returns the parent node of this node
+
   return n.FParentNode
   
 proc previousSibling*(n: PNode): PNode =
+  ## Returns the previous sibling of this node
+
   var nLow: int = low(n.FParentNode.childNodes)
   var nHigh: int = high(n.FParentNode.childNodes)
   for i in nLow..nHigh:
-    if n.FParentNode.childNodes[i] == n: # HAVE TO TEST this line, not sure if ``==`` will work
+    if n.FParentNode.childNodes[i] == n:
       return n.FParentNode.childNodes[i - 1]
   return nil
   
 proc `prefix=`*(n: var PNode, value: string) =
+  ## Modifies the prefix of this node
+
   # Setter
   # Check if name contains illegal characters
   if illegalChars in value:
@@ -532,8 +549,11 @@ proc appendChild*(n: PNode, newChild: PNode) =
   ## Adds the node newChild to the end of the list of children of this node.
   ## If the newChild is already in the tree, it is first removed.
   
-  # TODO - Check if n contains newChild
-  # TODO - Exceptions
+  # Check if n contains newChild
+  if n.childNodes != nil:
+    for i in low(n.childNodes)..high(n.childNodes):
+      if n.childNodes[i] == newChild:
+        raise newException(EHierarchyRequestErr, "The node to append is already in this nodes children.")
   
   # Check if newChild is from this nodes document
   if n.FOwnerDocument != newChild.FOwnerDocument:
@@ -542,6 +562,9 @@ proc appendChild*(n: PNode, newChild: PNode) =
   if n == newChild:
     raise newException(EHierarchyRequestErr, "You can't add a node into itself")
   
+  if n.nodeType in childlessObjects:
+    raise newException(ENoModificationAllowedErr, "Cannot append children to a childless node")
+  
   if n.childNodes == nil: n.childNodes = @[]
     
   newChild.FParentNode = n
@@ -604,10 +627,43 @@ proc isSupported*(n: PNode, feature: string, version: string): bool =
   ## feature and that feature is supported by this node. 
   return n.FOwnerDocument.FImplementation.hasFeature(feature, version)
 
+proc isEmpty(s: string): bool =
+
+  if s == "" or s == nil:
+    return True
+  for i in items(s):
+    if i != ' ':
+      return False
+  return True
+
 proc normalize*(n: PNode) =
-  ## Puts all Text nodes in the full depth of the sub-tree underneath this Node
+  ## Merges all seperated TextNodes together, and removes any empty TextNodes
+  var curTextNode: PNode = nil
+  var i: int = 0
   
-  # TODO
+  var newChildNodes: seq[PNode] = @[]
+  while True:
+    if i >= n.childNodes.len:
+      break
+    if n.childNodes[i].nodeType == TextNode:
+      
+      #If the TextNode is empty, remove it
+      if PText(n.childNodes[i]).data.isEmpty():
+        inc(i)
+      
+      if curTextNode == nil:
+        curTextNode = n.childNodes[i]
+      else:
+        PText(curTextNode).data.add(PText(n.childNodes[i]).data)
+        curTextNode.nodeValue.add(PText(n.childNodes[i]).data)
+        inc(i)
+    else:
+      newChildNodes.add(curTextNode)
+      newChildNodes.add(n.childNodes[i])
+      curTextNode = nil
+    
+    inc(i)
+  n.childNodes = newChildNodes
 
 proc removeChild*(n: PNode, oldChild: PNode): PNode =
   ## Removes the child node indicated by ``oldChild`` from the list of children, and returns it.
@@ -791,26 +847,32 @@ proc setNamedItemNS*(NList: var seq[PAttr], arg: PAttr): PAttr =
     NList[index] = arg
     return item # Return the replaced node
     
-# TODO - Maybe implement a ChildlessNode!^
-    
 # CharacterData - Decided to implement this, 
 # Didn't add the procedures, because you can just edit .data
 
 # Attr
 # Attributes
 proc name*(a: PAttr): string =
+  ## Returns the name of the Attribute
+
   return a.FName
   
 proc specified*(a: PAttr): bool =
+  ## Specifies whether this attribute was specified in the original document
+
   return a.FSpecified
   
 proc ownerElement*(a: PAttr): PElement = 
+  ## Returns this Attributes owner element
+
   return a.FOwnerElement
 
 # Element
 # Attributes
 
 proc tagName*(el: PElement): string =
+  ## Returns the Element Tag Name
+
   return el.FTagName
 
 # Procedures
@@ -960,11 +1022,29 @@ proc setAttributeNS*(el: PElement, namespaceURI, localName, value: string) =
 proc splitData*(TextNode: PText, offset: int): PText =
   ## Breaks this node into two nodes at the specified offset, 
   ## keeping both in the tree as siblings.
+  
+  if offset > TextNode.data.len():
+    raise newException(EIndexSizeErr, "Index out of bounds")
+  
+  var left: string = TextNode.data.copy(0, offset)
+  TextNode.data = left
+  var right: string = TextNode.data.copy(offset, TextNode.data.len())
+  
+  if TextNode.FParentNode != nil:
+    for i in low(TextNode.FParentNode.childNodes)..high(TextNode.FParentNode.childNodes):
+      if TextNode.FParentNode.childNodes[i] == TextNode:
+        var newNode: PText = TextNode.FOwnerDocument.createTextNode(right)
+        TextNode.FParentNode.childNodes.insert(newNode, i)
+        return newNode
+  else:
+    var newNode: PText = TextNode.FOwnerDocument.createTextNode(right)
+    return newNode
 
-  # TODO - need insert(seq[T])
 
 # ProcessingInstruction
-proc target*(PI: PProcessingInstruction): string = 
+proc target*(PI: PProcessingInstruction): string =
+  ## Returns the Processing Instructions target
+
   return PI.FTarget
 
     
diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim
index 90d4d85b1..9df60cab8 100644
--- a/lib/pure/xmldomparser.nim
+++ b/lib/pure/xmldomparser.nim
@@ -16,6 +16,7 @@ import xmldom, os, streams, parsexml, strutils
 type
   #Parsing errors
   EMismatchedTag* = object of E_Base ## Raised when a tag is not properly closed
+  EParserError* = object of E_Base ## Raised when an unexpected XML Parser event occurs
 
 template newException(exceptn, message: expr): expr =
   block: # open a new scope
@@ -52,6 +53,7 @@ proc parseElement(x: var TXmlParser, doc: var PDocument): PElement =
         
     of xmlElementEnd:
       if x.elementName == n.nodeName:
+        # n.normalize() # Remove any whitespace etc.
         return n
       else: #The wrong element is ended
         raise newException(EMismatchedTag, "Mismatched tag at line " & 
@@ -71,8 +73,12 @@ proc parseElement(x: var TXmlParser, doc: var PDocument): PElement =
       n.appendChild(doc.createComment(x.charData()))
     of xmlPI:
       n.appendChild(doc.createProcessingInstruction(x.PIName(), x.PIRest()))
+      
+    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
+      # Unused 'events'
+
     else:
-      # echo(x.kind()) # XXX do nothing here!?
+      raise newException(EParserError, "Unexpected XML Parser event")
     x.next()
 
   raise newException(EMismatchedTag, 
@@ -99,9 +105,12 @@ proc loadXML*(path: string): PDocument =
     of xmlElementStart, xmlElementOpen:
       var el: PElement = parseElement(x, XmlDoc)
       XmlDoc = dom.createDocument(el)
+    of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
+      # Unused 'events'
     else:
-      # echo(x.kind())
+      raise newException(EParserError, "Unexpected XML Parser event")
 
+  close(x)
   return XmlDoc
 
 
diff --git a/lib/pure/xmlgen.nim b/lib/pure/xmlgen.nim
index 79a782252..29f2700f2 100755
--- a/lib/pure/xmlgen.nim
+++ b/lib/pure/xmlgen.nim
@@ -21,6 +21,10 @@
 ##   
 ##   <h1><a href="http://force7.de/nimrod">Nimrod</a></h1>
 ##
+## **Deprecated since version 0.8.8.** Use the macro ``<>`` in xmltree 
+## instead.
+
+{.deprecated.}
 
 import
   macros, strutils
@@ -52,8 +56,8 @@ proc xmlCheckedTag*(e: PNimrodNode, tag: string,
   
   # copy the attributes; when iterating over them these lists
   # will be modified, so that each attribute is only given one value
-  var req = splitSeq(reqAttr)
-  var opt = splitSeq(optAttr)
+  var req = split(reqAttr)
+  var opt = split(optAttr)
   result = newNimNode(nnkBracket, e)
   result.add(newStrLitNode("<"))
   result.add(newStrLitNode(tag))
diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim
new file mode 100644
index 000000000..aeec842d7
--- /dev/null
+++ b/lib/pure/xmltree.nim
@@ -0,0 +1,231 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2010 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## A simple XML tree. More efficient and simpler than the DOM.
+
+import macros, strtabs
+
+type
+  PXmlNode* = ref TXmlNode ## an XML tree consists of ``PXmlNode``s. 
+  
+  TXmlNodeKind* = enum  ## different kinds of ``PXmlNode``s
+    xnText,             ## a text element
+    xnElement,          ## an element with 0 or more children
+    xnCData,            ## a CDATA node
+    xnComment           ## an XML comment
+  
+  PXmlAttributes* = PStringTable ## an alias for a string to string mapping
+  
+  TXmlNode {.pure, final, acyclic.} = object 
+    case k: TXmlNodeKind
+    of xnText, xnComment, xnCData: 
+      fText: string
+    of xnElement:
+      fTag: string
+      s: seq[PXmlNode]
+      fAttr: PXmlAttributes
+    fClientData: int              ## for other clients
+  
+proc newXmlNode(kind: TXmlNodeKind): PXmlNode = 
+  ## creates a new ``PXmlNode``.
+  new(result)
+  result.k = kind
+
+proc newElement*(tag: string): PXmlNode = 
+  ## creates a new ``PXmlNode``. of kind ``xnText`` with the given `tag`.
+  result = newXmlNode(xnElement)
+  result.fTag = tag
+  result.s = @[]
+  # init attributes lazily to safe memory
+
+proc newText*(text: string): PXmlNode = 
+  ## creates a new ``PXmlNode`` of kind ``xnText`` with the text `text`.
+  result = newXmlNode(xnText)
+  result.fText = text
+
+proc newComment*(comment: string): PXmlNode = 
+  ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `comment`.
+  result = newXmlNode(xnComment)
+  result.fText = comment
+
+proc newCData*(cdata: string): PXmlNode = 
+  ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `cdata`.
+  result = newXmlNode(xnCData)
+  result.fText = cdata
+
+proc text*(n: PXmlNode): string {.inline.} = 
+  ## gets the associated text with the node `n`. `n` can be a CDATA, Text
+  ## or comment node.
+  assert n.k in {xnText, xnComment, xnCData}
+  result = n.fText
+
+proc tag*(n: PXmlNode): string {.inline.} = 
+  ## gets the tag name of `n`. `n` has to be an ``xnElement`` node.
+  assert n.k == xnElement
+  result = n.fTag
+    
+proc add*(father, son: PXmlNode) {.inline.} = 
+  ## adds the child `son` to `father`.
+  add(father.s, son)
+  
+proc len*(n: PXmlNode): int {.inline.} = 
+  ## returns the number `n`'s children.
+  if n.k == xnElement: result = len(n.s)
+
+proc kind*(n: PXmlNode): TXmlNodeKind {.inline.} =
+  ## returns `n`'s kind.
+  result = n.k
+
+proc `[]`* (n: PXmlNode, i: int): PXmlNode {.inline.} = 
+  ## returns the `i`'th child of `n`.
+  assert n.k == xnElement
+  result = n.s[i]
+
+iterator items*(n: PXmlNode): PXmlNode {.inline.} = 
+  ## iterates over any child of `n`.
+  assert n.k == xnElement
+  for i in 0 .. n.len-1: yield n[i]
+
+proc attr*(n: PXmlNode): PXmlAttributes {.inline.} = 
+  ## gets the attributes belonging to `n`.
+  assert n.k == xnElement
+  result = n.fAttr
+  
+proc `attr=`*(n: PXmlNode, attr: PXmlAttributes) {.inline.} = 
+  ## sets the attributes belonging to `n`.
+  assert n.k == xnElement
+  n.fAttr = attr
+
+proc attrLen*(n: PXmlNode): int {.inline.} = 
+  ## returns the number of `n`'s attributes.
+  assert n.k == xnElement
+  if not isNil(n.fAttr): result = len(n.fAttr)
+
+proc clientData*(n: PXmlNode): int {.inline.} =
+  ## gets the client data of `n`. The client data field is used by the HTML
+  ## parser and generator.
+  result = n.fClientData
+
+proc `clientData=`*(n: PXmlNode, data: int) {.inline.} = 
+  ## sets the client data of `n`. The client data field is used by the HTML
+  ## parser and generator.
+  n.fClientData = data
+
+proc addEscaped*(result: var string, s: string) = 
+  ## same as ``result.add(escape(s))``, but more efficient.
+  for c in items(s):
+    case c
+    of '<': result.add("&lt;")
+    of '>': result.add("&gt;")
+    of '&': result.add("&amp;")
+    of '"': result.add("&quot;")
+    else: result.add(c)
+
+proc escape*(s: string): string = 
+  ## escapes `s` for inclusion into an XML document. 
+  ## Escapes these characters:
+  ##
+  ## ------------    -------------------
+  ## char            is converted to
+  ## ------------    -------------------
+  ##  ``<``          ``&lt;``
+  ##  ``>``          ``&gt;``
+  ##  ``&``          ``&amp;``
+  ##  ``"``          ``&quot;``
+  ## ------------    -------------------
+  result = newString(s.len)
+  setLen(result, 0)
+  addEscaped(result, s)
+  
+proc addIndent(result: var string, indent: int) = 
+  result.add("\n")
+  for i in 1..indent: result.add(' ')
+  
+proc add*(result: var string, n: PXmlNode, indent = 0, indWidth = 2) = 
+  ## adds the textual representation of `n` to `result`.
+  case n.k
+  of xnElement:
+    result.add('<')
+    result.add(n.fTag)
+    if not isNil(n.fAttr): 
+      for key, val in pairs(n.fAttr): 
+        result.add(' ')
+        result.add(key)
+        result.add("=\"")
+        result.addEscaped(val)
+        result.add('"')
+    if n.len > 0:
+      result.add('>')
+      for i in 0..n.len-1:
+        result.addIndent(indent+indWidth)
+        result.add(n[i], indent+indWidth, indWidth)
+      result.addIndent(indent)
+      result.add("</")
+      result.add(n.fTag)
+      result.add(">")
+    else: 
+      result.add(" />")
+  of xnText:
+    result.addEscaped(n.fText)
+  of xnComment:
+    result.add("<!-- ")
+    result.addEscaped(n.fText)
+    result.add(" -->")
+  of xnCDATA:
+    result.add("<![CDATA[")
+    result.add(n.fText)
+    result.add("]]>")
+
+proc `$`*(n: PXmlNode): string =
+  ## converts `n` into its string representation.
+  result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
+  result.add(n)
+
+proc newXmlTree*(tag: string, children: openArray[PXmlNode],
+                 attributes: PXmlAttributes = nil): PXmlNode = 
+  ## creates a new XML tree with `tag`, `children` and `attributes`
+  result = newXmlNode(xnElement)
+  result.fTag = tag
+  newSeq(result.s, children.len)
+  for i in 0..children.len-1: result.s[i] = children[i]
+  result.fAttr = attributes
+  
+proc xmlConstructor(e: PNimrodNode): PNimrodNode {.compileTime.} =
+  ## use this procedure to define a new XML tag
+  expectLen(e, 1)
+  var a = e[0]
+  if a.kind == nnkCall:
+    result = newCall("newXmlTree", toStrLit(a[0]))
+    var attrs = newCall("newStringTable", [])
+    var bracket = newNimNode(nnkBracket, a)
+    for i in 1..a.len-1:
+      if a[i].kind == nnkExprEqExpr: 
+        attrs.add(toStrLit(a[i][0]))
+        attrs.add(a[i][1])
+      else:
+        bracket.add(a[i])
+    result.add(bracket)
+    if attrs.len > 1: result.add(attrs)
+  else:
+    result = newCall("newXmlTree", toStrLit(a))
+
+macro `<>`*(x: expr): expr = 
+  ## Constructor macro for XML. Example usage:
+  ##
+  ## .. code-block:: nimrod
+  ##   <>a(href="http://force7.de/nimrod", "Nimrod rules.")
+  ##
+  ## Produces an XML tree for::
+  ##
+  ##  <a href="http://force7.de/nimrod">Nimrod rules.</a>
+  ##
+  result = xmlConstructor(x)
+
+
+
diff --git a/lib/pure/xmltreeparser.nim b/lib/pure/xmltreeparser.nim
new file mode 100644
index 000000000..5a48f9e8b
--- /dev/null
+++ b/lib/pure/xmltreeparser.nim
@@ -0,0 +1,52 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2010 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module parses an XML document and creates its XML tree representation.
+
+import streams, parsexml, xmltree
+
+
+proc parse*(x: var TXmlParser, father: PXmlNode) =
+  
+
+proc parseXml*(s: PStream, filename: string, 
+               errors: var seq[string]): PXmlNode = 
+  ## parses the XML from stream `s` and returns a ``PXmlNode``. Every
+  ## occured parsing error is added to the `errors` sequence.
+  var x: TXmlParser
+  open(x, s, filename, {reportComments})
+  
+  result = newElement("html")
+  while true:
+    x.next()
+    case x.kind
+    of xmlWhitespace: nil # just skip it
+    of xmlComment: 
+      result.add(newComment(x.text))
+  
+  close(x)
+
+proc parseXml*(s: PStream): PXmlNode = 
+  ## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing
+  ## errors are ignored.
+  var errors: seq[string] = @[]
+  result = parseXml(s, "unknown_html_doc", errors)
+
+proc loadXml*(path: string, reportErrors = false): PXmlNode = 
+  ## Loads and parses XML from file specified by ``path``, and returns 
+  ## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are
+  ## ``echo``ed.
+  var s = newFileStream(path, fmRead)
+  if s == nil: raise newException(EIO, "Unable to read file: " & path)
+  
+  var errors: seq[string] = @[]
+  result = parseXml(s, path, errors)
+  if reportErrors: 
+    for msg in items(errors): echo(msg)
+
diff --git a/lib/system.nim b/lib/system.nim
index 79c014a7c..ae9d6b432 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -1257,7 +1257,9 @@ proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".}
   ## equivalent to ``writeln(stdout, x); flush(stdout)``. BUT: This is
   ## available for the ECMAScript target too!
 
-template newException(exceptn, message: expr): expr =
+template newException*(exceptn, message: expr): expr = 
+  ## creates an exception object of type "exceptn" and sets its ``msg`` field
+  ## to `message`. Returns the new exception object. 
   block: # open a new scope
     var
       e: ref exceptn
diff --git a/web/news.txt b/web/news.txt
index 096ee8ba9..b4f3551d1 100755
--- a/web/news.txt
+++ b/web/news.txt
@@ -26,6 +26,7 @@ Additions
 - Added ``system.cstringArrayToSeq``.
 - Added ``system.lines(f: TFile)`` iterator.
 - Added ``system.delete``, ``system.del`` and ``system.insert`` for sequences.
+- Exported ``system.newException`` template.
 - Added ``cgi.decodeData(data: string): tuple[key, value: string]``.
 - Added ``ropes`` module.
 - Added ``sockets`` module.
@@ -36,6 +37,9 @@ Additions
 - Added ``unidecode`` module.
 - Added ``xmldom`` module.
 - Added ``xmldomparser`` module.
+- Added ``xmltree`` module.
+- Added ``xmltreeparser`` module.
+- Added ``htmlparser`` module.
 - Many wrappers now do not contain redundant name prefixes (like ``GTK_``,
   ``lua``). The new wrappers are available in ``lib/newwrap``. Change
   your configuration file to use these.
@@ -100,7 +104,7 @@ Changes affecting backwards compatibility
 - The compiler does not skip the linking step anymore even if no file
   has changed.
 - ``os.splitFile(".xyz")`` now returns ``("", ".xyz", "")`` instead of
-  ``("", "", ".xyz")``. Filenames starting with a dot are handled
+  ``("", "", ".xyz")``. So filenames starting with a dot are handled
   differently. 
 - ``strutils.split(s: string, seps: set[char])`` never yields the empty string
   anymore. This behaviour is probably more appropriate for whitespace splitting.