summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorAndreas Rumpf <andreas@andreas-desktop>2010-02-12 00:03:18 +0100
committerAndreas Rumpf <andreas@andreas-desktop>2010-02-12 00:03:18 +0100
commit16c0beb27c372ce431b54bab9750bbaab254acfb (patch)
treebde411156a230d29de9cbaefe30967755f90d6e4 /lib
parentf721ddd75b61600a06995ee728f332fa96f45dd2 (diff)
downloadNim-16c0beb27c372ce431b54bab9750bbaab254acfb.tar.gz
further progress on the new XML processing modules
Diffstat (limited to 'lib')
-rwxr-xr-x[-rw-r--r--]lib/pure/htmlparser.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/httpserver.nim0
-rwxr-xr-xlib/pure/parsexml.nim6
-rwxr-xr-x[-rw-r--r--]lib/pure/unidecode/gen.py0
-rwxr-xr-x[-rw-r--r--]lib/pure/unidecode/unidecode.dat0
-rwxr-xr-x[-rw-r--r--]lib/pure/unidecode/unidecode.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmldom.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmldomparser.nim0
-rwxr-xr-x[-rw-r--r--]lib/pure/xmltree.nim18
-rwxr-xr-x[-rw-r--r--]lib/pure/xmltreeparser.nim102
10 files changed, 112 insertions, 14 deletions
diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim
index df840e15c..df840e15c 100644..100755
--- a/lib/pure/htmlparser.nim
+++ b/lib/pure/htmlparser.nim
diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim
index 2c85d8137..2c85d8137 100644..100755
--- a/lib/pure/httpserver.nim
+++ b/lib/pure/httpserver.nim
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim
index a209e8be0..598ae6c68 100755
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
@@ -196,6 +196,12 @@ proc errorMsgExpected*(my: TXmlParser, tag: string): string =
   ## other error messages 
   result = "$1($2, $3) Error: $4" % [
     my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag]
+
+proc errorMsg*(my: TXmlParser, msg: string): string = 
+  ## returns an error message with text `msg` in the same format as the
+  ## other error messages 
+  result = "$1($2, $3) Error: $4" % [
+    my.filename, $getLine(my), $getColumn(my), msg]
     
 proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} = 
   my.err = kind
diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py
index 8da0136ff..8da0136ff 100644..100755
--- a/lib/pure/unidecode/gen.py
+++ b/lib/pure/unidecode/gen.py
diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat
index 9dff0a4a9..9dff0a4a9 100644..100755
--- a/lib/pure/unidecode/unidecode.dat
+++ b/lib/pure/unidecode/unidecode.dat
diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim
index a665dd73e..a665dd73e 100644..100755
--- a/lib/pure/unidecode/unidecode.nim
+++ b/lib/pure/unidecode/unidecode.nim
diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim
index 4e9d721d7..4e9d721d7 100644..100755
--- a/lib/pure/xmldom.nim
+++ b/lib/pure/xmldom.nim
diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim
index 9df60cab8..9df60cab8 100644..100755
--- a/lib/pure/xmldomparser.nim
+++ b/lib/pure/xmldomparser.nim
diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim
index aeec842d7..005969fc4 100644..100755
--- a/lib/pure/xmltree.nim
+++ b/lib/pure/xmltree.nim
@@ -18,13 +18,14 @@ type
     xnText,             ## a text element
     xnElement,          ## an element with 0 or more children
     xnCData,            ## a CDATA node
+    xnEntity,           ## an entity (like ``&thing;``)
     xnComment           ## an XML comment
   
   PXmlAttributes* = PStringTable ## an alias for a string to string mapping
   
   TXmlNode {.pure, final, acyclic.} = object 
     case k: TXmlNodeKind
-    of xnText, xnComment, xnCData: 
+    of xnText, xnComment, xnCData, xnEntity: 
       fText: string
     of xnElement:
       fTag: string
@@ -59,10 +60,15 @@ proc newCData*(cdata: string): PXmlNode =
   result = newXmlNode(xnCData)
   result.fText = cdata
 
+proc newEntity*(entity: string): PXmlNode = 
+  ## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`.
+  result = newXmlNode(xnCData)
+  result.fText = cdata
+
 proc text*(n: PXmlNode): string {.inline.} = 
-  ## gets the associated text with the node `n`. `n` can be a CDATA, Text
-  ## or comment node.
-  assert n.k in {xnText, xnComment, xnCData}
+  ## gets the associated text with the node `n`. `n` can be a CDATA, Text,
+  ## comment, or entity node.
+  assert n.k in {xnText, xnComment, xnCData, xnEntity}
   result = n.fText
 
 proc tag*(n: PXmlNode): string {.inline.} = 
@@ -181,6 +187,10 @@ proc add*(result: var string, n: PXmlNode, indent = 0, indWidth = 2) =
     result.add("<![CDATA[")
     result.add(n.fText)
     result.add("]]>")
+  of xnEntity:
+    result.add('&')
+    result.add(n.fText)
+    result.add(';')
 
 proc `$`*(n: PXmlNode): string =
   ## converts `n` into its string representation.
diff --git a/lib/pure/xmltreeparser.nim b/lib/pure/xmltreeparser.nim
index 5a48f9e8b..b7a9ba54a 100644..100755
--- a/lib/pure/xmltreeparser.nim
+++ b/lib/pure/xmltreeparser.nim
@@ -9,11 +9,87 @@
 
 ## This module parses an XML document and creates its XML tree representation.
 
-import streams, parsexml, xmltree
+import streams, parsexml, strtabs, xmltree, hxmlcommon
 
+type
+  EInvalidXml* = object of E_Base ## exception that is raised for invalid XML
+    errors*: seq[string]          ## all detected parsing errors
 
-proc parse*(x: var TXmlParser, father: PXmlNode) =
+proc raiseInvalidXml(errors: seq[string]) = 
+  var e: ref EInvalidXml
+  new(e)
+  e.msg = errors[0]
+  e.errors = errors
+  raise e
+
+proc addNode(father, son: PXmlNode) = 
+  if son != nil: add(father, son)
+
+proc parse*(x: var TXmlParser, errors: var seq[string]): PXmlNode =
+  case x.kind
+  of xmlComment: 
+    result = newComment(x.text)
+    next(x)
+  of xmlCharData, xmlWhitespace:
+    result = newText(x.text)
+    next(x)
+  of xmlPI, xmlSpecial:
+    # we just ignore processing instructions for now
+    next(x)
+  of xmlError:
+    errors.add(errorMsg(x))
+    next(x)
+  of xmlElementStart:    ## ``<elem>``
+    result = newElement(x.elementName)
+    next(x)
+    while true:
+      case x.kind
+      of xmlElementEnd: 
+        if x.elementName == result.tag: 
+          next(x)
+        else:
+          errors.add(errorMsg(x, "</$1> expected" % result.tag))
+          # do not skip it here!
+        break
+      of xmlEof:
+        errors.add(errorMsg(x, "</$1> expected" % result.tag))
+        break
+      else:
+        result.addNode(parse(x, errors))
+  of xmlElementEnd:       ## ``</elem>``
+    errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
+  of xmlElementOpen:     ## ``<elem 
+    result = newElement(x.elementName)
+    next(x)
+    result.attr = newStringTable()
+    while true: 
+      case x.kind
+      of xmlAttribute:
+        result.attr[x.attrKey] = x.attrValue
+        next(x)
+      of xmlElementClose:
+        next(x)
+        break
+      of xmlError:
+        errors.add(errorMsg(x))
+        next(x)
+        break
+      else:
+        errors.add(errorMsg(x, "'>' expected" % result.tag))
+        next(x)
+        break
   
+  of xmlAttribute, xmlElementClose:
+    errors.add(errorMsg(x, "<some_tag> expected")
+    next(x)
+  of xmlCData: 
+    result = newCData(x.charData)
+    next(x)
+  of xmlEntity:
+    ## &entity;
+    ## XXX To implement!
+    next(x)
+  of xmlEof: nil
 
 proc parseXml*(s: PStream, filename: string, 
                errors: var seq[string]): PXmlNode = 
@@ -21,27 +97,31 @@ proc parseXml*(s: PStream, filename: string,
   ## occured parsing error is added to the `errors` sequence.
   var x: TXmlParser
   open(x, s, filename, {reportComments})
-  
-  result = newElement("html")
   while true:
     x.next()
     case x.kind
-    of xmlWhitespace: nil # just skip it
-    of xmlComment: 
-      result.add(newComment(x.text))
-  
+    of xmlElementOpen, xmlElementStart: 
+      result = parse(x, errors)
+      break
+    of xmlComment, xmlWhitespace: nil # just skip it
+    of xmlError:
+      errors.add(errorMsg(x))
+    else:
+      errors.add(errorMsg(x, "<some_tag> expected")
+      break
   close(x)
 
 proc parseXml*(s: PStream): PXmlNode = 
   ## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing
-  ## errors are ignored.
+  ## errors are turned into an ``EInvalidXML`` exception.
   var errors: seq[string] = @[]
   result = parseXml(s, "unknown_html_doc", errors)
+  if errors.len > 0: raiseInvalidXMl(errors)
 
 proc loadXml*(path: string, reportErrors = false): PXmlNode = 
   ## Loads and parses XML from file specified by ``path``, and returns 
   ## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are
-  ## ``echo``ed.
+  ## ``echo``ed, otherwise an exception is thrown.
   var s = newFileStream(path, fmRead)
   if s == nil: raise newException(EIO, "Unable to read file: " & path)
   
@@ -49,4 +129,6 @@ proc loadXml*(path: string, reportErrors = false): PXmlNode =
   result = parseXml(s, path, errors)
   if reportErrors: 
     for msg in items(errors): echo(msg)
+  elif errors.len > 0: 
+    raiseInvalidXMl(errors)