diff options
author | Andreas Rumpf <andreas@andreas-desktop> | 2010-02-12 00:03:18 +0100 |
---|---|---|
committer | Andreas Rumpf <andreas@andreas-desktop> | 2010-02-12 00:03:18 +0100 |
commit | 16c0beb27c372ce431b54bab9750bbaab254acfb (patch) | |
tree | bde411156a230d29de9cbaefe30967755f90d6e4 /lib | |
parent | f721ddd75b61600a06995ee728f332fa96f45dd2 (diff) | |
download | Nim-16c0beb27c372ce431b54bab9750bbaab254acfb.tar.gz |
further progress on the new XML processing modules
Diffstat (limited to 'lib')
-rwxr-xr-x[-rw-r--r--] | lib/pure/htmlparser.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/httpserver.nim | 0 | ||||
-rwxr-xr-x | lib/pure/parsexml.nim | 6 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/unidecode/gen.py | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/unidecode/unidecode.dat | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/unidecode/unidecode.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmldom.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmldomparser.nim | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmltree.nim | 18 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/pure/xmltreeparser.nim | 102 |
10 files changed, 112 insertions, 14 deletions
diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim index df840e15c..df840e15c 100644..100755 --- a/lib/pure/htmlparser.nim +++ b/lib/pure/htmlparser.nim diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim index 2c85d8137..2c85d8137 100644..100755 --- a/lib/pure/httpserver.nim +++ b/lib/pure/httpserver.nim diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim index a209e8be0..598ae6c68 100755 --- a/lib/pure/parsexml.nim +++ b/lib/pure/parsexml.nim @@ -196,6 +196,12 @@ proc errorMsgExpected*(my: TXmlParser, tag: string): string = ## other error messages result = "$1($2, $3) Error: $4" % [ my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag] + +proc errorMsg*(my: TXmlParser, msg: string): string = + ## returns an error message with text `msg` in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), msg] proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} = my.err = kind diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py index 8da0136ff..8da0136ff 100644..100755 --- a/lib/pure/unidecode/gen.py +++ b/lib/pure/unidecode/gen.py diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat index 9dff0a4a9..9dff0a4a9 100644..100755 --- a/lib/pure/unidecode/unidecode.dat +++ b/lib/pure/unidecode/unidecode.dat diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim index a665dd73e..a665dd73e 100644..100755 --- a/lib/pure/unidecode/unidecode.nim +++ b/lib/pure/unidecode/unidecode.nim diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim index 4e9d721d7..4e9d721d7 100644..100755 --- a/lib/pure/xmldom.nim +++ b/lib/pure/xmldom.nim diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim index 9df60cab8..9df60cab8 100644..100755 --- a/lib/pure/xmldomparser.nim +++ b/lib/pure/xmldomparser.nim diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim index aeec842d7..005969fc4 100644..100755 --- a/lib/pure/xmltree.nim +++ b/lib/pure/xmltree.nim @@ -18,13 +18,14 @@ type xnText, ## a text element xnElement, ## an element with 0 or more children xnCData, ## a CDATA node + xnEntity, ## an entity (like ``&thing;``) xnComment ## an XML comment PXmlAttributes* = PStringTable ## an alias for a string to string mapping TXmlNode {.pure, final, acyclic.} = object case k: TXmlNodeKind - of xnText, xnComment, xnCData: + of xnText, xnComment, xnCData, xnEntity: fText: string of xnElement: fTag: string @@ -59,10 +60,15 @@ proc newCData*(cdata: string): PXmlNode = result = newXmlNode(xnCData) result.fText = cdata +proc newEntity*(entity: string): PXmlNode = + ## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`. + result = newXmlNode(xnCData) + result.fText = cdata + proc text*(n: PXmlNode): string {.inline.} = - ## gets the associated text with the node `n`. `n` can be a CDATA, Text - ## or comment node. - assert n.k in {xnText, xnComment, xnCData} + ## gets the associated text with the node `n`. `n` can be a CDATA, Text, + ## comment, or entity node. + assert n.k in {xnText, xnComment, xnCData, xnEntity} result = n.fText proc tag*(n: PXmlNode): string {.inline.} = @@ -181,6 +187,10 @@ proc add*(result: var string, n: PXmlNode, indent = 0, indWidth = 2) = result.add("<![CDATA[") result.add(n.fText) result.add("]]>") + of xnEntity: + result.add('&') + result.add(n.fText) + result.add(';') proc `$`*(n: PXmlNode): string = ## converts `n` into its string representation. diff --git a/lib/pure/xmltreeparser.nim b/lib/pure/xmltreeparser.nim index 5a48f9e8b..b7a9ba54a 100644..100755 --- a/lib/pure/xmltreeparser.nim +++ b/lib/pure/xmltreeparser.nim @@ -9,11 +9,87 @@ ## This module parses an XML document and creates its XML tree representation. -import streams, parsexml, xmltree +import streams, parsexml, strtabs, xmltree, hxmlcommon +type + EInvalidXml* = object of E_Base ## exception that is raised for invalid XML + errors*: seq[string] ## all detected parsing errors -proc parse*(x: var TXmlParser, father: PXmlNode) = +proc raiseInvalidXml(errors: seq[string]) = + var e: ref EInvalidXml + new(e) + e.msg = errors[0] + e.errors = errors + raise e + +proc addNode(father, son: PXmlNode) = + if son != nil: add(father, son) + +proc parse*(x: var TXmlParser, errors: var seq[string]): PXmlNode = + case x.kind + of xmlComment: + result = newComment(x.text) + next(x) + of xmlCharData, xmlWhitespace: + result = newText(x.text) + next(x) + of xmlPI, xmlSpecial: + # we just ignore processing instructions for now + next(x) + of xmlError: + errors.add(errorMsg(x)) + next(x) + of xmlElementStart: ## ``<elem>`` + result = newElement(x.elementName) + next(x) + while true: + case x.kind + of xmlElementEnd: + if x.elementName == result.tag: + next(x) + else: + errors.add(errorMsg(x, "</$1> expected" % result.tag)) + # do not skip it here! + break + of xmlEof: + errors.add(errorMsg(x, "</$1> expected" % result.tag)) + break + else: + result.addNode(parse(x, errors)) + of xmlElementEnd: ## ``</elem>`` + errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName)) + of xmlElementOpen: ## ``<elem + result = newElement(x.elementName) + next(x) + result.attr = newStringTable() + while true: + case x.kind + of xmlAttribute: + result.attr[x.attrKey] = x.attrValue + next(x) + of xmlElementClose: + next(x) + break + of xmlError: + errors.add(errorMsg(x)) + next(x) + break + else: + errors.add(errorMsg(x, "'>' expected" % result.tag)) + next(x) + break + of xmlAttribute, xmlElementClose: + errors.add(errorMsg(x, "<some_tag> expected") + next(x) + of xmlCData: + result = newCData(x.charData) + next(x) + of xmlEntity: + ## &entity; + ## XXX To implement! + next(x) + of xmlEof: nil proc parseXml*(s: PStream, filename: string, errors: var seq[string]): PXmlNode = @@ -21,27 +97,31 @@ proc parseXml*(s: PStream, filename: string, ## occured parsing error is added to the `errors` sequence. var x: TXmlParser open(x, s, filename, {reportComments}) - - result = newElement("html") while true: x.next() case x.kind - of xmlWhitespace: nil # just skip it - of xmlComment: - result.add(newComment(x.text)) - + of xmlElementOpen, xmlElementStart: + result = parse(x, errors) + break + of xmlComment, xmlWhitespace: nil # just skip it + of xmlError: + errors.add(errorMsg(x)) + else: + errors.add(errorMsg(x, "<some_tag> expected") + break close(x) proc parseXml*(s: PStream): PXmlNode = ## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing - ## errors are ignored. + ## errors are turned into an ``EInvalidXML`` exception. var errors: seq[string] = @[] result = parseXml(s, "unknown_html_doc", errors) + if errors.len > 0: raiseInvalidXMl(errors) proc loadXml*(path: string, reportErrors = false): PXmlNode = ## Loads and parses XML from file specified by ``path``, and returns ## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are - ## ``echo``ed. + ## ``echo``ed, otherwise an exception is thrown. var s = newFileStream(path, fmRead) if s == nil: raise newException(EIO, "Unable to read file: " & path) @@ -49,4 +129,6 @@ proc loadXml*(path: string, reportErrors = false): PXmlNode = result = parseXml(s, path, errors) if reportErrors: for msg in items(errors): echo(msg) + elif errors.len > 0: + raiseInvalidXMl(errors) |