diff options
Diffstat (limited to 'lib/pure/parsexml.nim')
-rw-r--r-- | lib/pure/parsexml.nim | 178 |
1 files changed, 92 insertions, 86 deletions
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim index 6ec77ebdb..c760799a2 100644 --- a/lib/pure/parsexml.nim +++ b/lib/pure/parsexml.nim @@ -36,43 +36,43 @@ The file ``examples/htmltitle.nim`` demonstrates how to use the XML parser to accomplish a simple task: To determine the title of an HTML document. -.. code-block:: nim + ```nim + # Example program to show the parsexml module + # This program reads an HTML file and writes its title to stdout. + # Errors and whitespace are ignored. - # Example program to show the parsexml module - # This program reads an HTML file and writes its title to stdout. - # Errors and whitespace are ignored. + import std/[os, streams, parsexml, strutils] - import os, streams, parsexml, strutils + if paramCount() < 1: + quit("Usage: htmltitle filename[.html]") - if paramCount() < 1: - quit("Usage: htmltitle filename[.html]") - - var filename = addFileExt(paramStr(1), "html") - var s = newFileStream(filename, fmRead) - if s == nil: quit("cannot open the file " & filename) - var x: XmlParser - open(x, s, filename) - while true: - x.next() - case x.kind - of xmlElementStart: - if cmpIgnoreCase(x.elementName, "title") == 0: - var title = "" - x.next() # skip "<title>" - while x.kind == xmlCharData: - title.add(x.charData) - x.next() - if x.kind == xmlElementEnd and cmpIgnoreCase(x.elementName, "title") == 0: - echo("Title: " & title) - quit(0) # Success! - else: - echo(x.errorMsgExpected("/title")) + var filename = addFileExt(paramStr(1), "html") + var s = newFileStream(filename, fmRead) + if s == nil: quit("cannot open the file " & filename) + var x: XmlParser + open(x, s, filename) + while true: + x.next() + case x.kind + of xmlElementStart: + if cmpIgnoreCase(x.elementName, "title") == 0: + var title = "" + x.next() # skip "<title>" + while x.kind == xmlCharData: + title.add(x.charData) + x.next() + if x.kind == xmlElementEnd and cmpIgnoreCase(x.elementName, "title") == 0: + echo("Title: " & title) + quit(0) # Success! + else: + echo(x.errorMsgExpected("/title")) - of xmlEof: break # end of file reached - else: discard # ignore other events + of xmlEof: break # end of file reached + else: discard # ignore other events - x.close() - quit("Could not determine title!") + x.close() + quit("Could not determine title!") + ``` ]## @@ -85,69 +85,72 @@ The file ``examples/htmlrefs.nim`` demonstrates how to use the XML parser to accomplish another simple task: To determine all the links an HTML document contains. -.. code-block:: nim - - # Example program to show the new parsexml module - # This program reads an HTML file and writes all its used links to stdout. - # Errors and whitespace are ignored. + ```nim + # Example program to show the new parsexml module + # This program reads an HTML file and writes all its used links to stdout. + # Errors and whitespace are ignored. - import os, streams, parsexml, strutils + import std/[os, streams, parsexml, strutils] - proc `=?=` (a, b: string): bool = - # little trick: define our own comparator that ignores case - return cmpIgnoreCase(a, b) == 0 + proc `=?=` (a, b: string): bool = + # little trick: define our own comparator that ignores case + return cmpIgnoreCase(a, b) == 0 - if paramCount() < 1: - quit("Usage: htmlrefs filename[.html]") + if paramCount() < 1: + quit("Usage: htmlrefs filename[.html]") - var links = 0 # count the number of links - var filename = addFileExt(paramStr(1), "html") - var s = newFileStream(filename, fmRead) - if s == nil: quit("cannot open the file " & filename) - var x: XmlParser - open(x, s, filename) - next(x) # get first event - block mainLoop: - while true: - case x.kind - of xmlElementOpen: - # the <a href = "xyz"> tag we are interested in always has an attribute, - # thus we search for ``xmlElementOpen`` and not for ``xmlElementStart`` - if x.elementName =?= "a": - x.next() - if x.kind == xmlAttribute: - if x.attrKey =?= "href": - var link = x.attrValue - inc(links) - # skip until we have an ``xmlElementClose`` event - while true: - x.next() - case x.kind - of xmlEof: break mainLoop - of xmlElementClose: break - else: discard - x.next() # skip ``xmlElementClose`` - # now we have the description for the ``a`` element - var desc = "" - while x.kind == xmlCharData: - desc.add(x.charData) - x.next() - echo(desc & ": " & link) - else: - x.next() - of xmlEof: break # end of file reached - of xmlError: - echo(errorMsg(x)) + var links = 0 # count the number of links + var filename = addFileExt(paramStr(1), "html") + var s = newFileStream(filename, fmRead) + if s == nil: quit("cannot open the file " & filename) + var x: XmlParser + open(x, s, filename) + next(x) # get first event + block mainLoop: + while true: + case x.kind + of xmlElementOpen: + # the <a href = "xyz"> tag we are interested in always has an attribute, + # thus we search for ``xmlElementOpen`` and not for ``xmlElementStart`` + if x.elementName =?= "a": + x.next() + if x.kind == xmlAttribute: + if x.attrKey =?= "href": + var link = x.attrValue + inc(links) + # skip until we have an ``xmlElementClose`` event + while true: + x.next() + case x.kind + of xmlEof: break mainLoop + of xmlElementClose: break + else: discard + x.next() # skip ``xmlElementClose`` + # now we have the description for the ``a`` element + var desc = "" + while x.kind == xmlCharData: + desc.add(x.charData) + x.next() + echo(desc & ": " & link) + else: x.next() - else: x.next() # skip other events + of xmlEof: break # end of file reached + of xmlError: + echo(errorMsg(x)) + x.next() + else: x.next() # skip other events - echo($links & " link(s) found!") - x.close() + echo($links & " link(s) found!") + x.close() + ``` ]## import - strutils, lexbase, streams, unicode + std/[strutils, lexbase, streams, unicode] + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] # the parser treats ``<br />`` as ``<br></br>`` @@ -666,6 +669,9 @@ proc parseAttribute(my: var XmlParser) = parseEntity(my, my.b) my.kind = xmlAttribute # parseEntity overwrites my.kind! pos = my.bufpos + elif c == '/': + pos = lexbase.handleRefillChar(my, pos) + add(my.b, '/') else: add(my.b, c) inc(pos) @@ -786,7 +792,7 @@ proc next*(my: var XmlParser) = my.state = stateNormal when not defined(testing) and isMainModule: - import os + import std/os var s = newFileStream(paramStr(1), fmRead) if s == nil: quit("cannot open the file" & paramStr(1)) var x: XmlParser |