diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pure/htmlparser.nim | 30 |
1 files changed, 28 insertions, 2 deletions
diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim index d60d2e583..060f0e386 100644 --- a/lib/pure/htmlparser.nim +++ b/lib/pure/htmlparser.nim @@ -17,11 +17,37 @@ ## ## echo loadHtml("mydirty.html") ## -## ## Every tag in the resulting tree is in lower case. ## ## **Note:** The resulting ``PXmlNode`` already uses the ``clientData`` field, ## so it cannot be used by clients of this library. +## +## Example: Transforming hyperlinks +## ================================ +## +## This code demonstrates how you can iterate over all the tags in an HTML file +## and write back the modified version. In this case we look for hyperlinks +## ending with the extension ``.rst`` and convert them to ``.html``. +## +## .. code-block:: nimrod +## +## import htmlparser +## import xmltree # To use '$' for PXmlNode +## import strtabs # To access PXmlAttributes +## import os # To use splitFile +## import strutils # To use cmpIgnoreCase +## +## proc transformHyperlinks() = +## let html = loadHTML("input.html") +## +## for a in html.findAll("a"): +## let href = a.attrs["href"] +## if not href.isNil: +## let (dir, filename, ext) = splitFile(href) +## if cmpIgnoreCase(ext, ".rst") == 0: +## a.attrs["href"] = dir / filename & ".html" +## +## writeFile("output.html", $html) import strutils, streams, parsexml, xmltree, unicode, strtabs @@ -528,7 +554,7 @@ proc parseHtml*(s: PStream, filename: string, ## parses the XML from stream `s` and returns a ``PXmlNode``. Every ## occured parsing error is added to the `errors` sequence. var x: TXmlParser - open(x, s, filename, {reportComments}) + open(x, s, filename, {reportComments, reportWhitespace}) next(x) # skip the DOCTYPE: if x.kind == xmlSpecial: next(x) |