summary refs log tree commit diff stats
path: root/examples/htmlrefs.nim
diff options
context:
space:
mode:
Diffstat (limited to 'examples/htmlrefs.nim')
-rw-r--r--examples/htmlrefs.nim58
1 files changed, 58 insertions, 0 deletions
diff --git a/examples/htmlrefs.nim b/examples/htmlrefs.nim
new file mode 100644
index 000000000..cf1b3be28
--- /dev/null
+++ b/examples/htmlrefs.nim
@@ -0,0 +1,58 @@
+# Example program to show the new parsexml module
+# This program reads an HTML file and writes all its used links to stdout.
+# Errors and whitespace are ignored.
+# (c) 2009 Andreas Rumpf
+
+import os, streams, parsexml, strutils
+
+proc `=?=` (a, b: string): bool = 
+  # little trick: define our own comparator that ignores case
+  return cmpIgnoreCase(a, b) == 0
+
+if paramCount() < 1: 
+  quit("Usage: htmlrefs filename[.html]")
+
+var links = 0 # count the number of links
+var filename = appendFileExt(ParamStr(1), "html")
+var s = newFileStream(filename, fmRead)
+if s == nil: quit("cannot open the file" & filename)
+var x: TXmlParser
+open(x, s, filename)
+next(x) # get first event
+block mainLoop:
+  while true:
+    case x.kind
+    of xmlElementOpen: 
+      # the <a href = "xyz"> tag we are interested in always has an attribute,
+      # thus we search for ``xmlElementOpen`` and not for ``xmlElementStart``
+      if x.elementName =?= "a": 
+        x.next()
+        if x.kind == xmlAttribute: 
+          if x.attrKey =?= "href":
+            var link = x.attrValue
+            inc(links)
+            # skip until we have an ``xmlElementClose`` event
+            while true: 
+              x.next()
+              case x.kind
+              of xmlEof: break mainLoop
+              of xmlElementClose: break
+              else: nil
+            x.next() # skip ``xmlElementClose``
+            # now we have the description for the ``a`` element
+            var desc = ""
+            while x.kind == xmlCharData: 
+              desc.add(x.charData)
+              x.next()
+            Echo(desc & ": " & link)
+      else:
+        x.next()      
+    of xmlEof: break # end of file reached
+    of xmlError: 
+      Echo(errorMsg(x))
+      x.next()
+    else: x.next() # skip other events
+
+echo($links & " link(s) found!")
+x.close()
+