summary refs log tree commit diff stats
path: root/examples/htmlrefs.nim
blob: 8b668325f0a87d30c1c8c0bcb976104d39d36bf2 (plain) (blame)
1
2
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #6
# Example program to show the new parsexml module
# This program reads an HTML file and writes all its used links to stdout.
# Errors and whitespace are ignored.

import os, streams, parsexml, strutils

proc `=?=` (a, b: string): bool = 
  # little trick: define our own comparator that ignores case
  return cmpIgnoreCase(a, b) == 0

if paramCount() < 1: 
  quit("Usage: htmlrefs filename[.html]")

var links = 0 # count the number of links
var filename = addFileExt(ParamStr(1), "html")
var s = newFileStream(filename, fmRead)
if s == nil: quit("cannot open the file " & filename)
var x: TXmlParser
open(x, s, filename)
next(x) # get first event
block mainLoop:
  while true:
    case x.kind
    of xmlElementOpen: 
      # the <a href = "xyz"> tag we are interested in always has an attribute,
      # thus we search for ``xmlElementOpen`` and not for ``xmlElementStart``
      if x.elementName =?= "a": 
        x.next()
        if x.kind == xmlAttribute: 
          if x.attrKey =?= "href":
            var link = x.attrValue
            inc(links)
            # skip until we have an ``xmlElementClose`` event
            while true: 
              x.next()
              case x.kind
              of xmlEof: break mainLoop
              of xmlElementClose: break
              else: discard
            x.next() # skip ``xmlElementClose``
            # now we have the description for the ``a`` element
            var desc = ""
            while x.kind == xmlCharData: 
              desc.add(x.charData)
              x.next()
            Echo(desc & ": " & link)
      else:
        x.next()      
    of xmlEof: break # end of file reached
    of xmlError: 
      Echo(errorMsg(x))
      x.next()
    else: x.next() # skip other events

echo($links & " link(s) found!")
x.close()