summary refs log blame commit diff stats
path: root/examples/htmlrefs.nim
blob: 394932773a8fb09350c21aef5ed82ed724a6b240 (plain) (tree)
1
2
3
4
5
6
7
8
9
10


                                                                          


                                      
                                 


                                                             
                    


                                         
                                              
                                       
                                                     
                




                         
                      

                                                                              
                               
                
                                  



                                                             
                       



                                       
                           


                                                               
                                        

                                  
                                    
           
                
                                          
                
                       





                                      
# Example program to show the new parsexml module
# This program reads an HTML file and writes all its used links to stdout.
# Errors and whitespace are ignored.

import os, streams, parsexml, strutils

proc `=?=` (a, b: string): bool =
  # little trick: define our own comparator that ignores case
  return cmpIgnoreCase(a, b) == 0

if paramCount() < 1:
  quit("Usage: htmlrefs filename[.html]")

var links = 0 # count the number of links
var filename = addFileExt(paramStr(1), "html")
var s = newFileStream(filename, fmRead)
if s == nil: quit("cannot open the file " & filename)
var x: XmlParser
open(x, s, filename)
next(x) # get first event
block mainLoop:
  while true:
    case x.kind
    of xmlElementOpen:
      # the <a href = "xyz"> tag we are interested in always has an attribute,
      # thus we search for ``xmlElementOpen`` and not for ``xmlElementStart``
      if x.elementName =?= "a":
        x.next()
        if x.kind == xmlAttribute:
          if x.attrKey =?= "href":
            var link = x.attrValue
            inc(links)
            # skip until we have an ``xmlElementClose`` event
            while true:
              x.next()
              case x.kind
              of xmlEof: break mainLoop
              of xmlElementClose: break
              else: discard
            x.next() # skip ``xmlElementClose``
            # now we have the description for the ``a`` element
            var desc = ""
            while x.kind == xmlCharData:
              desc.add(x.charData)
              x.next()
            echo(desc & ": " & link)
      else:
        x.next()
    of xmlEof: break # end of file reached
    of xmlError:
      echo(errorMsg(x))
      x.next()
    else: x.next() # skip other events

echo($links & " link(s) found!")
x.close()