devel libs added

author: rumpf_a@web.de <> 2010-01-24 23:34:10 +0100
committer: rumpf_a@web.de <> 2010-01-24 23:34:10 +0100
commit: 5eea125ba73a5768a264c5d4fc8cc55fba8d5fe8 (patch)
tree: 9fc4fbef4ed5f672d7bd6842d487de33298ca73d /lib/devel
parent: 6bbed25d14c674837c40e761753e2bf6e26b1db2 (diff)
download: Nim-5eea125ba73a5768a264c5d4fc8cc55fba8d5fe8.tar.gz
2 files changed, 240 insertions, 0 deletions
diff --git a/lib/devel/httpclient.nim b/lib/devel/httpclient.nim
new file mode 100644
index 000000000..abea34ea6
--- /dev/null
+++ b/lib/devel/httpclient.nim
@@ -0,0 +1,176 @@
+import sockets, strutils, parseurl, pegs
+
+type
+  response = tuple[version: string, status: string, headers: seq[header], body: string]
+  header = tuple[htype: string, hvalue: string] 
+
+  EInvalidHttp* = object of EBase ## exception that is raised when server does
+                                  ## not conform to the implemented HTTP
+                                  ## protocol
+
+proc httpError(msg: string) =
+  var e: ref EInvalidHttp
+  new(e)
+  e.msg = msg
+  raise e
+
+proc parseResponse(data: string): response =
+  var i = 0
+
+  #Parse the version
+  #Parses the first line of the headers
+  #``HTTP/1.1`` 200 OK
+    
+  var matches: array[0..1, string]
+  var L = data.matchLen(peg"\i 'HTTP/' {'1.1'/'1.0'} \s+ {(!\n .)*}\n",
+                        matches, i)
+  if L < 0: httpError("invalid HTTP header")
+  
+  result.version = matches[0]
+  result.status = matches[1]
+  inc(i, L)
+  
+  #Parse the headers
+  #Everything after the first line leading up to the body
+  #htype: hvalue
+
+  result.headers = @[]
+  while true:
+    var key = ""
+    while data[i] != ':':
+      if data[i] == '\0': httpError("invalid HTTP header, ':' expected")
+      key.add(data[i])
+      inc(i)
+    inc(i) # skip ':'
+    if data[i] == ' ': inc(i)
+    var val = ""
+    while data[i] notin {'\C', '\L', '\0'}:
+      val.add(data[i])
+      inc(i)
+    
+    result.headers.add((key, val))
+    
+    if data[i] == '\C': inc(i)
+    if data[i] == '\L': inc(i)
+    else: httpError("invalid HTTP header, CR-LF expected")
+    
+    if data[i] == '\C': inc(i)
+    if data[i] == '\L':
+      inc(i)
+      break
+    
+  #Parse the body
+  #Everything after the headers(The first double CRLF)
+  result.body = data.copy(i)
+  
+
+proc readChunked(data: var string, s: TSocket): response =
+  #Read data from socket until the terminating chunk size is found(0\c\L\c\L)
+  while true:
+    data.add(s.recv())
+    #Contains because 
+    #trailers might be present
+    #after the terminating chunk size
+    if data.contains("0\c\L\c\L"): 
+      break
+      
+  result = parseResponse(data) #Re-parse the body
+  
+  var count, length, chunkLength: int = 0
+  var newBody: string = ""
+  var bodySplit: seq[string] = result.body.splitLines()
+  #Remove the chunks
+  for i in items(bodySplit):
+    if count == 1: #Get the first chunk size
+      chunkLength = ParseHexInt(i) - i.len() - 1
+    else:
+      if length >= chunkLength:
+        #The chunk size determines how much text is left
+        #Until the next chunk size
+        chunkLength = ParseHexInt(i)
+        length = 0
+      else:
+        #Break if the terminating chunk size is found
+        #This should ignore the `trailers`
+        if bodySplit[count] == "0": #This might cause problems...
+          break
+        
+        #Add the text to the newBody
+        newBody.add(i & "\c\L")
+        length = length + i.len()
+    inc(count)
+  #Make the parsed body the new body
+  result.body = newBody
+    
+proc getHeaderValue*(headers: seq[header], name: string): string =
+  ## Retrieves a header by ``name``, from ``headers``.
+  ## Returns "" if a header is not found
+  for i in low(headers)..high(headers):
+    if cmpIgnoreCase(headers[i].htype, name) == 0:
+      return headers[i].hvalue
+  return ""
+
+proc request*(url: string): response =
+  var r = parse(url)
+  
+  var headers: string
+  if r.path != "":
+    headers = "GET " & r.path & " HTTP/1.1\c\L"
+  else:
+    headers = "GET / HTTP/1.1\c\L"
+  
+  headers = headers & "Host: " & r.subdomain & r.domain & "\c\L\c\L"
+  
+  var s = socket()
+  s.connect(r.subdomain & r.domain, TPort(80))
+  s.send(headers)
+  
+  var data = s.recv()
+  
+  result = parseResponse(data)
+
+  #-REGION- Transfer-Encoding 
+  #-Takes precedence over Content-Length
+  #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.2
+  var transferEncodingHeader = getHeaderValue(result.headers, "Transfer-Encoding")
+  if transferEncodingHeader == "chunked":
+    result = readChunked(data, s)
+  
+  #-REGION- Content-Length
+  #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.3
+  var contentLengthHeader = getHeaderValue(result.headers, "Content-Length")
+  if contentLengthHeader != "":
+    var length = contentLengthHeader.parseint()
+
+    while data.len() < length:
+      data.add(s.recv())
+      
+    result = parseResponse(data)
+    
+  #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO
+    
+  #-REGION- Connection: Close
+  #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.5
+  var connectionHeader = getHeaderValue(result.headers, "Connection")
+  if connectionHeader == "close":
+    while True:
+      var nD = s.recv()
+      if nD == "": break
+      data.add(nD)
+    result = parseResponse(data)
+  
+  s.close()
+
+proc get*(url: string): response =
+  result = request(url)
+  
+
+var r = get("http://www.google.co.uk/index.html")
+#var r = get("http://www.crunchyroll.com")
+echo("===================================")
+echo(r.version & " " & r.status)
+
+for htype, hvalue in items(r.headers):
+  echo(htype, ": ", hvalue)
+echo("---------------------------------")
+echo(r.body)
\ No newline at end of file
diff --git a/lib/devel/parseurl.nim b/lib/devel/parseurl.nim
new file mode 100644
index 000000000..769d07561
--- /dev/null
+++ b/lib/devel/parseurl.nim
@@ -0,0 +1,64 @@
+import regexprs, strutils
+
+type
+  TURL* = tuple[protocol, username, password,
+    subdomain, domain, port, path, query, anchor: string]
+
+proc parse*(url: string): TURL =
+  const pattern = r"([a-zA-Z]+://)?(.+@)?(.+\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)?"
+  var m: array[0..7, string] #Array with the matches
+  discard regexprs.match(url, pattern, m)
+ 
+  var msplit = m[2].split(':')
+
+  var username: string = ""
+  var password: string = ""
+  if m[2] != "":
+    username = msplit[0]
+    if msplit.len() == 2:
+      password = msplit[1].replace("@", "")
+
+  var path: string = ""
+  var query: string = ""
+  var anchor: string = ""
+     
+  if m[7] != nil:
+    msplit = m[7].split('?')
+    path = msplit[0]
+    query = ""
+    anchor = ""
+    if msplit.len() == 2:
+      query = "?" & msplit[1]
+     
+    msplit = path.split('#')
+    if msplit.len() == 2:
+      anchor = "#" & msplit[1]
+      path = msplit[0]
+    msplit = query.split('#')
+    if msplit.len() == 2:
+      anchor = "#" & msplit[1]
+      query = msplit[0]
+ 
+  result = (protocol: m[1], username: username, password: password,
+    subdomain: m[3], domain: m[4] & m[5], port: m[6], path: path, query: query, anchor: anchor)
+ 
+when isMainModule:
+  proc test(r: TURL) =
+    echo("protocol=" & r.protocol)
+    echo("username=" & r.username)
+    echo("password=" & r.password)
+    echo("subdomain=" & r.subdomain)
+    echo("domain=" & r.domain)
+    echo("port=" & r.port)
+    echo("path=" & r.path)
+    echo("query=" & r.query)
+    echo("anchor=" & r.anchor)
+    echo("---------------")
+   
+  var r: TURL
+  r = parse(r"http://google.co.uk/search?var=bleahdhsad")
+  test(r)
+  r = parse(r"http://dom96:test@google.com:80/search.php?q=562gs6&foo=6gs6&bar=7hs6#test")
+  test(r)
+  r = parse(r"http://www.google.co.uk/search?q=multiple+subdomains&ie=utf-8&oe=utf-8&aq=t&rls=org.mozilla:pl:official&client=firefox-a")
+  test(r)
\ No newline at end of file
author	rumpf_a@web.de <>	2010-01-24 23:34:10 +0100
committer	rumpf_a@web.de <>	2010-01-24 23:34:10 +0100
commit	5eea125ba73a5768a264c5d4fc8cc55fba8d5fe8 (patch)
tree	9fc4fbef4ed5f672d7bd6842d487de33298ca73d /lib/devel
parent	6bbed25d14c674837c40e761753e2bf6e26b1db2 (diff)
download	Nim-5eea125ba73a5768a264c5d4fc8cc55fba8d5fe8.tar.gz