httpclient uses strtabs

author: Andreas Rumpf <rumpf_a@web.de> 2010-02-07 20:30:05 +0100
committer: Andreas Rumpf <rumpf_a@web.de> 2010-02-07 20:30:05 +0100
commit: 7db3d97cf980d11b5196e1f5936e0308f70d7aff (patch)
tree: 6ae217f3b9106bff26cfe5f173e72fe989443855 /lib
parent: 3644bc7212115aa47a1d5087a4afc31e6d28b6fa (diff)
download: Nim-7db3d97cf980d11b5196e1f5936e0308f70d7aff.tar.gz
3 files changed, 218 insertions, 142 deletions
diff --git a/lib/devel/parseurl.nim b/lib/devel/parseurl.nim
deleted file mode 100755
index 756d5a891..000000000
--- a/lib/devel/parseurl.nim
+++ /dev/null
@@ -1,95 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2010 Dominik Picheta
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Parses & constructs URLs.
-
-import strutils
-
-type
-  TURL* = tuple[      ## represents a *Uniform Resource Locator* (URL)
-                      ## any optional component is "" if it does not exist
-    scheme, username, password, 
-    hostname, port, path, query, anchor: string]
-    
-proc parse*(url: string): TURL =
-  var i: int = 0
-
-  var scheme, username, password: string = ""
-  var hostname, port, path, query, anchor: string = ""
-
-  var temp: string = ""
-  
-  if url[i] != '/': #url isn't a relative path
-    while True:
-      #Scheme
-      if url[i] == ':':
-        if url[i+1] == '/' and url[i+2] == '/':
-          scheme = temp
-          temp = ""
-          inc(i, 3) #Skip the //
-      #Authority(username, password)
-      if url[i] == '@':
-        username = temp.split(':')[0]
-        if temp.split(':').len() > 1:
-          password = temp.split(':')[1]
-        temp = ""
-        inc(i) #Skip the @ 
-      #hostname(subdomain, domain, port)
-      if url[i] == '/' or url[i] == '\0':
-        #TODO
-        hostname = temp
-        if hostname.split(':').len() > 1:
-          port = hostname.split(':')[1]
-          hostname = hostname.split(':')[0]
-        
-        temp = ""
-        break
-      
-      temp.add(url[i])
-      inc(i)
-
-  #Path
-  while True:
-    if url[i] == '?':
-      path = temp
-      temp = ""
-    if url[i] == '#':
-      if temp[0] == '?':
-        query = temp
-      else:
-        path = temp
-      temp = ""
-      
-    if url[i] == '\0':
-      if temp[0] == '?':
-        query = temp
-      elif temp[0] == '#':
-        anchor = temp
-      else:
-        path = temp
-      break
-      
-    temp.add(url[i])
-    inc(i)
-    
-  return (scheme, username, password, hostname, port, path, query, anchor)
-
-proc `$`*(t: TURL): string =
-  result = ""
-  if t.scheme != "": result.add(t.scheme & "://")
-  if t.username != "":
-    if t.password != "":
-      result.add(t.username & ":" & t.password & "@")
-    else:
-      result.add(t.username & "@")
-  if t.hostname != "": result.add(t.hostname)
-  if t.port != "": result.add(":" & t.port)
-  if t.path != "": result.add(t.path)
-  if t.query != "": result.add(t.query)
-  if t.anchor != "": result.add(t.anchor)
diff --git a/lib/devel/httpclient.nim b/lib/pure/httpclient.nim
index 54f9321a1..9d8c70eeb 100755
--- a/lib/devel/httpclient.nim
+++ b/lib/pure/httpclient.nim
@@ -9,21 +9,55 @@
 
 ## This module implements a simple HTTP client that can be used to retrieve
 ## webpages/other data.
+##
+## Retrieving a website
+## ====================
+## 
+## This example uses HTTP GET to retrieve
+## ``http://google.com``
+## 
+## .. code-block:: nimrod
+##   echo(getContent("http://google.com"))
+## 
+## Using HTTP POST
+## ===============
+## 
+## This example demonstrates the usage of the W3 HTML Validator, it 
+## uses ``multipart/form-data`` as the ``Content-Type`` to send the HTML to
+## the server. 
+## 
+## .. code-block:: nimrod
+##   var headers: string = "Content-Type: multipart/form-data; boundary=xyz\c\L"
+##   var body: string = "--xyz\c\L"
+##   # soap 1.2 output
+##   body.add("Content-Disposition: form-data; name=\"output\"\c\L")
+##   body.add("\c\Lsoap12\c\L")
+##    
+##   # html
+##   body.add("--xyz\c\L")
+##   body.add("Content-Disposition: form-data; name=\"uploaded_file\";" &
+##            " filename=\"test.html\"\c\L")
+##   body.add("Content-Type: text/html\c\L")
+##   body.add("\c\L<html><head></head><body><p>test</p></body></html>\c\L")
+##   body.add("--xyz--")
+##    
+##   echo(postContent("http://validator.w3.org/check", headers, body))
 
-# neuer Code:
-import sockets, strutils, parseurl, pegs, parseutils
+import sockets, strutils, parseurl, pegs, parseutils, strtabs
 
 type
   TResponse* = tuple[
-    version: string, status: string, headers: seq[THeader],
+    version: string, 
+    status: string, 
+    headers: PStringTable,
     body: string]
-  THeader* = tuple[htype, hvalue: string]
 
   EInvalidProtocol* = object of EBase ## exception that is raised when server
                                       ## does not conform to the implemented
                                       ## protocol
 
-  EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc,
+  EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc 
+                                     ## and ``postContent`` proc,
                                      ## when the server returns an error
 
 template newException(exceptn, message: expr): expr =
@@ -46,14 +80,6 @@ proc fileError(msg: string) =
   e.msg = msg
   raise e
 
-proc getHeaderValue*(headers: seq[THeader], name: string): string =
-  ## Retrieves a header by ``name``, from ``headers``.
-  ## Returns "" if a header is not found
-  for i in low(headers)..high(headers):
-    if cmpIgnoreCase(headers[i].htype, name) == 0:
-      return headers[i].hvalue
-  return ""
-
 proc charAt(d: var string, i: var int, s: TSocket): char {.inline.} = 
   result = d[i]
   while result == '\0':
@@ -111,14 +137,14 @@ proc parseChunks(d: var string, start: int, s: TSocket): string =
     while charAt(d, i, s) in {'\C', '\L'}: inc(i)
   
 proc parseBody(d: var string, start: int, s: TSocket,
-               headers: seq[THeader]): string =
-  if getHeaderValue(headers, "Transfer-Encoding") == "chunked":
+               headers: PStringTable): string =
+  if headers["Transfer-Encoding"] == "chunked":
     result = parseChunks(d, start, s)
   else:
     result = copy(d, start)
     # -REGION- Content-Length
     # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.3
-    var contentLengthHeader = getHeaderValue(headers, "Content-Length")
+    var contentLengthHeader = headers["Content-Length"]
     if contentLengthHeader != "":
       var length = contentLengthHeader.parseint()
       while result.len() < length: result.add(s.recv())
@@ -127,7 +153,7 @@ proc parseBody(d: var string, start: int, s: TSocket,
       
       # -REGION- Connection: Close
       # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5
-      if getHeaderValue(headers, "Connection") == "close":
+      if headers["Connection"] == "close":
         while True:
           var moreData = recv(s)
           if moreData.len == 0: break
@@ -154,7 +180,7 @@ proc parseResponse(s: TSocket): TResponse =
   # Everything after the first line leading up to the body
   # htype: hvalue
 
-  result.headers = @[]
+  result.headers = newStringTable(modeCaseInsensitive)
   while true:
     var key = ""
     while d[i] != ':':
@@ -168,7 +194,7 @@ proc parseResponse(s: TSocket): TResponse =
       val.add(d[i])
       inc(i)
     
-    result.headers.add((key, val))
+    result.headers[key] = val
     
     if d[i] == '\C': inc(i)
     if d[i] == '\L': inc(i)
@@ -181,22 +207,46 @@ proc parseResponse(s: TSocket): TResponse =
     
   result.body = parseBody(d, i, s, result.headers) 
 
-proc request*(url: string): TResponse =
-  var r = parse(url)
+type
+  THttpMethod* = enum ## the requested HttpMethod
+    httpHEAD,         ## Asks for the response identical to the one that would
+                      ## correspond to a GET request, but without the response
+                      ## body.
+    httpGET,          ## Retrieves the specified resource.
+    httpPOST,         ## Submits data to be processed to the identified 
+                      ## resource. The data is included in the body of the 
+                      ## request.
+    httpPUT,          ## Uploads a representation of the specified resource.
+    httpDELETE,       ## Deletes the specified resource.
+    httpTRACE,        ## Echoes back the received request, so that a client 
+                      ## can see what intermediate servers are adding or
+                      ## changing in the request.
+    httpOPTIONS,      ## Returns the HTTP methods that the server supports 
+                      ## for specified address.
+    httpCONNECT       ## Converts the request connection to a transparent 
+                      ## TCP/IP tunnel, usually used for proxies.
+
+proc request*(url: string, httpMethod = httpGET, extraHeaders = "", 
+              body = ""): TResponse =
+  ## | Requests ``url`` with the specified ``httpMethod``.
+  ## | Extra headers can be specified and must be seperated by ``\c\L``
+  var r = parseUrl(url)
   
-  var headers: string
+  var headers = copy($httpMethod, len("http"))
   if r.path != "":
-    headers = "GET " & r.path & " HTTP/1.1\c\L"
-  else:
-    headers = "GET / HTTP/1.1\c\L"
+    headers.add(" " & r.path & r.query)
+  headers.add(" / HTTP/1.1\c\L")
   
-  add(headers, "Host: " & r.hostname & "\c\L\c\L")
-  add(headers, "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; pl;" &
-               " rv:1.9.2) Gecko/20100115 Firefox/3.6")
+  add(headers, "Host: " & r.hostname & "\c\L")
+  add(headers, extraHeaders)
+  add(headers, "\c\L")
 
   var s = socket()
   s.connect(r.hostname, TPort(80))
   s.send(headers)
+  if body != "":
+    s.send(body)
+  
   result = parseResponse(s)
   s.close()
   
@@ -207,24 +257,49 @@ proc redirection(status: string): bool =
       return True
   
 proc get*(url: string, maxRedirects = 5): TResponse =
-  ## low-level proc similar to ``request`` which handles redirection
+  ## | GET's the ``url`` and returns a ``TResponse`` object
+  ## | This proc also handles redirection
   result = request(url)
   for i in 1..maxRedirects:
     if result.status.redirection():
-      var locationHeader = getHeaderValue(result.headers, "Location")
+      var locationHeader = result.headers["Location"]
       if locationHeader == "": httpError("location header expected")
       result = request(locationHeader)
       
 proc getContent*(url: string): string =
-  ## GET's the body and returns it as a string
-  ## Raises exceptions for the status codes ``4xx`` and ``5xx``
+  ## | GET's the body and returns it as a string.
+  ## | Raises exceptions for the status codes ``4xx`` and ``5xx``
   var r = get(url)
   if r.status[0] in {'4','5'}:
     raise newException(EHTTPRequestErr, r.status)
   else:
     return r.body
   
+proc post*(url: string, extraHeaders = "", body = "", 
+           maxRedirects = 5): TResponse =
+  ## | POST's ``body`` to the ``url`` and returns a ``TResponse`` object.
+  ## | This proc adds the necessary Content-Length header.
+  ## | This proc also handles redirection.
+  extraHeaders.add("Content-Length: " & $len(body) & "\c\L")
+  result = request(url, httpPOST, extraHeaders, body)
+  for i in 1..maxRedirects:
+    if result.status.redirection():
+      var locationHeader = result.headers["Location"]
+      if locationHeader == "": httpError("location header expected")
+      var meth = if result.status != "307": httpGet else: httpPost
+      result = request(locationHeader, meth, extraHeaders, body)
+  
+proc postContent*(url: string, extraHeaders = "", body = ""): string =
+  ## | POST's ``body`` to ``url`` and returns the response's body as a string
+  ## | Raises exceptions for the status codes ``4xx`` and ``5xx``
+  var r = post(url, extraHeaders, body)
+  if r.status[0] in {'4','5'}:
+    raise newException(EHTTPRequestErr, r.status)
+  else:
+    return r.body
+  
 proc downloadFile*(url: string, outputFilename: string) =
+  ## Downloads ``url`` and saves it to ``outputFilename``
   var f: TFile
   if open(f, outputFilename, fmWrite):
     f.write(getContent(url))
@@ -236,5 +311,24 @@ proc downloadFile*(url: string, outputFilename: string) =
 when isMainModule:
   #downloadFile("http://force7.de/nimrod/index.html", "nimrodindex.html")
   #downloadFile("http://www.httpwatch.com/", "ChunkTest.html")
-  downloadFile("http://www.httpwatch.com/httpgallery/chunked/", "ChunkTest.html")
+  #downloadFile("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com",
+  # "validator.html")
+
+  #var r = get("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com&
+  #  charset=%28detect+automatically%29&doctype=Inline&group=0")
   
+  var headers: string = "Content-Type: multipart/form-data; boundary=xyz\c\L"
+  var body: string = "--xyz\c\L"
+  # soap 1.2 output
+  body.add("Content-Disposition: form-data; name=\"output\"\c\L")
+  body.add("\c\Lsoap12\c\L")
+  
+  # html
+  body.add("--xyz\c\L")
+  body.add("Content-Disposition: form-data; name=\"uploaded_file\";" &
+           " filename=\"test.html\"\c\L")
+  body.add("Content-Type: text/html\c\L")
+  body.add("\c\L<html><head></head><body><p>test</p></body></html>\c\L")
+  body.add("--xyz--")
+
+  echo(postContent("http://validator.w3.org/check", headers, body))
diff --git a/lib/pure/parseurl.nim b/lib/pure/parseurl.nim
index cbb5ba9c9..cd3bc621a 100755
--- a/lib/pure/parseurl.nim
+++ b/lib/pure/parseurl.nim
@@ -1,18 +1,95 @@
-import regexprs, strutils
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2010 Dominik Picheta
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Parses & constructs URLs.
+
+import strutils
 
 type
-  TUrl* = tuple[protocol, subdomain, domain, port: string, path: seq[string]]
+  TURL* = tuple[      ## represents a *Uniform Resource Locator* (URL)
+                      ## any optional component is "" if it does not exist
+    scheme, username, password, 
+    hostname, port, path, query, anchor: string]
+    
+proc parseUrl*(url: string): TURL =
+  var i: int = 0
+
+  var scheme, username, password: string = ""
+  var hostname, port, path, query, anchor: string = ""
+
+  var temp: string = ""
+  
+  if url[i] != '/': #url isn't a relative path
+    while True:
+      #Scheme
+      if url[i] == ':':
+        if url[i+1] == '/' and url[i+2] == '/':
+          scheme = temp
+          temp = ""
+          inc(i, 3) #Skip the //
+      #Authority(username, password)
+      if url[i] == '@':
+        username = temp.split(':')[0]
+        if temp.split(':').len() > 1:
+          password = temp.split(':')[1]
+        temp = ""
+        inc(i) #Skip the @ 
+      #hostname(subdomain, domain, port)
+      if url[i] == '/' or url[i] == '\0':
+        #TODO
+        hostname = temp
+        if hostname.split(':').len() > 1:
+          port = hostname.split(':')[1]
+          hostname = hostname.split(':')[0]
+        
+        temp = ""
+        break
+      
+      temp.add(url[i])
+      inc(i)
 
-proc parseUrl*(url: string): TUrl =
-  #([a-zA-Z]+://)?(\w+?\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)?
-  const pattern = r"([a-zA-Z]+://)?(\w+?\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)?"
-  var m: array[0..6, string] #Array with the matches
-  discard regexprs.match(url, pattern, m)
- 
-  result = (protocol: m[1], subdomain: m[2], domain: m[3] & m[4], 
-            port: m[5], path: m[6].split('/'))
- 
-when isMainModule:
-  var r = parseUrl(r"http://google.com/search?var=bleahdhsad")
-  echo(r.domain)
+  #Path
+  while True:
+    if url[i] == '?':
+      path = temp
+      temp = ""
+    if url[i] == '#':
+      if temp[0] == '?':
+        query = temp
+      else:
+        path = temp
+      temp = ""
+      
+    if url[i] == '\0':
+      if temp[0] == '?':
+        query = temp
+      elif temp[0] == '#':
+        anchor = temp
+      else:
+        path = temp
+      break
+      
+    temp.add(url[i])
+    inc(i)
+    
+  return (scheme, username, password, hostname, port, path, query, anchor)
 
+proc `$`*(t: TURL): string =
+  result = ""
+  if t.scheme != "": result.add(t.scheme & "://")
+  if t.username != "":
+    if t.password != "":
+      result.add(t.username & ":" & t.password & "@")
+    else:
+      result.add(t.username & "@")
+  if t.hostname != "": result.add(t.hostname)
+  if t.port != "": result.add(":" & t.port)
+  if t.path != "": result.add(t.path)
+  if t.query != "": result.add(t.query)
+  if t.anchor != "": result.add(t.anchor)
author	Andreas Rumpf <rumpf_a@web.de>	2010-02-07 20:30:05 +0100
committer	Andreas Rumpf <rumpf_a@web.de>	2010-02-07 20:30:05 +0100
commit	7db3d97cf980d11b5196e1f5936e0308f70d7aff (patch)
tree	6ae217f3b9106bff26cfe5f173e72fe989443855 /lib
parent	3644bc7212115aa47a1d5087a4afc31e6d28b6fa (diff)
download	Nim-7db3d97cf980d11b5196e1f5936e0308f70d7aff.tar.gz