diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2010-02-07 20:30:05 +0100 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2010-02-07 20:30:05 +0100 |
commit | 7db3d97cf980d11b5196e1f5936e0308f70d7aff (patch) | |
tree | 6ae217f3b9106bff26cfe5f173e72fe989443855 /lib | |
parent | 3644bc7212115aa47a1d5087a4afc31e6d28b6fa (diff) | |
download | Nim-7db3d97cf980d11b5196e1f5936e0308f70d7aff.tar.gz |
httpclient uses strtabs
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/devel/parseurl.nim | 95 | ||||
-rwxr-xr-x | lib/pure/httpclient.nim (renamed from lib/devel/httpclient.nim) | 160 | ||||
-rwxr-xr-x | lib/pure/parseurl.nim | 105 |
3 files changed, 218 insertions, 142 deletions
diff --git a/lib/devel/parseurl.nim b/lib/devel/parseurl.nim deleted file mode 100755 index 756d5a891..000000000 --- a/lib/devel/parseurl.nim +++ /dev/null @@ -1,95 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2010 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Parses & constructs URLs. - -import strutils - -type - TURL* = tuple[ ## represents a *Uniform Resource Locator* (URL) - ## any optional component is "" if it does not exist - scheme, username, password, - hostname, port, path, query, anchor: string] - -proc parse*(url: string): TURL = - var i: int = 0 - - var scheme, username, password: string = "" - var hostname, port, path, query, anchor: string = "" - - var temp: string = "" - - if url[i] != '/': #url isn't a relative path - while True: - #Scheme - if url[i] == ':': - if url[i+1] == '/' and url[i+2] == '/': - scheme = temp - temp = "" - inc(i, 3) #Skip the // - #Authority(username, password) - if url[i] == '@': - username = temp.split(':')[0] - if temp.split(':').len() > 1: - password = temp.split(':')[1] - temp = "" - inc(i) #Skip the @ - #hostname(subdomain, domain, port) - if url[i] == '/' or url[i] == '\0': - #TODO - hostname = temp - if hostname.split(':').len() > 1: - port = hostname.split(':')[1] - hostname = hostname.split(':')[0] - - temp = "" - break - - temp.add(url[i]) - inc(i) - - #Path - while True: - if url[i] == '?': - path = temp - temp = "" - if url[i] == '#': - if temp[0] == '?': - query = temp - else: - path = temp - temp = "" - - if url[i] == '\0': - if temp[0] == '?': - query = temp - elif temp[0] == '#': - anchor = temp - else: - path = temp - break - - temp.add(url[i]) - inc(i) - - return (scheme, username, password, hostname, port, path, query, anchor) - -proc `$`*(t: TURL): string = - result = "" - if t.scheme != "": result.add(t.scheme & "://") - if t.username != "": - if t.password != "": - result.add(t.username & ":" & t.password & "@") - else: - result.add(t.username & "@") - if t.hostname != "": result.add(t.hostname) - if t.port != "": result.add(":" & t.port) - if t.path != "": result.add(t.path) - if t.query != "": result.add(t.query) - if t.anchor != "": result.add(t.anchor) diff --git a/lib/devel/httpclient.nim b/lib/pure/httpclient.nim index 54f9321a1..9d8c70eeb 100755 --- a/lib/devel/httpclient.nim +++ b/lib/pure/httpclient.nim @@ -9,21 +9,55 @@ ## This module implements a simple HTTP client that can be used to retrieve ## webpages/other data. +## +## Retrieving a website +## ==================== +## +## This example uses HTTP GET to retrieve +## ``http://google.com`` +## +## .. code-block:: nimrod +## echo(getContent("http://google.com")) +## +## Using HTTP POST +## =============== +## +## This example demonstrates the usage of the W3 HTML Validator, it +## uses ``multipart/form-data`` as the ``Content-Type`` to send the HTML to +## the server. +## +## .. code-block:: nimrod +## var headers: string = "Content-Type: multipart/form-data; boundary=xyz\c\L" +## var body: string = "--xyz\c\L" +## # soap 1.2 output +## body.add("Content-Disposition: form-data; name=\"output\"\c\L") +## body.add("\c\Lsoap12\c\L") +## +## # html +## body.add("--xyz\c\L") +## body.add("Content-Disposition: form-data; name=\"uploaded_file\";" & +## " filename=\"test.html\"\c\L") +## body.add("Content-Type: text/html\c\L") +## body.add("\c\L<html><head></head><body><p>test</p></body></html>\c\L") +## body.add("--xyz--") +## +## echo(postContent("http://validator.w3.org/check", headers, body)) -# neuer Code: -import sockets, strutils, parseurl, pegs, parseutils +import sockets, strutils, parseurl, pegs, parseutils, strtabs type TResponse* = tuple[ - version: string, status: string, headers: seq[THeader], + version: string, + status: string, + headers: PStringTable, body: string] - THeader* = tuple[htype, hvalue: string] EInvalidProtocol* = object of EBase ## exception that is raised when server ## does not conform to the implemented ## protocol - EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc, + EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc + ## and ``postContent`` proc, ## when the server returns an error template newException(exceptn, message: expr): expr = @@ -46,14 +80,6 @@ proc fileError(msg: string) = e.msg = msg raise e -proc getHeaderValue*(headers: seq[THeader], name: string): string = - ## Retrieves a header by ``name``, from ``headers``. - ## Returns "" if a header is not found - for i in low(headers)..high(headers): - if cmpIgnoreCase(headers[i].htype, name) == 0: - return headers[i].hvalue - return "" - proc charAt(d: var string, i: var int, s: TSocket): char {.inline.} = result = d[i] while result == '\0': @@ -111,14 +137,14 @@ proc parseChunks(d: var string, start: int, s: TSocket): string = while charAt(d, i, s) in {'\C', '\L'}: inc(i) proc parseBody(d: var string, start: int, s: TSocket, - headers: seq[THeader]): string = - if getHeaderValue(headers, "Transfer-Encoding") == "chunked": + headers: PStringTable): string = + if headers["Transfer-Encoding"] == "chunked": result = parseChunks(d, start, s) else: result = copy(d, start) # -REGION- Content-Length # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.3 - var contentLengthHeader = getHeaderValue(headers, "Content-Length") + var contentLengthHeader = headers["Content-Length"] if contentLengthHeader != "": var length = contentLengthHeader.parseint() while result.len() < length: result.add(s.recv()) @@ -127,7 +153,7 @@ proc parseBody(d: var string, start: int, s: TSocket, # -REGION- Connection: Close # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5 - if getHeaderValue(headers, "Connection") == "close": + if headers["Connection"] == "close": while True: var moreData = recv(s) if moreData.len == 0: break @@ -154,7 +180,7 @@ proc parseResponse(s: TSocket): TResponse = # Everything after the first line leading up to the body # htype: hvalue - result.headers = @[] + result.headers = newStringTable(modeCaseInsensitive) while true: var key = "" while d[i] != ':': @@ -168,7 +194,7 @@ proc parseResponse(s: TSocket): TResponse = val.add(d[i]) inc(i) - result.headers.add((key, val)) + result.headers[key] = val if d[i] == '\C': inc(i) if d[i] == '\L': inc(i) @@ -181,22 +207,46 @@ proc parseResponse(s: TSocket): TResponse = result.body = parseBody(d, i, s, result.headers) -proc request*(url: string): TResponse = - var r = parse(url) +type + THttpMethod* = enum ## the requested HttpMethod + httpHEAD, ## Asks for the response identical to the one that would + ## correspond to a GET request, but without the response + ## body. + httpGET, ## Retrieves the specified resource. + httpPOST, ## Submits data to be processed to the identified + ## resource. The data is included in the body of the + ## request. + httpPUT, ## Uploads a representation of the specified resource. + httpDELETE, ## Deletes the specified resource. + httpTRACE, ## Echoes back the received request, so that a client + ## can see what intermediate servers are adding or + ## changing in the request. + httpOPTIONS, ## Returns the HTTP methods that the server supports + ## for specified address. + httpCONNECT ## Converts the request connection to a transparent + ## TCP/IP tunnel, usually used for proxies. + +proc request*(url: string, httpMethod = httpGET, extraHeaders = "", + body = ""): TResponse = + ## | Requests ``url`` with the specified ``httpMethod``. + ## | Extra headers can be specified and must be seperated by ``\c\L`` + var r = parseUrl(url) - var headers: string + var headers = copy($httpMethod, len("http")) if r.path != "": - headers = "GET " & r.path & " HTTP/1.1\c\L" - else: - headers = "GET / HTTP/1.1\c\L" + headers.add(" " & r.path & r.query) + headers.add(" / HTTP/1.1\c\L") - add(headers, "Host: " & r.hostname & "\c\L\c\L") - add(headers, "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; pl;" & - " rv:1.9.2) Gecko/20100115 Firefox/3.6") + add(headers, "Host: " & r.hostname & "\c\L") + add(headers, extraHeaders) + add(headers, "\c\L") var s = socket() s.connect(r.hostname, TPort(80)) s.send(headers) + if body != "": + s.send(body) + result = parseResponse(s) s.close() @@ -207,24 +257,49 @@ proc redirection(status: string): bool = return True proc get*(url: string, maxRedirects = 5): TResponse = - ## low-level proc similar to ``request`` which handles redirection + ## | GET's the ``url`` and returns a ``TResponse`` object + ## | This proc also handles redirection result = request(url) for i in 1..maxRedirects: if result.status.redirection(): - var locationHeader = getHeaderValue(result.headers, "Location") + var locationHeader = result.headers["Location"] if locationHeader == "": httpError("location header expected") result = request(locationHeader) proc getContent*(url: string): string = - ## GET's the body and returns it as a string - ## Raises exceptions for the status codes ``4xx`` and ``5xx`` + ## | GET's the body and returns it as a string. + ## | Raises exceptions for the status codes ``4xx`` and ``5xx`` var r = get(url) if r.status[0] in {'4','5'}: raise newException(EHTTPRequestErr, r.status) else: return r.body +proc post*(url: string, extraHeaders = "", body = "", + maxRedirects = 5): TResponse = + ## | POST's ``body`` to the ``url`` and returns a ``TResponse`` object. + ## | This proc adds the necessary Content-Length header. + ## | This proc also handles redirection. + extraHeaders.add("Content-Length: " & $len(body) & "\c\L") + result = request(url, httpPOST, extraHeaders, body) + for i in 1..maxRedirects: + if result.status.redirection(): + var locationHeader = result.headers["Location"] + if locationHeader == "": httpError("location header expected") + var meth = if result.status != "307": httpGet else: httpPost + result = request(locationHeader, meth, extraHeaders, body) + +proc postContent*(url: string, extraHeaders = "", body = ""): string = + ## | POST's ``body`` to ``url`` and returns the response's body as a string + ## | Raises exceptions for the status codes ``4xx`` and ``5xx`` + var r = post(url, extraHeaders, body) + if r.status[0] in {'4','5'}: + raise newException(EHTTPRequestErr, r.status) + else: + return r.body + proc downloadFile*(url: string, outputFilename: string) = + ## Downloads ``url`` and saves it to ``outputFilename`` var f: TFile if open(f, outputFilename, fmWrite): f.write(getContent(url)) @@ -236,5 +311,24 @@ proc downloadFile*(url: string, outputFilename: string) = when isMainModule: #downloadFile("http://force7.de/nimrod/index.html", "nimrodindex.html") #downloadFile("http://www.httpwatch.com/", "ChunkTest.html") - downloadFile("http://www.httpwatch.com/httpgallery/chunked/", "ChunkTest.html") + #downloadFile("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com", + # "validator.html") + + #var r = get("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com& + # charset=%28detect+automatically%29&doctype=Inline&group=0") + var headers: string = "Content-Type: multipart/form-data; boundary=xyz\c\L" + var body: string = "--xyz\c\L" + # soap 1.2 output + body.add("Content-Disposition: form-data; name=\"output\"\c\L") + body.add("\c\Lsoap12\c\L") + + # html + body.add("--xyz\c\L") + body.add("Content-Disposition: form-data; name=\"uploaded_file\";" & + " filename=\"test.html\"\c\L") + body.add("Content-Type: text/html\c\L") + body.add("\c\L<html><head></head><body><p>test</p></body></html>\c\L") + body.add("--xyz--") + + echo(postContent("http://validator.w3.org/check", headers, body)) diff --git a/lib/pure/parseurl.nim b/lib/pure/parseurl.nim index cbb5ba9c9..cd3bc621a 100755 --- a/lib/pure/parseurl.nim +++ b/lib/pure/parseurl.nim @@ -1,18 +1,95 @@ -import regexprs, strutils +# +# +# Nimrod's Runtime Library +# (c) Copyright 2010 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Parses & constructs URLs. + +import strutils type - TUrl* = tuple[protocol, subdomain, domain, port: string, path: seq[string]] + TURL* = tuple[ ## represents a *Uniform Resource Locator* (URL) + ## any optional component is "" if it does not exist + scheme, username, password, + hostname, port, path, query, anchor: string] + +proc parseUrl*(url: string): TURL = + var i: int = 0 + + var scheme, username, password: string = "" + var hostname, port, path, query, anchor: string = "" + + var temp: string = "" + + if url[i] != '/': #url isn't a relative path + while True: + #Scheme + if url[i] == ':': + if url[i+1] == '/' and url[i+2] == '/': + scheme = temp + temp = "" + inc(i, 3) #Skip the // + #Authority(username, password) + if url[i] == '@': + username = temp.split(':')[0] + if temp.split(':').len() > 1: + password = temp.split(':')[1] + temp = "" + inc(i) #Skip the @ + #hostname(subdomain, domain, port) + if url[i] == '/' or url[i] == '\0': + #TODO + hostname = temp + if hostname.split(':').len() > 1: + port = hostname.split(':')[1] + hostname = hostname.split(':')[0] + + temp = "" + break + + temp.add(url[i]) + inc(i) -proc parseUrl*(url: string): TUrl = - #([a-zA-Z]+://)?(\w+?\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)? - const pattern = r"([a-zA-Z]+://)?(\w+?\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)?" - var m: array[0..6, string] #Array with the matches - discard regexprs.match(url, pattern, m) - - result = (protocol: m[1], subdomain: m[2], domain: m[3] & m[4], - port: m[5], path: m[6].split('/')) - -when isMainModule: - var r = parseUrl(r"http://google.com/search?var=bleahdhsad") - echo(r.domain) + #Path + while True: + if url[i] == '?': + path = temp + temp = "" + if url[i] == '#': + if temp[0] == '?': + query = temp + else: + path = temp + temp = "" + + if url[i] == '\0': + if temp[0] == '?': + query = temp + elif temp[0] == '#': + anchor = temp + else: + path = temp + break + + temp.add(url[i]) + inc(i) + + return (scheme, username, password, hostname, port, path, query, anchor) +proc `$`*(t: TURL): string = + result = "" + if t.scheme != "": result.add(t.scheme & "://") + if t.username != "": + if t.password != "": + result.add(t.username & ":" & t.password & "@") + else: + result.add(t.username & "@") + if t.hostname != "": result.add(t.hostname) + if t.port != "": result.add(":" & t.port) + if t.path != "": result.add(t.path) + if t.query != "": result.add(t.query) + if t.anchor != "": result.add(t.anchor) |