diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2010-02-07 20:30:05 +0100 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2010-02-07 20:30:05 +0100 |
commit | 7db3d97cf980d11b5196e1f5936e0308f70d7aff (patch) | |
tree | 6ae217f3b9106bff26cfe5f173e72fe989443855 /lib/devel | |
parent | 3644bc7212115aa47a1d5087a4afc31e6d28b6fa (diff) | |
download | Nim-7db3d97cf980d11b5196e1f5936e0308f70d7aff.tar.gz |
httpclient uses strtabs
Diffstat (limited to 'lib/devel')
-rwxr-xr-x | lib/devel/httpclient.nim | 240 | ||||
-rwxr-xr-x | lib/devel/parseurl.nim | 95 |
2 files changed, 0 insertions, 335 deletions
diff --git a/lib/devel/httpclient.nim b/lib/devel/httpclient.nim deleted file mode 100755 index 54f9321a1..000000000 --- a/lib/devel/httpclient.nim +++ /dev/null @@ -1,240 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2010 Dominik Picheta, Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a simple HTTP client that can be used to retrieve -## webpages/other data. - -# neuer Code: -import sockets, strutils, parseurl, pegs, parseutils - -type - TResponse* = tuple[ - version: string, status: string, headers: seq[THeader], - body: string] - THeader* = tuple[htype, hvalue: string] - - EInvalidProtocol* = object of EBase ## exception that is raised when server - ## does not conform to the implemented - ## protocol - - EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc, - ## when the server returns an error - -template newException(exceptn, message: expr): expr = - block: # open a new scope - var - e: ref exceptn - new(e) - e.msg = message - e - -proc httpError(msg: string) = - var e: ref EInvalidProtocol - new(e) - e.msg = msg - raise e - -proc fileError(msg: string) = - var e: ref EIO - new(e) - e.msg = msg - raise e - -proc getHeaderValue*(headers: seq[THeader], name: string): string = - ## Retrieves a header by ``name``, from ``headers``. - ## Returns "" if a header is not found - for i in low(headers)..high(headers): - if cmpIgnoreCase(headers[i].htype, name) == 0: - return headers[i].hvalue - return "" - -proc charAt(d: var string, i: var int, s: TSocket): char {.inline.} = - result = d[i] - while result == '\0': - d = s.recv() - i = 0 - result = d[i] - -proc parseChunks(d: var string, start: int, s: TSocket): string = - # get chunks: - var i = start - result = "" - while true: - var chunkSize = 0 - var digitFound = false - while true: - case d[i] - of '0'..'9': - digitFound = true - chunkSize = chunkSize shl 4 or (ord(d[i]) - ord('0')) - of 'a'..'f': - digitFound = true - chunkSize = chunkSize shl 4 or (ord(d[i]) - ord('a') + 10) - of 'A'..'F': - digitFound = true - chunkSize = chunkSize shl 4 or (ord(d[i]) - ord('A') + 10) - of '\0': - d = s.recv() - i = -1 - else: break - inc(i) - if not digitFound: httpError("Chunksize expected") - if chunkSize <= 0: break - while charAt(d, i, s) notin {'\C', '\L', '\0'}: inc(i) - if charAt(d, i, s) == '\C': inc(i) - if charAt(d, i, s) == '\L': inc(i) - else: httpError("CR-LF after chunksize expected") - - var x = copy(d, i, i+chunkSize-1) - var size = x.len - result.add(x) - inc(i, size) - if size < chunkSize: - # read in the rest: - var missing = chunkSize - size - var L = result.len - setLen(result, L + missing) - while missing > 0: - var bytesRead = s.recv(addr(result[L]), missing) - inc(L, bytesRead) - dec(missing, bytesRead) - # next chunk: - d = s.recv() - i = 0 - # skip trailing CR-LF: - while charAt(d, i, s) in {'\C', '\L'}: inc(i) - -proc parseBody(d: var string, start: int, s: TSocket, - headers: seq[THeader]): string = - if getHeaderValue(headers, "Transfer-Encoding") == "chunked": - result = parseChunks(d, start, s) - else: - result = copy(d, start) - # -REGION- Content-Length - # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.3 - var contentLengthHeader = getHeaderValue(headers, "Content-Length") - if contentLengthHeader != "": - var length = contentLengthHeader.parseint() - while result.len() < length: result.add(s.recv()) - else: - # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO - - # -REGION- Connection: Close - # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5 - if getHeaderValue(headers, "Connection") == "close": - while True: - var moreData = recv(s) - if moreData.len == 0: break - result.add(moreData) - -proc parseResponse(s: TSocket): TResponse = - var d = s.recv() - var i = 0 - - # Parse the version - # Parses the first line of the headers - # ``HTTP/1.1`` 200 OK - - var matches: array[0..1, string] - var L = d.matchLen(peg"\i 'HTTP/' {'1.1'/'1.0'} \s+ {(!\n .)*}\n", - matches, i) - if L < 0: httpError("invalid HTTP header") - - result.version = matches[0] - result.status = matches[1] - inc(i, L) - - # Parse the headers - # Everything after the first line leading up to the body - # htype: hvalue - - result.headers = @[] - while true: - var key = "" - while d[i] != ':': - if d[i] == '\0': httpError("invalid HTTP header, ':' expected") - key.add(d[i]) - inc(i) - inc(i) # skip ':' - if d[i] == ' ': inc(i) # skip if the character is a space - var val = "" - while d[i] notin {'\C', '\L', '\0'}: - val.add(d[i]) - inc(i) - - result.headers.add((key, val)) - - if d[i] == '\C': inc(i) - if d[i] == '\L': inc(i) - else: httpError("invalid HTTP header, CR-LF expected") - - if d[i] == '\C': inc(i) - if d[i] == '\L': - inc(i) - break - - result.body = parseBody(d, i, s, result.headers) - -proc request*(url: string): TResponse = - var r = parse(url) - - var headers: string - if r.path != "": - headers = "GET " & r.path & " HTTP/1.1\c\L" - else: - headers = "GET / HTTP/1.1\c\L" - - add(headers, "Host: " & r.hostname & "\c\L\c\L") - add(headers, "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; pl;" & - " rv:1.9.2) Gecko/20100115 Firefox/3.6") - - var s = socket() - s.connect(r.hostname, TPort(80)) - s.send(headers) - result = parseResponse(s) - s.close() - -proc redirection(status: string): bool = - const redirectionNRs = ["301", "302", "303", "307"] - for i in items(redirectionNRs): - if status.startsWith(i): - return True - -proc get*(url: string, maxRedirects = 5): TResponse = - ## low-level proc similar to ``request`` which handles redirection - result = request(url) - for i in 1..maxRedirects: - if result.status.redirection(): - var locationHeader = getHeaderValue(result.headers, "Location") - if locationHeader == "": httpError("location header expected") - result = request(locationHeader) - -proc getContent*(url: string): string = - ## GET's the body and returns it as a string - ## Raises exceptions for the status codes ``4xx`` and ``5xx`` - var r = get(url) - if r.status[0] in {'4','5'}: - raise newException(EHTTPRequestErr, r.status) - else: - return r.body - -proc downloadFile*(url: string, outputFilename: string) = - var f: TFile - if open(f, outputFilename, fmWrite): - f.write(getContent(url)) - f.close() - else: - fileError("Unable to open file") - - -when isMainModule: - #downloadFile("http://force7.de/nimrod/index.html", "nimrodindex.html") - #downloadFile("http://www.httpwatch.com/", "ChunkTest.html") - downloadFile("http://www.httpwatch.com/httpgallery/chunked/", "ChunkTest.html") - diff --git a/lib/devel/parseurl.nim b/lib/devel/parseurl.nim deleted file mode 100755 index 756d5a891..000000000 --- a/lib/devel/parseurl.nim +++ /dev/null @@ -1,95 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2010 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Parses & constructs URLs. - -import strutils - -type - TURL* = tuple[ ## represents a *Uniform Resource Locator* (URL) - ## any optional component is "" if it does not exist - scheme, username, password, - hostname, port, path, query, anchor: string] - -proc parse*(url: string): TURL = - var i: int = 0 - - var scheme, username, password: string = "" - var hostname, port, path, query, anchor: string = "" - - var temp: string = "" - - if url[i] != '/': #url isn't a relative path - while True: - #Scheme - if url[i] == ':': - if url[i+1] == '/' and url[i+2] == '/': - scheme = temp - temp = "" - inc(i, 3) #Skip the // - #Authority(username, password) - if url[i] == '@': - username = temp.split(':')[0] - if temp.split(':').len() > 1: - password = temp.split(':')[1] - temp = "" - inc(i) #Skip the @ - #hostname(subdomain, domain, port) - if url[i] == '/' or url[i] == '\0': - #TODO - hostname = temp - if hostname.split(':').len() > 1: - port = hostname.split(':')[1] - hostname = hostname.split(':')[0] - - temp = "" - break - - temp.add(url[i]) - inc(i) - - #Path - while True: - if url[i] == '?': - path = temp - temp = "" - if url[i] == '#': - if temp[0] == '?': - query = temp - else: - path = temp - temp = "" - - if url[i] == '\0': - if temp[0] == '?': - query = temp - elif temp[0] == '#': - anchor = temp - else: - path = temp - break - - temp.add(url[i]) - inc(i) - - return (scheme, username, password, hostname, port, path, query, anchor) - -proc `$`*(t: TURL): string = - result = "" - if t.scheme != "": result.add(t.scheme & "://") - if t.username != "": - if t.password != "": - result.add(t.username & ":" & t.password & "@") - else: - result.add(t.username & "@") - if t.hostname != "": result.add(t.hostname) - if t.port != "": result.add(":" & t.port) - if t.path != "": result.add(t.path) - if t.query != "": result.add(t.query) - if t.anchor != "": result.add(t.anchor) |