diff options
author | rumpf_a@web.de <> | 2010-01-24 23:34:10 +0100 |
---|---|---|
committer | rumpf_a@web.de <> | 2010-01-24 23:34:10 +0100 |
commit | 5eea125ba73a5768a264c5d4fc8cc55fba8d5fe8 (patch) | |
tree | 9fc4fbef4ed5f672d7bd6842d487de33298ca73d /lib/devel | |
parent | 6bbed25d14c674837c40e761753e2bf6e26b1db2 (diff) | |
download | Nim-5eea125ba73a5768a264c5d4fc8cc55fba8d5fe8.tar.gz |
devel libs added
Diffstat (limited to 'lib/devel')
-rw-r--r-- | lib/devel/httpclient.nim | 176 | ||||
-rw-r--r-- | lib/devel/parseurl.nim | 64 |
2 files changed, 240 insertions, 0 deletions
diff --git a/lib/devel/httpclient.nim b/lib/devel/httpclient.nim new file mode 100644 index 000000000..abea34ea6 --- /dev/null +++ b/lib/devel/httpclient.nim @@ -0,0 +1,176 @@ +import sockets, strutils, parseurl, pegs + +type + response = tuple[version: string, status: string, headers: seq[header], body: string] + header = tuple[htype: string, hvalue: string] + + EInvalidHttp* = object of EBase ## exception that is raised when server does + ## not conform to the implemented HTTP + ## protocol + +proc httpError(msg: string) = + var e: ref EInvalidHttp + new(e) + e.msg = msg + raise e + +proc parseResponse(data: string): response = + var i = 0 + + #Parse the version + #Parses the first line of the headers + #``HTTP/1.1`` 200 OK + + var matches: array[0..1, string] + var L = data.matchLen(peg"\i 'HTTP/' {'1.1'/'1.0'} \s+ {(!\n .)*}\n", + matches, i) + if L < 0: httpError("invalid HTTP header") + + result.version = matches[0] + result.status = matches[1] + inc(i, L) + + #Parse the headers + #Everything after the first line leading up to the body + #htype: hvalue + + result.headers = @[] + while true: + var key = "" + while data[i] != ':': + if data[i] == '\0': httpError("invalid HTTP header, ':' expected") + key.add(data[i]) + inc(i) + inc(i) # skip ':' + if data[i] == ' ': inc(i) + var val = "" + while data[i] notin {'\C', '\L', '\0'}: + val.add(data[i]) + inc(i) + + result.headers.add((key, val)) + + if data[i] == '\C': inc(i) + if data[i] == '\L': inc(i) + else: httpError("invalid HTTP header, CR-LF expected") + + if data[i] == '\C': inc(i) + if data[i] == '\L': + inc(i) + break + + #Parse the body + #Everything after the headers(The first double CRLF) + result.body = data.copy(i) + + +proc readChunked(data: var string, s: TSocket): response = + #Read data from socket until the terminating chunk size is found(0\c\L\c\L) + while true: + data.add(s.recv()) + #Contains because + #trailers might be present + #after the terminating chunk size + if data.contains("0\c\L\c\L"): + break + + result = parseResponse(data) #Re-parse the body + + var count, length, chunkLength: int = 0 + var newBody: string = "" + var bodySplit: seq[string] = result.body.splitLines() + #Remove the chunks + for i in items(bodySplit): + if count == 1: #Get the first chunk size + chunkLength = ParseHexInt(i) - i.len() - 1 + else: + if length >= chunkLength: + #The chunk size determines how much text is left + #Until the next chunk size + chunkLength = ParseHexInt(i) + length = 0 + else: + #Break if the terminating chunk size is found + #This should ignore the `trailers` + if bodySplit[count] == "0": #This might cause problems... + break + + #Add the text to the newBody + newBody.add(i & "\c\L") + length = length + i.len() + inc(count) + #Make the parsed body the new body + result.body = newBody + +proc getHeaderValue*(headers: seq[header], name: string): string = + ## Retrieves a header by ``name``, from ``headers``. + ## Returns "" if a header is not found + for i in low(headers)..high(headers): + if cmpIgnoreCase(headers[i].htype, name) == 0: + return headers[i].hvalue + return "" + +proc request*(url: string): response = + var r = parse(url) + + var headers: string + if r.path != "": + headers = "GET " & r.path & " HTTP/1.1\c\L" + else: + headers = "GET / HTTP/1.1\c\L" + + headers = headers & "Host: " & r.subdomain & r.domain & "\c\L\c\L" + + var s = socket() + s.connect(r.subdomain & r.domain, TPort(80)) + s.send(headers) + + var data = s.recv() + + result = parseResponse(data) + + #-REGION- Transfer-Encoding + #-Takes precedence over Content-Length + #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.2 + var transferEncodingHeader = getHeaderValue(result.headers, "Transfer-Encoding") + if transferEncodingHeader == "chunked": + result = readChunked(data, s) + + #-REGION- Content-Length + #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.3 + var contentLengthHeader = getHeaderValue(result.headers, "Content-Length") + if contentLengthHeader != "": + var length = contentLengthHeader.parseint() + + while data.len() < length: + data.add(s.recv()) + + result = parseResponse(data) + + #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO + + #-REGION- Connection: Close + #(http://tools.ietf.org/html/rfc2616#section-4.4) NR.5 + var connectionHeader = getHeaderValue(result.headers, "Connection") + if connectionHeader == "close": + while True: + var nD = s.recv() + if nD == "": break + data.add(nD) + result = parseResponse(data) + + s.close() + +proc get*(url: string): response = + result = request(url) + + +var r = get("http://www.google.co.uk/index.html") +#var r = get("http://www.crunchyroll.com") +echo("===================================") +echo(r.version & " " & r.status) + +for htype, hvalue in items(r.headers): + echo(htype, ": ", hvalue) +echo("---------------------------------") +echo(r.body) \ No newline at end of file diff --git a/lib/devel/parseurl.nim b/lib/devel/parseurl.nim new file mode 100644 index 000000000..769d07561 --- /dev/null +++ b/lib/devel/parseurl.nim @@ -0,0 +1,64 @@ +import regexprs, strutils + +type + TURL* = tuple[protocol, username, password, + subdomain, domain, port, path, query, anchor: string] + +proc parse*(url: string): TURL = + const pattern = r"([a-zA-Z]+://)?(.+@)?(.+\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)?" + var m: array[0..7, string] #Array with the matches + discard regexprs.match(url, pattern, m) + + var msplit = m[2].split(':') + + var username: string = "" + var password: string = "" + if m[2] != "": + username = msplit[0] + if msplit.len() == 2: + password = msplit[1].replace("@", "") + + var path: string = "" + var query: string = "" + var anchor: string = "" + + if m[7] != nil: + msplit = m[7].split('?') + path = msplit[0] + query = "" + anchor = "" + if msplit.len() == 2: + query = "?" & msplit[1] + + msplit = path.split('#') + if msplit.len() == 2: + anchor = "#" & msplit[1] + path = msplit[0] + msplit = query.split('#') + if msplit.len() == 2: + anchor = "#" & msplit[1] + query = msplit[0] + + result = (protocol: m[1], username: username, password: password, + subdomain: m[3], domain: m[4] & m[5], port: m[6], path: path, query: query, anchor: anchor) + +when isMainModule: + proc test(r: TURL) = + echo("protocol=" & r.protocol) + echo("username=" & r.username) + echo("password=" & r.password) + echo("subdomain=" & r.subdomain) + echo("domain=" & r.domain) + echo("port=" & r.port) + echo("path=" & r.path) + echo("query=" & r.query) + echo("anchor=" & r.anchor) + echo("---------------") + + var r: TURL + r = parse(r"http://google.co.uk/search?var=bleahdhsad") + test(r) + r = parse(r"http://dom96:test@google.com:80/search.php?q=562gs6&foo=6gs6&bar=7hs6#test") + test(r) + r = parse(r"http://www.google.co.uk/search?q=multiple+subdomains&ie=utf-8&oe=utf-8&aq=t&rls=org.mozilla:pl:official&client=firefox-a") + test(r) \ No newline at end of file |