diff options
Diffstat (limited to 'lib/pure/httpclient.nim')
-rw-r--r-- | lib/pure/httpclient.nim | 267 |
1 files changed, 181 insertions, 86 deletions
diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim index 14bcfd2fb..08ea99627 100644 --- a/lib/pure/httpclient.nim +++ b/lib/pure/httpclient.nim @@ -10,28 +10,38 @@ ## This module implements a simple HTTP client that can be used to retrieve ## webpages and other data. ## +## .. warning:: Validate untrusted inputs: URI parsers and getters are not detecting malicious URIs. +## ## Retrieving a website ## ==================== ## ## This example uses HTTP GET to retrieve ## `http://google.com`: ## -## .. code-block:: Nim +## ```Nim ## import std/httpclient ## var client = newHttpClient() -## echo client.getContent("http://google.com") +## try: +## echo client.getContent("http://google.com") +## finally: +## client.close() +## ``` ## ## The same action can also be performed asynchronously, simply use the ## `AsyncHttpClient`: ## -## .. code-block:: Nim +## ```Nim ## import std/[asyncdispatch, httpclient] ## ## proc asyncProc(): Future[string] {.async.} = ## var client = newAsyncHttpClient() -## return await client.getContent("http://example.com") +## try: +## return await client.getContent("http://google.com") +## finally: +## client.close() ## ## echo waitFor asyncProc() +## ``` ## ## The functionality implemented by `HttpClient` and `AsyncHttpClient` ## is the same, so you can use whichever one suits you best in the examples @@ -40,6 +50,10 @@ ## **Note:** You need to run asynchronous examples in an async proc ## otherwise you will get an `Undeclared identifier: 'await'` error. ## +## **Note:** An asynchronous client instance can only deal with one +## request at a time. To send multiple requests in parallel, use +## multiple client instances. +## ## Using HTTP POST ## =============== ## @@ -47,33 +61,39 @@ ## uses `multipart/form-data` as the `Content-Type` to send the HTML to be ## validated to the server. ## -## .. code-block:: Nim +## ```Nim ## var client = newHttpClient() ## var data = newMultipartData() ## data["output"] = "soap12" ## data["uploaded_file"] = ("test.html", "text/html", ## "<html><head></head><body><p>test</p></body></html>") -## -## echo client.postContent("http://validator.w3.org/check", multipart=data) +## try: +## echo client.postContent("http://validator.w3.org/check", multipart=data) +## finally: +## client.close() +## ``` ## ## To stream files from disk when performing the request, use `addFiles`. ## ## **Note:** This will allocate a new `Mimetypes` database every time you call ## it, you can pass your own via the `mimeDb` parameter to avoid this. ## -## .. code-block:: Nim +## ```Nim ## let mimes = newMimetypes() ## var client = newHttpClient() ## var data = newMultipartData() ## data.addFiles({"uploaded_file": "test.html"}, mimeDb = mimes) -## -## echo client.postContent("http://validator.w3.org/check", multipart=data) +## try: +## echo client.postContent("http://validator.w3.org/check", multipart=data) +## finally: +## client.close() +## ``` ## ## You can also make post requests with custom headers. ## This example sets `Content-Type` to `application/json` ## and uses a json object for the body ## -## .. code-block:: Nim +## ```Nim ## import std/[httpclient, json] ## ## let client = newHttpClient() @@ -81,8 +101,12 @@ ## let body = %*{ ## "data": "some text" ## } -## let response = client.request("http://some.api", httpMethod = HttpPost, body = $body) -## echo response.status +## try: +## let response = client.request("http://some.api", httpMethod = HttpPost, body = $body) +## echo response.status +## finally: +## client.close() +## ``` ## ## Progress reporting ## ================== @@ -91,31 +115,36 @@ ## This callback will be executed every second with information about the ## progress of the HTTP request. ## -## .. code-block:: Nim -## import std/[asyncdispatch, httpclient] +## ```Nim +## import std/[asyncdispatch, httpclient] ## -## proc onProgressChanged(total, progress, speed: BiggestInt) {.async.} = -## echo("Downloaded ", progress, " of ", total) -## echo("Current rate: ", speed div 1000, "kb/s") +## proc onProgressChanged(total, progress, speed: BiggestInt) {.async.} = +## echo("Downloaded ", progress, " of ", total) +## echo("Current rate: ", speed div 1000, "kb/s") ## -## proc asyncProc() {.async.} = -## var client = newAsyncHttpClient() -## client.onProgressChanged = onProgressChanged -## discard await client.getContent("http://speedtest-ams2.digitalocean.com/100mb.test") +## proc asyncProc() {.async.} = +## var client = newAsyncHttpClient() +## client.onProgressChanged = onProgressChanged +## try: +## discard await client.getContent("http://speedtest-ams2.digitalocean.com/100mb.test") +## finally: +## client.close() ## -## waitFor asyncProc() +## waitFor asyncProc() +## ``` ## ## If you would like to remove the callback simply set it to `nil`. ## -## .. code-block:: Nim +## ```Nim ## client.onProgressChanged = nil +## ``` ## ## .. warning:: The `total` reported by httpclient may be 0 in some cases. ## ## ## SSL/TLS support ## =============== -## This requires the OpenSSL library, fortunately it's widely used and installed +## This requires the OpenSSL library. Fortunately it's widely used and installed ## on many operating systems. httpclient will use SSL automatically if you give ## any of the functions a url with the `https` schema, for example: ## `https://github.com/`. @@ -123,12 +152,26 @@ ## You will also have to compile with `ssl` defined like so: ## `nim c -d:ssl ...`. ## -## Certificate validation is NOT performed by default. -## This will change in the future. +## Certificate validation is performed by default. ## ## A set of directories and files from the `ssl_certs <ssl_certs.html>`_ ## module are scanned to locate CA certificates. ## +## Example of setting SSL verification parameters in a new client: +## +## ```Nim +## import httpclient +## var client = newHttpClient(sslContext=newContext(verifyMode=CVerifyPeer)) +## ``` +## +## There are three options for verify mode: +## +## * ``CVerifyNone``: certificates are not verified; +## * ``CVerifyPeer``: certificates are verified; +## * ``CVerifyPeerUseEnvVars``: certificates are verified and the optional +## environment variables SSL_CERT_FILE and SSL_CERT_DIR are also used to +## locate certificates +## ## See `newContext <net.html#newContext.string,string,string,string>`_ to tweak or disable certificate validation. ## ## Timeouts @@ -148,10 +191,11 @@ ## ## Here is how to set a timeout when creating an `HttpClient` instance: ## -## .. code-block:: Nim -## import std/httpclient +## ```Nim +## import std/httpclient ## -## let client = newHttpClient(timeout = 42) +## let client = newHttpClient(timeout = 42) +## ``` ## ## Proxy ## ===== @@ -162,28 +206,39 @@ ## ## Some examples on how to configure a Proxy for `HttpClient`: ## -## .. code-block:: Nim -## import std/httpclient +## ```Nim +## import std/httpclient +## +## let myProxy = newProxy("http://myproxy.network") +## let client = newHttpClient(proxy = myProxy) +## ``` +## +## Use proxies with basic authentication: +## +## ```Nim +## import std/httpclient ## -## let myProxy = newProxy("http://myproxy.network") -## let client = newHttpClient(proxy = myProxy) +## let myProxy = newProxy("http://myproxy.network", auth="user:password") +## let client = newHttpClient(proxy = myProxy) +## ``` ## ## Get Proxy URL from environment variables: ## -## .. code-block:: Nim -## import std/httpclient +## ```Nim +## import std/httpclient ## -## var url = "" -## try: -## if existsEnv("http_proxy"): -## url = getEnv("http_proxy") -## elif existsEnv("https_proxy"): -## url = getEnv("https_proxy") -## except ValueError: -## echo "Unable to parse proxy from environment variables." +## var url = "" +## try: +## if existsEnv("http_proxy"): +## url = getEnv("http_proxy") +## elif existsEnv("https_proxy"): +## url = getEnv("https_proxy") +## except ValueError: +## echo "Unable to parse proxy from environment variables." ## -## let myProxy = newProxy(url = url) -## let client = newHttpClient(proxy = myProxy) +## let myProxy = newProxy(url = url) +## let client = newHttpClient(proxy = myProxy) +## ``` ## ## Redirects ## ========= @@ -194,10 +249,11 @@ ## ## Here you can see an example about how to set the `maxRedirects` of `HttpClient`: ## -## .. code-block:: Nim -## import std/httpclient +## ```Nim +## import std/httpclient ## -## let client = newHttpClient(maxRedirects = 0) +## let client = newHttpClient(maxRedirects = 0) +## ``` ## import std/private/since @@ -208,6 +264,9 @@ import std/[ asyncnet, asyncdispatch, asyncfile, nativesockets, ] +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + export httpcore except parseHeader # TODO: The `except` doesn't work type @@ -245,9 +304,9 @@ proc contentLength*(response: Response | AsyncResponse): int = ## This is effectively the value of the "Content-Length" header. ## ## A `ValueError` exception will be raised if the value is not an integer. - var contentLengthHeader = response.headers.getOrDefault("Content-Length") + ## If the Content-Length header is not set in the response, ContentLength is set to the value -1. + var contentLengthHeader = response.headers.getOrDefault("Content-Length", HttpHeaderValues(@["-1"])) result = contentLengthHeader.parseInt() - doAssert(result >= 0 and result <= high(int32)) proc lastModified*(response: Response | AsyncResponse): DateTime = ## Retrieves the specified response's last modified time. @@ -300,7 +359,7 @@ type ## and `postContent` proc, ## when the server returns an error -const defUserAgent* = "Nim httpclient/" & NimVersion +const defUserAgent* = "Nim-httpclient/" & NimVersion proc httpError(msg: string) = var e: ref ProtocolError @@ -383,8 +442,9 @@ proc add*(p: MultipartData, xs: MultipartEntries): MultipartData ## Add a list of multipart entries to the multipart data `p`. All values are ## added without a filename and without a content type. ## - ## .. code-block:: Nim + ## ```Nim ## data.add({"action": "login", "format": "json"}) + ## ``` for name, content in xs.items: p.add(name, content) result = p @@ -393,8 +453,9 @@ proc newMultipartData*(xs: MultipartEntries): MultipartData = ## Create a new multipart data object and fill it with the entries `xs` ## directly. ## - ## .. code-block:: Nim + ## ```Nim ## var data = newMultipartData({"action": "login", "format": "json"}) + ## ``` result = MultipartData() for entry in xs: result.add(entry.name, entry.content) @@ -409,8 +470,9 @@ proc addFiles*(p: MultipartData, xs: openArray[tuple[name, file: string]], ## Raises an `IOError` if the file cannot be opened or reading fails. To ## manually specify file content, filename and MIME type, use `[]=` instead. ## - ## .. code-block:: Nim + ## ```Nim ## data.addFiles({"uploaded_file": "public/test.html"}) + ## ``` for name, file in xs.items: var contentType: string let (_, fName, ext) = splitFile(file) @@ -424,8 +486,9 @@ proc `[]=`*(p: MultipartData, name, content: string) {.inline.} = ## Add a multipart entry to the multipart data `p`. The value is added ## without a filename and without a content type. ## - ## .. code-block:: Nim + ## ```Nim ## data["username"] = "NimUser" + ## ``` p.add(name, content) proc `[]=`*(p: MultipartData, name: string, @@ -433,9 +496,10 @@ proc `[]=`*(p: MultipartData, name: string, ## Add a file to the multipart data `p`, specifying filename, contentType ## and content manually. ## - ## .. code-block:: Nim + ## ```Nim ## data["uploaded_file"] = ("test.html", "text/html", ## "<html><head></head><body><p>test</p></body></html>") + ## ``` p.add(name, file.content, file.name, file.contentType, useStream = false) proc getBoundary(p: MultipartData): string = @@ -510,7 +574,7 @@ proc generateHeaders(requestUrl: Uri, httpMethod: HttpMethod, headers: HttpHeade # Proxy auth header. if not proxy.isNil and proxy.auth != "": let auth = base64.encode(proxy.auth) - add(result, "Proxy-Authorization: basic " & auth & httpNewLine) + add(result, "Proxy-Authorization: Basic " & auth & httpNewLine) for key, val in headers: add(result, key & ": " & val & httpNewLine) @@ -574,15 +638,11 @@ proc newHttpClient*(userAgent = defUserAgent, maxRedirects = 5, ## ## `headers` specifies the HTTP Headers. runnableExamples: - import std/[asyncdispatch, httpclient, strutils] - - proc asyncProc(): Future[string] {.async.} = - var client = newAsyncHttpClient() - return await client.getContent("http://example.com") + import std/strutils - let exampleHtml = waitFor asyncProc() + let exampleHtml = newHttpClient().getContent("http://example.com") assert "Example Domain" in exampleHtml - assert not ("Pizza" in exampleHtml) + assert "Pizza" notin exampleHtml new result result.headers = headers @@ -616,6 +676,17 @@ proc newAsyncHttpClient*(userAgent = defUserAgent, maxRedirects = 5, ## connections. ## ## `headers` specifies the HTTP Headers. + runnableExamples: + import std/[asyncdispatch, strutils] + + proc asyncProc(): Future[string] {.async.} = + let client = newAsyncHttpClient() + result = await client.getContent("http://example.com") + + let exampleHtml = waitFor asyncProc() + assert "Example Domain" in exampleHtml + assert "Pizza" notin exampleHtml + new result result.headers = headers result.userAgent = userAgent @@ -635,15 +706,15 @@ proc close*(client: HttpClient | AsyncHttpClient) = client.connected = false proc getSocket*(client: HttpClient): Socket {.inline.} = - ## Get network socket, useful if you want to find out more details about the connection + ## Get network socket, useful if you want to find out more details about the connection. ## - ## this example shows info about local and remote endpoints + ## This example shows info about local and remote endpoints: ## - ## .. code-block:: Nim + ## ```Nim ## if client.connected: ## echo client.getSocket.getLocalAddr ## echo client.getSocket.getPeerAddr - ## + ## ``` return client.socket proc getSocket*(client: AsyncHttpClient): AsyncSocket {.inline.} = @@ -653,7 +724,7 @@ proc reportProgress(client: HttpClient | AsyncHttpClient, progress: BiggestInt) {.multisync.} = client.contentProgress += progress client.oneSecondProgress += progress - if (getMonoTime() - client.lastProgressReport).inSeconds > 1: + if (getMonoTime() - client.lastProgressReport).inSeconds >= 1: if not client.onProgressChanged.isNil: await client.onProgressChanged(client.contentTotal, client.contentProgress, @@ -751,7 +822,7 @@ proc parseBody(client: HttpClient | AsyncHttpClient, headers: HttpHeaders, httpError("Got disconnected while trying to read body.") if recvLen != length: httpError("Received length doesn't match expected length. Wanted " & - $length & " got " & $recvLen) + $length & " got: " & $recvLen) else: # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO @@ -785,6 +856,7 @@ proc parseResponse(client: HttpClient | AsyncHttpClient, var parsedStatus = false var linei = 0 var fullyRead = false + var lastHeaderName = "" var line = "" result.headers = newHttpHeaders() while true: @@ -819,16 +891,29 @@ proc parseResponse(client: HttpClient | AsyncHttpClient, parsedStatus = true else: # Parse headers - var name = "" - var le = parseUntil(line, name, ':', linei) - if le <= 0: httpError("invalid headers") - inc(linei, le) - if line[linei] != ':': httpError("invalid headers") - inc(linei) # Skip : - - result.headers.add(name, line[linei .. ^1].strip()) - if result.headers.len > headerLimit: - httpError("too many headers") + # There's at least one char because empty lines are handled above (with client.close) + if line[0] in {' ', '\t'}: + # Check if it's a multiline header value, if so, append to the header we're currently parsing + # This works because a line with a header must start with the header name without any leading space + # See https://datatracker.ietf.org/doc/html/rfc7230, section 3.2 and 3.2.4 + # Multiline headers are deprecated in the spec, but it's better to parse them than crash + if lastHeaderName == "": + # Some extra unparsable lines in the HTTP output - we ignore them + discard + else: + result.headers.table[result.headers.toCaseInsensitive(lastHeaderName)][^1].add "\n" & line + else: + var name = "" + var le = parseUntil(line, name, ':', linei) + if le <= 0: httpError("Invalid headers - received empty header name") + if line.len == le: httpError("Invalid headers - no colon after header name") + inc(linei, le) # Skip the parsed header name + inc(linei) # Skip : + # If we want to be HTTP spec compliant later, error on linei == line.len (for empty header value) + lastHeaderName = name # Remember the header name for the possible multi-line header + result.headers.add(name, line[linei .. ^1].strip()) + if result.headers.len > headerLimit: + httpError("too many headers") if not fullyRead: httpError("Connection was closed before full request has been made") @@ -849,6 +934,9 @@ proc parseResponse(client: HttpClient | AsyncHttpClient, client.parseBodyFut.addCallback do(): if client.parseBodyFut.failed: client.bodyStream.fail(client.parseBodyFut.error) + else: + when client is AsyncHttpClient: + result.bodyStream.complete() proc newConnection(client: HttpClient | AsyncHttpClient, url: Uri) {.multisync.} = @@ -955,19 +1043,22 @@ proc format(client: HttpClient | AsyncHttpClient, if entry.isFile: length += entry.fileSize + httpNewLine.len - result.add "--" & bound & "--" + result.add "--" & bound & "--" & httpNewLine for s in result: length += s.len client.headers["Content-Length"] = $length proc override(fallback, override: HttpHeaders): HttpHeaders = # Right-biased map union for `HttpHeaders` - if override.isNil: - return fallback result = newHttpHeaders() # Copy by value result.table[] = fallback.table[] + + if override.isNil: + # Return the copy of fallback so it does not get modified + return result + for k, vs in override.table: result[k] = vs @@ -987,12 +1078,16 @@ proc requestAux(client: HttpClient | AsyncHttpClient, url: Uri, await newConnection(client, url) - let newHeaders = client.headers.override(headers) + var newHeaders: HttpHeaders var data: seq[string] if multipart != nil and multipart.content.len > 0: + # `format` modifies `client.headers`, see + # https://github.com/nim-lang/Nim/pull/18208#discussion_r647036979 data = await client.format(multipart) + newHeaders = client.headers.override(headers) else: + newHeaders = client.headers.override(headers) # Only change headers if they have not been specified already if not newHeaders.hasKey("Content-Length"): if body.len != 0: @@ -1137,7 +1232,7 @@ proc responseContent(resp: Response | AsyncResponse): Future[string] {.multisync ## A `HttpRequestError` will be raised if the server responds with a ## client error (status code 4xx) or a server error (status code 5xx). if resp.code.is4xx or resp.code.is5xx: - raise newException(HttpRequestError, resp.status) + raise newException(HttpRequestError, resp.status.move) else: return await resp.bodyStream.readAll() |