summary refs log tree commit diff stats
path: root/lib/pure/httpclient.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/httpclient.nim')
-rw-r--r--lib/pure/httpclient.nim267
1 files changed, 181 insertions, 86 deletions
diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim
index 14bcfd2fb..08ea99627 100644
--- a/lib/pure/httpclient.nim
+++ b/lib/pure/httpclient.nim
@@ -10,28 +10,38 @@
 ## This module implements a simple HTTP client that can be used to retrieve
 ## webpages and other data.
 ##
+## .. warning:: Validate untrusted inputs: URI parsers and getters are not detecting malicious URIs.
+##
 ## Retrieving a website
 ## ====================
 ##
 ## This example uses HTTP GET to retrieve
 ## `http://google.com`:
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   import std/httpclient
 ##   var client = newHttpClient()
-##   echo client.getContent("http://google.com")
+##   try:
+##     echo client.getContent("http://google.com")
+##   finally:
+##     client.close()
+##   ```
 ##
 ## The same action can also be performed asynchronously, simply use the
 ## `AsyncHttpClient`:
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   import std/[asyncdispatch, httpclient]
 ##
 ##   proc asyncProc(): Future[string] {.async.} =
 ##     var client = newAsyncHttpClient()
-##     return await client.getContent("http://example.com")
+##     try:
+##       return await client.getContent("http://google.com")
+##     finally:
+##       client.close()
 ##
 ##   echo waitFor asyncProc()
+##   ```
 ##
 ## The functionality implemented by `HttpClient` and `AsyncHttpClient`
 ## is the same, so you can use whichever one suits you best in the examples
@@ -40,6 +50,10 @@
 ## **Note:** You need to run asynchronous examples in an async proc
 ## otherwise you will get an `Undeclared identifier: 'await'` error.
 ##
+## **Note:** An asynchronous client instance can only deal with one
+## request at a time. To send multiple requests in parallel, use
+## multiple client instances.
+##
 ## Using HTTP POST
 ## ===============
 ##
@@ -47,33 +61,39 @@
 ## uses `multipart/form-data` as the `Content-Type` to send the HTML to be
 ## validated to the server.
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   var client = newHttpClient()
 ##   var data = newMultipartData()
 ##   data["output"] = "soap12"
 ##   data["uploaded_file"] = ("test.html", "text/html",
 ##     "<html><head></head><body><p>test</p></body></html>")
-##
-##   echo client.postContent("http://validator.w3.org/check", multipart=data)
+##   try:
+##     echo client.postContent("http://validator.w3.org/check", multipart=data)
+##   finally:
+##     client.close()
+##   ```
 ##
 ## To stream files from disk when performing the request, use `addFiles`.
 ##
 ## **Note:** This will allocate a new `Mimetypes` database every time you call
 ## it, you can pass your own via the `mimeDb` parameter to avoid this.
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   let mimes = newMimetypes()
 ##   var client = newHttpClient()
 ##   var data = newMultipartData()
 ##   data.addFiles({"uploaded_file": "test.html"}, mimeDb = mimes)
-##
-##   echo client.postContent("http://validator.w3.org/check", multipart=data)
+##   try:
+##     echo client.postContent("http://validator.w3.org/check", multipart=data)
+##   finally:
+##     client.close()
+##   ```
 ##
 ## You can also make post requests with custom headers.
 ## This example sets `Content-Type` to `application/json`
 ## and uses a json object for the body
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   import std/[httpclient, json]
 ##
 ##   let client = newHttpClient()
@@ -81,8 +101,12 @@
 ##   let body = %*{
 ##       "data": "some text"
 ##   }
-##   let response = client.request("http://some.api", httpMethod = HttpPost, body = $body)
-##   echo response.status
+##   try:
+##     let response = client.request("http://some.api", httpMethod = HttpPost, body = $body)
+##     echo response.status
+##   finally:
+##     client.close()
+##   ```
 ##
 ## Progress reporting
 ## ==================
@@ -91,31 +115,36 @@
 ## This callback will be executed every second with information about the
 ## progress of the HTTP request.
 ##
-## .. code-block:: Nim
-##    import std/[asyncdispatch, httpclient]
+##   ```Nim
+##   import std/[asyncdispatch, httpclient]
 ##
-##    proc onProgressChanged(total, progress, speed: BiggestInt) {.async.} =
-##      echo("Downloaded ", progress, " of ", total)
-##      echo("Current rate: ", speed div 1000, "kb/s")
+##   proc onProgressChanged(total, progress, speed: BiggestInt) {.async.} =
+##     echo("Downloaded ", progress, " of ", total)
+##     echo("Current rate: ", speed div 1000, "kb/s")
 ##
-##    proc asyncProc() {.async.} =
-##      var client = newAsyncHttpClient()
-##      client.onProgressChanged = onProgressChanged
-##      discard await client.getContent("http://speedtest-ams2.digitalocean.com/100mb.test")
+##   proc asyncProc() {.async.} =
+##     var client = newAsyncHttpClient()
+##     client.onProgressChanged = onProgressChanged
+##     try:
+##       discard await client.getContent("http://speedtest-ams2.digitalocean.com/100mb.test")
+##     finally:
+##       client.close()
 ##
-##    waitFor asyncProc()
+##   waitFor asyncProc()
+##   ```
 ##
 ## If you would like to remove the callback simply set it to `nil`.
 ##
-## .. code-block:: Nim
+##   ```Nim
 ##   client.onProgressChanged = nil
+##   ```
 ##
 ## .. warning:: The `total` reported by httpclient may be 0 in some cases.
 ##
 ##
 ## SSL/TLS support
 ## ===============
-## This requires the OpenSSL library, fortunately it's widely used and installed
+## This requires the OpenSSL library. Fortunately it's widely used and installed
 ## on many operating systems. httpclient will use SSL automatically if you give
 ## any of the functions a url with the `https` schema, for example:
 ## `https://github.com/`.
@@ -123,12 +152,26 @@
 ## You will also have to compile with `ssl` defined like so:
 ## `nim c -d:ssl ...`.
 ##
-## Certificate validation is NOT performed by default.
-## This will change in the future.
+## Certificate validation is performed by default.
 ##
 ## A set of directories and files from the `ssl_certs <ssl_certs.html>`_
 ## module are scanned to locate CA certificates.
 ##
+## Example of setting SSL verification parameters in a new client:
+##
+##   ```Nim
+##   import httpclient
+##   var client = newHttpClient(sslContext=newContext(verifyMode=CVerifyPeer))
+##   ```
+##
+## There are three options for verify mode:
+##
+## * ``CVerifyNone``: certificates are not verified;
+## * ``CVerifyPeer``: certificates are verified;
+## * ``CVerifyPeerUseEnvVars``: certificates are verified and the optional
+##   environment variables SSL_CERT_FILE and SSL_CERT_DIR are also used to
+##   locate certificates
+##
 ## See `newContext <net.html#newContext.string,string,string,string>`_ to tweak or disable certificate validation.
 ##
 ## Timeouts
@@ -148,10 +191,11 @@
 ##
 ## Here is how to set a timeout when creating an `HttpClient` instance:
 ##
-## .. code-block:: Nim
-##    import std/httpclient
+##   ```Nim
+##   import std/httpclient
 ##
-##    let client = newHttpClient(timeout = 42)
+##   let client = newHttpClient(timeout = 42)
+##   ```
 ##
 ## Proxy
 ## =====
@@ -162,28 +206,39 @@
 ##
 ## Some examples on how to configure a Proxy for `HttpClient`:
 ##
-## .. code-block:: Nim
-##    import std/httpclient
+##   ```Nim
+##   import std/httpclient
+##
+##   let myProxy = newProxy("http://myproxy.network")
+##   let client = newHttpClient(proxy = myProxy)
+##   ```
+##
+## Use proxies with basic authentication:
+##
+##   ```Nim
+##   import std/httpclient
 ##
-##    let myProxy = newProxy("http://myproxy.network")
-##    let client = newHttpClient(proxy = myProxy)
+##   let myProxy = newProxy("http://myproxy.network", auth="user:password")
+##   let client = newHttpClient(proxy = myProxy)
+##   ```
 ##
 ## Get Proxy URL from environment variables:
 ##
-## .. code-block:: Nim
-##    import std/httpclient
+##   ```Nim
+##   import std/httpclient
 ##
-##    var url = ""
-##    try:
-##      if existsEnv("http_proxy"):
-##        url = getEnv("http_proxy")
-##      elif existsEnv("https_proxy"):
-##        url = getEnv("https_proxy")
-##    except ValueError:
-##      echo "Unable to parse proxy from environment variables."
+##   var url = ""
+##   try:
+##     if existsEnv("http_proxy"):
+##       url = getEnv("http_proxy")
+##     elif existsEnv("https_proxy"):
+##       url = getEnv("https_proxy")
+##   except ValueError:
+##     echo "Unable to parse proxy from environment variables."
 ##
-##    let myProxy = newProxy(url = url)
-##    let client = newHttpClient(proxy = myProxy)
+##   let myProxy = newProxy(url = url)
+##   let client = newHttpClient(proxy = myProxy)
+##   ```
 ##
 ## Redirects
 ## =========
@@ -194,10 +249,11 @@
 ##
 ## Here you can see an example about how to set the `maxRedirects` of `HttpClient`:
 ##
-## .. code-block:: Nim
-##    import std/httpclient
+##   ```Nim
+##   import std/httpclient
 ##
-##    let client = newHttpClient(maxRedirects = 0)
+##   let client = newHttpClient(maxRedirects = 0)
+##   ```
 ##
 
 import std/private/since
@@ -208,6 +264,9 @@ import std/[
   asyncnet, asyncdispatch, asyncfile, nativesockets,
 ]
 
+when defined(nimPreviewSlimSystem):
+  import std/[assertions, syncio]
+
 export httpcore except parseHeader # TODO: The `except` doesn't work
 
 type
@@ -245,9 +304,9 @@ proc contentLength*(response: Response | AsyncResponse): int =
   ## This is effectively the value of the "Content-Length" header.
   ##
   ## A `ValueError` exception will be raised if the value is not an integer.
-  var contentLengthHeader = response.headers.getOrDefault("Content-Length")
+  ## If the Content-Length header is not set in the response, ContentLength is set to the value -1.
+  var contentLengthHeader = response.headers.getOrDefault("Content-Length", HttpHeaderValues(@["-1"]))
   result = contentLengthHeader.parseInt()
-  doAssert(result >= 0 and result <= high(int32))
 
 proc lastModified*(response: Response | AsyncResponse): DateTime =
   ## Retrieves the specified response's last modified time.
@@ -300,7 +359,7 @@ type
                                         ## and `postContent` proc,
                                         ## when the server returns an error
 
-const defUserAgent* = "Nim httpclient/" & NimVersion
+const defUserAgent* = "Nim-httpclient/" & NimVersion
 
 proc httpError(msg: string) =
   var e: ref ProtocolError
@@ -383,8 +442,9 @@ proc add*(p: MultipartData, xs: MultipartEntries): MultipartData
   ## Add a list of multipart entries to the multipart data `p`. All values are
   ## added without a filename and without a content type.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   data.add({"action": "login", "format": "json"})
+  ##   ```
   for name, content in xs.items:
     p.add(name, content)
   result = p
@@ -393,8 +453,9 @@ proc newMultipartData*(xs: MultipartEntries): MultipartData =
   ## Create a new multipart data object and fill it with the entries `xs`
   ## directly.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   var data = newMultipartData({"action": "login", "format": "json"})
+  ##   ```
   result = MultipartData()
   for entry in xs:
     result.add(entry.name, entry.content)
@@ -409,8 +470,9 @@ proc addFiles*(p: MultipartData, xs: openArray[tuple[name, file: string]],
   ## Raises an `IOError` if the file cannot be opened or reading fails. To
   ## manually specify file content, filename and MIME type, use `[]=` instead.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   data.addFiles({"uploaded_file": "public/test.html"})
+  ##   ```
   for name, file in xs.items:
     var contentType: string
     let (_, fName, ext) = splitFile(file)
@@ -424,8 +486,9 @@ proc `[]=`*(p: MultipartData, name, content: string) {.inline.} =
   ## Add a multipart entry to the multipart data `p`. The value is added
   ## without a filename and without a content type.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   data["username"] = "NimUser"
+  ##   ```
   p.add(name, content)
 
 proc `[]=`*(p: MultipartData, name: string,
@@ -433,9 +496,10 @@ proc `[]=`*(p: MultipartData, name: string,
   ## Add a file to the multipart data `p`, specifying filename, contentType
   ## and content manually.
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   data["uploaded_file"] = ("test.html", "text/html",
   ##     "<html><head></head><body><p>test</p></body></html>")
+  ##   ```
   p.add(name, file.content, file.name, file.contentType, useStream = false)
 
 proc getBoundary(p: MultipartData): string =
@@ -510,7 +574,7 @@ proc generateHeaders(requestUrl: Uri, httpMethod: HttpMethod, headers: HttpHeade
   # Proxy auth header.
   if not proxy.isNil and proxy.auth != "":
     let auth = base64.encode(proxy.auth)
-    add(result, "Proxy-Authorization: basic " & auth & httpNewLine)
+    add(result, "Proxy-Authorization: Basic " & auth & httpNewLine)
 
   for key, val in headers:
     add(result, key & ": " & val & httpNewLine)
@@ -574,15 +638,11 @@ proc newHttpClient*(userAgent = defUserAgent, maxRedirects = 5,
   ##
   ## `headers` specifies the HTTP Headers.
   runnableExamples:
-    import std/[asyncdispatch, httpclient, strutils]
-
-    proc asyncProc(): Future[string] {.async.} =
-      var client = newAsyncHttpClient()
-      return await client.getContent("http://example.com")
+    import std/strutils
 
-    let exampleHtml = waitFor asyncProc()
+    let exampleHtml = newHttpClient().getContent("http://example.com")
     assert "Example Domain" in exampleHtml
-    assert not ("Pizza" in exampleHtml)
+    assert "Pizza" notin exampleHtml
 
   new result
   result.headers = headers
@@ -616,6 +676,17 @@ proc newAsyncHttpClient*(userAgent = defUserAgent, maxRedirects = 5,
   ## connections.
   ##
   ## `headers` specifies the HTTP Headers.
+  runnableExamples:
+    import std/[asyncdispatch, strutils]
+
+    proc asyncProc(): Future[string] {.async.} =
+      let client = newAsyncHttpClient()
+      result = await client.getContent("http://example.com")
+
+    let exampleHtml = waitFor asyncProc()
+    assert "Example Domain" in exampleHtml
+    assert "Pizza" notin exampleHtml
+  
   new result
   result.headers = headers
   result.userAgent = userAgent
@@ -635,15 +706,15 @@ proc close*(client: HttpClient | AsyncHttpClient) =
     client.connected = false
 
 proc getSocket*(client: HttpClient): Socket {.inline.} =
-  ## Get network socket, useful if you want to find out more details about the connection
+  ## Get network socket, useful if you want to find out more details about the connection.
   ##
-  ## this example shows info about local and remote endpoints
+  ## This example shows info about local and remote endpoints:
   ##
-  ## .. code-block:: Nim
+  ##   ```Nim
   ##   if client.connected:
   ##     echo client.getSocket.getLocalAddr
   ##     echo client.getSocket.getPeerAddr
-  ##
+  ##   ```
   return client.socket
 
 proc getSocket*(client: AsyncHttpClient): AsyncSocket {.inline.} =
@@ -653,7 +724,7 @@ proc reportProgress(client: HttpClient | AsyncHttpClient,
                     progress: BiggestInt) {.multisync.} =
   client.contentProgress += progress
   client.oneSecondProgress += progress
-  if (getMonoTime() - client.lastProgressReport).inSeconds > 1:
+  if (getMonoTime() - client.lastProgressReport).inSeconds >= 1:
     if not client.onProgressChanged.isNil:
       await client.onProgressChanged(client.contentTotal,
                                      client.contentProgress,
@@ -751,7 +822,7 @@ proc parseBody(client: HttpClient | AsyncHttpClient, headers: HttpHeaders,
           httpError("Got disconnected while trying to read body.")
         if recvLen != length:
           httpError("Received length doesn't match expected length. Wanted " &
-                    $length & " got " & $recvLen)
+                    $length & " got: " & $recvLen)
     else:
       # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO
 
@@ -785,6 +856,7 @@ proc parseResponse(client: HttpClient | AsyncHttpClient,
   var parsedStatus = false
   var linei = 0
   var fullyRead = false
+  var lastHeaderName = ""
   var line = ""
   result.headers = newHttpHeaders()
   while true:
@@ -819,16 +891,29 @@ proc parseResponse(client: HttpClient | AsyncHttpClient,
       parsedStatus = true
     else:
       # Parse headers
-      var name = ""
-      var le = parseUntil(line, name, ':', linei)
-      if le <= 0: httpError("invalid headers")
-      inc(linei, le)
-      if line[linei] != ':': httpError("invalid headers")
-      inc(linei) # Skip :
-
-      result.headers.add(name, line[linei .. ^1].strip())
-      if result.headers.len > headerLimit:
-        httpError("too many headers")
+      # There's at least one char because empty lines are handled above (with client.close)
+      if line[0] in {' ', '\t'}:
+        # Check if it's a multiline header value, if so, append to the header we're currently parsing
+        # This works because a line with a header must start with the header name without any leading space
+        # See https://datatracker.ietf.org/doc/html/rfc7230, section 3.2 and 3.2.4
+        # Multiline headers are deprecated in the spec, but it's better to parse them than crash
+        if lastHeaderName == "":
+          # Some extra unparsable lines in the HTTP output - we ignore them
+          discard
+        else:
+          result.headers.table[result.headers.toCaseInsensitive(lastHeaderName)][^1].add "\n" & line
+      else:
+        var name = ""
+        var le = parseUntil(line, name, ':', linei)
+        if le <= 0: httpError("Invalid headers - received empty header name")
+        if line.len == le: httpError("Invalid headers - no colon after header name")
+        inc(linei, le) # Skip the parsed header name
+        inc(linei) # Skip :
+        # If we want to be HTTP spec compliant later, error on linei == line.len (for empty header value)
+        lastHeaderName = name # Remember the header name for the possible multi-line header
+        result.headers.add(name, line[linei .. ^1].strip())
+        if result.headers.len > headerLimit:
+          httpError("too many headers")
 
   if not fullyRead:
     httpError("Connection was closed before full request has been made")
@@ -849,6 +934,9 @@ proc parseResponse(client: HttpClient | AsyncHttpClient,
       client.parseBodyFut.addCallback do():
         if client.parseBodyFut.failed:
           client.bodyStream.fail(client.parseBodyFut.error)
+  else:
+    when client is AsyncHttpClient:
+      result.bodyStream.complete()
 
 proc newConnection(client: HttpClient | AsyncHttpClient,
                    url: Uri) {.multisync.} =
@@ -955,19 +1043,22 @@ proc format(client: HttpClient | AsyncHttpClient,
     if entry.isFile:
       length += entry.fileSize + httpNewLine.len
 
-  result.add "--" & bound & "--"
+  result.add "--" & bound & "--" & httpNewLine
 
   for s in result: length += s.len
   client.headers["Content-Length"] = $length
 
 proc override(fallback, override: HttpHeaders): HttpHeaders =
   # Right-biased map union for `HttpHeaders`
-  if override.isNil:
-    return fallback
 
   result = newHttpHeaders()
   # Copy by value
   result.table[] = fallback.table[]
+
+  if override.isNil:
+    # Return the copy of fallback so it does not get modified
+    return result
+
   for k, vs in override.table:
     result[k] = vs
 
@@ -987,12 +1078,16 @@ proc requestAux(client: HttpClient | AsyncHttpClient, url: Uri,
 
   await newConnection(client, url)
 
-  let newHeaders = client.headers.override(headers)
+  var newHeaders: HttpHeaders
 
   var data: seq[string]
   if multipart != nil and multipart.content.len > 0:
+    # `format` modifies `client.headers`, see 
+    # https://github.com/nim-lang/Nim/pull/18208#discussion_r647036979
     data = await client.format(multipart)
+    newHeaders = client.headers.override(headers)
   else:
+    newHeaders = client.headers.override(headers)
     # Only change headers if they have not been specified already
     if not newHeaders.hasKey("Content-Length"):
       if body.len != 0:
@@ -1137,7 +1232,7 @@ proc responseContent(resp: Response | AsyncResponse): Future[string] {.multisync
   ## A `HttpRequestError` will be raised if the server responds with a
   ## client error (status code 4xx) or a server error (status code 5xx).
   if resp.code.is4xx or resp.code.is5xx:
-    raise newException(HttpRequestError, resp.status)
+    raise newException(HttpRequestError, resp.status.move)
   else:
     return await resp.bodyStream.readAll()