From 40e33dec45b98e1c5385f844e241011a8351d364 Mon Sep 17 00:00:00 2001 From: Yardanico Date: Wed, 1 Nov 2023 10:01:31 +0300 Subject: Fix `IndexDefect` errors in httpclient on invalid/weird headers (#22886) Continuation of https://github.com/nim-lang/Nim/pull/19262 Fixes https://github.com/nim-lang/Nim/issues/19261 The parsing code is still too lenient (e.g. it will happily parse header names with spaces in them, which is outright invalid by the spec), but I didn't want to touch it beyond the simple changes to make sure that `std/httpclient` won't throw `IndexDefect`s like it does now on those cases: - Multiline header values - No colon after the header name - No value after the header name + colon One question remains - should I keep `toCaseInsensitive` exported in `httpcore` or just copy-paste the implementation? --------- Co-authored-by: Andreas Rumpf --- lib/pure/httpclient.nim | 34 ++++++++++++++++++++++++---------- lib/pure/httpcore.nim | 3 ++- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim index d2bf925ba..fc66b96f5 100644 --- a/lib/pure/httpclient.nim +++ b/lib/pure/httpclient.nim @@ -856,6 +856,7 @@ proc parseResponse(client: HttpClient | AsyncHttpClient, var parsedStatus = false var linei = 0 var fullyRead = false + var lastHeaderName = "" var line = "" result.headers = newHttpHeaders() while true: @@ -890,16 +891,29 @@ proc parseResponse(client: HttpClient | AsyncHttpClient, parsedStatus = true else: # Parse headers - var name = "" - var le = parseUntil(line, name, ':', linei) - if le <= 0: httpError("invalid headers") - inc(linei, le) - if line[linei] != ':': httpError("invalid headers") - inc(linei) # Skip : - - result.headers.add(name, line[linei .. ^1].strip()) - if result.headers.len > headerLimit: - httpError("too many headers") + # There's at least one char because empty lines are handled above (with client.close) + if line[0] in {' ', '\t'}: + # Check if it's a multiline header value, if so, append to the header we're currently parsing + # This works because a line with a header must start with the header name without any leading space + # See https://datatracker.ietf.org/doc/html/rfc7230, section 3.2 and 3.2.4 + # Multiline headers are deprecated in the spec, but it's better to parse them than crash + if lastHeaderName == "": + # Some extra unparsable lines in the HTTP output - we ignore them + discard + else: + result.headers.table[result.headers.toCaseInsensitive(lastHeaderName)][^1].add "\n" & line + else: + var name = "" + var le = parseUntil(line, name, ':', linei) + if le <= 0: httpError("Invalid headers - received empty header name") + if line.len == le: httpError("Invalid headers - no colon after header name") + inc(linei, le) # Skip the parsed header name + inc(linei) # Skip : + # If we want to be HTTP spec compliant later, error on linei == line.len (for empty header value) + lastHeaderName = name # Remember the header name for the possible multi-line header + result.headers.add(name, line[linei .. ^1].strip()) + if result.headers.len > headerLimit: + httpError("too many headers") if not fullyRead: httpError("Connection was closed before full request has been made") diff --git a/lib/pure/httpcore.nim b/lib/pure/httpcore.nim index 5b3d7b45b..ab0c030a5 100644 --- a/lib/pure/httpcore.nim +++ b/lib/pure/httpcore.nim @@ -126,7 +126,8 @@ func toTitleCase(s: string): string = result[i] = if upper: toUpperAscii(s[i]) else: toLowerAscii(s[i]) upper = s[i] == '-' -func toCaseInsensitive(headers: HttpHeaders, s: string): string {.inline.} = +func toCaseInsensitive*(headers: HttpHeaders, s: string): string {.inline.} = + ## For internal usage only. Do not use. return if headers.isTitleCase: toTitleCase(s) else: toLowerAscii(s) func newHttpHeaders*(titleCase=false): HttpHeaders = -- cgit 1.4.1-2-gfad0