summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorMildred Ki'Lya <mildred@users.noreply.github.com>2021-01-12 13:42:05 +0100
committerGitHub <noreply@github.com>2021-01-12 13:42:05 +0100
commit71db2be83351aac3347f95dd545466a447ed1c07 (patch)
tree781477297aca643221505c52819d02765b912a4a /lib
parentbb3c6d079772f2724c051c3ff3ccba90960548e2 (diff)
downloadNim-71db2be83351aac3347f95dd545466a447ed1c07.tar.gz
Improve uri.parseQuery to never raise an error (#16647)
In case of malformed query string where there is `=` on the value, handle
this character as part of the value instead of throwing an error.

The following query string should no longer crash a program:

    key=value&key2=x=1

It will be interpreted as [("key", "value"), ("key2", "x=1")]

This is correct according to latest WhatWG's HTML5 specification
recarding the urlencoded parser:
https://url.spec.whatwg.org/#concept-urlencoded-parser

Older behavior can be restored using the -d:nimLegacyParseQueryStrict
flag.
Diffstat (limited to 'lib')
-rw-r--r--lib/pure/cgi.nim14
-rw-r--r--lib/pure/uri.nim38
2 files changed, 27 insertions, 25 deletions
diff --git a/lib/pure/cgi.nim b/lib/pure/cgi.nim
index 8d827f555..3b8dad849 100644
--- a/lib/pure/cgi.nim
+++ b/lib/pure/cgi.nim
@@ -84,11 +84,8 @@ proc getEncodedData(allowedMethods: set[RequestMethod]): string =
 iterator decodeData*(data: string): tuple[key, value: TaintedString] =
   ## Reads and decodes CGI data and yields the (name, value) pairs the
   ## data consists of.
-  try:
-    for (key, value) in uri.decodeQuery(data):
-      yield (key, value)
-  except UriParseError as e:
-    cgiError(e.msg)
+  for (key, value) in uri.decodeQuery(data):
+    yield (key, value)
 
 iterator decodeData*(allowedMethods: set[RequestMethod] =
        {methodNone, methodPost, methodGet}): tuple[key, value: TaintedString] =
@@ -96,11 +93,8 @@ iterator decodeData*(allowedMethods: set[RequestMethod] =
   ## data consists of. If the client does not use a method listed in the
   ## `allowedMethods` set, a `CgiError` exception is raised.
   let data = getEncodedData(allowedMethods)
-  try:
-    for (key, value) in uri.decodeQuery(data):
-      yield (key, value)
-  except UriParseError as e:
-    cgiError(e.msg)
+  for (key, value) in uri.decodeQuery(data):
+    yield (key, value)
 
 proc readData*(allowedMethods: set[RequestMethod] =
                {methodNone, methodPost, methodGet}): StringTableRef =
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim
index 7f553be1a..c8ed28536 100644
--- a/lib/pure/uri.nim
+++ b/lib/pure/uri.nim
@@ -161,22 +161,26 @@ func encodeQuery*(query: openArray[(string, string)], usePlus = true,
       result.add(encodeUrl(val, usePlus))
 
 iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
-  ## Reads and decodes query string ``data`` and yields the (key, value) pairs the
-  ## data consists of.
+  ## Reads and decodes query string `data` and yields the `(key, value)` pairs
+  ## the data consists of. If compiled with `-d:nimLegacyParseQueryStrict`, an
+  ## error is raised when there is an unencoded `=` character in a decoded
+  ## value, which was the behavior in Nim < 1.5.1
   runnableExamples:
-    import std/sugar
-    let s = collect(newSeq):
-      for k, v in decodeQuery("foo=1&bar=2"): (k, v)
-    doAssert s == @[("foo", "1"), ("bar", "2")]
+    import std/sequtils
+    doAssert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")]
+    doAssert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")]
 
-  proc parseData(data: string, i: int, field: var string): int =
+  proc parseData(data: string, i: int, field: var string, sep: char): int =
     result = i
     while result < data.len:
-      case data[result]
+      let c = data[result]
+      case c
       of '%': add(field, decodePercent(data, result))
       of '+': add(field, ' ')
-      of '=', '&': break
-      else: add(field, data[result])
+      of '&': break
+      else:
+        if c == sep: break
+        else: add(field, data[result])
       inc(result)
 
   var i = 0
@@ -185,16 +189,20 @@ iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
   # decode everything in one pass:
   while i < data.len:
     setLen(name, 0) # reuse memory
-    i = parseData(data, i, name)
+    i = parseData(data, i, name, '=')
     setLen(value, 0) # reuse memory
     if i < data.len and data[i] == '=':
       inc(i) # skip '='
-      i = parseData(data, i, value)
+      when defined(nimLegacyParseQueryStrict):
+        i = parseData(data, i, value, '=')
+      else:
+        i = parseData(data, i, value, '&')
     yield (name.TaintedString, value.TaintedString)
     if i < data.len:
-      if data[i] == '&': inc(i)
-      else:
-        uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
+      when defined(nimLegacyParseQueryStrict):
+        if data[i] != '&':
+          uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
+      inc(i)
 
 func parseAuthority(authority: string, result: var Uri) =
   var i = 0