fix #14082, don't crash on incorrectly formatted input (#14977) [backport]

* fix #14082, don't crash on incorrectly formatted input * address code review * remove duplication
author: Miran <narimiran@disroot.org> 2020-07-17 10:59:53 +0200
committer: GitHub <noreply@github.com> 2020-07-17 10:59:53 +0200
commit: c62513049cd7dcbd5b339c2078856a31c498b4aa (patch)
tree: 9f44f58f5d4d83f159922e6f03994f0f6cea4310 /lib
parent: 1355b461aa70cd1e17b2f07085aa6c97cb54283f (diff)
download: Nim-c62513049cd7dcbd5b339c2078856a31c498b4aa.tar.gz
3 files changed, 42 insertions, 42 deletions
diff --git a/lib/pure/cgi.nim b/lib/pure/cgi.nim
index dd7c4d477..734ab9171 100644
--- a/lib/pure/cgi.nim
+++ b/lib/pure/cgi.nim
@@ -32,12 +32,7 @@
 import strutils, os, strtabs, cookies, uri
 export uri.encodeUrl, uri.decodeUrl
 
-proc handleHexChar(c: char, x: var int) {.inline.} =
-  case c
-  of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
-  of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
-  of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
-  else: assert(false)
+include includes/decode_helpers
 
 proc addXmlChar(dest: var string, c: char) {.inline.} =
   case c
@@ -93,40 +88,27 @@ proc getEncodedData(allowedMethods: set[RequestMethod]): string =
 iterator decodeData*(data: string): tuple[key, value: TaintedString] =
   ## Reads and decodes CGI data and yields the (name, value) pairs the
   ## data consists of.
+  proc parseData(data: string, i: int, field: var string): int =
+    result = i
+    while result < data.len:
+      case data[result]
+      of '%': add(field, decodePercent(data, result))
+      of '+': add(field, ' ')
+      of '=', '&': break
+      else: add(field, data[result])
+      inc(result)
+
   var i = 0
   var name = ""
   var value = ""
   # decode everything in one pass:
   while i < data.len:
     setLen(name, 0) # reuse memory
-    while i < data.len:
-      case data[i]
-      of '%':
-        var x = 0
-        handleHexChar(data[i+1], x)
-        handleHexChar(data[i+2], x)
-        inc(i, 2)
-        add(name, chr(x))
-      of '+': add(name, ' ')
-      of '=', '&': break
-      else: add(name, data[i])
-      inc(i)
+    i = parseData(data, i, name)
     if i >= data.len or data[i] != '=': cgiError("'=' expected")
     inc(i) # skip '='
     setLen(value, 0) # reuse memory
-    while i < data.len:
-      case data[i]
-      of '%':
-        var x = 0
-        if i+2 < data.len:
-          handleHexChar(data[i+1], x)
-          handleHexChar(data[i+2], x)
-        inc(i, 2)
-        add(value, chr(x))
-      of '+': add(value, ' ')
-      of '&', '\0': break
-      else: add(value, data[i])
-      inc(i)
+    i = parseData(data, i, value)
     yield (name.TaintedString, value.TaintedString)
     if i < data.len:
       if data[i] == '&': inc(i)
diff --git a/lib/pure/includes/decode_helpers.nim b/lib/pure/includes/decode_helpers.nim
new file mode 100644
index 000000000..74fe37d07
--- /dev/null
+++ b/lib/pure/includes/decode_helpers.nim
@@ -0,0 +1,24 @@
+# Include file that implements 'decodePercent' and friends. Do not import it!
+
+proc handleHexChar(c: char, x: var int, f: var bool) {.inline.} =
+  case c
+  of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
+  of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
+  of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
+  else: f = true
+
+proc decodePercent(s: string, i: var int): char =
+  ## Converts `%xx` hexadecimal to the charracter with ordinal number `xx`.
+  ##
+  ## If `xx` is not a valid hexadecimal value, it is left intact: only the
+  ## leading `%` is returned as-is, and `xx` characters will be processed in the
+  ## next step (e.g. in `uri.decodeUrl`) as regular characters.
+  result = '%'
+  if i+2 < s.len:
+    var x = 0
+    var failed = false
+    handleHexChar(s[i+1], x, failed)
+    handleHexChar(s[i+2], x, failed)
+    if not failed:
+      result = chr(x)
+      inc(i, 2)
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim
index b163a2ab4..04a9d97bd 100644
--- a/lib/pure/uri.nim
+++ b/lib/pure/uri.nim
@@ -47,6 +47,8 @@
 import std/private/since
 
 import strutils, parseutils, base64
+include includes/decode_helpers
+
 type
   Url* = distinct string
 
@@ -90,6 +92,7 @@ proc decodeUrl*(s: string, decodePlus = true): string =
   ## This means that any ``%xx`` (where ``xx`` denotes a hexadecimal
   ## value) are converted to the character with ordinal number ``xx``,
   ## and every other character is carried over.
+  ## If ``xx`` is not a valid hexadecimal value, it is left intact.
   ##
   ## As a special rule, when the value of ``decodePlus`` is true, ``+``
   ## characters are converted to a space.
@@ -101,12 +104,7 @@ proc decodeUrl*(s: string, decodePlus = true): string =
     assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test") == "https://nim-lang.org/this is a test"
     assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test",
         false) == "https://nim-lang.org/this is a test"
-  proc handleHexChar(c: char, x: var int) {.inline.} =
-    case c
-    of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
-    of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
-    of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
-    else: assert(false)
+    assert decodeUrl("abc%xyz") == "abc%xyz"
 
   result = newString(s.len)
   var i = 0
@@ -114,11 +112,7 @@ proc decodeUrl*(s: string, decodePlus = true): string =
   while i < s.len:
     case s[i]
     of '%':
-      var x = 0
-      handleHexChar(s[i+1], x)
-      handleHexChar(s[i+2], x)
-      inc(i, 2)
-      result[j] = chr(x)
+      result[j] = decodePercent(s, i)
     of '+':
       if decodePlus:
         result[j] = ' '
author	Miran <narimiran@disroot.org>	2020-07-17 10:59:53 +0200
committer	GitHub <noreply@github.com>	2020-07-17 10:59:53 +0200
commit	c62513049cd7dcbd5b339c2078856a31c498b4aa (patch)
tree	9f44f58f5d4d83f159922e6f03994f0f6cea4310 /lib
parent	1355b461aa70cd1e17b2f07085aa6c97cb54283f (diff)
download	Nim-c62513049cd7dcbd5b339c2078856a31c498b4aa.tar.gz