diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2015-10-23 02:02:07 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2015-10-23 02:02:07 +0200 |
commit | 1c9f05ef135f16caeef68fea895be26b1471ba9b (patch) | |
tree | eb5cc7ed634b73b748ed36cd02648b3f0a64fad2 /lib | |
parent | ba63ea9e826f6aafd22605ed291961f69cc078c6 (diff) | |
parent | 42975e6338da24ba6e61d5adee4e851cf4b330aa (diff) | |
download | Nim-1c9f05ef135f16caeef68fea895be26b1471ba9b.tar.gz |
Merge pull request #3442 from Dhertz/devel
Make sure the json module decodes UTF16 correctly
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pure/json.nim | 40 |
1 files changed, 32 insertions, 8 deletions
diff --git a/lib/pure/json.nim b/lib/pure/json.nim index 06d5a13e2..ab7d18bd8 100644 --- a/lib/pure/json.nim +++ b/lib/pure/json.nim @@ -203,6 +203,15 @@ proc handleHexChar(c: char, x: var int): bool = of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) else: result = false # error +proc parseEscapedUTF16(buf: cstring, pos: var int): int = + result = 0 + #UTF-16 escape is always 4 bytes. + for _ in 0..3: + if handleHexChar(buf[pos], result): + inc(pos) + else: + return -1 + proc parseString(my: var JsonParser): TokKind = result = tkString var pos = my.bufpos + 1 @@ -238,11 +247,22 @@ proc parseString(my: var JsonParser): TokKind = inc(pos, 2) of 'u': inc(pos, 2) - var r: int - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) + var r = parseEscapedUTF16(buf, pos) + if r < 0: + my.err = errInvalidToken + break + # Deal with surrogates + if (r and 0xfc00) == 0xd800: + if buf[pos] & buf[pos+1] != "\\u": + my.err = errInvalidToken + break + inc(pos, 2) + var s = parseEscapedUTF16(buf, pos) + if (s and 0xfc00) == 0xdc00 and s > 0: + r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) + else: + my.err = errInvalidToken + break add(my.a, toUTF8(Rune(r))) else: # don't bother with the error @@ -1200,11 +1220,15 @@ when isMainModule: assert(false) except IndexError: assert(true) - let testJson = parseJson"""{ "a": [1, 2, 3, 4], "b": "asd" }""" + let testJson = parseJson"""{ "a": [1, 2, 3, 4], "b": "asd", "c": "\ud83c\udf83", "d": "\u00E6"}""" # nil passthrough assert(testJson{"doesnt_exist"}{"anything"}.isNil) - testJson{["c", "d"]} = %true - assert(testJson["c"]["d"].bval) + testJson{["e", "f"]} = %true + assert(testJson["e"]["f"].bval) + + # make sure UTF-16 decoding works. + assert(testJson["c"].str == "🎃") + assert(testJson["d"].str == "æ") # make sure no memory leek when parsing invalid string let startMemory = getOccupiedMem() |