diff options
author | jangko <jangko128@gmail.com> | 2015-08-21 10:43:31 +0700 |
---|---|---|
committer | jangko <jangko128@gmail.com> | 2015-08-21 10:43:31 +0700 |
commit | 7c757599f1c9157a65e8e2238d4b11eedeeb01bf (patch) | |
tree | 3b5f0bc75b8f080ec77030625a3026fc80c9351d /tests | |
parent | c103eddc737a48d3de04e64c2086549b7ec33d6d (diff) | |
download | Nim-7c757599f1c9157a65e8e2238d4b11eedeeb01bf.tar.gz |
fixed UTF-16 to UTF-8 conversion in widestrs.nim
the source of problem for issue #3228 also add test for entire range of valid UTF-16 and test for invalid UTF-16 sequence
Diffstat (limited to 'tests')
-rw-r--r-- | tests/stdlib/twchartoutf8.nim | 42 |
1 files changed, 35 insertions, 7 deletions
diff --git a/tests/stdlib/twchartoutf8.nim b/tests/stdlib/twchartoutf8.nim index 806a222b6..9838bbfe7 100644 --- a/tests/stdlib/twchartoutf8.nim +++ b/tests/stdlib/twchartoutf8.nim @@ -33,9 +33,9 @@ proc testCP(wc: WideCString, lo, hi: int) = var x = 0 let chunk = 1024 for i in lo..hi: - wc[x] = cast[TUtf16Char](i) + wc[x] = cast[Utf16Char](i) if (x >= chunk) or (i >= hi): - wc[x] = TUtf16Char(0) + wc[x] = Utf16Char(0) var a = convertToUTF8(wc, int32(x)) var b = wc $ chunk assert a == b @@ -43,26 +43,26 @@ proc testCP(wc: WideCString, lo, hi: int) = inc x proc testCP2(wc: WideCString, lo, hi: int) = - assert ((lo >=0x10000) and (hi <= 0x10FFFF)) + assert((lo >= 0x10000) and (hi <= 0x10FFFF)) var x = 0 let chunk = 1024 for i in lo..hi: let ch = i - 0x10000 let W1 = 0xD800 or (ch shr 10) let W2 = 0xDC00 or (0x3FF and ch) - wc[x] = cast[TUtf16Char](W1) - wc[x+1] = cast[TUtf16Char](W2) + wc[x] = cast[Utf16Char](W1) + wc[x+1] = cast[Utf16Char](W2) inc(x, 2) if (x >= chunk) or (i >= hi): - wc[x] = TUtf16Char(0) + wc[x] = Utf16Char(0) var a = convertToUTF8(wc, int32(x)) var b = wc $ chunk assert a == b x = 0 #RFC-2781 "UTF-16, an encoding of ISO 10646" - + var wc: WideCString unsafeNew(wc, 1024 * 4 + 2) @@ -75,4 +75,32 @@ wc.testCP(0xE000, 0xFFFF) #U+10000 to U+10FFFF wc.testCP2(0x10000, 0x10FFFF) + +#invalid UTF-16 +const + b = "\xEF\xBF\xBD" + c = "\xEF\xBF\xBF" + +wc[0] = cast[Utf16Char](0xDC00) +wc[1] = Utf16Char(0) +var a = $wc +assert a == b + +wc[0] = cast[Utf16Char](0xFFFF) +wc[1] = cast[Utf16Char](0xDC00) +wc[2] = Utf16Char(0) +a = $wc +assert a == c & b + +wc[0] = cast[Utf16Char](0xD800) +wc[1] = Utf16Char(0) +a = $wc +assert a == b + +wc[0] = cast[Utf16Char](0xD800) +wc[1] = cast[Utf16Char](0xFFFF) +wc[2] = Utf16Char(0) +a = $wc +assert a == b & c + echo "OK" \ No newline at end of file |