diff options
-rw-r--r-- | lib/system/widestrs.nim | 14 | ||||
-rw-r--r-- | tests/stdlib/twchartoutf8.nim | 78 |
2 files changed, 85 insertions, 7 deletions
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim index e782e2452..94ae3e26b 100644 --- a/lib/system/widestrs.nim +++ b/lib/system/widestrs.nim @@ -119,20 +119,20 @@ proc `$`*(w: WideCString, estimate: int): string = var i = 0 while w[i].int16 != 0'i16: - var ch = uint32(cast[uint16](w[i])) + var ch = int(cast[uint16](w[i])) inc i - if ch >= uint32(UNI_SUR_HIGH_START) and ch <= uint32(UNI_SUR_HIGH_END): + if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END: # If the 16 bits following the high surrogate are in the source buffer... - let ch2 = uint32(cast[uint16](w[i])) - ch = (ch shl halfShift) + ch2 + halfBase + let ch2 = int(cast[uint16](w[i])) + ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase inc i - if ch < 0x80'u32: + if ch < 0x80: result.add chr(ch) - elif ch < 0x800'u32: + elif ch < 0x800: result.add chr((ch shr 6) or 0xc0) result.add chr((ch and 0x3f) or 0x80) - elif ch < 0x10000'u32: + elif ch < 0x10000: result.add chr((ch shr 12) or 0xe0) result.add chr(((ch shr 6) and 0x3f) or 0x80) result.add chr((ch and 0x3f) or 0x80) diff --git a/tests/stdlib/twchartoutf8.nim b/tests/stdlib/twchartoutf8.nim new file mode 100644 index 000000000..806a222b6 --- /dev/null +++ b/tests/stdlib/twchartoutf8.nim @@ -0,0 +1,78 @@ +#assume WideCharToMultiByte always produce correct result +#windows only + +when not defined(windows): + {.error: "windows only".} + +{.push gcsafe.} + +const CP_UTF8 = 65001'i32 + +type + LPBOOL = ptr int32 + LPWCSTR = ptr uint16 + +proc WideCharToMultiByte*(CodePage: int32, dwFlags: int32, + lpWideCharStr: LPWCSTR, cchWideChar: int32, + lpMultiByteStr: cstring, cchMultiByte: int32, + lpDefaultChar: cstring, lpUsedDefaultChar: LPBOOL): int32{. + stdcall, dynlib: "kernel32", importc: "WideCharToMultiByte".} + +{.pop.} + +proc convertToUTF8(wc: WideCString, wclen: int32): string = + let size = WideCharToMultiByte(CP_UTF8, 0'i32, cast[LPWCSTR](addr(wc[0])), wclen, + cstring(nil), 0'i32, cstring(nil), LPBOOL(nil)) + result = newString(size) + let res = WideCharToMultiByte(CP_UTF8, 0'i32, cast[LPWCSTR](addr(wc[0])), wclen, + cstring(result), size, cstring(nil), LPBOOL(nil)) + result[size] = chr(0) + assert size == res + +proc testCP(wc: WideCString, lo, hi: int) = + var x = 0 + let chunk = 1024 + for i in lo..hi: + wc[x] = cast[TUtf16Char](i) + if (x >= chunk) or (i >= hi): + wc[x] = TUtf16Char(0) + var a = convertToUTF8(wc, int32(x)) + var b = wc $ chunk + assert a == b + x = 0 + inc x + +proc testCP2(wc: WideCString, lo, hi: int) = + assert ((lo >=0x10000) and (hi <= 0x10FFFF)) + var x = 0 + let chunk = 1024 + for i in lo..hi: + let ch = i - 0x10000 + let W1 = 0xD800 or (ch shr 10) + let W2 = 0xDC00 or (0x3FF and ch) + wc[x] = cast[TUtf16Char](W1) + wc[x+1] = cast[TUtf16Char](W2) + inc(x, 2) + + if (x >= chunk) or (i >= hi): + wc[x] = TUtf16Char(0) + var a = convertToUTF8(wc, int32(x)) + var b = wc $ chunk + assert a == b + x = 0 + +#RFC-2781 "UTF-16, an encoding of ISO 10646" + +var wc: WideCString +unsafeNew(wc, 1024 * 4 + 2) + +#U+0000 to U+D7FF +#skip the U+0000 +wc.testCP(1, 0xD7FF) + +#U+E000 to U+FFFF +wc.testCP(0xE000, 0xFFFF) + +#U+10000 to U+10FFFF +wc.testCP2(0x10000, 0x10FFFF) +echo "OK" \ No newline at end of file |