diff options
author | jangko <jangko128@gmail.com> | 2015-08-20 14:17:46 +0700 |
---|---|---|
committer | jangko <jangko128@gmail.com> | 2015-08-20 14:17:46 +0700 |
commit | 493dbc8932326c5f830c0396b528d433a0bdf314 (patch) | |
tree | 4efe05ebd7441c52e02b9fb62590398c0e4558be | |
parent | 69b32637b1f12000b64fa4db452323dc30b3567f (diff) | |
download | Nim-493dbc8932326c5f830c0396b528d433a0bdf314.tar.gz |
fixed UTF-16 to UTF-8 conversion in widestrs.nim
the source of problem for issue #3228
-rw-r--r-- | lib/system/widestrs.nim | 44 |
1 files changed, 18 insertions, 26 deletions
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim index e7b7f3972..e782e2452 100644 --- a/lib/system/widestrs.nim +++ b/lib/system/widestrs.nim @@ -119,36 +119,28 @@ proc `$`*(w: WideCString, estimate: int): string = var i = 0 while w[i].int16 != 0'i16: - var ch = w[i].int + var ch = uint32(cast[uint16](w[i])) inc i - if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_HIGH_END: + if ch >= uint32(UNI_SUR_HIGH_START) and ch <= uint32(UNI_SUR_HIGH_END): # If the 16 bits following the high surrogate are in the source buffer... - let ch2 = w[i].int - # If it's a low surrogate, convert to UTF32: - if ch2 >=% UNI_SUR_LOW_START and ch2 <=% UNI_SUR_LOW_END: - ch = ((ch -% UNI_SUR_HIGH_START) shr halfShift) +% - (ch2 -% UNI_SUR_LOW_START) +% halfBase - inc i - - if ch <=% 127: + let ch2 = uint32(cast[uint16](w[i])) + ch = (ch shl halfShift) + ch2 + halfBase + inc i + + if ch < 0x80'u32: result.add chr(ch) - elif ch <=% 0x07FF: - result.add chr((ch shr 6) or 0b110_00000) - result.add chr((ch and ones(6)) or 0b10_000000) - elif ch <=% 0xFFFF: - result.add chr(ch shr 12 or 0b1110_0000) - result.add chr(ch shr 6 and ones(6) or 0b10_0000_00) - result.add chr(ch and ones(6) or 0b10_0000_00) - elif ch <=% 0x0010FFFF: - result.add chr(ch shr 18 or 0b1111_0000) - result.add chr(ch shr 12 and ones(6) or 0b10_0000_00) - result.add chr(ch shr 6 and ones(6) or 0b10_0000_00) - result.add chr(ch and ones(6) or 0b10_0000_00) + elif ch < 0x800'u32: + result.add chr((ch shr 6) or 0xc0) + result.add chr((ch and 0x3f) or 0x80) + elif ch < 0x10000'u32: + result.add chr((ch shr 12) or 0xe0) + result.add chr(((ch shr 6) and 0x3f) or 0x80) + result.add chr((ch and 0x3f) or 0x80) else: - # replacement char: - result.add chr(0xFFFD shr 12 or 0b1110_0000) - result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00) - result.add chr(0xFFFD and ones(6) or 0b10_0000_00) + result.add chr((ch shr 18) or 0xf0) + result.add chr(((ch shr 12) and 0x3f) or 0x80) + result.add chr(((ch shr 6) and 0x3f) or 0x80) + result.add chr((ch and 0x3f) or 0x80) proc `$`*(s: WideCString): string = result = s $ 80 |