summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorjangko <jangko128@gmail.com>2015-08-21 10:43:31 +0700
committerjangko <jangko128@gmail.com>2015-08-21 10:43:31 +0700
commit7c757599f1c9157a65e8e2238d4b11eedeeb01bf (patch)
tree3b5f0bc75b8f080ec77030625a3026fc80c9351d /lib
parentc103eddc737a48d3de04e64c2086549b7ec33d6d (diff)
downloadNim-7c757599f1c9157a65e8e2238d4b11eedeeb01bf.tar.gz
fixed UTF-16 to UTF-8 conversion in widestrs.nim
the source of problem for issue #3228
also add test for entire range of valid UTF-16
and test for invalid UTF-16 sequence
Diffstat (limited to 'lib')
-rw-r--r--lib/system/widestrs.nim26
1 files changed, 20 insertions, 6 deletions
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index 94ae3e26b..77310b289 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -114,7 +114,7 @@ proc newWideCString*(s: cstring): WideCString =
 proc newWideCString*(s: string): WideCString =
   result = newWideCString(s, s.len)
 
-proc `$`*(w: WideCString, estimate: int): string =
+proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
   result = newStringOfCap(estimate + estimate shr 2)
 
   var i = 0
@@ -124,9 +124,18 @@ proc `$`*(w: WideCString, estimate: int): string =
     if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
       # If the 16 bits following the high surrogate are in the source buffer...
       let ch2 = int(cast[uint16](w[i]))
-      ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
-      inc i
-    
+      
+      # If it's a low surrogate, convert to UTF32:
+      if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
+        ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
+        inc i
+      else:
+        #invalid UTF-16
+        ch = replacement
+    elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
+      #invalid UTF-16
+      ch = replacement
+      
     if ch < 0x80:
       result.add chr(ch)
     elif ch < 0x800:
@@ -136,11 +145,16 @@ proc `$`*(w: WideCString, estimate: int): string =
       result.add chr((ch shr 12) or 0xe0)
       result.add chr(((ch shr 6) and 0x3f) or 0x80)
       result.add chr((ch and 0x3f) or 0x80)
-    else:
+    elif ch <= 0x10FFFF:
       result.add chr((ch shr 18) or 0xf0)
       result.add chr(((ch shr 12) and 0x3f) or 0x80)
       result.add chr(((ch shr 6) and 0x3f) or 0x80)
       result.add chr((ch and 0x3f) or 0x80)
-
+    else:
+      # replacement char(in case user give very large number):
+      result.add chr(0xFFFD shr 12 or 0b1110_0000)
+      result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
+      result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
+      
 proc `$`*(s: WideCString): string =
   result = s $ 80