summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--lib/pure/unicode.nim72
-rw-r--r--lib/system/widestrs.nim49
2 files changed, 78 insertions, 43 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim
index 6ba966816..0c4f15c91 100644
--- a/lib/pure/unicode.nim
+++ b/lib/pure/unicode.nim
@@ -49,6 +49,8 @@ proc runeLenAt*(s: string, i: Natural): int =
   elif ord(s[i]) shr 1 == 0b1111110: result = 6
   else: result = 1
 
+const replRune = Rune(0xFFFD)
+
 template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
   ## Returns the Unicode character ``s[i]`` in ``result``. If ``doInc == true``
   ## ``i`` is incremented by the number of bytes that have been processed.
@@ -58,49 +60,69 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
     when doInc: inc(i)
   elif ord(s[i]) shr 5 == 0b110:
     # assert(ord(s[i+1]) shr 6 == 0b10)
-    result = Rune((ord(s[i]) and (ones(5))) shl 6 or
-                  (ord(s[i+1]) and ones(6)))
-    when doInc: inc(i, 2)
+    if i <= s.len - 2:
+      result = Rune((ord(s[i]) and (ones(5))) shl 6 or
+                    (ord(s[i+1]) and ones(6)))
+      when doInc: inc(i, 2)
+    else:
+      result = replRune
+      when doInc: inc(i)
   elif ord(s[i]) shr 4 == 0b1110:
     # assert(ord(s[i+1]) shr 6 == 0b10)
     # assert(ord(s[i+2]) shr 6 == 0b10)
-    result = Rune((ord(s[i]) and ones(4)) shl 12 or
-             (ord(s[i+1]) and ones(6)) shl 6 or
-             (ord(s[i+2]) and ones(6)))
-    when doInc: inc(i, 3)
+    if i <= s.len - 3:
+      result = Rune((ord(s[i]) and ones(4)) shl 12 or
+               (ord(s[i+1]) and ones(6)) shl 6 or
+               (ord(s[i+2]) and ones(6)))
+      when doInc: inc(i, 3)
+    else:
+      result = replRune
+      when doInc: inc(i)
   elif ord(s[i]) shr 3 == 0b11110:
     # assert(ord(s[i+1]) shr 6 == 0b10)
     # assert(ord(s[i+2]) shr 6 == 0b10)
     # assert(ord(s[i+3]) shr 6 == 0b10)
-    result = Rune((ord(s[i]) and ones(3)) shl 18 or
-             (ord(s[i+1]) and ones(6)) shl 12 or
-             (ord(s[i+2]) and ones(6)) shl 6 or
-             (ord(s[i+3]) and ones(6)))
-    when doInc: inc(i, 4)
+    if i <= s.len - 4:
+      result = Rune((ord(s[i]) and ones(3)) shl 18 or
+               (ord(s[i+1]) and ones(6)) shl 12 or
+               (ord(s[i+2]) and ones(6)) shl 6 or
+               (ord(s[i+3]) and ones(6)))
+      when doInc: inc(i, 4)
+    else:
+      result = replRune
+      when doInc: inc(i)
   elif ord(s[i]) shr 2 == 0b111110:
     # assert(ord(s[i+1]) shr 6 == 0b10)
     # assert(ord(s[i+2]) shr 6 == 0b10)
     # assert(ord(s[i+3]) shr 6 == 0b10)
     # assert(ord(s[i+4]) shr 6 == 0b10)
-    result = Rune((ord(s[i]) and ones(2)) shl 24 or
-             (ord(s[i+1]) and ones(6)) shl 18 or
-             (ord(s[i+2]) and ones(6)) shl 12 or
-             (ord(s[i+3]) and ones(6)) shl 6 or
-             (ord(s[i+4]) and ones(6)))
-    when doInc: inc(i, 5)
+    if i <= s.len - 5:
+      result = Rune((ord(s[i]) and ones(2)) shl 24 or
+               (ord(s[i+1]) and ones(6)) shl 18 or
+               (ord(s[i+2]) and ones(6)) shl 12 or
+               (ord(s[i+3]) and ones(6)) shl 6 or
+               (ord(s[i+4]) and ones(6)))
+      when doInc: inc(i, 5)
+    else:
+      result = replRune
+      when doInc: inc(i)
   elif ord(s[i]) shr 1 == 0b1111110:
     # assert(ord(s[i+1]) shr 6 == 0b10)
     # assert(ord(s[i+2]) shr 6 == 0b10)
     # assert(ord(s[i+3]) shr 6 == 0b10)
     # assert(ord(s[i+4]) shr 6 == 0b10)
     # assert(ord(s[i+5]) shr 6 == 0b10)
-    result = Rune((ord(s[i]) and ones(1)) shl 30 or
-             (ord(s[i+1]) and ones(6)) shl 24 or
-             (ord(s[i+2]) and ones(6)) shl 18 or
-             (ord(s[i+3]) and ones(6)) shl 12 or
-             (ord(s[i+4]) and ones(6)) shl 6 or
-             (ord(s[i+5]) and ones(6)))
-    when doInc: inc(i, 6)
+    if i <= s.len - 6:
+      result = Rune((ord(s[i]) and ones(1)) shl 30 or
+               (ord(s[i+1]) and ones(6)) shl 24 or
+               (ord(s[i+2]) and ones(6)) shl 18 or
+               (ord(s[i+3]) and ones(6)) shl 12 or
+               (ord(s[i+4]) and ones(6)) shl 6 or
+               (ord(s[i+5]) and ones(6)))
+      when doInc: inc(i, 6)
+    else:
+      result = replRune
+      when doInc: inc(i)
   else:
     result = Rune(ord(s[i]))
     when doInc: inc(i)
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index 3c957476f..dda547abe 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -38,10 +38,11 @@ const
   UNI_SUR_HIGH_END = 0xDBFF
   UNI_SUR_LOW_START = 0xDC00
   UNI_SUR_LOW_END = 0xDFFF
+  UNI_REPL = 0xFFFD
 
 template ones(n: untyped): untyped = ((1 shl n)-1)
 
-template fastRuneAt(s: cstring, i: int, result: untyped, doInc = true) =
+template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
   ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
   ## `i` is incremented by the number of bytes that have been processed.
   bind ones
@@ -51,24 +52,36 @@ template fastRuneAt(s: cstring, i: int, result: untyped, doInc = true) =
     when doInc: inc(i)
   elif ord(s[i]) shr 5 == 0b110:
     #assert(ord(s[i+1]) shr 6 == 0b10)
-    result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
-    when doInc: inc(i, 2)
+    if i <= L - 2:
+      result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
+      when doInc: inc(i, 2)
+    else:
+      result = UNI_REPL
+      when doInc: inc(i)
   elif ord(s[i]) shr 4 == 0b1110:
-    #assert(ord(s[i+1]) shr 6 == 0b10)
-    #assert(ord(s[i+2]) shr 6 == 0b10)
-    result = (ord(s[i]) and ones(4)) shl 12 or
-             (ord(s[i+1]) and ones(6)) shl 6 or
-             (ord(s[i+2]) and ones(6))
-    when doInc: inc(i, 3)
+    if i <= L - 3:
+      #assert(ord(s[i+1]) shr 6 == 0b10)
+      #assert(ord(s[i+2]) shr 6 == 0b10)
+      result = (ord(s[i]) and ones(4)) shl 12 or
+               (ord(s[i+1]) and ones(6)) shl 6 or
+               (ord(s[i+2]) and ones(6))
+      when doInc: inc(i, 3)
+    else:
+      result = UNI_REPL
+      when doInc: inc(i)
   elif ord(s[i]) shr 3 == 0b11110:
-    #assert(ord(s[i+1]) shr 6 == 0b10)
-    #assert(ord(s[i+2]) shr 6 == 0b10)
-    #assert(ord(s[i+3]) shr 6 == 0b10)
-    result = (ord(s[i]) and ones(3)) shl 18 or
-             (ord(s[i+1]) and ones(6)) shl 12 or
-             (ord(s[i+2]) and ones(6)) shl 6 or
-             (ord(s[i+3]) and ones(6))
-    when doInc: inc(i, 4)
+    if i <= L - 4:
+      #assert(ord(s[i+1]) shr 6 == 0b10)
+      #assert(ord(s[i+2]) shr 6 == 0b10)
+      #assert(ord(s[i+3]) shr 6 == 0b10)
+      result = (ord(s[i]) and ones(3)) shl 18 or
+               (ord(s[i+1]) and ones(6)) shl 12 or
+               (ord(s[i+2]) and ones(6)) shl 6 or
+               (ord(s[i+3]) and ones(6))
+      when doInc: inc(i, 4)
+    else:
+      result = UNI_REPL
+      when doInc: inc(i)
   else:
     result = 0xFFFD
     when doInc: inc(i)
@@ -78,7 +91,7 @@ iterator runes(s: cstring, L: int): int =
     i = 0
     result: int
   while i < L:
-    fastRuneAt(s, i, result, true)
+    fastRuneAt(s, i, L, result, true)
     yield result
 
 proc newWideCString*(source: cstring, L: int): WideCString =