summary refs log tree commit diff stats
path: root/lib/std/widestrs.nim
diff options
context:
space:
mode:
authorBung <crc32@qq.com>2022-12-01 20:34:00 +0800
committerGitHub <noreply@github.com>2022-12-01 13:34:00 +0100
commit658b28dc5707601b39d9aad4b6bf79a9afff1e92 (patch)
treeb8e10d4d1f2666c45c3b1dec23e7c03d5ba23be9 /lib/std/widestrs.nim
parenta70d3abd37819e7562f80f0c808788c3f5c62c55 (diff)
downloadNim-658b28dc5707601b39d9aad4b6bf79a9afff1e92.tar.gz
tyInt tyUint fit target int bit width (#20829)
Diffstat (limited to 'lib/std/widestrs.nim')
-rw-r--r--lib/std/widestrs.nim379
1 files changed, 190 insertions, 189 deletions
diff --git a/lib/std/widestrs.nim b/lib/std/widestrs.nim
index 7df4004d1..f0f648c8d 100644
--- a/lib/std/widestrs.nim
+++ b/lib/std/widestrs.nim
@@ -15,214 +15,215 @@
 type
   Utf16Char* = distinct int16
 
-when defined(nimv2):
+when not (defined(cpu16) or defined(cpu8)):
+  when defined(nimv2):
 
-  type
-    WideCString* = ptr UncheckedArray[Utf16Char]
+    type
+      WideCString* = ptr UncheckedArray[Utf16Char]
 
-    WideCStringObj* = object
-      bytes: int
-      data: WideCString
+      WideCStringObj* = object
+        bytes: int
+        data: WideCString
 
-  proc `=destroy`(a: var WideCStringObj) =
-    if a.data != nil:
+    proc `=destroy`(a: var WideCStringObj) =
+      if a.data != nil:
+        when compileOption("threads"):
+          deallocShared(a.data)
+        else:
+          dealloc(a.data)
+
+    proc `=copy`(a: var WideCStringObj; b: WideCStringObj) {.error.}
+
+    proc `=sink`(a: var WideCStringObj; b: WideCStringObj) =
+      a.bytes = b.bytes
+      a.data = b.data
+
+    proc createWide(a: var WideCStringObj; bytes: int) =
+      a.bytes = bytes
       when compileOption("threads"):
-        deallocShared(a.data)
+        a.data = cast[typeof(a.data)](allocShared0(bytes))
       else:
-        dealloc(a.data)
+        a.data = cast[typeof(a.data)](alloc0(bytes))
 
-  proc `=copy`(a: var WideCStringObj; b: WideCStringObj) {.error.}
+    template `[]`*(a: WideCStringObj; idx: int): Utf16Char = a.data[idx]
+    template `[]=`*(a: WideCStringObj; idx: int; val: Utf16Char) = a.data[idx] = val
 
-  proc `=sink`(a: var WideCStringObj; b: WideCStringObj) =
-    a.bytes = b.bytes
-    a.data = b.data
+    template nullWide(): untyped = WideCStringObj(bytes: 0, data: nil)
+
+    converter toWideCString*(x: WideCStringObj): WideCString {.inline.} =
+      result = x.data
 
-  proc createWide(a: var WideCStringObj; bytes: int) =
-    a.bytes = bytes
-    when compileOption("threads"):
-      a.data = cast[typeof(a.data)](allocShared0(bytes))
-    else:
-      a.data = cast[typeof(a.data)](alloc0(bytes))
-
-  template `[]`*(a: WideCStringObj; idx: int): Utf16Char = a.data[idx]
-  template `[]=`*(a: WideCStringObj; idx: int; val: Utf16Char) = a.data[idx] = val
-
-  template nullWide(): untyped = WideCStringObj(bytes: 0, data: nil)
-
-  converter toWideCString*(x: WideCStringObj): WideCString {.inline.} =
-    result = x.data
-
-else:
-  template nullWide(): untyped = nil
-
-  type
-    WideCString* = ref UncheckedArray[Utf16Char]
-    WideCStringObj* = WideCString
-
-  template createWide(a; L) =
-    unsafeNew(a, L)
-
-proc ord(arg: Utf16Char): int = int(cast[uint16](arg))
-
-proc len*(w: WideCString): int =
-  ## returns the length of a widestring. This traverses the whole string to
-  ## find the binary zero end marker!
-  result = 0
-  while int16(w[result]) != 0'i16: inc result
-
-const
-  UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16)
-  UNI_MAX_BMP = 0x0000FFFF
-  UNI_MAX_UTF16 = 0x0010FFFF
-  # UNI_MAX_UTF32 = 0x7FFFFFFF
-  # UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
-
-  halfShift = 10
-  halfBase = 0x0010000
-  halfMask = 0x3FF
-
-  UNI_SUR_HIGH_START = 0xD800
-  UNI_SUR_HIGH_END = 0xDBFF
-  UNI_SUR_LOW_START = 0xDC00
-  UNI_SUR_LOW_END = 0xDFFF
-  UNI_REPL = 0xFFFD
-
-template ones(n: untyped): untyped = ((1 shl n)-1)
-
-template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
-  ## Returns the unicode character `s[i]` in `result`. If `doInc == true`
-  ## `i` is incremented by the number of bytes that have been processed.
-  bind ones
-
-  if ord(s[i]) <= 127:
-    result = ord(s[i])
-    when doInc: inc(i)
-  elif ord(s[i]) shr 5 == 0b110:
-    #assert(ord(s[i+1]) shr 6 == 0b10)
-    if i <= L - 2:
-      result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
-      when doInc: inc(i, 2)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  elif ord(s[i]) shr 4 == 0b1110:
-    if i <= L - 3:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(4)) shl 12 or
-               (ord(s[i+1]) and ones(6)) shl 6 or
-               (ord(s[i+2]) and ones(6))
-      when doInc: inc(i, 3)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
-  elif ord(s[i]) shr 3 == 0b11110:
-    if i <= L - 4:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      #assert(ord(s[i+3]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(3)) shl 18 or
-               (ord(s[i+1]) and ones(6)) shl 12 or
-               (ord(s[i+2]) and ones(6)) shl 6 or
-               (ord(s[i+3]) and ones(6))
-      when doInc: inc(i, 4)
-    else:
-      result = UNI_REPL
-      when doInc: inc(i)
   else:
-    result = 0xFFFD
-    when doInc: inc(i)
-
-iterator runes(s: cstring, L: int): int =
-  var
-    i = 0
-    result: int
-  while i < L:
-    fastRuneAt(s, i, L, result, true)
-    yield result
-
-proc newWideCString*(size: int): WideCStringObj =
-  createWide(result, size * 2 + 2)
-
-proc newWideCString*(source: cstring, L: int): WideCStringObj =
-  createWide(result, L * 2 + 2)
-  var d = 0
-  for ch in runes(source, L):
-
-    if ch <= UNI_MAX_BMP:
-      if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_LOW_END:
-        result[d] = UNI_REPLACEMENT_CHAR
-      else:
-        result[d] = cast[Utf16Char](uint16(ch))
-    elif ch > UNI_MAX_UTF16:
-      result[d] = UNI_REPLACEMENT_CHAR
-    else:
-      let ch = ch - halfBase
-      result[d] = cast[Utf16Char](uint16((ch shr halfShift) + UNI_SUR_HIGH_START))
-      inc d
-      result[d] = cast[Utf16Char](uint16((ch and halfMask) + UNI_SUR_LOW_START))
-    inc d
-  result[d] = Utf16Char(0)
+    template nullWide(): untyped = nil
 
-proc newWideCString*(s: cstring): WideCStringObj =
-  if s.isNil: return nullWide
+    type
+      WideCString* = ref UncheckedArray[Utf16Char]
+      WideCStringObj* = WideCString
 
-  result = newWideCString(s, s.len)
+    template createWide(a; L) =
+      unsafeNew(a, L)
 
-proc newWideCString*(s: string): WideCStringObj =
-  result = newWideCString(cstring s, s.len)
+  proc ord(arg: Utf16Char): int = int(cast[uint16](arg))
 
-proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
-  result = newStringOfCap(estimate + estimate shr 2)
+  proc len*(w: WideCString): int =
+    ## returns the length of a widestring. This traverses the whole string to
+    ## find the binary zero end marker!
+    result = 0
+    while int16(w[result]) != 0'i16: inc result
 
-  var i = 0
-  while w[i].int16 != 0'i16:
-    var ch = ord(w[i])
-    inc i
-    if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
-      # If the 16 bits following the high surrogate are in the source buffer...
-      let ch2 = ord(w[i])
+  const
+    UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16)
+    UNI_MAX_BMP = 0x0000FFFF
+    UNI_MAX_UTF16 = 0x0010FFFF
+    # UNI_MAX_UTF32 = 0x7FFFFFFF
+    # UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
 
-      # If it's a low surrogate, convert to UTF32:
-      if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
-        ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
-        inc i
+    halfShift = 10
+    halfBase = 0x0010000
+    halfMask = 0x3FF
+
+    UNI_SUR_HIGH_START = 0xD800
+    UNI_SUR_HIGH_END = 0xDBFF
+    UNI_SUR_LOW_START = 0xDC00
+    UNI_SUR_LOW_END = 0xDFFF
+    UNI_REPL = 0xFFFD
+
+  template ones(n: untyped): untyped = ((1 shl n)-1)
+
+  template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
+    ## Returns the unicode character `s[i]` in `result`. If `doInc == true`
+    ## `i` is incremented by the number of bytes that have been processed.
+    bind ones
+
+    if ord(s[i]) <= 127:
+      result = ord(s[i])
+      when doInc: inc(i)
+    elif ord(s[i]) shr 5 == 0b110:
+      #assert(ord(s[i+1]) shr 6 == 0b10)
+      if i <= L - 2:
+        result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
+        when doInc: inc(i, 2)
+      else:
+        result = UNI_REPL
+        when doInc: inc(i)
+    elif ord(s[i]) shr 4 == 0b1110:
+      if i <= L - 3:
+        #assert(ord(s[i+1]) shr 6 == 0b10)
+        #assert(ord(s[i+2]) shr 6 == 0b10)
+        result = (ord(s[i]) and ones(4)) shl 12 or
+                (ord(s[i+1]) and ones(6)) shl 6 or
+                (ord(s[i+2]) and ones(6))
+        when doInc: inc(i, 3)
       else:
+        result = UNI_REPL
+        when doInc: inc(i)
+    elif ord(s[i]) shr 3 == 0b11110:
+      if i <= L - 4:
+        #assert(ord(s[i+1]) shr 6 == 0b10)
+        #assert(ord(s[i+2]) shr 6 == 0b10)
+        #assert(ord(s[i+3]) shr 6 == 0b10)
+        result = (ord(s[i]) and ones(3)) shl 18 or
+                (ord(s[i+1]) and ones(6)) shl 12 or
+                (ord(s[i+2]) and ones(6)) shl 6 or
+                (ord(s[i+3]) and ones(6))
+        when doInc: inc(i, 4)
+      else:
+        result = UNI_REPL
+        when doInc: inc(i)
+    else:
+      result = 0xFFFD
+      when doInc: inc(i)
+
+  iterator runes(s: cstring, L: int): int =
+    var
+      i = 0
+      result: int
+    while i < L:
+      fastRuneAt(s, i, L, result, true)
+      yield result
+
+  proc newWideCString*(size: int): WideCStringObj =
+    createWide(result, size * 2 + 2)
+
+  proc newWideCString*(source: cstring, L: int): WideCStringObj =
+    createWide(result, L * 2 + 2)
+    var d = 0
+    for ch in runes(source, L):
+
+      if ch <= UNI_MAX_BMP:
+        if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_LOW_END:
+          result[d] = UNI_REPLACEMENT_CHAR
+        else:
+          result[d] = cast[Utf16Char](uint16(ch))
+      elif ch > UNI_MAX_UTF16:
+        result[d] = UNI_REPLACEMENT_CHAR
+      else:
+        let ch = ch - halfBase
+        result[d] = cast[Utf16Char](uint16((ch shr halfShift) + UNI_SUR_HIGH_START))
+        inc d
+        result[d] = cast[Utf16Char](uint16((ch and halfMask) + UNI_SUR_LOW_START))
+      inc d
+    result[d] = Utf16Char(0)
+
+  proc newWideCString*(s: cstring): WideCStringObj =
+    if s.isNil: return nullWide
+
+    result = newWideCString(s, s.len)
+
+  proc newWideCString*(s: string): WideCStringObj =
+    result = newWideCString(cstring s, s.len)
+
+  proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
+    result = newStringOfCap(estimate + estimate shr 2)
+
+    var i = 0
+    while w[i].int16 != 0'i16:
+      var ch = ord(w[i])
+      inc i
+      if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
+        # If the 16 bits following the high surrogate are in the source buffer...
+        let ch2 = ord(w[i])
+
+        # If it's a low surrogate, convert to UTF32:
+        if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
+          ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
+          inc i
+        else:
+          #invalid UTF-16
+          ch = replacement
+      elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
         #invalid UTF-16
         ch = replacement
-    elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
-      #invalid UTF-16
-      ch = replacement
-
-    if ch < 0x80:
-      result.add chr(ch)
-    elif ch < 0x800:
-      result.add chr((ch shr 6) or 0xc0)
-      result.add chr((ch and 0x3f) or 0x80)
-    elif ch < 0x10000:
-      result.add chr((ch shr 12) or 0xe0)
-      result.add chr(((ch shr 6) and 0x3f) or 0x80)
-      result.add chr((ch and 0x3f) or 0x80)
-    elif ch <= 0x10FFFF:
-      result.add chr((ch shr 18) or 0xf0)
-      result.add chr(((ch shr 12) and 0x3f) or 0x80)
-      result.add chr(((ch shr 6) and 0x3f) or 0x80)
-      result.add chr((ch and 0x3f) or 0x80)
-    else:
-      # replacement char(in case user give very large number):
-      result.add chr(0xFFFD shr 12 or 0b1110_0000)
-      result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
-      result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
 
-proc `$`*(s: WideCString): string =
-  result = s $ 80
+      if ch < 0x80:
+        result.add chr(ch)
+      elif ch < 0x800:
+        result.add chr((ch shr 6) or 0xc0)
+        result.add chr((ch and 0x3f) or 0x80)
+      elif ch < 0x10000:
+        result.add chr((ch shr 12) or 0xe0)
+        result.add chr(((ch shr 6) and 0x3f) or 0x80)
+        result.add chr((ch and 0x3f) or 0x80)
+      elif ch <= 0x10FFFF:
+        result.add chr((ch shr 18) or 0xf0)
+        result.add chr(((ch shr 12) and 0x3f) or 0x80)
+        result.add chr(((ch shr 6) and 0x3f) or 0x80)
+        result.add chr((ch and 0x3f) or 0x80)
+      else:
+        # replacement char(in case user give very large number):
+        result.add chr(0xFFFD shr 12 or 0b1110_0000)
+        result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
+        result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
+
+  proc `$`*(s: WideCString): string =
+    result = s $ 80
 
-when defined(nimv2):
-  proc `$`*(s: WideCStringObj, estimate: int, replacement: int = 0xFFFD): string =
-    `$`(s.data, estimate, replacement)
+  when defined(nimv2):
+    proc `$`*(s: WideCStringObj, estimate: int, replacement: int = 0xFFFD): string =
+      `$`(s.data, estimate, replacement)
 
-  proc `$`*(s: WideCStringObj): string =
-    $(s.data)
+    proc `$`*(s: WideCStringObj): string =
+      $(s.data)
 
-  proc len*(w: WideCStringObj): int {.inline.} =
-    len(w.data)
+    proc len*(w: WideCStringObj): int {.inline.} =
+      len(w.data)