1 files changed, 124 insertions, 235 deletions
diff --git a/lib/system/widestrs.nim b/lib/system/widestrs.nim
index 588093d10..cf1f0910c 100644
--- a/lib/system/widestrs.nim
+++ b/lib/system/widestrs.nim
@@ -12,249 +12,138 @@
 
 type
   TUtf16Char* = distinct int16
-  WideCString* = ptr array[0.. 1_000_000, TUtf16Char]
+  WideCString* = ref array[0.. 1_000_000, TUtf16Char]
 
 proc len*(w: WideCString): int =
   ## returns the length of a widestring. This traverses the whole string to
   ## find the binary zero end marker!
   while int16(w[result]) != 0'i16: inc result
 
-when true:
-  const
-    UNI_REPLACEMENT_CHAR = TUtf16Char(0xFFFD'i16)
-    UNI_MAX_BMP = 0x0000FFFF
-    UNI_MAX_UTF16 = 0x0010FFFF
-    UNI_MAX_UTF32 = 0x7FFFFFFF
-    UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
-
-    halfShift = 10
-    halfBase = 0x0010000
-    halfMask = 0x3FF
-
-    UNI_SUR_HIGH_START = 0xD800
-    UNI_SUR_HIGH_END = 0xDBFF
-    UNI_SUR_LOW_START = 0xDC00
-    UNI_SUR_LOW_END = 0xDFFF
-
-  template ones(n: expr): expr = ((1 shl n)-1)
-
-  template fastRuneAt(s: cstring, i: int, result: expr, doInc = true) =
-    ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
-    ## `i` is incremented by the number of bytes that have been processed.
-    bind ones
-
-    if ord(s[i]) <=% 127:
-      result = ord(s[i])
-      when doInc: inc(i)
-    elif ord(s[i]) shr 5 == 0b110:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
-      when doInc: inc(i, 2)
-    elif ord(s[i]) shr 4 == 0b1110:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(4)) shl 12 or
-               (ord(s[i+1]) and ones(6)) shl 6 or
-               (ord(s[i+2]) and ones(6))
-      when doInc: inc(i, 3)
-    elif ord(s[i]) shr 3 == 0b11110:
-      #assert(ord(s[i+1]) shr 6 == 0b10)
-      #assert(ord(s[i+2]) shr 6 == 0b10)
-      #assert(ord(s[i+3]) shr 6 == 0b10)
-      result = (ord(s[i]) and ones(3)) shl 18 or
-               (ord(s[i+1]) and ones(6)) shl 12 or
-               (ord(s[i+2]) and ones(6)) shl 6 or
-               (ord(s[i+3]) and ones(6))
-      when doInc: inc(i, 4)
-    else:
-      result = 0xFFFD
-      when doInc: inc(i)
-
-  iterator runes(s: cstring): int =
-    var
-      i = 0
-      result: int
-    while s[i] != '\0':
-      fastRuneAt(s, i, result, true)
-      yield result
-
-  proc allocWideCString*(source: cstring, L: int): WideCString =
-    ## free after usage with `dealloc`.
-    result = cast[wideCString](alloc(L * 4 + 2))
-    var d = 0
-    for ch in runes(source):
-      if ch <=% UNI_MAX_BMP:
-        if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_LOW_END:
-          result[d] = UNI_REPLACEMENT_CHAR
-        else:
-          result[d] = TUtf16Char(toU16(ch))
-      elif ch >% UNI_MAX_UTF16:
+const
+  UNI_REPLACEMENT_CHAR = TUtf16Char(0xFFFD'i16)
+  UNI_MAX_BMP = 0x0000FFFF
+  UNI_MAX_UTF16 = 0x0010FFFF
+  UNI_MAX_UTF32 = 0x7FFFFFFF
+  UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
+
+  halfShift = 10
+  halfBase = 0x0010000
+  halfMask = 0x3FF
+
+  UNI_SUR_HIGH_START = 0xD800
+  UNI_SUR_HIGH_END = 0xDBFF
+  UNI_SUR_LOW_START = 0xDC00
+  UNI_SUR_LOW_END = 0xDFFF
+
+template ones(n: expr): expr = ((1 shl n)-1)
+
+template fastRuneAt(s: cstring, i: int, result: expr, doInc = true) =
+  ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
+  ## `i` is incremented by the number of bytes that have been processed.
+  bind ones
+
+  if ord(s[i]) <=% 127:
+    result = ord(s[i])
+    when doInc: inc(i)
+  elif ord(s[i]) shr 5 == 0b110:
+    #assert(ord(s[i+1]) shr 6 == 0b10)
+    result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
+    when doInc: inc(i, 2)
+  elif ord(s[i]) shr 4 == 0b1110:
+    #assert(ord(s[i+1]) shr 6 == 0b10)
+    #assert(ord(s[i+2]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(4)) shl 12 or
+             (ord(s[i+1]) and ones(6)) shl 6 or
+             (ord(s[i+2]) and ones(6))
+    when doInc: inc(i, 3)
+  elif ord(s[i]) shr 3 == 0b11110:
+    #assert(ord(s[i+1]) shr 6 == 0b10)
+    #assert(ord(s[i+2]) shr 6 == 0b10)
+    #assert(ord(s[i+3]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(3)) shl 18 or
+             (ord(s[i+1]) and ones(6)) shl 12 or
+             (ord(s[i+2]) and ones(6)) shl 6 or
+             (ord(s[i+3]) and ones(6))
+    when doInc: inc(i, 4)
+  else:
+    result = 0xFFFD
+    when doInc: inc(i)
+
+iterator runes(s: cstring): int =
+  var
+    i = 0
+    result: int
+  while s[i] != '\0':
+    fastRuneAt(s, i, result, true)
+    yield result
+
+proc newWideCString*(source: cstring, L: int): WideCString =
+  unsafeNew(result, L * 4 + 2)
+  #result = cast[wideCString](alloc(L * 4 + 2))
+  var d = 0
+  for ch in runes(source):
+    if ch <=% UNI_MAX_BMP:
+      if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_LOW_END:
         result[d] = UNI_REPLACEMENT_CHAR
       else:
-        let ch = ch -% halfBase
-        result[d] = TUtf16Char(toU16((ch shr halfShift) +% UNI_SUR_HIGH_START))
-        inc d
-        result[d] = TUtf16Char(toU16((ch and halfMask) +% UNI_SUR_LOW_START))
+        result[d] = TUtf16Char(toU16(ch))
+    elif ch >% UNI_MAX_UTF16:
+      result[d] = UNI_REPLACEMENT_CHAR
+    else:
+      let ch = ch -% halfBase
+      result[d] = TUtf16Char(toU16((ch shr halfShift) +% UNI_SUR_HIGH_START))
       inc d
-    result[d] = TUtf16Char(0'i16)
-
-  proc allocWideCString*(s: cstring): WideCString =
-    ## free after usage with `dealloc`.
-    if s.isNil: return nil
-
-    when not defined(c_strlen):
-      proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
-
-    let L = cstrlen(s)
-    result = allocWideCString(s, L)
-
-  proc allocWideCString*(s: string): WideCString =
-    ## free after usage with `dealloc`.
-    result = allocWideCString(s, s.len)
-
-  proc `$`*(w: wideCString, estimate: int): string =
-    result = newStringOfCap(estimate + estimate shr 2)
-
-    var i = 0
-    while w[i].int16 != 0'i16:
-      var ch = w[i].int
-      inc i
-      if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_HIGH_END:
-        # If the 16 bits following the high surrogate are in the source buffer...
-        let ch2 = w[i].int
-        # If it's a low surrogate, convert to UTF32:
-        if ch2 >=% UNI_SUR_LOW_START and ch2 <=% UNI_SUR_LOW_END:
-          ch = ((ch -% UNI_SUR_HIGH_START) shr halfShift) +%
-                (ch2 -% UNI_SUR_LOW_START) +% halfBase
-          inc i
-          
-      if ch <=% 127:
-        result.add chr(ch)
-      elif ch <=% 0x07FF:
-        result.add chr((ch shr 6) or 0b110_00000)
-        result.add chr((ch and ones(6)) or 0b10_000000)
-      elif ch <=% 0xFFFF:
-        result.add chr(ch shr 12 or 0b1110_0000)
-        result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-        result.add chr(ch and ones(6) or 0b10_0000_00)
-      elif ch <=% 0x0010FFFF:
-        result.add chr(ch shr 18 or 0b1111_0000)
-        result.add chr(ch shr 12 and ones(6) or 0b10_0000_00)
-        result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
-        result.add chr(ch and ones(6) or 0b10_0000_00)
-      else:
-        # replacement char:
-        result.add chr(0xFFFD shr 12 or 0b1110_0000)
-        result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
-        result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
-
-  proc `$`*(s: WideCString): string =
-    result = s $ 80
-
-else:
-  const
-    utf8Encoding = 65001
-    
-  proc MultiByteToWideChar*(
-    CodePage: int32,
-    dwFlags: int32,
-    lpMultiByteStr: cstring,
-    cbMultiByte: cint,
-    lpWideCharStr: WideCString,
-    cchWideChar: cint): cint {.
-      stdcall, importc: "MultiByteToWideChar", dynlib: "kernel32".}
-
-  proc WideCharToMultiByte*(
-    CodePage: int32,
-    dwFlags: int32,
-    lpWideCharStr: WideCString,
-    cchWideChar: cint,
-    lpMultiByteStr: cstring,
-    cbMultiByte: cint,
-    lpDefaultChar: cstring=nil,
-    lpUsedDefaultChar: pointer=nil): cint {.
-      stdcall, importc: "WideCharToMultiByte", dynlib: "kernel32".}
-
-  proc raiseEncodingError() {.noinline, noreturn.} =
-    raise newException(EOS, "error in unicode conversion")
-
-  proc `$`*(s: WideCString, len: int): string =
-    # special case: empty string: needed because MultiByteToWideChar
-    # returns 0 in case of error:
-    if len == 0: return ""
-
-    # educated guess of capacity:
-    var cap = len + len shr 2
-    result = newStringOfCap(cap)
-    
-    let m = WideCharToMultiByte(
-      CodePage = utf8Encoding,
-      dwFlags = 0'i32,
-      lpWideCharStr = s,
-      cchWideChar = cint(len),
-      lpMultiByteStr = cstring(result),
-      cbMultiByte = cap)
-    if m == 0:
-      # try again; ask for capacity:
-      cap = WideCharToMultiByte(
-        CodePage = utf8Encoding,
-        dwFlags = 0'i32,
-        lpWideCharStr = s,
-        cchWideChar = cint(len),
-        lpMultiByteStr = nil,
-        cbMultiByte = cint(0))
-      # and do the conversion properly:
-      result = newStringOfCap(cap)
-      let m = WideCharToMultiByte(
-        CodePage = utf8Encoding,
-        dwFlags = 0'i32,
-        lpWideCharStr = s,
-        cchWideChar = cint(len),
-        lpMultiByteStr = cstring(result),
-        cbMultiByte = cap)
-      if m == 0: raiseEncodingError()
-      setLen(result, m)
-    elif m <= cap:
-      setLen(result, m)
+      result[d] = TUtf16Char(toU16((ch and halfMask) +% UNI_SUR_LOW_START))
+    inc d
+  result[d] = TUtf16Char(0'i16)
+
+proc newWideCString*(s: cstring): WideCString =
+  if s.isNil: return nil
+
+  when not defined(c_strlen):
+    proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
+
+  let L = cstrlen(s)
+  result = newWideCString(s, L)
+
+proc newWideCString*(s: string): WideCString =
+  result = newWideCString(s, s.len)
+
+proc `$`*(w: wideCString, estimate: int): string =
+  result = newStringOfCap(estimate + estimate shr 2)
+
+  var i = 0
+  while w[i].int16 != 0'i16:
+    var ch = w[i].int
+    inc i
+    if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_HIGH_END:
+      # If the 16 bits following the high surrogate are in the source buffer...
+      let ch2 = w[i].int
+      # If it's a low surrogate, convert to UTF32:
+      if ch2 >=% UNI_SUR_LOW_START and ch2 <=% UNI_SUR_LOW_END:
+        ch = ((ch -% UNI_SUR_HIGH_START) shr halfShift) +%
+              (ch2 -% UNI_SUR_LOW_START) +% halfBase
+        inc i
+        
+    if ch <=% 127:
+      result.add chr(ch)
+    elif ch <=% 0x07FF:
+      result.add chr((ch shr 6) or 0b110_00000)
+      result.add chr((ch and ones(6)) or 0b10_000000)
+    elif ch <=% 0xFFFF:
+      result.add chr(ch shr 12 or 0b1110_0000)
+      result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
+      result.add chr(ch and ones(6) or 0b10_0000_00)
+    elif ch <=% 0x0010FFFF:
+      result.add chr(ch shr 18 or 0b1111_0000)
+      result.add chr(ch shr 12 and ones(6) or 0b10_0000_00)
+      result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
+      result.add chr(ch and ones(6) or 0b10_0000_00)
     else:
-      sysAssert(false, "") # cannot happen
-    
-  proc `$`*(s: WideCString): string =
-    result = s $ s.len
-    
-  proc allocWideCString*(s: string): WideCString =
-    ## free after usage with `dealloc`.
-    let cap = s.len+1
-    result = cast[wideCString](alloc0(cap * 2))
-    # special case: empty string: needed because MultiByteToWideChar
-    # return 0 in case of error:
-    if s.len == 0: return
-    # convert to utf-16 LE
-    let m = MultiByteToWideChar(CodePage = utf8Encoding, dwFlags = 0'i32, 
-                                lpMultiByteStr = cstring(s),
-                                cbMultiByte = cint(s.len),
-                                lpWideCharStr = result,
-                                cchWideChar = cint(cap))
-    if m == 0: raiseEncodingError()
-
-  proc allocWideCString*(s: cstring): WideCString =
-    ## free after usage with `dealloc`.
-    if s.isNil: return nil
-
-    when not defined(c_strlen):
-      proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
-
-    let len = cstrlen(s)
-    let cap = len+1
-    result = cast[wideCString](alloc0(cap * 2))
-    # special case: empty string: needed because MultiByteToWideChar
-    # return 0 in case of error:
-    if s.len == 0: return
-    # convert to utf-16 LE
-    let m = MultiByteToWideChar(CodePage = utf8Encoding, dwFlags = 0'i32, 
-                                lpMultiByteStr = s,
-                                cbMultiByte = cint(len),
-                                lpWideCharStr = result,
-                                cchWideChar = cint(cap))
-    if m == 0: raiseEncodingError()
+      # replacement char:
+      result.add chr(0xFFFD shr 12 or 0b1110_0000)
+      result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
+      result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
 
+proc `$`*(s: WideCString): string =
+  result = s $ 80