1 files changed, 214 insertions, 286 deletions
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index e2137e8f4..3621c4960 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -15,77 +15,85 @@
 # we don't use refcounts because that's a behaviour
 # the programmer may not want
 
+
+proc dataPointer(a: PGenericSeq, elemAlign: int): pointer =
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign))
+
+proc dataPointer(a: PGenericSeq, elemAlign, elemSize, index: int): pointer =
+  cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
+
 proc resize(old: int): int {.inline.} =
   if old <= 0: result = 4
   elif old < 65536: result = old * 2
   else: result = old * 3 div 2 # for large arrays * 3/2 is better
 
-proc cmpStrings(a, b: NimString): int {.inline, compilerProc.} =
-  if a == b: return 0
-  if a == nil: return -1
-  if b == nil: return 1
-  return c_strcmp(a.data, b.data)
-
-proc eqStrings(a, b: NimString): bool {.inline, compilerProc.} =
-  if a == b: return true
-  if a == nil or b == nil: return false
-  return a.len == b.len and
-    c_memcmp(a.data, b.data, a.len) == 0'i32
-
 when declared(allocAtomic):
-  template allocStr(size: expr): expr =
+  template allocStr(size: untyped): untyped =
     cast[NimString](allocAtomic(size))
 
-  template allocStrNoInit(size: expr): expr =
+  template allocStrNoInit(size: untyped): untyped =
     cast[NimString](boehmAllocAtomic(size))
+elif defined(gcRegions):
+  template allocStr(size: untyped): untyped =
+    cast[NimString](newStr(addr(strDesc), size, true))
+
+  template allocStrNoInit(size: untyped): untyped =
+    cast[NimString](newStr(addr(strDesc), size, false))
+
 else:
-  template allocStr(size: expr): expr =
+  template allocStr(size: untyped): untyped =
     cast[NimString](newObj(addr(strDesc), size))
 
-  template allocStrNoInit(size: expr): expr =
+  template allocStrNoInit(size: untyped): untyped =
     cast[NimString](newObjNoInit(addr(strDesc), size))
 
-proc rawNewStringNoInit(space: int): NimString {.compilerProc.} =
-  var s = space
-  if s < 7: s = 7
+proc rawNewStringNoInit(space: int): NimString =
+  let s = max(space, 7)
   result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
   result.reserved = s
   when defined(gogc):
     result.elemSize = 1
 
-proc rawNewString(space: int): NimString {.compilerProc.} =
-  var s = space
-  if s < 7: s = 7
-  result = allocStr(sizeof(TGenericSeq) + s + 1)
-  result.reserved = s
-  when defined(gogc):
-    result.elemSize = 1
+proc rawNewString(space: int): NimString {.compilerproc.} =
+  result = rawNewStringNoInit(space)
+  result.len = 0
+  result.data[0] = '\0'
 
-proc mnewString(len: int): NimString {.compilerProc.} =
-  result = rawNewString(len)
+proc mnewString(len: int): NimString {.compilerproc.} =
+  result = rawNewStringNoInit(len)
   result.len = len
+  zeroMem(addr result.data[0], len + 1)
+
+proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
+  # This is not used by most recent versions of the compiler anymore, but
+  # required for bootstrapping purposes.
+  let start = max(start, 0)
+  if s == nil: return nil
+  let len = min(last, s.len-1) - start + 1
+  result = rawNewStringNoInit(len)
+  result.len = len
+  copyMem(addr(result.data), addr(s.data[start]), len)
+  result.data[len] = '\0'
 
-proc copyStrLast(s: NimString, start, last: int): NimString {.compilerProc.} =
-  var start = max(start, 0)
-  var len = min(last, s.len-1) - start + 1
-  if len > 0:
-    result = rawNewStringNoInit(len)
-    result.len = len
-    c_memcpy(result.data, addr(s.data[start]), len)
-    result.data[len] = '\0'
-  else:
-    result = rawNewString(len)
-
-proc copyStr(s: NimString, start: int): NimString {.compilerProc.} =
+proc copyStr(s: NimString, start: int): NimString {.compilerproc.} =
+  # This is not used by most recent versions of the compiler anymore, but
+  # required for bootstrapping purposes.
+  if s == nil: return nil
   result = copyStrLast(s, start, s.len-1)
 
-proc toNimStr(str: cstring, len: int): NimString {.compilerProc.} =
+proc nimToCStringConv(s: NimString): cstring {.compilerproc, nonReloadable, inline.} =
+  if s == nil or s.len == 0: result = cstring""
+  else: result = cast[cstring](addr s.data)
+
+proc toNimStr(str: cstring, len: int): NimString {.compilerproc.} =
   result = rawNewStringNoInit(len)
   result.len = len
-  c_memcpy(result.data, str, len + 1)
+  copyMem(addr(result.data), str, len)
+  result.data[len] = '\0'
 
 proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
-  result = toNimStr(str, c_strlen(str))
+  if str == nil: NimString(nil)
+  else: toNimStr(str, str.len)
 
 proc copyString(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
@@ -94,42 +102,61 @@ proc copyString(src: NimString): NimString {.compilerRtl.} =
     else:
       result = rawNewStringNoInit(src.len)
       result.len = src.len
-      c_memcpy(result.data, src.data, src.len + 1)
+      copyMem(addr(result.data), addr(src.data), src.len + 1)
+      sysAssert((seqShallowFlag and result.reserved) == 0, "copyString")
+      when defined(nimShallowStrings):
+        if (src.reserved and strlitFlag) != 0:
+          result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
+
+proc newOwnedString(src: NimString; n: int): NimString =
+  result = rawNewStringNoInit(n)
+  result.len = n
+  copyMem(addr(result.data), addr(src.data), n)
+  result.data[n] = '\0'
 
 proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
   if src != nil:
-    when declared(newObjRC1):
-      var s = src.len
-      if s < 7: s = 7
-      result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
-                               s+1))
-      result.reserved = s
+    if (src.reserved and seqShallowFlag) != 0:
+      result = src
+      when declared(incRef):
+        incRef(usrToCell(result))
     else:
-      result = rawNewStringNoInit(src.len)
+      when declared(newObjRC1) and not defined(gcRegions):
+        var s = src.len
+        if s < 7: s = 7
+        result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
+                                s+1))
+        result.reserved = s
+        when defined(gogc):
+          result.elemSize = 1
+      else:
+        result = rawNewStringNoInit(src.len)
+      result.len = src.len
+      copyMem(addr(result.data), addr(src.data), src.len + 1)
+      sysAssert((seqShallowFlag and result.reserved) == 0, "copyStringRC1")
+      when defined(nimShallowStrings):
+        if (src.reserved and strlitFlag) != 0:
+          result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
+
+proc copyDeepString(src: NimString): NimString {.inline.} =
+  if src != nil:
+    result = rawNewStringNoInit(src.len)
     result.len = src.len
-    c_memcpy(result.data, src.data, src.len + 1)
-
-
-proc hashString(s: string): int {.compilerproc.} =
-  # the compiler needs exactly the same hash function!
-  # this used to be used for efficient generation of string case statements
-  var h = 0
-  for i in 0..len(s)-1:
-    h = h +% ord(s[i])
-    h = h +% h shl 10
-    h = h xor (h shr 6)
-  h = h +% h shl 3
-  h = h xor (h shr 11)
-  h = h +% h shl 15
-  result = h
+    copyMem(addr(result.data), addr(src.data), src.len + 1)
 
 proc addChar(s: NimString, c: char): NimString =
   # is compilerproc!
-  result = s
-  if result.len >= result.space:
-    result.reserved = resize(result.space)
-    result = cast[NimString](growObj(result,
-      sizeof(TGenericSeq) + result.reserved + 1))
+  if s == nil:
+    result = rawNewStringNoInit(1)
+    result.len = 0
+  else:
+    result = s
+    if result.len >= result.space:
+      let r = resize(result.space)
+      result = rawNewStringNoInit(r)
+      result.len = s.len
+      copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1)
+      result.reserved = r
   result.data[result.len] = c
   result.data[result.len+1] = '\0'
   inc(result.len)
@@ -166,19 +193,24 @@ proc addChar(s: NimString, c: char): NimString =
 #   s = rawNewString(0);
 
 proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
-  if dest.len + addlen <= dest.space:
+  if dest == nil:
+    result = rawNewString(addlen)
+  elif dest.len + addlen <= dest.space:
     result = dest
   else: # slow path:
-    var sp = max(resize(dest.space), dest.len + addlen)
-    result = cast[NimString](growObj(dest, sizeof(TGenericSeq) + sp + 1))
+    let sp = max(resize(dest.space), dest.len + addlen)
+    result = rawNewStringNoInit(sp)
+    result.len = dest.len
+    copyMem(addr result.data[0], unsafeAddr(dest.data[0]), dest.len+1)
     result.reserved = sp
     #result = rawNewString(sp)
     #copyMem(result, dest, dest.len + sizeof(TGenericSeq))
     # DO NOT UPDATE LEN YET: dest.len = newLen
 
 proc appendString(dest, src: NimString) {.compilerproc, inline.} =
-  c_memcpy(addr(dest.data[dest.len]), src.data, src.len + 1)
-  inc(dest.len, src.len)
+  if src != nil:
+    copyMem(addr(dest.data[dest.len]), addr(src.data), src.len + 1)
+    inc(dest.len, src.len)
 
 proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
   dest.data[dest.len] = c
@@ -186,17 +218,27 @@ proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
   inc(dest.len)
 
 proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
-  var n = max(newLen, 0)
-  if n <= s.space:
+  let n = max(newLen, 0)
+  if s == nil:
+    if n == 0:
+      return s
+    else:
+      result = mnewString(n)
+  elif n <= s.space:
     result = s
   else:
-    result = resizeString(s, n)
+    let sp = max(resize(s.space), n)
+    result = rawNewStringNoInit(sp)
+    result.len = s.len
+    copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len)
+    zeroMem(addr result.data[s.len], n - s.len)
+    result.reserved = sp
   result.len = n
   result.data[n] = '\0'
 
 # ----------------- sequences ----------------------------------------------
 
-proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
+proc incrSeq(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
   # increments the length by one:
   # this is needed for supporting ``add``;
   #
@@ -205,231 +247,117 @@ proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
   #  seq[seq->len-1] = x;
   result = seq
   if result.len >= result.space:
-    result.reserved = resize(result.space)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
-                               GenericSeqSize))
+    let r = resize(result.space)
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
+    result.reserved = r
   inc(result.len)
 
-proc incrSeqV2(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
+proc incrSeqV2(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
   # incrSeq version 2
   result = seq
   if result.len >= result.space:
-    result.reserved = resize(result.space)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
-                               GenericSeqSize))
-
-proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
-    compilerRtl.} =
+    let r = resize(result.space)
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
+    result.reserved = r
+
+proc incrSeqV3(s: PGenericSeq, typ: PNimType): PGenericSeq {.compilerproc.} =
+  if s == nil:
+    result = cast[PGenericSeq](newSeq(typ, 1))
+    result.len = 0
+  else:
+    result = s
+    if result.len >= result.space:
+      let r = resize(result.space)
+      result = cast[PGenericSeq](newSeq(typ, r))
+      result.len = s.len
+      copyMem(dataPointer(result, typ.base.align), dataPointer(s, typ.base.align), s.len * typ.base.size)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
+
+proc setLengthSeq(seq: PGenericSeq, elemSize, elemAlign, newLen: int): PGenericSeq {.
+    compilerRtl, inl.} =
   result = seq
   if result.space < newLen:
-    result.reserved = max(resize(result.space), newLen)
-    result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
-                               GenericSeqSize))
+    let r = max(resize(result.space), newLen)
+    result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
+    result.reserved = r
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc) and
-         not defined(gcMarkAndSweep) and not defined(gogc):
-      when false: # compileOption("gc", "v2"):
+         not defined(gcMarkAndSweep) and not defined(gogc) and
+         not defined(gcRegions):
+      if ntfNoRefs notin extGetCellType(result).base.flags:
         for i in newLen..result.len-1:
-          let len0 = gch.tempStack.len
-          forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
-                            GenericSeqSize +% (i*%elemSize)),
-                            extGetCellType(result).base, waPush)
-          let len1 = gch.tempStack.len
-          for i in len0 .. <len1:
-            doDecRef(gch.tempStack.d[i], LocalHeap, MaybeCyclic)
-          gch.tempStack.len = len0
-      else:
-        for i in newLen..result.len-1:
-          forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
-                            GenericSeqSize +% (i*%elemSize)),
+          forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
                             extGetCellType(result).base, waZctDecRef)
 
     # XXX: zeroing out the memory can still result in crashes if a wiped-out
     # cell is aliased by another pointer (ie proc parameter or a let variable).
-    # This is a tought problem, because even if we don't zeroMem here, in the
+    # This is a tough problem, because even if we don't zeroMem here, in the
     # presence of user defined destructors, the user will expect the cell to be
-    # "destroyed" thus creating the same problem. We can destoy the cell in the
+    # "destroyed" thus creating the same problem. We can destroy the cell in the
     # finalizer of the sequence, but this makes destruction non-deterministic.
-    zeroMem(cast[pointer](cast[ByteAddress](result) +% GenericSeqSize +%
-           (newLen*%elemSize)), (result.len-%newLen) *% elemSize)
+    zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
   result.len = newLen
 
-# --------------- other string routines ----------------------------------
-proc nimIntToStr(x: int): string {.compilerRtl.} =
-  result = newString(sizeof(x)*4)
-  var i = 0
-  var y = x
-  while true:
-    var d = y div 10
-    result[i] = chr(abs(int(y - d*10)) + ord('0'))
-    inc(i)
-    y = d
-    if y == 0: break
-  if x < 0:
-    result[i] = '-'
-    inc(i)
-  setLen(result, i)
-  # mirror the string:
-  for j in 0..i div 2 - 1:
-    swap(result[j], result[i-j-1])
-
-proc nimFloatToStr(f: float): string {.compilerproc.} =
-  var buf: array[0..64, char]
-  var n: int = c_sprintf(buf, "%.16g", f)
-  var hasDot = false
-  for i in 0..n-1:
-    if buf[i] == ',':
-      buf[i] = '.'
-      hasDot = true
-    elif buf[i] in {'a'..'z', 'A'..'Z', '.'}:
-      hasDot = true
-  if not hasDot:
-    buf[n] = '.'
-    buf[n+1] = '0'
-    buf[n+2] = '\0'
-  # On Windows nice numbers like '1.#INF', '-1.#INF' or '1.#NAN' are produced.
-  # We want to get rid of these here:
-  if buf[n-1] == 'N':
-    result = "nan"
-  elif buf[n-1] == 'F':
-    if buf[0] == '-':
-      result = "-inf"
+proc setLengthSeqV2(s: PGenericSeq, typ: PNimType, newLen: int): PGenericSeq {.
+    compilerRtl.} =
+  sysAssert typ.kind == tySequence, "setLengthSeqV2: type is not a seq"
+  if s == nil:
+    if newLen == 0:
+      result = s
     else:
-      result = "inf"
+      result = cast[PGenericSeq](newSeq(typ, newLen))
   else:
-    result = $buf
-
-proc strtod(buf: cstring, endptr: ptr cstring): float64 {.importc,
-  header: "<stdlib.h>", noSideEffect.}
-
-var decimalPoint: char
-
-proc getDecimalPoint(): char =
-  result = decimalPoint
-  if result == '\0':
-    if strtod("0,5", nil) == 0.5: result = ','
-    else: result = '.'
-    # yes this is threadsafe in practice, spare me:
-    decimalPoint = result
-
-const
-  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
-
-proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
-                          start = 0): int {.compilerProc.} =
-  # This routine leverages `strtod()` for the non-trivial task of
-  # parsing floating point numbers correctly. Because `strtod()` is
-  # locale-dependent with respect to the radix character, we create
-  # a copy where the decimal point is replaced with the locale's
-  # radix character.
-  var
-    i = start
-    sign = 1.0
-    t: array[500, char] # flaviu says: 325 is the longest reasonable literal
-    ti = 0
-    hasdigits = false
-
-  template addToBuf(c) =
-    if ti < t.high:
-      t[ti] = c; inc(ti)
-
-  # Sign?
-  if s[i] == '+' or s[i] == '-':
-    if s[i] == '-':
-      sign = -1.0
-    t[ti] = s[i]
-    inc(i); inc(ti)
-
-  # NaN?
-  if s[i] == 'N' or s[i] == 'n':
-    if s[i+1] == 'A' or s[i+1] == 'a':
-      if s[i+2] == 'N' or s[i+2] == 'n':
-        if s[i+3] notin IdentChars:
-          number = NaN
-          return i+3 - start
-    return 0
-
-  # Inf?
-  if s[i] == 'I' or s[i] == 'i':
-    if s[i+1] == 'N' or s[i+1] == 'n':
-      if s[i+2] == 'F' or s[i+2] == 'f':
-        if s[i+3] notin IdentChars:
-          number = Inf*sign
-          return i+3 - start
-    return 0
-
-  # Integer part?
-  while s[i] in {'0'..'9'}:
-    hasdigits = true
-    addToBuf(s[i])
-    inc(i);
-    while s[i] == '_': inc(i)
-
-  # Fractional part?
-  if s[i] == '.':
-    addToBuf(getDecimalPoint())
-    inc(i)
-    while s[i] in {'0'..'9'}:
-      hasdigits = true
-      addToBuf(s[i])
-      inc(i)
-      while s[i] == '_': inc(i)
-  if not hasdigits:
-    return 0
-
-  # Exponent?
-  if s[i] in {'e', 'E'}:
-    addToBuf(s[i])
-    inc(i)
-    if s[i] in {'+', '-'}:
-      addToBuf(s[i])
-      inc(i)
-    if s[i] notin {'0'..'9'}:
-      return 0
-    while s[i] in {'0'..'9'}:
-      addToBuf(s[i])
-      inc(i)
-      while s[i] == '_': inc(i)
-  number = strtod(t, nil)
-  result = i - start
-
-proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
-  result = newString(sizeof(x)*4)
-  var i = 0
-  var y = x
-  while true:
-    var d = y div 10
-    result[i] = chr(abs(int(y - d*10)) + ord('0'))
-    inc(i)
-    y = d
-    if y == 0: break
-  if x < 0:
-    result[i] = '-'
-    inc(i)
-  setLen(result, i)
-  # mirror the string:
-  for j in 0..i div 2 - 1:
-    swap(result[j], result[i-j-1])
-
-proc nimBoolToStr(x: bool): string {.compilerRtl.} =
-  return if x: "true" else: "false"
-
-proc nimCharToStr(x: char): string {.compilerRtl.} =
-  result = newString(1)
-  result[0] = x
-
-proc binaryStrSearch(x: openArray[string], y: string): int {.compilerproc.} =
-  var
-    a = 0
-    b = len(x)
-  while a < b:
-    var mid = (a + b) div 2
-    if x[mid] < y:
-      a = mid + 1
+    let elemSize = typ.base.size
+    let elemAlign = typ.base.align
+    if s.space < newLen:
+      let r = max(resize(s.space), newLen)
+      result = cast[PGenericSeq](newSeq(typ, r))
+      copyMem(dataPointer(result, elemAlign), dataPointer(s, elemAlign), s.len * elemSize)
+      # since we steal the content from 's', it's crucial to set s's len to 0.
+      s.len = 0
+    elif newLen < s.len:
+      result = s
+      # we need to decref here, otherwise the GC leaks!
+      when not defined(boehmGC) and not defined(nogc) and
+          not defined(gcMarkAndSweep) and not defined(gogc) and
+          not defined(gcRegions):
+        if ntfNoRefs notin typ.base.flags:
+          for i in newLen..result.len-1:
+            forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
+                              extGetCellType(result).base, waZctDecRef)
+
+      # XXX: zeroing out the memory can still result in crashes if a wiped-out
+      # cell is aliased by another pointer (ie proc parameter or a let variable).
+      # This is a tough problem, because even if we don't zeroMem here, in the
+      # presence of user defined destructors, the user will expect the cell to be
+      # "destroyed" thus creating the same problem. We can destroy the cell in the
+      # finalizer of the sequence, but this makes destruction non-deterministic.
+      zeroMem(dataPointer(result, elemAlign, elemSize, newLen), (result.len-%newLen) *% elemSize)
     else:
-      b = mid
-  if a < len(x) and x[a] == y:
-    result = a
-  else:
-    result = -1
+      result = s
+      zeroMem(dataPointer(result, elemAlign, elemSize, result.len), (newLen-%result.len) *% elemSize)
+    result.len = newLen
+
+func capacity*(self: string): int {.inline.} =
+  ## Returns the current capacity of the string.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var str = newStringOfCap(cap = 42)
+    str.add "Nim"
+    assert str.capacity == 42
+
+  let str = cast[NimString](self)
+  result = if str != nil: str.space else: 0
+
+func capacity*[T](self: seq[T]): int {.inline.} =
+  ## Returns the current capacity of the seq.
+  # See https://github.com/nim-lang/RFCs/issues/460
+  runnableExamples:
+    var lst = newSeqOfCap[string](cap = 42)
+    lst.add "Nim"
+    assert lst.capacity == 42
+
+  let sek = cast[PGenericSeq](self)
+  result = if sek != nil: sek.space else: 0