12 files changed, 258 insertions, 178 deletions
diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index d9a914afc..bfc09ec3c 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -18,34 +18,87 @@ when not defined(nimHasDefault):
     var v: T
     v
 
+const freeMarker = 0
+const deletedMarker = -1
+
+type UHash = uint
+
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
-proc isEmpty(hcode: Hash): bool {.inline.} =
-  result = hcode == 0
+proc isFilledAndValid(hcode: Hash): bool {.inline.} =
+  result = hcode != 0 and hcode != deletedMarker
+    # performance: we could use bit magic if needed
 
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
 
-proc nextTry(h, maxHash: Hash): Hash {.inline.} =
-  result = (h + 1) and maxHash
 
-proc mustRehash(length, counter: int): bool {.inline.} =
-  assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4)
+proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
+  result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
+
+proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
+  # FACTOR between hashcommon.nextTry, intsets.nextTry
+  # an optimization would be to use `(h + 1) and maxHash` for a few iterations
+  # and then switch to the formula below, to get "best of both worlds": good
+  # cache locality, except when a collision cluster is detected (ie, large number
+  # of iterations).
+  const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
+  result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
+  perturb = perturb shr PERTURB_SHIFT
+
+proc mustRehash[T](t: T): bool {.inline.} =
+  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
+  let counter2 = t.counter + t.countDeleted
+  let length = t.dataLen
+  assert(length > counter2)
+  result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
+
+proc rightSize*(count: Natural): int {.inline.} =
+  ## Return the value of `initialSize` to support `count` items.
+  ##
+  ## If more items are expected to be added, simply add that
+  ## expected extra amount to the parameter before calling this.
+  ##
+  ## Internally, we want `mustRehash(t) == false` for t that was just resized.
+  # Make sure to synchronize with `mustRehash`
+  result = nextPowerOfTwo(count * 3 div 2 + 4)
+
+template getPerturb(t: typed, hc: Hash): UHash =
+  # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
+  # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
+  let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
+  # this makes a major difference for cases like #13393; it causes the bits
+  # that were masked out in 1st position so they'll be masked in instead, and
+  # influence the recursion in nextTry earlier rather than later.
+  translateBits(cast[uint](hc), numBitsMask)
 
 template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0:
     return -1
   var h: Hash = hc and maxHash(t) # start with real hash value
-  while isFilled(t.data[h].hcode):
-    # Compare hc THEN key with boolean short circuit. This makes the common case
-    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
-    # It does slow down succeeding lookups by one extra Hash cmp&and..usually
-    # just a few clock cycles, generally worth it for any non-integer-like A.
-    if t.data[h].hcode == hc and t.data[h].key == key:
-      return h
-    h = nextTry(h, maxHash(t))
-  result = -1 - h # < 0 => MISSING; insert idx = -1 - result
+  var perturb = t.getPerturb(hc)
+  var deletedIndex = -1
+  while true:
+    if isFilledAndValid(t.data[h].hcode):
+      # Compare hc THEN key with boolean short circuit. This makes the common case
+      # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
+      # It does slow down succeeding lookups by one extra Hash cmp&and..usually
+      # just a few clock cycles, generally worth it for any non-integer-like A.
+      # performance: we optimize this: depending on type(key), skip hc comparison
+      if t.data[h].hcode == hc and t.data[h].key == key:
+        return h
+      h = nextTry(h, maxHash(t), perturb)
+    elif t.data[h].hcode == deletedMarker:
+      if deletedIndex == -1:
+        deletedIndex = h
+      h = nextTry(h, maxHash(t), perturb)
+    else:
+      break
+  if deletedIndex == -1:
+    result = -1 - h # < 0 => MISSING; insert idx = -1 - result
+  else:
+    # we prefer returning a (in fact the 1st found) deleted index
+    result = -1 - deletedIndex
 
 proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
   rawGetKnownHCImpl()
@@ -54,6 +107,8 @@ template genHashImpl(key, hc: typed) =
   hc = hash(key)
   if hc == 0: # This almost never taken branch should be very predictable.
     hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
+  elif hc == deletedMarker:
+    hc = 214159261
 
 template genHash(key: typed): Hash =
   var res: Hash
diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index 8338ceaca..7ca379783 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -46,30 +46,40 @@ type
   IntSet* = object       ## An efficient set of `int` implemented as a sparse bit set.
     elems: int           # only valid for small numbers
     counter, max: int
+    countDeleted: int
     head: PTrunk
     data: TrunkSeq
     a: array[0..33, int] # profiling shows that 34 elements are enough
 
-proc mustRehash(length, counter: int): bool {.inline.} =
-  assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4)
+proc mustRehash[T](t: T): bool {.inline.} =
+  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
+  let counter2 = t.counter + t.countDeleted
+  let length = t.max + 1
+  assert length > counter2
+  result = (length * 2 < counter2 * 3) or (length - counter2 < 4)
 
-proc nextTry(h, maxHash: Hash): Hash {.inline.} =
-  result = ((5 * h) + 1) and maxHash
+proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
+  # FACTOR between hashcommon.nextTry, intsets.nextTry
+  const PERTURB_SHIFT = 5
+  var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
+  perturb = cast[Hash](perturb2)
+  result = ((5*h) + 1 + perturb) and maxHash
 
 proc intSetGet(t: IntSet, key: int): PTrunk =
   var h = key and t.max
+  var perturb = key
   while t.data[h] != nil:
     if t.data[h].key == key:
       return t.data[h]
-    h = nextTry(h, t.max)
+    h = nextTry(h, t.max, perturb)
   result = nil
 
 proc intSetRawInsert(t: IntSet, data: var TrunkSeq, desc: PTrunk) =
   var h = desc.key and t.max
+  var perturb = desc.key
   while data[h] != nil:
     assert(data[h] != desc)
-    h = nextTry(h, t.max)
+    h = nextTry(h, t.max, perturb)
   assert(data[h] == nil)
   data[h] = desc
 
@@ -84,14 +94,16 @@ proc intSetEnlarge(t: var IntSet) =
 
 proc intSetPut(t: var IntSet, key: int): PTrunk =
   var h = key and t.max
+  var perturb = key
   while t.data[h] != nil:
     if t.data[h].key == key:
       return t.data[h]
-    h = nextTry(h, t.max)
-  if mustRehash(t.max + 1, t.counter): intSetEnlarge(t)
+    h = nextTry(h, t.max, perturb)
+  if mustRehash(t): intSetEnlarge(t)
   inc(t.counter)
   h = key and t.max
-  while t.data[h] != nil: h = nextTry(h, t.max)
+  perturb = key
+  while t.data[h] != nil: h = nextTry(h, t.max, perturb)
   assert(t.data[h] == nil)
   new(result)
   result.next = t.head
@@ -100,6 +112,7 @@ proc intSetPut(t: var IntSet, key: int): PTrunk =
   t.data[h] = result
 
 proc bitincl(s: var IntSet, key: int) {.inline.} =
+  var ret: PTrunk
   var t = intSetPut(s, `shr`(key, TrunkShift))
   var u = key and TrunkMask
   t.bits[u shr IntShift] = t.bits[u shr IntShift] or
@@ -393,7 +406,8 @@ proc assign*(dest: var IntSet, src: IntSet) =
     var it = src.head
     while it != nil:
       var h = it.key and dest.max
-      while dest.data[h] != nil: h = nextTry(h, dest.max)
+      var perturb = it.key
+      while dest.data[h] != nil: h = nextTry(h, dest.max, perturb)
       assert(dest.data[h] == nil)
       var n: PTrunk
       new(n)
diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim
index d2d1490ff..f8950e354 100644
--- a/lib/pure/collections/setimpl.nim
+++ b/lib/pure/collections/setimpl.nim
@@ -48,7 +48,7 @@ template inclImpl() {.dirty.} =
   var hc: Hash
   var index = rawGet(s, key, hc)
   if index < 0:
-    if mustRehash(len(s.data), s.counter):
+    if mustRehash(s):
       enlarge(s)
       index = rawGetKnownHC(s, key, hc)
     rawInsert(s, s.data, key, hc, -1 - index)
@@ -62,17 +62,12 @@ template containsOrInclImpl() {.dirty.} =
   if index >= 0:
     result = true
   else:
-    if mustRehash(len(s.data), s.counter):
+    if mustRehash(s):
       enlarge(s)
       index = rawGetKnownHC(s, key, hc)
     rawInsert(s, s.data, key, hc, -1 - index)
     inc(s.counter)
 
-template doWhile(a, b) =
-  while true:
-    b
-    if not a: break
-
 proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   var hc: Hash
   var i = rawGet(s, key, hc)
@@ -82,17 +77,9 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   if i >= 0:
     result = false
     dec(s.counter)
-    while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-      var j = i # The correctness of this depends on (h+1) in nextTry,
-      var r = j # though may be adaptable to other simple sequences.
-      s.data[i].hcode = 0 # mark current EMPTY
-      s.data[i].key = default(type(s.data[i].key))
-      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-        i = (i + 1) and msk # increment mod table size
-        if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
-          return
-        r = s.data[i].hcode and msk # "home" location of key@i
-      s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop
+    inc(s.countDeleted)
+    s.data[i].hcode = deletedMarker
+    s.data[i].key = default(type(s.data[i].key))
 
 template dollarImpl() {.dirty.} =
   result = "{"
diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim
index 9dd72cb56..9f800e6d1 100644
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -68,6 +68,7 @@ type
     ## before calling other procs on it.
     data: KeyValuePairSeq[A]
     counter: int
+    countDeleted: int
 
 type
   OrderedKeyValuePair[A] = tuple[
@@ -80,15 +81,13 @@ type
     ## <#initOrderedSet,int>`_ before calling other procs on it.
     data: OrderedKeyValuePairSeq[A]
     counter, first, last: int
+    countDeleted: int
 
 const
   defaultInitialSize* = 64
 
 include setimpl
 
-proc rightSize*(count: Natural): int {.inline.}
-
-
 # ---------------------------------------------------------------------
 # ------------------------------ HashSet ------------------------------
 # ---------------------------------------------------------------------
@@ -250,7 +249,7 @@ iterator items*[A](s: HashSet[A]): A =
   ##   echo b
   ##   # --> {(a: 1, b: 3), (a: 0, b: 4)}
   for h in 0 .. high(s.data):
-    if isFilled(s.data[h].hcode): yield s.data[h].key
+    if isFilledAndValid(s.data[h].hcode): yield s.data[h].key
 
 proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
   ## Includes `key` in the set `s` and tells if `key` was already in `s`.
@@ -342,7 +341,7 @@ proc pop*[A](s: var HashSet[A]): A =
     doAssertRaises(KeyError, echo s.pop)
 
   for h in 0 .. high(s.data):
-    if isFilled(s.data[h].hcode):
+    if isFilledAndValid(s.data[h].hcode):
       result = s.data[h].key
       excl(s, result)
       return result
@@ -593,16 +592,6 @@ proc `$`*[A](s: HashSet[A]): string =
   ##   # --> {no, esc'aping, is " provided}
   dollarImpl()
 
-proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of `initialSize` to support `count` items.
-  ##
-  ## If more items are expected to be added, simply add that
-  ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want `mustRehash(rightSize(x), x) == false`.
-  result = nextPowerOfTwo(count * 3 div 2 + 4)
-
-
 proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
      "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)
 
@@ -634,7 +623,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
     var idx = 0
     while h >= 0:
       var nxt = s.data[h].next
-      if isFilled(s.data[h].hcode):
+      if isFilledAndValid(s.data[h].hcode):
         yieldStmt
         inc(idx)
       h = nxt
@@ -868,7 +857,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
   while h >= 0 and g >= 0:
     var nxh = s.data[h].next
     var nxg = t.data[g].next
-    if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
+    if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
       if s.data[h].key == t.data[g].key:
         inc compared
       else:
@@ -1146,10 +1135,19 @@ when isMainModule and not defined(release):
       b.incl(2)
       assert b.len == 1
 
-    for i in 0 .. 32:
-      var s = rightSize(i)
-      if s <= i or mustRehash(s, i):
-        echo "performance issue: rightSize() will not elide enlarge() at ", i
+    block:
+      type FakeTable = object
+        dataLen: int
+        counter: int
+        countDeleted: int
+
+      var t: FakeTable
+      for i in 0 .. 32:
+        var s = rightSize(i)
+        t.dataLen = s
+        t.counter = i
+        doAssert s > i and not mustRehash(t),
+          "performance issue: rightSize() will not elide enlarge() at: " & $i
 
     block missingOrExcl:
       var s = toOrderedSet([2, 3, 6, 7])
diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim
index 1a6148752..0fbbdb3eb 100644
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -25,6 +25,7 @@ type
   SharedTable*[A, B] = object ## generic hash SharedTable
     data: KeyValuePairSeq[A, B]
     counter, dataLen: int
+    countDeleted: int
     lock: Lock
 
 template maxHash(t): untyped = t.dataLen-1
@@ -32,7 +33,7 @@ template maxHash(t): untyped = t.dataLen-1
 include tableimpl
 
 template st_maybeRehashPutImpl(enlarge) {.dirty.} =
-  if mustRehash(t.dataLen, t.counter):
+  if mustRehash(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
   index = -1 - index # important to transform for mgetOrPutImpl
@@ -49,9 +50,10 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
   for i in 0..<oldSize:
     let eh = n[i].hcode
     if isFilled(eh):
+      var perturb = t.getPerturb(eh)
       var j: Hash = eh and maxHash(t)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
       rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
   deallocShared(n)
 
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index 85bffd60f..aabaeeeb3 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -14,13 +14,20 @@ include hashcommon
 template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
   genHashImpl(key, hc)
   var h: Hash = hc and maxHash(t)
-  while isFilled(t.data[h].hcode):
-    h = nextTry(h, maxHash(t))
+  var perturb = t.getPerturb(hc)
+  while true:
+    let hcode = t.data[h].hcode
+    if hcode == deletedMarker or hcode == freeMarker:
+      break
+    else:
+      h = nextTry(h, maxHash(t), perturb)
   result = h
 
-template rawInsertImpl() {.dirty.} =
+template rawInsertImpl(t) {.dirty.} =
   data[h].key = key
   data[h].val = val
+  if data[h].hcode == deletedMarker:
+    t.countDeleted.dec
   data[h].hcode = hc
 
 proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
@@ -28,7 +35,7 @@ proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
 
 proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
                      key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl()
+  rawInsertImpl(t)
 
 template checkIfInitialized() =
   when compiles(defaultInitialSize):
@@ -37,15 +44,14 @@ template checkIfInitialized() =
 
 template addImpl(enlarge) {.dirty.} =
   checkIfInitialized()
-  if mustRehash(t.dataLen, t.counter): enlarge(t)
+  if mustRehash(t): enlarge(t)
   var hc: Hash
   var j = rawGetDeep(t, key, hc)
   rawInsert(t, t.data, key, val, hc, j)
   inc(t.counter)
 
 template maybeRehashPutImpl(enlarge) {.dirty.} =
-  checkIfInitialized()
-  if mustRehash(t.dataLen, t.counter):
+  if mustRehash(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
   index = -1 - index                  # important to transform for mgetOrPutImpl
@@ -82,24 +88,11 @@ template delImplIdx(t, i) =
   let msk = maxHash(t)
   if i >= 0:
     dec(t.counter)
-    block outer:
-      while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-        var j = i         # The correctness of this depends on (h+1) in nextTry,
-        var r = j         # though may be adaptable to other simple sequences.
-        t.data[i].hcode = 0              # mark current EMPTY
-        t.data[i].key = default(type(t.data[i].key))
-        t.data[i].val = default(type(t.data[i].val))
-        while true:
-          i = (i + 1) and msk            # increment mod table size
-          if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
-            break outer
-          r = t.data[i].hcode and msk    # "home" location of key@i
-          if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-            break
-        when defined(js):
-          t.data[j] = t.data[i]
-        else:
-          t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
+    inc(t.countDeleted)
+    t.data[i].hcode = deletedMarker
+    t.data[i].key = default(type(t.data[i].key))
+    t.data[i].val = default(type(t.data[i].val))
+    # mustRehash + enlarge not needed because counter+countDeleted doesn't change
 
 template delImpl() {.dirty.} =
   var hc: Hash
@@ -123,8 +116,8 @@ template initImpl(result: typed, size: int) =
     result.last = -1
 
 template insertImpl() = # for CountTable
-  if t.dataLen == 0: initImpl(t, defaultInitialSize)
-  if mustRehash(len(t.data), t.counter): enlarge(t)
+  checkIfInitialized()
+  if mustRehash(t): enlarge(t)
   ctRawInsert(t, t.data, key, val)
   inc(t.counter)
 
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index a0cc73ad6..bf08eaa92 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -233,6 +233,7 @@ type
     ## For creating an empty Table, use `initTable proc<#initTable,int>`_.
     data: KeyValuePairSeq[A, B]
     counter: int
+    countDeleted: int
   TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_.
     ##
     ## For creating a new empty TableRef, use `newTable proc
@@ -250,8 +251,6 @@ template dataLen(t): untyped = len(t.data)
 
 include tableimpl
 
-proc rightSize*(count: Natural): int {.inline.}
-
 template get(t, key): untyped =
   ## retrieves the value at ``t[key]``. The value can be modified.
   ## If ``key`` is not in ``t``, the ``KeyError`` exception is raised.
@@ -267,14 +266,16 @@ template get(t, key): untyped =
 
 proc enlarge[A, B](t: var Table[A, B]) =
   var n: KeyValuePairSeq[A, B]
-  newSeq(n, len(t.data) * growthFactor)
+  newSeq(n, t.counter.rightSize)
   swap(t.data, n)
+  t.countDeleted = 0
   for i in countup(0, high(n)):
     let eh = n[i].hcode
-    if isFilled(eh):
+    if isFilledAndValid(eh):
       var j: Hash = eh and maxHash(t)
+      var perturb = t.getPerturb(eh)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
       when defined(js):
         rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
       else:
@@ -579,15 +580,6 @@ proc `==`*[A, B](s, t: Table[A, B]): bool =
 
   equalsImpl(s, t)
 
-proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of ``initialSize`` to support ``count`` items.
-  ##
-  ## If more items are expected to be added, simply add that
-  ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want mustRehash(rightSize(x), x) == false.
-  result = nextPowerOfTwo(count * 3 div 2 + 4)
-
 proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] =
   ## Index the collection with the proc provided.
   # TODO: As soon as supported, change collection: A to collection: A[B]
@@ -670,7 +662,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
   ##   # value: [1, 5, 7, 9]
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -692,7 +684,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -713,7 +705,7 @@ iterator keys*[A, B](t: Table[A, B]): A =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield t.data[h].key
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -734,7 +726,7 @@ iterator values*[A, B](t: Table[A, B]): B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -756,36 +748,53 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
+template hasKeyOrPutCache(cache, h): bool =
+  # using `IntSet` would be an option worth considering to avoid quadratic
+  # behavior in case user misuses Table with lots of duplicate keys; but it
+  # has overhead in the common case of small number of duplicates.
+  # However: when lots of duplicates are used, all operations would be slow
+  # anyway because the full `hash(key)` is identical for these, which makes
+  # `nextTry` follow the exact same path for each key, resulting in large
+  # collision clusters. Alternatives could involve modifying the hash/retrieval
+  # based on duplicate key count.
+  var ret = false
+  for hi in cache:
+    if hi == h:
+      ret = true
+      break
+  if not ret: cache.add h
+  ret
+
 iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   ## Iterates over any value in the table ``t`` that belongs to the given ``key``.
   ##
   ## Used if you have a table with duplicate keys (as a result of using
   ## `add proc<#add,Table[A,B],A,B>`_).
   ##
-  ## **Examples:**
-  ##
-  ## .. code-block::
-  ##   var a = {'a': 3, 'b': 5}.toTable
-  ##   for i in 1..3:
-  ##     a.add('z', 10*i)
-  ##   echo a # {'a': 3, 'b': 5, 'z': 10, 'z': 20, 'z': 30}
-  ##
-  ##   for v in a.allValues('z'):
-  ##     echo v
-  ##   # 10
-  ##   # 20
-  ##   # 30
-  var h: Hash = genHash(key) and high(t.data)
+  runnableExamples:
+    import testutils
+    var a = {'a': 3, 'b': 5}.toTable
+    for i in 1..3: a.add('z', 10*i)
+    doAssert a.sortedPairs == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)]
+    doAssert sortedItems(a.allValues('z')) == @[10, 20, 30]
+
+  let hc = genHash(key)
+  var h: Hash = hc and high(t.data)
   let L = len(t)
-  while isFilled(t.data[h].hcode):
-    if t.data[h].key == key:
-      yield t.data[h].val
-      assert(len(t) == L, "the length of the table changed while iterating over it")
-    h = nextTry(h, high(t.data))
+  var perturb = t.getPerturb(hc)
+
+  var num = 0
+  var cache: seq[Hash]
+  while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
+    if t.data[h].hcode == hc and t.data[h].key == key:
+      if not hasKeyOrPutCache(cache, h):
+        yield t.data[h].val
+        assert(len(t) == L, "the length of the table changed while iterating over it")
+    h = nextTry(h, high(t.data), perturb)
 
 
 
@@ -1219,6 +1228,8 @@ type
     ## <#initOrderedTable,int>`_.
     data: OrderedKeyValuePairSeq[A, B]
     counter, first, last: int
+    countDeleted: int
+
   OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of
     ## `OrderedTable<#OrderedTable>`_.
     ##
@@ -1240,7 +1251,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int =
 proc rawInsert[A, B](t: var OrderedTable[A, B],
                      data: var OrderedKeyValuePairSeq[A, B],
                      key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl()
+  rawInsertImpl(t)
   data[h].next = -1
   if t.first < 0: t.first = h
   if t.last >= 0: data[t.last].next = h
@@ -1248,18 +1259,20 @@ proc rawInsert[A, B](t: var OrderedTable[A, B],
 
 proc enlarge[A, B](t: var OrderedTable[A, B]) =
   var n: OrderedKeyValuePairSeq[A, B]
-  newSeq(n, len(t.data) * growthFactor)
+  newSeq(n, t.counter.rightSize)
   var h = t.first
   t.first = -1
   t.last = -1
   swap(t.data, n)
+  t.countDeleted = 0
   while h >= 0:
     var nxt = n[h].next
     let eh = n[h].hcode
-    if isFilled(eh):
+    if isFilledAndValid(eh):
       var j: Hash = eh and maxHash(t)
+      var perturb = t.getPerturb(eh)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
       rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j)
     h = nxt
 
@@ -2211,6 +2224,7 @@ type
     ## <#initCountTable,int>`_.
     data: seq[tuple[key: A, val: int]]
     counter: int
+    countDeleted: int
     isSorted: bool
   CountTableRef*[A] = ref CountTable[A] ## Ref version of
     ## `CountTable<#CountTable>`_.
@@ -2223,8 +2237,10 @@ type
 
 proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                   key: A, val: int) =
-  var h: Hash = hash(key) and high(data)
-  while data[h].val != 0: h = nextTry(h, high(data))
+  let hc = hash(key)
+  var perturb = t.getPerturb(hc)
+  var h: Hash = hc and high(data)
+  while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
   data[h].key = key
   data[h].val = val
 
@@ -2251,10 +2267,12 @@ proc remove[A](t: var CountTable[A], key: A) =
 proc rawGet[A](t: CountTable[A], key: A): int =
   if t.data.len == 0:
     return -1
-  var h: Hash = hash(key) and high(t.data) # start with real hash value
-  while t.data[h].val != 0:
+  let hc = hash(key)
+  var perturb = t.getPerturb(hc)
+  var h: Hash = hc and high(t.data) # start with real hash value
+  while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
     if t.data[h].key == key: return h
-    h = nextTry(h, high(t.data))
+    h = nextTry(h, high(t.data), perturb)
   result = -1 - h # < 0 => MISSING; insert idx = -1 - result
 
 template ctget(t, key, default: untyped): untyped =
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index ac8498517..b55a7865d 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -112,38 +112,14 @@ proc hash*[T: proc](x: T): Hash {.inline.} =
   else:
     result = hash(pointer(x))
 
-const
-  prime = uint(11)
-
-proc hash*(x: int): Hash {.inline.} =
+proc hash*(x: int|int64|uint|uint64|char|Ordinal): Hash {.inline.} =
   ## Efficient hashing of integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: int64): Hash {.inline.} =
-  ## Efficient hashing of `int64` integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: uint): Hash {.inline.} =
-  ## Efficient hashing of unsigned integers.
-  result = cast[Hash](x * prime)
-
-proc hash*(x: uint64): Hash {.inline.} =
-  ## Efficient hashing of `uint64` integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: char): Hash {.inline.} =
-  ## Efficient hashing of characters.
-  result = cast[Hash](cast[uint](ord(x)) * prime)
-
-proc hash*[T: Ordinal](x: T): Hash {.inline.} =
-  ## Efficient hashing of other ordinal types (e.g. enums).
-  result = cast[Hash](cast[uint](ord(x)) * prime)
+  cast[Hash](ord(x))
 
 proc hash*(x: float): Hash {.inline.} =
   ## Efficient hashing of floats.
-  var y = x + 1.0
-  result = cast[ptr Hash](addr(y))[]
-
+  var y = x + 0.0 # for denormalization
+  result = hash(cast[ptr Hash](addr(y))[])
 
 # Forward declarations before methods that hash containers. This allows
 # containers to contain other containers
diff --git a/lib/pure/testutils.nim b/lib/pure/testutils.nim
new file mode 100644
index 000000000..bdf90b616
--- /dev/null
+++ b/lib/pure/testutils.nim
@@ -0,0 +1,15 @@
+## Utilities to help writing tests
+##
+## Unstable, experimental API.
+
+import std/[sequtils, algorithm]
+
+proc sortedPairs*[T](t: T): auto =
+  ## helps when writing tests involving tables in a way that's robust to
+  ## changes in hashing functions / table implementation.
+  toSeq(t.pairs).sorted
+
+template sortedItems*(t: untyped): untyped =
+  ## helps when writing tests involving tables in a way that's robust to
+  ## changes in hashing functions / table implementation.
+  sorted(toSeq(t))
diff --git a/tests/arc/trepr.nim b/tests/arc/trepr.nim
index 7a92112ed..391622ff6 100644
--- a/tests/arc/trepr.nim
+++ b/tests/arc/trepr.nim
@@ -1,7 +1,7 @@
 discard """
   cmd: "nim c --gc:arc $file"
   nimout: '''(a: true, n: doAssert)
-Table[system.string, trepr.MyType](data: @[], counter: 0)
+Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0)
 nil
 '''
 """
diff --git a/tests/collections/ttables.nim b/tests/collections/ttables.nim
index 9eccf345a..7459b3f13 100644
--- a/tests/collections/ttables.nim
+++ b/tests/collections/ttables.nim
@@ -8,7 +8,12 @@ And we get here
 '''
 joinable: false
 """
-import hashes, sequtils, tables, algorithm
+import hashes, sequtils, tables, algorithm, testutils
+
+block tableDollar:
+  # other tests should use `sortedPairs` to be robust to future table/hash
+  # implementation changes
+  doAssert ${1: 'a', 2: 'b'}.toTable in ["{1: 'a', 2: 'b'}", "{2: 'b', 1: 'a'}"]
 
 # test should not be joined because it takes too long.
 block tableadds:
@@ -188,6 +193,23 @@ block ttables2:
   run1()
   echo "2"
 
+block allValues:
+  var t: Table[int, string]
+  var key = 0
+  let n = 1000
+  for i in 0..<n: t.add(i, $i)
+  const keys = [0, -1, 12]
+  for i in 0..1:
+    for key in keys:
+      t.add(key, $key & ":" & $i)
+  for i in 0..<n:
+    if i notin keys:
+      t.del(i)
+  doAssert t.sortedPairs == @[(-1, "-1:0"), (-1, "-1:1"), (0, "0"), (0, "0:0"), (0, "0:1"), (12, "12"), (12, "12:0"), (12, "12:1")]
+  doAssert sortedItems(t.allValues(0)) == @["0", "0:0", "0:1"]
+  doAssert sortedItems(t.allValues(-1)) == @["-1:0", "-1:1"]
+  doAssert sortedItems(t.allValues(12)) == @["12", "12:0", "12:1"]
+  doAssert sortedItems(t.allValues(1)) == @[]
 
 block tablesref:
   const
@@ -232,8 +254,8 @@ block tablesref:
     for x in 0..1:
       for y in 0..1:
         assert t[(x,y)] == $x & $y
-    assert($t ==
-      "{(x: 1, y: 1): \"11\", (x: 0, y: 0): \"00\", (x: 0, y: 1): \"01\", (x: 1, y: 0): \"10\"}")
+    assert t.sortedPairs ==
+      @[((x: 0, y: 0), "00"), ((x: 0, y: 1), "01"), ((x: 1, y: 0), "10"), ((x: 1, y: 1), "11")]
 
   block tableTest2:
     var t = newTable[string, float]()
@@ -340,7 +362,7 @@ block tablesref:
   block anonZipTest:
     let keys = @['a','b','c']
     let values = @[1, 2, 3]
-    doAssert "{'c': 3, 'a': 1, 'b': 2}" == $ toTable zip(keys, values)
+    doAssert zip(keys, values).toTable.sortedPairs == @[('a', 1), ('b', 2), ('c', 3)]
 
   block clearTableTest:
     var t = newTable[string, float]()
@@ -369,3 +391,4 @@ block tablesref:
 
   orderedTableSortTest()
   echo "3"
+
diff --git a/tests/collections/ttablesthreads.nim b/tests/collections/ttablesthreads.nim
index 5553b31ef..757a8cebe 100644
--- a/tests/collections/ttablesthreads.nim
+++ b/tests/collections/ttablesthreads.nim
@@ -3,7 +3,7 @@ discard """
   output: '''true'''
 """
 
-import hashes, tables, sharedtables
+import hashes, tables, sharedtables, testutils
 
 const
   data = {
@@ -47,8 +47,7 @@ block tableTest1:
   for x in 0..1:
     for y in 0..1:
       assert t[(x,y)] == $x & $y
-  assert($t ==
-    "{(x: 1, y: 1): \"11\", (x: 0, y: 0): \"00\", (x: 0, y: 1): \"01\", (x: 1, y: 0): \"10\"}")
+  assert t.sortedPairs == @[((x: 0, y: 0), "00"), ((x: 0, y: 1), "01"), ((x: 1, y: 0), "10"), ((x: 1, y: 1), "11")]
 
 block tableTest2:
   var t = initTable[string, float]()