Unwind just the "pseudorandom probing" part of recent sets,tables changes (#13816)

* Unwind just the "pseudorandom probing" (whole hash-code-keyed variable stride double hashing) part of recent sets & tables changes (which has still been causing bugs over a month later (e.g., two days ago https://github.com/nim-lang/Nim/issues/13794) as well as still having several "figure this out" implementation question comments in them (see just diffs of this PR). This topic has been discussed in many places: https://github.com/nim-lang/Nim/issues/13393 https://github.com/nim-lang/Nim/pull/13418 https://github.com/nim-lang/Nim/pull/13440 https://github.com/nim-lang/Nim/issues/13794 Alternative/non-mandatory stronger integer hashes (or vice-versa opt-in identity hashes) are a better solution that is more general (no illusion of one hard-coded sequence solving all problems) while retaining the virtues of linear probing such as cache obliviousness and age-less tables under delete-heavy workloads (still untested after a month of this change). The only real solution for truly adversarial keys is a hash keyed off of data unobservable to attackers. That all fits better with a few families of user-pluggable/define-switchable hashes which can be provided in a separate PR more about `hashes.nim`. This PR carefully preserves the better (but still hard coded!) probing of the `intsets` and other recent fixes like `move` annotations, hash order invariant tests, `intsets.missingOrExcl` fixing, and the move of `rightSize` into `hashcommon.nim`. * Fix `data.len` -> `dataLen` problem.
author: c-blake <c-blake@users.noreply.github.com> 2020-03-31 13:18:45 -0400
committer: GitHub <noreply@github.com> 2020-03-31 19:18:45 +0200
commit: b1aa3b1eeada17453cee9af5c9227c85e0b73d54 (patch)
tree: 09868a6bbc7854ca18d71812c9941f9ed389dd0f /lib
parent: 7abeba6aeb2de6ab55702524a2c35da1c7da2ee1 (diff)
download: Nim-b1aa3b1eeada17453cee9af5c9227c85e0b73d54.tar.gz
7 files changed, 98 insertions, 174 deletions
diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index bfc09ec3c..e998145e7 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -18,87 +18,43 @@ when not defined(nimHasDefault):
     var v: T
     v
 
-const freeMarker = 0
-const deletedMarker = -1
-
-type UHash = uint
-
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
-proc isFilledAndValid(hcode: Hash): bool {.inline.} =
-  result = hcode != 0 and hcode != deletedMarker
-    # performance: we could use bit magic if needed
+proc isEmpty(hcode: Hash): bool {.inline.} =
+  result = hcode == 0
 
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
 
-
-proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
-  result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
-
-proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
-  # FACTOR between hashcommon.nextTry, intsets.nextTry
-  # an optimization would be to use `(h + 1) and maxHash` for a few iterations
-  # and then switch to the formula below, to get "best of both worlds": good
-  # cache locality, except when a collision cluster is detected (ie, large number
-  # of iterations).
-  const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
-  result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
-  perturb = perturb shr PERTURB_SHIFT
+proc nextTry(h, maxHash: Hash): Hash {.inline.} =
+  result = (h + 1) and maxHash
 
 proc mustRehash[T](t: T): bool {.inline.} =
-  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
-  let counter2 = t.counter + t.countDeleted
-  let length = t.dataLen
-  assert(length > counter2)
-  result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
+  assert(t.dataLen > t.counter)
+  result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4)
 
 proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of `initialSize` to support `count` items.
+  ## Return the value of ``initialSize`` to support ``count`` items.
   ##
   ## If more items are expected to be added, simply add that
   ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want `mustRehash(t) == false` for t that was just resized.
+  #
   # Make sure to synchronize with `mustRehash`
   result = nextPowerOfTwo(count * 3 div 2 + 4)
 
-template getPerturb(t: typed, hc: Hash): UHash =
-  # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
-  # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
-  let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
-  # this makes a major difference for cases like #13393; it causes the bits
-  # that were masked out in 1st position so they'll be masked in instead, and
-  # influence the recursion in nextTry earlier rather than later.
-  translateBits(cast[uint](hc), numBitsMask)
-
 template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0:
     return -1
   var h: Hash = hc and maxHash(t) # start with real hash value
-  var perturb = t.getPerturb(hc)
-  var deletedIndex = -1
-  while true:
-    if isFilledAndValid(t.data[h].hcode):
-      # Compare hc THEN key with boolean short circuit. This makes the common case
-      # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
-      # It does slow down succeeding lookups by one extra Hash cmp&and..usually
-      # just a few clock cycles, generally worth it for any non-integer-like A.
-      # performance: we optimize this: depending on type(key), skip hc comparison
-      if t.data[h].hcode == hc and t.data[h].key == key:
-        return h
-      h = nextTry(h, maxHash(t), perturb)
-    elif t.data[h].hcode == deletedMarker:
-      if deletedIndex == -1:
-        deletedIndex = h
-      h = nextTry(h, maxHash(t), perturb)
-    else:
-      break
-  if deletedIndex == -1:
-    result = -1 - h # < 0 => MISSING; insert idx = -1 - result
-  else:
-    # we prefer returning a (in fact the 1st found) deleted index
-    result = -1 - deletedIndex
+  while isFilled(t.data[h].hcode):
+    # Compare hc THEN key with boolean short circuit. This makes the common case
+    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
+    # It does slow down succeeding lookups by one extra Hash cmp&and..usually
+    # just a few clock cycles, generally worth it for any non-integer-like A.
+    if t.data[h].hcode == hc and t.data[h].key == key:
+      return h
+    h = nextTry(h, maxHash(t))
+  result = -1 - h # < 0 => MISSING; insert idx = -1 - result
 
 proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
   rawGetKnownHCImpl()
@@ -107,8 +63,6 @@ template genHashImpl(key, hc: typed) =
   hc = hash(key)
   if hc == 0: # This almost never taken branch should be very predictable.
     hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
-  elif hc == deletedMarker:
-    hc = 214159261
 
 template genHash(key: typed): Hash =
   var res: Hash
diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index 1967a0149..eae3fa447 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -46,20 +46,16 @@ type
   IntSet* = object       ## An efficient set of `int` implemented as a sparse bit set.
     elems: int           # only valid for small numbers
     counter, max: int
-    countDeleted: int
     head: PTrunk
     data: TrunkSeq
     a: array[0..33, int] # profiling shows that 34 elements are enough
 
 proc mustRehash[T](t: T): bool {.inline.} =
-  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
-  let counter2 = t.counter + t.countDeleted
   let length = t.max + 1
-  assert length > counter2
-  result = (length * 2 < counter2 * 3) or (length - counter2 < 4)
+  assert length > t.counter
+  result = (length * 2 < t.counter * 3) or (length - t.counter < 4)
 
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
-  # FACTOR between hashcommon.nextTry, intsets.nextTry
   const PERTURB_SHIFT = 5
   var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
   perturb = cast[Hash](perturb2)
diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim
index 9e68cb072..d798cbcb3 100644
--- a/lib/pure/collections/setimpl.nim
+++ b/lib/pure/collections/setimpl.nim
@@ -35,11 +35,10 @@ proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
 
 proc enlarge[A](s: var HashSet[A]) =
   var n: KeyValuePairSeq[A]
-  newSeq(n, s.counter.rightSize)
+  newSeq(n, len(s.data) * growthFactor)
   swap(s.data, n) # n is now old seq
-  s.countDeleted = 0
   for i in countup(0, high(n)):
-    if isFilledAndValid(n[i].hcode):
+    if isFilled(n[i].hcode):
       var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
       rawInsert(s, s.data, n[i].key, n[i].hcode, j)
 
@@ -69,6 +68,11 @@ template containsOrInclImpl() {.dirty.} =
     rawInsert(s, s.data, key, hc, -1 - index)
     inc(s.counter)
 
+template doWhile(a, b) =
+  while true:
+    b
+    if not a: break
+
 proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   var hc: Hash
   var i = rawGet(s, key, hc)
@@ -78,9 +82,17 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   if i >= 0:
     result = false
     dec(s.counter)
-    inc(s.countDeleted)
-    s.data[i].hcode = deletedMarker
-    s.data[i].key = default(type(s.data[i].key))
+    while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
+      var j = i # The correctness of this depends on (h+1) in nextTry,
+      var r = j # though may be adaptable to other simple sequences.
+      s.data[i].hcode = 0 # mark current EMPTY
+      s.data[i].key = default(type(s.data[i].key))
+      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
+        i = (i + 1) and msk # increment mod table size
+        if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
+          return
+        r = s.data[i].hcode and msk # "home" location of key@i
+      s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop
 
 template dollarImpl() {.dirty.} =
   result = "{"
@@ -113,7 +125,7 @@ proc enlarge[A](s: var OrderedSet[A]) =
   swap(s.data, n)
   while h >= 0:
     var nxt = n[h].next
-    if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
+    if isFilled(n[h].hcode):
       var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
       rawInsert(s, s.data, n[h].key, n[h].hcode, j)
     h = nxt
@@ -131,7 +143,7 @@ proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} =
   result = true
   while h >= 0:
     var nxt = n[h].next
-    if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
+    if isFilled(n[h].hcode):
       if n[h].hcode == hc and n[h].key == key:
         dec s.counter
         result = false
diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim
index caa25dbb3..8c04a05e3 100644
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -68,7 +68,6 @@ type
     ## before calling other procs on it.
     data: KeyValuePairSeq[A]
     counter: int
-    countDeleted: int
 
 type
   OrderedKeyValuePair[A] = tuple[
@@ -81,7 +80,6 @@ type
     ## <#initOrderedSet,int>`_ before calling other procs on it.
     data: OrderedKeyValuePairSeq[A]
     counter, first, last: int
-    countDeleted: int
 
 const
   defaultInitialSize* = 64
@@ -249,7 +247,7 @@ iterator items*[A](s: HashSet[A]): A =
   ##   echo b
   ##   # --> {(a: 1, b: 3), (a: 0, b: 4)}
   for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode): yield s.data[h].key
+    if isFilled(s.data[h].hcode): yield s.data[h].key
 
 proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
   ## Includes `key` in the set `s` and tells if `key` was already in `s`.
@@ -341,7 +339,7 @@ proc pop*[A](s: var HashSet[A]): A =
     doAssertRaises(KeyError, echo s.pop)
 
   for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode):
+    if isFilled(s.data[h].hcode):
       result = s.data[h].key
       excl(s, result)
       return result
@@ -574,8 +572,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
 proc hash*[A](s: HashSet[A]): Hash =
   ## Hashing of HashSet.
   for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode):
-      result = result xor s.data[h].hcode
+    result = result xor s.data[h].hcode
   result = !$result
 
 proc `$`*[A](s: HashSet[A]): string =
@@ -593,6 +590,7 @@ proc `$`*[A](s: HashSet[A]): string =
   ##   # --> {no, esc'aping, is " provided}
   dollarImpl()
 
+
 proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
      "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)
 
@@ -624,7 +622,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
     var idx = 0
     while h >= 0:
       var nxt = s.data[h].next
-      if isFilledAndValid(s.data[h].hcode):
+      if isFilled(s.data[h].hcode):
         yieldStmt
         inc(idx)
       h = nxt
@@ -858,7 +856,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
   while h >= 0 and g >= 0:
     var nxh = s.data[h].next
     var nxg = t.data[g].next
-    if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
+    if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
       if s.data[h].key == t.data[g].key:
         inc compared
       else:
diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim
index 27ac5e84f..2a1c0543f 100644
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -25,7 +25,6 @@ type
   SharedTable*[A, B] = object ## generic hash SharedTable
     data: KeyValuePairSeq[A, B]
     counter, dataLen: int
-    countDeleted: int
     lock: Lock
 
 template maxHash(t): untyped = t.dataLen-1
@@ -50,10 +49,9 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
   for i in 0..<oldSize:
     let eh = n[i].hcode
     if isFilled(eh):
-      var perturb = t.getPerturb(eh)
       var j: Hash = eh and maxHash(t)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t), perturb)
+        j = nextTry(j, maxHash(t))
       rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
   deallocShared(n)
 
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index d7facda72..47c14af93 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -14,20 +14,13 @@ include hashcommon
 template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
   genHashImpl(key, hc)
   var h: Hash = hc and maxHash(t)
-  var perturb = t.getPerturb(hc)
-  while true:
-    let hcode = t.data[h].hcode
-    if hcode == deletedMarker or hcode == freeMarker:
-      break
-    else:
-      h = nextTry(h, maxHash(t), perturb)
+  while isFilled(t.data[h].hcode):
+    h = nextTry(h, maxHash(t))
   result = h
 
-template rawInsertImpl(t) {.dirty.} =
+template rawInsertImpl() {.dirty.} =
   data[h].key = key
   data[h].val = val
-  if data[h].hcode == deletedMarker:
-    t.countDeleted.dec
   data[h].hcode = hc
 
 proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
@@ -35,7 +28,7 @@ proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
 
 proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
                      key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl(t)
+  rawInsertImpl()
 
 template checkIfInitialized() =
   when compiles(defaultInitialSize):
@@ -51,6 +44,7 @@ template addImpl(enlarge) {.dirty.} =
   inc(t.counter)
 
 template maybeRehashPutImpl(enlarge) {.dirty.} =
+  checkIfInitialized()
   if mustRehash(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
@@ -88,11 +82,24 @@ template delImplIdx(t, i) =
   let msk = maxHash(t)
   if i >= 0:
     dec(t.counter)
-    inc(t.countDeleted)
-    t.data[i].hcode = deletedMarker
-    t.data[i].key = default(type(t.data[i].key))
-    t.data[i].val = default(type(t.data[i].val))
-    # mustRehash + enlarge not needed because counter+countDeleted doesn't change
+    block outer:
+      while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
+        var j = i         # The correctness of this depends on (h+1) in nextTry,
+        var r = j         # though may be adaptable to other simple sequences.
+        t.data[i].hcode = 0              # mark current EMPTY
+        t.data[i].key = default(type(t.data[i].key))
+        t.data[i].val = default(type(t.data[i].val))
+        while true:
+          i = (i + 1) and msk            # increment mod table size
+          if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
+            break outer
+          r = t.data[i].hcode and msk    # "home" location of key@i
+          if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
+            break
+        when defined(js):
+          t.data[j] = t.data[i]
+        else:
+          t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
 
 template delImpl() {.dirty.} =
   var hc: Hash
@@ -108,7 +115,6 @@ template clearImpl() {.dirty.} =
   t.counter = 0
 
 template ctAnd(a, b): bool =
-  # pending https://github.com/nim-lang/Nim/issues/13502
   when a:
     when b: true
     else: false
@@ -126,7 +132,7 @@ template initImpl(result: typed, size: int) =
       result.last = -1
 
 template insertImpl() = # for CountTable
-  checkIfInitialized()
+  if t.dataLen == 0: initImpl(t, defaultInitialSize)
   if mustRehash(t): enlarge(t)
   ctRawInsert(t, t.data, key, val)
   inc(t.counter)
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index b79e396fd..131e0ad80 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -233,7 +233,6 @@ type
     ## For creating an empty Table, use `initTable proc<#initTable,int>`_.
     data: KeyValuePairSeq[A, B]
     counter: int
-    countDeleted: int
   TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_.
     ##
     ## For creating a new empty TableRef, use `newTable proc
@@ -266,16 +265,14 @@ template get(t, key): untyped =
 
 proc enlarge[A, B](t: var Table[A, B]) =
   var n: KeyValuePairSeq[A, B]
-  newSeq(n, t.counter.rightSize)
+  newSeq(n, len(t.data) * growthFactor)
   swap(t.data, n)
-  t.countDeleted = 0
   for i in countup(0, high(n)):
     let eh = n[i].hcode
-    if isFilledAndValid(eh):
+    if isFilled(eh):
       var j: Hash = eh and maxHash(t)
-      var perturb = t.getPerturb(eh)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t), perturb)
+        j = nextTry(j, maxHash(t))
       when defined(js):
         rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
       else:
@@ -662,7 +659,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
   ##   # value: [1, 5, 7, 9]
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -684,7 +681,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -705,7 +702,7 @@ iterator keys*[A, B](t: Table[A, B]): A =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield t.data[h].key
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -726,7 +723,7 @@ iterator values*[A, B](t: Table[A, B]): B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -748,27 +745,10 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
-template hasKeyOrPutCache(cache, h): bool =
-  # using `IntSet` would be an option worth considering to avoid quadratic
-  # behavior in case user misuses Table with lots of duplicate keys; but it
-  # has overhead in the common case of small number of duplicates.
-  # However: when lots of duplicates are used, all operations would be slow
-  # anyway because the full `hash(key)` is identical for these, which makes
-  # `nextTry` follow the exact same path for each key, resulting in large
-  # collision clusters. Alternatives could involve modifying the hash/retrieval
-  # based on duplicate key count.
-  var ret = false
-  for hi in cache:
-    if hi == h:
-      ret = true
-      break
-  if not ret: cache.add h
-  ret
-
 iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   ## Iterates over any value in the table ``t`` that belongs to the given ``key``.
   ##
@@ -782,20 +762,13 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
     for i in 1..3: a.add('z', 10*i)
     doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)]
     doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30]
-
-  let hc = genHash(key)
-  var h: Hash = hc and high(t.data)
+  var h: Hash = genHash(key) and high(t.data)
   let L = len(t)
-  var perturb = t.getPerturb(hc)
-
-  var num = 0
-  var cache: seq[Hash]
-  while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
-    if t.data[h].hcode == hc and t.data[h].key == key:
-      if not hasKeyOrPutCache(cache, h):
-        yield t.data[h].val
-        assert(len(t) == L, "the length of the table changed while iterating over it")
-    h = nextTry(h, high(t.data), perturb)
+  while isFilled(t.data[h].hcode):
+    if t.data[h].key == key:
+      yield t.data[h].val
+      assert(len(t) == L, "the length of the table changed while iterating over it")
+    h = nextTry(h, high(t.data))
 
 
 
@@ -1118,7 +1091,7 @@ iterator pairs*[A, B](t: TableRef[A, B]): (A, B) =
   ##   # value: [1, 5, 7, 9]
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -1140,7 +1113,7 @@ iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -1161,7 +1134,7 @@ iterator keys*[A, B](t: TableRef[A, B]): A =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield t.data[h].key
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -1182,7 +1155,7 @@ iterator values*[A, B](t: TableRef[A, B]): B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -1203,7 +1176,7 @@ iterator mvalues*[A, B](t: TableRef[A, B]): var B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -1229,8 +1202,6 @@ type
     ## <#initOrderedTable,int>`_.
     data: OrderedKeyValuePairSeq[A, B]
     counter, first, last: int
-    countDeleted: int
-
   OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of
     ## `OrderedTable<#OrderedTable>`_.
     ##
@@ -1252,7 +1223,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int =
 proc rawInsert[A, B](t: var OrderedTable[A, B],
                      data: var OrderedKeyValuePairSeq[A, B],
                      key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl(t)
+  rawInsertImpl()
   data[h].next = -1
   if t.first < 0: t.first = h
   if t.last >= 0: data[t.last].next = h
@@ -1260,20 +1231,18 @@ proc rawInsert[A, B](t: var OrderedTable[A, B],
 
 proc enlarge[A, B](t: var OrderedTable[A, B]) =
   var n: OrderedKeyValuePairSeq[A, B]
-  newSeq(n, t.counter.rightSize)
+  newSeq(n, len(t.data) * growthFactor)
   var h = t.first
   t.first = -1
   t.last = -1
   swap(t.data, n)
-  t.countDeleted = 0
   while h >= 0:
     var nxt = n[h].next
     let eh = n[h].hcode
-    if isFilledAndValid(eh):
+    if isFilled(eh):
       var j: Hash = eh and maxHash(t)
-      var perturb = t.getPerturb(eh)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t), perturb)
+        j = nextTry(j, maxHash(t))
       rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j)
     h = nxt
 
@@ -1282,10 +1251,6 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
     var h = t.first
     while h >= 0:
       var nxt = t.data[h].next
-       # For OrderedTable/OrderedTableRef, isFilled is ok because `del` is O(n)
-       # and doesn't create tombsones, but if it does start using tombstones,
-       # carefully replace `isFilled` by `isFilledAndValid` as appropriate for these
-       # table types only, ditto with `OrderedSet`.
       if isFilled(t.data[h].hcode):
         yieldStmt
       h = nxt
@@ -2229,7 +2194,6 @@ type
     ## <#initCountTable,int>`_.
     data: seq[tuple[key: A, val: int]]
     counter: int
-    countDeleted: int
     isSorted: bool
   CountTableRef*[A] = ref CountTable[A] ## Ref version of
     ## `CountTable<#CountTable>`_.
@@ -2242,10 +2206,8 @@ type
 
 proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                   key: A, val: int) =
-  let hc = hash(key)
-  var perturb = t.getPerturb(hc)
-  var h: Hash = hc and high(data)
-  while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
+  var h: Hash = hash(key) and high(data)
+  while data[h].val != 0: h = nextTry(h, high(data))
   data[h].key = key
   data[h].val = val
 
@@ -2272,12 +2234,10 @@ proc remove[A](t: var CountTable[A], key: A) =
 proc rawGet[A](t: CountTable[A], key: A): int =
   if t.data.len == 0:
     return -1
-  let hc = hash(key)
-  var perturb = t.getPerturb(hc)
-  var h: Hash = hc and high(t.data) # start with real hash value
-  while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
+  var h: Hash = hash(key) and high(t.data) # start with real hash value
+  while t.data[h].val != 0:
     if t.data[h].key == key: return h
-    h = nextTry(h, high(t.data), perturb)
+    h = nextTry(h, high(t.data))
   result = -1 - h # < 0 => MISSING; insert idx = -1 - result
 
 template ctget(t, key, default: untyped): untyped =
author	c-blake <c-blake@users.noreply.github.com>	2020-03-31 13:18:45 -0400
committer	GitHub <noreply@github.com>	2020-03-31 19:18:45 +0200
commit	b1aa3b1eeada17453cee9af5c9227c85e0b73d54 (patch)
tree	09868a6bbc7854ca18d71812c9941f9ed389dd0f /lib
parent	7abeba6aeb2de6ab55702524a2c35da1c7da2ee1 (diff)
download	Nim-b1aa3b1eeada17453cee9af5c9227c85e0b73d54.tar.gz