summary refs log tree commit diff stats
path: root/lib/pure/collections/tables.nim
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2015-05-28 12:49:08 +0200
committerAraq <rumpf_a@web.de>2015-05-28 12:49:08 +0200
commit21ea8e6913fbfc16192ad3fd157e8e18e559219d (patch)
tree030667c5716d41953ab444059c7e98576aa4ae5e /lib/pure/collections/tables.nim
parentc5a479d536eee243846282f3817ebb3916f6bcf4 (diff)
downloadNim-21ea8e6913fbfc16192ad3fd157e8e18e559219d.tar.gz
some progress on making async multithreaded
Diffstat (limited to 'lib/pure/collections/tables.nim')
-rw-r--r--lib/pure/collections/tables.nim217
1 files changed, 49 insertions, 168 deletions
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index a9357ce67..c802d0221 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -68,65 +68,20 @@
 import
   hashes, math
 
-{.pragma: myShallow.}
-
 type
   KeyValuePair[A, B] = tuple[hcode: THash, key: A, val: B]
   KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]]
-  Table* {.myShallow.}[A, B] = object ## generic hash table
+  Table*[A, B] = object ## generic hash table
     data: KeyValuePairSeq[A, B]
     counter: int
   TableRef*[A,B] = ref Table[A, B]
 
 {.deprecated: [TTable: Table, PTable: TableRef].}
 
-when not defined(nimhygiene):
-  {.pragma: dirty.}
-
-# hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
-# two procs retain clarity of that encoding without the space cost of an enum.
-proc isEmpty(hcode: THash): bool {.inline.} =
-  result = hcode == 0
-
-proc isFilled(hcode: THash): bool {.inline.} =
-  result = hcode != 0
-
-proc len*[A, B](t: Table[A, B]): int =
-  ## returns the number of keys in `t`.
-  result = t.counter
-
-iterator pairs*[A, B](t: Table[A, B]): (A, B) =
-  ## iterates over any (key, value) pair in the table `t`.
-  for h in 0..high(t.data):
-    if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val)
-
-iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
-  ## iterates over any (key, value) pair in the table `t`. The values
-  ## can be modified.
-  for h in 0..high(t.data):
-    if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val)
-
-iterator keys*[A, B](t: Table[A, B]): A =
-  ## iterates over any key in the table `t`.
-  for h in 0..high(t.data):
-    if isFilled(t.data[h].hcode): yield t.data[h].key
-
-iterator values*[A, B](t: Table[A, B]): B =
-  ## iterates over any value in the table `t`.
-  for h in 0..high(t.data):
-    if isFilled(t.data[h].hcode): yield t.data[h].val
-
-iterator mvalues*[A, B](t: var Table[A, B]): var B =
-  ## iterates over any value in the table `t`. The values can be modified.
-  for h in 0..high(t.data):
-    if isFilled(t.data[h].hcode): yield t.data[h].val
-
-const
-  growthFactor = 2
+template maxHash(t): expr {.immediate.} = high(t.data)
+template dataLen(t): expr = len(t.data)
 
-proc mustRehash(length, counter: int): bool {.inline.} =
-  assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4)
+include tableimpl
 
 proc rightSize*(count: Natural): int {.inline.} =
   ## Return the value of `initialSize` to support `count` items.
@@ -137,49 +92,9 @@ proc rightSize*(count: Natural): int {.inline.} =
   ## Internally, we want mustRehash(rightSize(x), x) == false.
   result = nextPowerOfTwo(count * 3 div 2  +  4)
 
-proc nextTry(h, maxHash: THash): THash {.inline.} =
-  result = (h + 1) and maxHash
-
-template rawGetKnownHCImpl() {.dirty.} =
-  var h: THash = hc and high(t.data)   # start with real hash value
-  while isFilled(t.data[h].hcode):
-    # Compare hc THEN key with boolean short circuit. This makes the common case
-    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
-    # It does slow down succeeding lookups by one extra THash cmp&and..usually
-    # just a few clock cycles, generally worth it for any non-integer-like A.
-    if t.data[h].hcode == hc and t.data[h].key == key:
-      return h
-    h = nextTry(h, high(t.data))
-  result = -1 - h                   # < 0 => MISSING; insert idx = -1 - result
-
-template rawGetImpl() {.dirty.} =
-  hc = hash(key)
-  if hc == 0:       # This almost never taken branch should be very predictable.
-    hc = 314159265  # Value doesn't matter; Any non-zero favorite is fine.
-  rawGetKnownHCImpl()
-
-template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
-  hc = hash(key)
-  if hc == 0:
-    hc = 314159265
-  var h: THash = hc and high(t.data)
-  while isFilled(t.data[h].hcode):
-    h = nextTry(h, high(t.data))
-  result = h
-
-template rawInsertImpl() {.dirty.} =
-  data[h].key = key
-  data[h].val = val
-  data[h].hcode = hc
-
-proc rawGetKnownHC[A, B](t: Table[A, B], key: A, hc: THash): int {.inline.} =
-  rawGetKnownHCImpl()
-
-proc rawGetDeep[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} =
-  rawGetDeepImpl()
-
-proc rawGet[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} =
-  rawGetImpl()
+proc len*[A, B](t: Table[A, B]): int =
+  ## returns the number of keys in `t`.
+  result = t.counter
 
 proc `[]`*[A, B](t: Table[A, B], key: A): B =
   ## retrieves the value at ``t[key]``. If `key` is not in `t`,
@@ -215,9 +130,35 @@ proc hasKey*[A, B](t: Table[A, B], key: A): bool =
   var hc: THash
   result = rawGet(t, key, hc) >= 0
 
-proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B],
-                     key: A, val: B, hc: THash, h: THash) =
-  rawInsertImpl()
+iterator pairs*[A, B](t: Table[A, B]): (A, B) =
+  ## iterates over any (key, value) pair in the table `t`.
+  for h in 0..high(t.data):
+    if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val)
+
+iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
+  ## iterates over any (key, value) pair in the table `t`. The values
+  ## can be modified.
+  for h in 0..high(t.data):
+    if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val)
+
+iterator keys*[A, B](t: Table[A, B]): A =
+  ## iterates over any key in the table `t`.
+  for h in 0..high(t.data):
+    if isFilled(t.data[h].hcode): yield t.data[h].key
+
+iterator values*[A, B](t: Table[A, B]): B =
+  ## iterates over any value in the table `t`.
+  for h in 0..high(t.data):
+    if isFilled(t.data[h].hcode): yield t.data[h].val
+
+iterator mvalues*[A, B](t: var Table[A, B]): var B =
+  ## iterates over any value in the table `t`. The values can be modified.
+  for h in 0..high(t.data):
+    if isFilled(t.data[h].hcode): yield t.data[h].val
+
+proc del*[A, B](t: var Table[A, B], key: A) =
+  ## deletes `key` from hash table `t`.
+  delImpl()
 
 proc enlarge[A, B](t: var Table[A, B]) =
   var n: KeyValuePairSeq[A, B]
@@ -228,81 +169,26 @@ proc enlarge[A, B](t: var Table[A, B]) =
       var j = -1 - rawGetKnownHC(t, n[i].key, n[i].hcode)
       rawInsert(t, t.data, n[i].key, n[i].val, n[i].hcode, j)
 
-template addImpl() {.dirty.} =
-  if mustRehash(len(t.data), t.counter): enlarge(t)
-  var hc: THash
-  var j = rawGetDeep(t, key, hc)
-  rawInsert(t, t.data, key, val, hc, j)
-  inc(t.counter)
-
-template maybeRehashPutImpl() {.dirty.} =
-  if mustRehash(len(t.data), t.counter):
-    enlarge(t)
-    index = rawGetKnownHC(t, key, hc)
-  index = -1 - index                  # important to transform for mgetOrPutImpl
-  rawInsert(t, t.data, key, val, hc, index)
-  inc(t.counter)
-
-template putImpl() {.dirty.} =
-  var hc: THash
-  var index = rawGet(t, key, hc)
-  if index >= 0: t.data[index].val = val
-  else: maybeRehashPutImpl()
-
-template mgetOrPutImpl() {.dirty.} =
-  var hc: THash
-  var index = rawGet(t, key, hc)
-  if index < 0: maybeRehashPutImpl()    # not present: insert (flipping index)
-  result = t.data[index].val            # either way return modifiable val
-
-template hasKeyOrPutImpl() {.dirty.} =
-  var hc: THash
-  var index = rawGet(t, key, hc)
-  if index < 0:
-    result = false
-    maybeRehashPutImpl()
-  else: result = true
-
 proc mgetOrPut*[A, B](t: var Table[A, B], key: A, val: B): var B =
   ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way
   ## returning a value which can be modified.
-  mgetOrPutImpl()
+  mgetOrPutImpl(enlarge)
 
 proc hasKeyOrPut*[A, B](t: var Table[A, B], key: A, val: B): bool =
   ## returns true iff `key` is in the table, otherwise inserts `value`.
-  hasKeyOrPutImpl()
+  hasKeyOrPutImpl(enlarge)
 
 proc `[]=`*[A, B](t: var Table[A, B], key: A, val: B) =
   ## puts a (key, value)-pair into `t`.
-  putImpl()
+  putImpl(enlarge)
 
 proc add*[A, B](t: var Table[A, B], key: A, val: B) =
   ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
-  addImpl()
-
-template doWhile(a: expr, b: stmt): stmt =
-  while true:
-    b
-    if not a: break
+  addImpl(enlarge)
 
-proc del*[A, B](t: var Table[A, B], key: A) =
-  ## deletes `key` from hash table `t`.
-  var hc: THash
-  var i = rawGet(t, key, hc)
-  let msk = high(t.data)
-  if i >= 0:
-    t.data[i].hcode = 0
-    dec(t.counter)
-    while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-      var j = i         # The correctness of this depends on (h+1) in nextTry,
-      var r = j         # though may be adaptable to other simple sequences.
-      t.data[i].hcode = 0              # mark current EMPTY
-      doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-        i = (i + 1) and msk            # increment mod table size
-        if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
-          return
-        r = t.data[i].hcode and msk    # "home" location of key@i
-      shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop
+proc len*[A, B](t: TableRef[A, B]): int =
+  ## returns the number of keys in `t`.
+  result = t.counter
 
 proc initTable*[A, B](initialSize=64): Table[A, B] =
   ## creates a new hash table that is empty.
@@ -356,10 +242,6 @@ proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] =
   for item in collection:
     result[index(item)] = item
 
-proc len*[A, B](t: TableRef[A, B]): int =
-  ## returns the number of keys in `t`.
-  result = t.counter
-
 iterator pairs*[A, B](t: TableRef[A, B]): (A, B) =
   ## iterates over any (key, value) pair in the table `t`.
   for h in 0..high(t.data):
@@ -454,8 +336,7 @@ type
   OrderedKeyValuePair[A, B] = tuple[
     hcode: THash, next: int, key: A, val: B]
   OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]]
-  OrderedTable* {.
-      myShallow.}[A, B] = object ## table that remembers insertion order
+  OrderedTable* [A, B] = object ## table that remembers insertion order
     data: OrderedKeyValuePairSeq[A, B]
     counter, first, last: int
   OrderedTableRef*[A, B] = ref OrderedTable[A, B]
@@ -557,20 +438,20 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) =
 
 proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) =
   ## puts a (key, value)-pair into `t`.
-  putImpl()
+  putImpl(enlarge)
 
 proc add*[A, B](t: var OrderedTable[A, B], key: A, val: B) =
   ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
-  addImpl()
+  addImpl(enlarge)
 
 proc mgetOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): var B =
   ## retrieves value at ``t[key]`` or puts ``value`` if not present, either way
   ## returning a value which can be modified.
-  mgetOrPutImpl()
+  mgetOrPutImpl(enlarge)
 
 proc hasKeyOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): bool =
   ## returns true iff `key` is in the table, otherwise inserts `value`.
-  hasKeyOrPutImpl()
+  hasKeyOrPutImpl(enlarge)
 
 proc initOrderedTable*[A, B](initialSize=64): OrderedTable[A, B] =
   ## creates a new ordered hash table that is empty.
@@ -741,7 +622,7 @@ proc sort*[A, B](t: OrderedTableRef[A, B],
 # ------------------------------ count tables -------------------------------
 
 type
-  CountTable* {.myShallow.}[
+  CountTable* [
       A] = object ## table that counts the number of each key
     data: seq[tuple[key: A, val: int]]
     counter: int