diff options
author | Araq <rumpf_a@web.de> | 2015-05-28 12:49:08 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2015-05-28 12:49:08 +0200 |
commit | 21ea8e6913fbfc16192ad3fd157e8e18e559219d (patch) | |
tree | 030667c5716d41953ab444059c7e98576aa4ae5e /lib/pure/collections/tables.nim | |
parent | c5a479d536eee243846282f3817ebb3916f6bcf4 (diff) | |
download | Nim-21ea8e6913fbfc16192ad3fd157e8e18e559219d.tar.gz |
some progress on making async multithreaded
Diffstat (limited to 'lib/pure/collections/tables.nim')
-rw-r--r-- | lib/pure/collections/tables.nim | 217 |
1 files changed, 49 insertions, 168 deletions
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index a9357ce67..c802d0221 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -68,65 +68,20 @@ import hashes, math -{.pragma: myShallow.} - type KeyValuePair[A, B] = tuple[hcode: THash, key: A, val: B] KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]] - Table* {.myShallow.}[A, B] = object ## generic hash table + Table*[A, B] = object ## generic hash table data: KeyValuePairSeq[A, B] counter: int TableRef*[A,B] = ref Table[A, B] {.deprecated: [TTable: Table, PTable: TableRef].} -when not defined(nimhygiene): - {.pragma: dirty.} - -# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These -# two procs retain clarity of that encoding without the space cost of an enum. -proc isEmpty(hcode: THash): bool {.inline.} = - result = hcode == 0 - -proc isFilled(hcode: THash): bool {.inline.} = - result = hcode != 0 - -proc len*[A, B](t: Table[A, B]): int = - ## returns the number of keys in `t`. - result = t.counter - -iterator pairs*[A, B](t: Table[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) - -iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t`. The values - ## can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) - -iterator keys*[A, B](t: Table[A, B]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].key - -iterator values*[A, B](t: Table[A, B]): B = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val - -iterator mvalues*[A, B](t: var Table[A, B]): var B = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val - -const - growthFactor = 2 +template maxHash(t): expr {.immediate.} = high(t.data) +template dataLen(t): expr = len(t.data) -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) +include tableimpl proc rightSize*(count: Natural): int {.inline.} = ## Return the value of `initialSize` to support `count` items. @@ -137,49 +92,9 @@ proc rightSize*(count: Natural): int {.inline.} = ## Internally, we want mustRehash(rightSize(x), x) == false. result = nextPowerOfTwo(count * 3 div 2 + 4) -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = (h + 1) and maxHash - -template rawGetKnownHCImpl() {.dirty.} = - var h: THash = hc and high(t.data) # start with real hash value - while isFilled(t.data[h].hcode): - # Compare hc THEN key with boolean short circuit. This makes the common case - # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. - # It does slow down succeeding lookups by one extra THash cmp&and..usually - # just a few clock cycles, generally worth it for any non-integer-like A. - if t.data[h].hcode == hc and t.data[h].key == key: - return h - h = nextTry(h, high(t.data)) - result = -1 - h # < 0 => MISSING; insert idx = -1 - result - -template rawGetImpl() {.dirty.} = - hc = hash(key) - if hc == 0: # This almost never taken branch should be very predictable. - hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. - rawGetKnownHCImpl() - -template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add - hc = hash(key) - if hc == 0: - hc = 314159265 - var h: THash = hc and high(t.data) - while isFilled(t.data[h].hcode): - h = nextTry(h, high(t.data)) - result = h - -template rawInsertImpl() {.dirty.} = - data[h].key = key - data[h].val = val - data[h].hcode = hc - -proc rawGetKnownHC[A, B](t: Table[A, B], key: A, hc: THash): int {.inline.} = - rawGetKnownHCImpl() - -proc rawGetDeep[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = - rawGetDeepImpl() - -proc rawGet[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = - rawGetImpl() +proc len*[A, B](t: Table[A, B]): int = + ## returns the number of keys in `t`. + result = t.counter proc `[]`*[A, B](t: Table[A, B], key: A): B = ## retrieves the value at ``t[key]``. If `key` is not in `t`, @@ -215,9 +130,35 @@ proc hasKey*[A, B](t: Table[A, B], key: A): bool = var hc: THash result = rawGet(t, key, hc) >= 0 -proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B], - key: A, val: B, hc: THash, h: THash) = - rawInsertImpl() +iterator pairs*[A, B](t: Table[A, B]): (A, B) = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) + +iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = + ## iterates over any (key, value) pair in the table `t`. The values + ## can be modified. + for h in 0..high(t.data): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) + +iterator keys*[A, B](t: Table[A, B]): A = + ## iterates over any key in the table `t`. + for h in 0..high(t.data): + if isFilled(t.data[h].hcode): yield t.data[h].key + +iterator values*[A, B](t: Table[A, B]): B = + ## iterates over any value in the table `t`. + for h in 0..high(t.data): + if isFilled(t.data[h].hcode): yield t.data[h].val + +iterator mvalues*[A, B](t: var Table[A, B]): var B = + ## iterates over any value in the table `t`. The values can be modified. + for h in 0..high(t.data): + if isFilled(t.data[h].hcode): yield t.data[h].val + +proc del*[A, B](t: var Table[A, B], key: A) = + ## deletes `key` from hash table `t`. + delImpl() proc enlarge[A, B](t: var Table[A, B]) = var n: KeyValuePairSeq[A, B] @@ -228,81 +169,26 @@ proc enlarge[A, B](t: var Table[A, B]) = var j = -1 - rawGetKnownHC(t, n[i].key, n[i].hcode) rawInsert(t, t.data, n[i].key, n[i].val, n[i].hcode, j) -template addImpl() {.dirty.} = - if mustRehash(len(t.data), t.counter): enlarge(t) - var hc: THash - var j = rawGetDeep(t, key, hc) - rawInsert(t, t.data, key, val, hc, j) - inc(t.counter) - -template maybeRehashPutImpl() {.dirty.} = - if mustRehash(len(t.data), t.counter): - enlarge(t) - index = rawGetKnownHC(t, key, hc) - index = -1 - index # important to transform for mgetOrPutImpl - rawInsert(t, t.data, key, val, hc, index) - inc(t.counter) - -template putImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: t.data[index].val = val - else: maybeRehashPutImpl() - -template mgetOrPutImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index < 0: maybeRehashPutImpl() # not present: insert (flipping index) - result = t.data[index].val # either way return modifiable val - -template hasKeyOrPutImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index < 0: - result = false - maybeRehashPutImpl() - else: result = true - proc mgetOrPut*[A, B](t: var Table[A, B], key: A, val: B): var B = ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way ## returning a value which can be modified. - mgetOrPutImpl() + mgetOrPutImpl(enlarge) proc hasKeyOrPut*[A, B](t: var Table[A, B], key: A, val: B): bool = ## returns true iff `key` is in the table, otherwise inserts `value`. - hasKeyOrPutImpl() + hasKeyOrPutImpl(enlarge) proc `[]=`*[A, B](t: var Table[A, B], key: A, val: B) = ## puts a (key, value)-pair into `t`. - putImpl() + putImpl(enlarge) proc add*[A, B](t: var Table[A, B], key: A, val: B) = ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. - addImpl() - -template doWhile(a: expr, b: stmt): stmt = - while true: - b - if not a: break + addImpl(enlarge) -proc del*[A, B](t: var Table[A, B], key: A) = - ## deletes `key` from hash table `t`. - var hc: THash - var i = rawGet(t, key, hc) - let msk = high(t.data) - if i >= 0: - t.data[i].hcode = 0 - dec(t.counter) - while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 - var j = i # The correctness of this depends on (h+1) in nextTry, - var r = j # though may be adaptable to other simple sequences. - t.data[i].hcode = 0 # mark current EMPTY - doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): - i = (i + 1) and msk # increment mod table size - if isEmpty(t.data[i].hcode): # end of collision cluster; So all done - return - r = t.data[i].hcode and msk # "home" location of key@i - shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop +proc len*[A, B](t: TableRef[A, B]): int = + ## returns the number of keys in `t`. + result = t.counter proc initTable*[A, B](initialSize=64): Table[A, B] = ## creates a new hash table that is empty. @@ -356,10 +242,6 @@ proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] = for item in collection: result[index(item)] = item -proc len*[A, B](t: TableRef[A, B]): int = - ## returns the number of keys in `t`. - result = t.counter - iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = ## iterates over any (key, value) pair in the table `t`. for h in 0..high(t.data): @@ -454,8 +336,7 @@ type OrderedKeyValuePair[A, B] = tuple[ hcode: THash, next: int, key: A, val: B] OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]] - OrderedTable* {. - myShallow.}[A, B] = object ## table that remembers insertion order + OrderedTable* [A, B] = object ## table that remembers insertion order data: OrderedKeyValuePairSeq[A, B] counter, first, last: int OrderedTableRef*[A, B] = ref OrderedTable[A, B] @@ -557,20 +438,20 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) = proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) = ## puts a (key, value)-pair into `t`. - putImpl() + putImpl(enlarge) proc add*[A, B](t: var OrderedTable[A, B], key: A, val: B) = ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. - addImpl() + addImpl(enlarge) proc mgetOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): var B = ## retrieves value at ``t[key]`` or puts ``value`` if not present, either way ## returning a value which can be modified. - mgetOrPutImpl() + mgetOrPutImpl(enlarge) proc hasKeyOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): bool = ## returns true iff `key` is in the table, otherwise inserts `value`. - hasKeyOrPutImpl() + hasKeyOrPutImpl(enlarge) proc initOrderedTable*[A, B](initialSize=64): OrderedTable[A, B] = ## creates a new ordered hash table that is empty. @@ -741,7 +622,7 @@ proc sort*[A, B](t: OrderedTableRef[A, B], # ------------------------------ count tables ------------------------------- type - CountTable* {.myShallow.}[ + CountTable* [ A] = object ## table that counts the number of each key data: seq[tuple[key: A, val: int]] counter: int |