diff options
author | Reimer Behrends <behrends@gmail.com> | 2014-05-04 15:22:50 +0200 |
---|---|---|
committer | Reimer Behrends <behrends@gmail.com> | 2014-05-04 15:22:50 +0200 |
commit | 79891b6b9b0cb324081dbd6b558534f91ec134fe (patch) | |
tree | b07846c661c423fae7e59da67f9793efe9e10780 /lib/pure/collections/tables.nim | |
parent | 05712fe8053420ecf481757e529a7ef2f32fb75c (diff) | |
download | Nim-79891b6b9b0cb324081dbd6b558534f91ec134fe.tar.gz |
Added support for ref type hash tables.
This reuses the hash table implementation for objects (and the associated tests). For efficiency reasons, iterator implementations are currently adapted rather than calling the TTable code.
Diffstat (limited to 'lib/pure/collections/tables.nim')
-rw-r--r-- | lib/pure/collections/tables.nim | 267 |
1 files changed, 266 insertions, 1 deletions
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index 33e558aee..848f4b8ba 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -66,6 +66,7 @@ type TTable* {.final, myShallow.}[A, B] = object ## generic hash table data: TKeyValuePairSeq[A, B] counter: int + PTable*[A,B] = ref TTable[A, B] when not defined(nimhygiene): {.pragma: dirty.} @@ -231,7 +232,7 @@ proc `$`*[A, B](t: TTable[A, B]): string = ## The `$` operator for hash tables. dollarImpl() -proc `==`*[A, B](s, t: TTable[A, B]): bool = +template equalsImpl() = if s.counter == t.counter: # different insertion orders mean different 'data' seqs, so we have # to use the slow route here: @@ -240,6 +241,9 @@ proc `==`*[A, B](s, t: TTable[A, B]): bool = if t[key] != val: return false return true +proc `==`*[A, B](s, t: TTable[A, B]): bool = + equalsImpl() + proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): TTable[C, B] = ## Index the collection with the proc provided. # TODO: As soon as supported, change collection: A to collection: A[B] @@ -247,6 +251,88 @@ proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): TTable[C, B] = for item in collection: result[index(item)] = item +proc len*[A, B](t: PTable[A, B]): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*[A, B](t: PTable[A, B]): tuple[key: A, val: B] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) + +iterator mpairs*[A, B](t: PTable[A, B]): tuple[key: A, val: var B] = + ## iterates over any (key, value) pair in the table `t`. The values + ## can be modified. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) + +iterator keys*[A, B](t: PTable[A, B]): A = + ## iterates over any key in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].key + +iterator values*[A, B](t: PTable[A, B]): B = + ## iterates over any value in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].val + +iterator mvalues*[A, B](t: PTable[A, B]): var B = + ## iterates over any value in the table `t`. The values can be modified. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].val + +proc `[]`*[A, B](t: PTable[A, B], key: A): B = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## default empty value for the type `B` is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + result = t[][key] + +proc mget*[A, B](t: PTable[A, B], key: A): var B = + ## retrieves the value at ``t[key]``. The value can be modified. + ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. + t[].mget(key) + +proc hasKey*[A, B](t: PTable[A, B], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = t[].hasKey(key) + +proc `[]=`*[A, B](t: PTable[A, B], key: A, val: B) = + ## puts a (key, value)-pair into `t`. + t[][key] = val + +proc add*[A, B](t: PTable[A, B], key: A, val: B) = + ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + t[].add(key, val) + +proc del*[A, B](t: PTable[A, B], key: A) = + ## deletes `key` from hash table `t`. + t[].del(key) + +proc newTable*[A, B](initialSize=64): PTable[A, B] = + new(result) + result[] = initTable[A, B](initialSize) + +proc newTable*[A, B](pairs: openArray[tuple[key: A, + val: B]]): PTable[A, B] = + ## creates a new hash table that contains the given `pairs`. + new(result) + result[] = toTable[A, B](pairs) + +proc `$`*[A, B](t: PTable[A, B]): string = + ## The `$` operator for hash tables. + dollarImpl() + +proc `==`*[A, B](s, t: PTable[A, B]): bool = + equalsImpl() + +proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): PTable[C, B] = + ## Index the collection with the proc provided. + # TODO: As soon as supported, change collection: A to collection: A[B] + result = newTable[C, B]() + for item in collection: + result[index(item)] = item + # ------------------------------ ordered table ------------------------------ type @@ -257,6 +343,7 @@ type final, myShallow.}[A, B] = object ## table that remembers insertion order data: TOrderedKeyValuePairSeq[A, B] counter, first, last: int + POrderedTable*[A, B] = ref TOrderedTable[A, B] proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} = ## returns the number of keys in `t`. @@ -417,6 +504,96 @@ proc sort*[A, B](t: var TOrderedTable[A, B], t.first = list t.last = tail +proc len*[A, B](t: POrderedTable[A, B]): int {.inline.} = + ## returns the number of keys in `t`. + result = t.counter + +template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = + var h = t.first + while h >= 0: + var nxt = t.data[h].next + if t.data[h].slot == seFilled: yieldStmt + h = nxt + +iterator pairs*[A, B](t: POrderedTable[A, B]): tuple[key: A, val: B] = + ## iterates over any (key, value) pair in the table `t` in insertion + ## order. + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + +iterator mpairs*[A, B](t: POrderedTable[A, B]): tuple[key: A, val: var B] = + ## iterates over any (key, value) pair in the table `t` in insertion + ## order. The values can be modified. + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + +iterator keys*[A, B](t: POrderedTable[A, B]): A = + ## iterates over any key in the table `t` in insertion order. + forAllOrderedPairs: + yield t.data[h].key + +iterator values*[A, B](t: POrderedTable[A, B]): B = + ## iterates over any value in the table `t` in insertion order. + forAllOrderedPairs: + yield t.data[h].val + +iterator mvalues*[A, B](t: POrderedTable[A, B]): var B = + ## iterates over any value in the table `t` in insertion order. The values + ## can be modified. + forAllOrderedPairs: + yield t.data[h].val + +proc `[]`*[A, B](t: POrderedTable[A, B], key: A): B = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## default empty value for the type `B` is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + result = t[][key] + +proc mget*[A, B](t: POrderedTable[A, B], key: A): var B = + ## retrieves the value at ``t[key]``. The value can be modified. + ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. + result = t[].mget(key) + +proc hasKey*[A, B](t: POrderedTable[A, B], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = t[].hasKey(key) + +proc `[]=`*[A, B](t: POrderedTable[A, B], key: A, val: B) = + ## puts a (key, value)-pair into `t`. + t[][key] = val + +proc add*[A, B](t: POrderedTable[A, B], key: A, val: B) = + ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + t[].add(key, val) + +proc newOrderedTable*[A, B](initialSize=64): POrderedTable[A, B] = + ## creates a new ordered hash table that is empty. + ## + ## `initialSize` needs to be a power of two. If you need to accept runtime + ## values for this you could use the ``nextPowerOfTwo`` proc from the + ## `math <math.html>`_ module. + new(result) + result[] = initOrderedTable[A, B]() + +proc newOrderedTable*[A, B](pairs: openArray[tuple[key: A, + val: B]]): POrderedTable[A, B] = + ## creates a new ordered hash table that contains the given `pairs`. + result = newOrderedTable[A, B](nextPowerOfTwo(pairs.len+10)) + for key, val in items(pairs): result[key] = val + +proc `$`*[A, B](t: POrderedTable[A, B]): string = + ## The `$` operator for ordered hash tables. + dollarImpl() + +proc sort*[A, B](t: POrderedTable[A, B], + cmp: proc (x,y: tuple[key: A, val: B]): int) = + ## sorts `t` according to `cmp`. This modifies the internal list + ## that kept the insertion order, so insertion order is lost after this + ## call but key lookup and insertions remain possible after `sort` (in + ## contrast to the `sort` for count tables). + t[].sort(cmp) + # ------------------------------ count tables ------------------------------- type @@ -424,6 +601,7 @@ type A] = object ## table that counts the number of each key data: seq[tuple[key: A, val: int]] counter: int + PCountTable*[A] = ref TCountTable[A] proc len*[A](t: TCountTable[A]): int = ## returns the number of keys in `t`. @@ -567,6 +745,93 @@ proc sort*[A](t: var TCountTable[A]) = if j < h: break if h == 1: break +proc len*[A](t: PCountTable[A]): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*[A](t: PCountTable[A]): tuple[key: A, val: int] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + +iterator mpairs*[A](t: PCountTable[A]): tuple[key: A, val: var int] = + ## iterates over any (key, value) pair in the table `t`. The values can + ## be modified. + for h in 0..high(t.data): + if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + +iterator keys*[A](t: PCountTable[A]): A = + ## iterates over any key in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].key + +iterator values*[A](t: PCountTable[A]): int = + ## iterates over any value in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].val + +iterator mvalues*[A](t: PCountTable[A]): var int = + ## iterates over any value in the table `t`. The values can be modified. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].val + +proc `[]`*[A](t: PCountTable[A], key: A): int = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## 0 is returned. One can check with ``hasKey`` whether the key + ## exists. + result = t[][key] + +proc mget*[A](t: PCountTable[A], key: A): var int = + ## retrieves the value at ``t[key]``. The value can be modified. + ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. + result = t[].mget(key) + +proc hasKey*[A](t: PCountTable[A], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = t[].hasKey(key) + +proc `[]=`*[A](t: PCountTable[A], key: A, val: int) = + ## puts a (key, value)-pair into `t`. `val` has to be positive. + assert val > 0 + t[][key] = val + +proc newCountTable*[A](initialSize=64): PCountTable[A] = + ## creates a new count table that is empty. + ## + ## `initialSize` needs to be a power of two. If you need to accept runtime + ## values for this you could use the ``nextPowerOfTwo`` proc from the + ## `math <math.html>`_ module. + new(result) + result[] = initCountTable[A](initialSize) + +proc newCountTable*[A](keys: openArray[A]): PCountTable[A] = + ## creates a new count table with every key in `keys` having a count of 1. + result = newCountTable[A](nextPowerOfTwo(keys.len+10)) + for key in items(keys): result[key] = 1 + +proc `$`*[A](t: PCountTable[A]): string = + ## The `$` operator for count tables. + dollarImpl() + +proc inc*[A](t: PCountTable[A], key: A, val = 1) = + ## increments `t[key]` by `val`. + t[].inc(key, val) + +proc smallest*[A](t: PCountTable[A]): tuple[key: A, val: int] = + ## returns the largest (key,val)-pair. Efficiency: O(n) + t[].smallest + +proc largest*[A](t: PCountTable[A]): tuple[key: A, val: int] = + ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) + t[].largest + +proc sort*[A](t: PCountTable[A]) = + ## sorts the count table so that the entry with the highest counter comes + ## first. This is destructive! You must not modify `t` afterwards! + ## You can use the iterators `pairs`, `keys`, and `values` to iterate over + ## `t` in the sorted order. + t[].sort + when isMainModule: type Person = object |