Added support for ref type hash tables.

This reuses the hash table implementation for objects (and the associated tests). For efficiency reasons, iterator implementations are currently adapted rather than calling the TTable code.
author: Reimer Behrends <behrends@gmail.com> 2014-05-04 15:22:50 +0200
committer: Reimer Behrends <behrends@gmail.com> 2014-05-04 15:22:50 +0200
commit: 79891b6b9b0cb324081dbd6b558534f91ec134fe (patch)
tree: b07846c661c423fae7e59da67f9793efe9e10780 /lib/pure/collections/tables.nim
parent: 05712fe8053420ecf481757e529a7ef2f32fb75c (diff)
download: Nim-79891b6b9b0cb324081dbd6b558534f91ec134fe.tar.gz
1 files changed, 266 insertions, 1 deletions
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 33e558aee..848f4b8ba 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -66,6 +66,7 @@ type
   TTable* {.final, myShallow.}[A, B] = object ## generic hash table
     data: TKeyValuePairSeq[A, B]
     counter: int
+  PTable*[A,B] = ref TTable[A, B]
 
 when not defined(nimhygiene):
   {.pragma: dirty.}
@@ -231,7 +232,7 @@ proc `$`*[A, B](t: TTable[A, B]): string =
   ## The `$` operator for hash tables.
   dollarImpl()
   
-proc `==`*[A, B](s, t: TTable[A, B]): bool =
+template equalsImpl() =
   if s.counter == t.counter:
     # different insertion orders mean different 'data' seqs, so we have
     # to use the slow route here:
@@ -240,6 +241,9 @@ proc `==`*[A, B](s, t: TTable[A, B]): bool =
       if t[key] != val: return false
     return true
   
+proc `==`*[A, B](s, t: TTable[A, B]): bool =
+  equalsImpl()
+  
 proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): TTable[C, B] =
   ## Index the collection with the proc provided.
   # TODO: As soon as supported, change collection: A to collection: A[B]
@@ -247,6 +251,88 @@ proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): TTable[C, B] =
   for item in collection:
     result[index(item)] = item
 
+proc len*[A, B](t: PTable[A, B]): int =
+  ## returns the number of keys in `t`.
+  result = t.counter
+
+iterator pairs*[A, B](t: PTable[A, B]): tuple[key: A, val: B] =
+  ## iterates over any (key, value) pair in the table `t`.
+  for h in 0..high(t.data):
+    if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
+
+iterator mpairs*[A, B](t: PTable[A, B]): tuple[key: A, val: var B] =
+  ## iterates over any (key, value) pair in the table `t`. The values
+  ## can be modified.
+  for h in 0..high(t.data):
+    if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
+
+iterator keys*[A, B](t: PTable[A, B]): A =
+  ## iterates over any key in the table `t`.
+  for h in 0..high(t.data):
+    if t.data[h].slot == seFilled: yield t.data[h].key
+
+iterator values*[A, B](t: PTable[A, B]): B =
+  ## iterates over any value in the table `t`.
+  for h in 0..high(t.data):
+    if t.data[h].slot == seFilled: yield t.data[h].val
+
+iterator mvalues*[A, B](t: PTable[A, B]): var B =
+  ## iterates over any value in the table `t`. The values can be modified.
+  for h in 0..high(t.data):
+    if t.data[h].slot == seFilled: yield t.data[h].val
+
+proc `[]`*[A, B](t: PTable[A, B], key: A): B =
+  ## retrieves the value at ``t[key]``. If `key` is not in `t`,
+  ## default empty value for the type `B` is returned
+  ## and no exception is raised. One can check with ``hasKey`` whether the key
+  ## exists.
+  result = t[][key]
+
+proc mget*[A, B](t: PTable[A, B], key: A): var B =
+  ## retrieves the value at ``t[key]``. The value can be modified.
+  ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised.
+  t[].mget(key)
+
+proc hasKey*[A, B](t: PTable[A, B], key: A): bool =
+  ## returns true iff `key` is in the table `t`.
+  result = t[].hasKey(key)
+
+proc `[]=`*[A, B](t: PTable[A, B], key: A, val: B) =
+  ## puts a (key, value)-pair into `t`.
+  t[][key] = val
+
+proc add*[A, B](t: PTable[A, B], key: A, val: B) =
+  ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
+  t[].add(key, val)
+  
+proc del*[A, B](t: PTable[A, B], key: A) =
+  ## deletes `key` from hash table `t`.
+  t[].del(key)
+
+proc newTable*[A, B](initialSize=64): PTable[A, B] =
+  new(result)
+  result[] = initTable[A, B](initialSize)
+
+proc newTable*[A, B](pairs: openArray[tuple[key: A, 
+                    val: B]]): PTable[A, B] =
+  ## creates a new hash table that contains the given `pairs`.
+  new(result)
+  result[] = toTable[A, B](pairs)
+
+proc `$`*[A, B](t: PTable[A, B]): string =
+  ## The `$` operator for hash tables.
+  dollarImpl()
+
+proc `==`*[A, B](s, t: PTable[A, B]): bool =
+  equalsImpl()
+
+proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): PTable[C, B] =
+  ## Index the collection with the proc provided.
+  # TODO: As soon as supported, change collection: A to collection: A[B]
+  result = newTable[C, B]()
+  for item in collection:
+    result[index(item)] = item
+
 # ------------------------------ ordered table ------------------------------
 
 type
@@ -257,6 +343,7 @@ type
       final, myShallow.}[A, B] = object ## table that remembers insertion order
     data: TOrderedKeyValuePairSeq[A, B]
     counter, first, last: int
+  POrderedTable*[A, B] = ref TOrderedTable[A, B]
 
 proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} =
   ## returns the number of keys in `t`.
@@ -417,6 +504,96 @@ proc sort*[A, B](t: var TOrderedTable[A, B],
   t.first = list
   t.last = tail
 
+proc len*[A, B](t: POrderedTable[A, B]): int {.inline.} =
+  ## returns the number of keys in `t`.
+  result = t.counter
+
+template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} =
+  var h = t.first
+  while h >= 0:
+    var nxt = t.data[h].next
+    if t.data[h].slot == seFilled: yieldStmt
+    h = nxt
+
+iterator pairs*[A, B](t: POrderedTable[A, B]): tuple[key: A, val: B] =
+  ## iterates over any (key, value) pair in the table `t` in insertion
+  ## order.
+  forAllOrderedPairs:
+    yield (t.data[h].key, t.data[h].val)
+
+iterator mpairs*[A, B](t: POrderedTable[A, B]): tuple[key: A, val: var B] =
+  ## iterates over any (key, value) pair in the table `t` in insertion
+  ## order. The values can be modified.
+  forAllOrderedPairs:
+    yield (t.data[h].key, t.data[h].val)
+
+iterator keys*[A, B](t: POrderedTable[A, B]): A =
+  ## iterates over any key in the table `t` in insertion order.
+  forAllOrderedPairs:
+    yield t.data[h].key
+
+iterator values*[A, B](t: POrderedTable[A, B]): B =
+  ## iterates over any value in the table `t` in insertion order.
+  forAllOrderedPairs:
+    yield t.data[h].val
+
+iterator mvalues*[A, B](t: POrderedTable[A, B]): var B =
+  ## iterates over any value in the table `t` in insertion order. The values
+  ## can be modified.
+  forAllOrderedPairs:
+    yield t.data[h].val
+
+proc `[]`*[A, B](t: POrderedTable[A, B], key: A): B =
+  ## retrieves the value at ``t[key]``. If `key` is not in `t`,
+  ## default empty value for the type `B` is returned
+  ## and no exception is raised. One can check with ``hasKey`` whether the key
+  ## exists.
+  result = t[][key]
+
+proc mget*[A, B](t: POrderedTable[A, B], key: A): var B =
+  ## retrieves the value at ``t[key]``. The value can be modified.
+  ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised.
+  result = t[].mget(key)
+
+proc hasKey*[A, B](t: POrderedTable[A, B], key: A): bool =
+  ## returns true iff `key` is in the table `t`.
+  result = t[].hasKey(key)
+
+proc `[]=`*[A, B](t: POrderedTable[A, B], key: A, val: B) =
+  ## puts a (key, value)-pair into `t`.
+  t[][key] = val
+
+proc add*[A, B](t: POrderedTable[A, B], key: A, val: B) =
+  ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
+  t[].add(key, val)
+
+proc newOrderedTable*[A, B](initialSize=64): POrderedTable[A, B] =
+  ## creates a new ordered hash table that is empty.
+  ##
+  ## `initialSize` needs to be a power of two. If you need to accept runtime
+  ## values for this you could use the ``nextPowerOfTwo`` proc from the
+  ## `math <math.html>`_ module.
+  new(result)
+  result[] = initOrderedTable[A, B]()
+
+proc newOrderedTable*[A, B](pairs: openArray[tuple[key: A, 
+                           val: B]]): POrderedTable[A, B] =
+  ## creates a new ordered hash table that contains the given `pairs`.
+  result = newOrderedTable[A, B](nextPowerOfTwo(pairs.len+10))
+  for key, val in items(pairs): result[key] = val
+
+proc `$`*[A, B](t: POrderedTable[A, B]): string =
+  ## The `$` operator for ordered hash tables.
+  dollarImpl()
+
+proc sort*[A, B](t: POrderedTable[A, B], 
+                 cmp: proc (x,y: tuple[key: A, val: B]): int) =
+  ## sorts `t` according to `cmp`. This modifies the internal list
+  ## that kept the insertion order, so insertion order is lost after this
+  ## call but key lookup and insertions remain possible after `sort` (in
+  ## contrast to the `sort` for count tables).
+  t[].sort(cmp)
+
 # ------------------------------ count tables -------------------------------
 
 type
@@ -424,6 +601,7 @@ type
       A] = object ## table that counts the number of each key
     data: seq[tuple[key: A, val: int]]
     counter: int
+  PCountTable*[A] = ref TCountTable[A]
 
 proc len*[A](t: TCountTable[A]): int =
   ## returns the number of keys in `t`.
@@ -567,6 +745,93 @@ proc sort*[A](t: var TCountTable[A]) =
         if j < h: break
     if h == 1: break
 
+proc len*[A](t: PCountTable[A]): int =
+  ## returns the number of keys in `t`.
+  result = t.counter
+
+iterator pairs*[A](t: PCountTable[A]): tuple[key: A, val: int] =
+  ## iterates over any (key, value) pair in the table `t`.
+  for h in 0..high(t.data):
+    if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val)
+
+iterator mpairs*[A](t: PCountTable[A]): tuple[key: A, val: var int] =
+  ## iterates over any (key, value) pair in the table `t`. The values can
+  ## be modified.
+  for h in 0..high(t.data):
+    if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val)
+
+iterator keys*[A](t: PCountTable[A]): A =
+  ## iterates over any key in the table `t`.
+  for h in 0..high(t.data):
+    if t.data[h].val != 0: yield t.data[h].key
+
+iterator values*[A](t: PCountTable[A]): int =
+  ## iterates over any value in the table `t`.
+  for h in 0..high(t.data):
+    if t.data[h].val != 0: yield t.data[h].val
+
+iterator mvalues*[A](t: PCountTable[A]): var int =
+  ## iterates over any value in the table `t`. The values can be modified.
+  for h in 0..high(t.data):
+    if t.data[h].val != 0: yield t.data[h].val
+
+proc `[]`*[A](t: PCountTable[A], key: A): int =
+  ## retrieves the value at ``t[key]``. If `key` is not in `t`,
+  ## 0 is returned. One can check with ``hasKey`` whether the key
+  ## exists.
+  result = t[][key]
+
+proc mget*[A](t: PCountTable[A], key: A): var int =
+  ## retrieves the value at ``t[key]``. The value can be modified.
+  ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised.
+  result = t[].mget(key)
+
+proc hasKey*[A](t: PCountTable[A], key: A): bool =
+  ## returns true iff `key` is in the table `t`.
+  result = t[].hasKey(key)
+
+proc `[]=`*[A](t: PCountTable[A], key: A, val: int) =
+  ## puts a (key, value)-pair into `t`. `val` has to be positive.
+  assert val > 0
+  t[][key] = val
+
+proc newCountTable*[A](initialSize=64): PCountTable[A] =
+  ## creates a new count table that is empty.
+  ##
+  ## `initialSize` needs to be a power of two. If you need to accept runtime
+  ## values for this you could use the ``nextPowerOfTwo`` proc from the
+  ## `math <math.html>`_ module.
+  new(result)
+  result[] = initCountTable[A](initialSize)
+
+proc newCountTable*[A](keys: openArray[A]): PCountTable[A] =
+  ## creates a new count table with every key in `keys` having a count of 1.
+  result = newCountTable[A](nextPowerOfTwo(keys.len+10))
+  for key in items(keys): result[key] = 1
+
+proc `$`*[A](t: PCountTable[A]): string =
+  ## The `$` operator for count tables.
+  dollarImpl()
+
+proc inc*[A](t: PCountTable[A], key: A, val = 1) = 
+  ## increments `t[key]` by `val`.
+  t[].inc(key, val)
+
+proc smallest*[A](t: PCountTable[A]): tuple[key: A, val: int] =
+  ## returns the largest (key,val)-pair. Efficiency: O(n)
+  t[].smallest
+
+proc largest*[A](t: PCountTable[A]): tuple[key: A, val: int] =
+  ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n)
+  t[].largest
+
+proc sort*[A](t: PCountTable[A]) =
+  ## sorts the count table so that the entry with the highest counter comes
+  ## first. This is destructive! You must not modify `t` afterwards!
+  ## You can use the iterators `pairs`,  `keys`, and `values` to iterate over
+  ## `t` in the sorted order.
+  t[].sort
+
 when isMainModule:
   type
     Person = object
author	Reimer Behrends <behrends@gmail.com>	2014-05-04 15:22:50 +0200
committer	Reimer Behrends <behrends@gmail.com>	2014-05-04 15:22:50 +0200
commit	79891b6b9b0cb324081dbd6b558534f91ec134fe (patch)
tree	b07846c661c423fae7e59da67f9793efe9e10780 /lib/pure/collections/tables.nim
parent	05712fe8053420ecf481757e529a7ef2f32fb75c (diff)
download	Nim-79891b6b9b0cb324081dbd6b558534f91ec134fe.tar.gz