diff options
-rw-r--r-- | lib/pure/collections/tables.nim | 267 | ||||
-rw-r--r-- | tests/table/ptables.nim | 128 | ||||
-rw-r--r-- | tests/table/ptables2.nim | 20 |
3 files changed, 414 insertions, 1 deletions
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index 33e558aee..848f4b8ba 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -66,6 +66,7 @@ type TTable* {.final, myShallow.}[A, B] = object ## generic hash table data: TKeyValuePairSeq[A, B] counter: int + PTable*[A,B] = ref TTable[A, B] when not defined(nimhygiene): {.pragma: dirty.} @@ -231,7 +232,7 @@ proc `$`*[A, B](t: TTable[A, B]): string = ## The `$` operator for hash tables. dollarImpl() -proc `==`*[A, B](s, t: TTable[A, B]): bool = +template equalsImpl() = if s.counter == t.counter: # different insertion orders mean different 'data' seqs, so we have # to use the slow route here: @@ -240,6 +241,9 @@ proc `==`*[A, B](s, t: TTable[A, B]): bool = if t[key] != val: return false return true +proc `==`*[A, B](s, t: TTable[A, B]): bool = + equalsImpl() + proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): TTable[C, B] = ## Index the collection with the proc provided. # TODO: As soon as supported, change collection: A to collection: A[B] @@ -247,6 +251,88 @@ proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): TTable[C, B] = for item in collection: result[index(item)] = item +proc len*[A, B](t: PTable[A, B]): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*[A, B](t: PTable[A, B]): tuple[key: A, val: B] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) + +iterator mpairs*[A, B](t: PTable[A, B]): tuple[key: A, val: var B] = + ## iterates over any (key, value) pair in the table `t`. The values + ## can be modified. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) + +iterator keys*[A, B](t: PTable[A, B]): A = + ## iterates over any key in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].key + +iterator values*[A, B](t: PTable[A, B]): B = + ## iterates over any value in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].val + +iterator mvalues*[A, B](t: PTable[A, B]): var B = + ## iterates over any value in the table `t`. The values can be modified. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].val + +proc `[]`*[A, B](t: PTable[A, B], key: A): B = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## default empty value for the type `B` is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + result = t[][key] + +proc mget*[A, B](t: PTable[A, B], key: A): var B = + ## retrieves the value at ``t[key]``. The value can be modified. + ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. + t[].mget(key) + +proc hasKey*[A, B](t: PTable[A, B], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = t[].hasKey(key) + +proc `[]=`*[A, B](t: PTable[A, B], key: A, val: B) = + ## puts a (key, value)-pair into `t`. + t[][key] = val + +proc add*[A, B](t: PTable[A, B], key: A, val: B) = + ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + t[].add(key, val) + +proc del*[A, B](t: PTable[A, B], key: A) = + ## deletes `key` from hash table `t`. + t[].del(key) + +proc newTable*[A, B](initialSize=64): PTable[A, B] = + new(result) + result[] = initTable[A, B](initialSize) + +proc newTable*[A, B](pairs: openArray[tuple[key: A, + val: B]]): PTable[A, B] = + ## creates a new hash table that contains the given `pairs`. + new(result) + result[] = toTable[A, B](pairs) + +proc `$`*[A, B](t: PTable[A, B]): string = + ## The `$` operator for hash tables. + dollarImpl() + +proc `==`*[A, B](s, t: PTable[A, B]): bool = + equalsImpl() + +proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): PTable[C, B] = + ## Index the collection with the proc provided. + # TODO: As soon as supported, change collection: A to collection: A[B] + result = newTable[C, B]() + for item in collection: + result[index(item)] = item + # ------------------------------ ordered table ------------------------------ type @@ -257,6 +343,7 @@ type final, myShallow.}[A, B] = object ## table that remembers insertion order data: TOrderedKeyValuePairSeq[A, B] counter, first, last: int + POrderedTable*[A, B] = ref TOrderedTable[A, B] proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} = ## returns the number of keys in `t`. @@ -417,6 +504,96 @@ proc sort*[A, B](t: var TOrderedTable[A, B], t.first = list t.last = tail +proc len*[A, B](t: POrderedTable[A, B]): int {.inline.} = + ## returns the number of keys in `t`. + result = t.counter + +template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = + var h = t.first + while h >= 0: + var nxt = t.data[h].next + if t.data[h].slot == seFilled: yieldStmt + h = nxt + +iterator pairs*[A, B](t: POrderedTable[A, B]): tuple[key: A, val: B] = + ## iterates over any (key, value) pair in the table `t` in insertion + ## order. + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + +iterator mpairs*[A, B](t: POrderedTable[A, B]): tuple[key: A, val: var B] = + ## iterates over any (key, value) pair in the table `t` in insertion + ## order. The values can be modified. + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + +iterator keys*[A, B](t: POrderedTable[A, B]): A = + ## iterates over any key in the table `t` in insertion order. + forAllOrderedPairs: + yield t.data[h].key + +iterator values*[A, B](t: POrderedTable[A, B]): B = + ## iterates over any value in the table `t` in insertion order. + forAllOrderedPairs: + yield t.data[h].val + +iterator mvalues*[A, B](t: POrderedTable[A, B]): var B = + ## iterates over any value in the table `t` in insertion order. The values + ## can be modified. + forAllOrderedPairs: + yield t.data[h].val + +proc `[]`*[A, B](t: POrderedTable[A, B], key: A): B = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## default empty value for the type `B` is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + result = t[][key] + +proc mget*[A, B](t: POrderedTable[A, B], key: A): var B = + ## retrieves the value at ``t[key]``. The value can be modified. + ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. + result = t[].mget(key) + +proc hasKey*[A, B](t: POrderedTable[A, B], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = t[].hasKey(key) + +proc `[]=`*[A, B](t: POrderedTable[A, B], key: A, val: B) = + ## puts a (key, value)-pair into `t`. + t[][key] = val + +proc add*[A, B](t: POrderedTable[A, B], key: A, val: B) = + ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + t[].add(key, val) + +proc newOrderedTable*[A, B](initialSize=64): POrderedTable[A, B] = + ## creates a new ordered hash table that is empty. + ## + ## `initialSize` needs to be a power of two. If you need to accept runtime + ## values for this you could use the ``nextPowerOfTwo`` proc from the + ## `math <math.html>`_ module. + new(result) + result[] = initOrderedTable[A, B]() + +proc newOrderedTable*[A, B](pairs: openArray[tuple[key: A, + val: B]]): POrderedTable[A, B] = + ## creates a new ordered hash table that contains the given `pairs`. + result = newOrderedTable[A, B](nextPowerOfTwo(pairs.len+10)) + for key, val in items(pairs): result[key] = val + +proc `$`*[A, B](t: POrderedTable[A, B]): string = + ## The `$` operator for ordered hash tables. + dollarImpl() + +proc sort*[A, B](t: POrderedTable[A, B], + cmp: proc (x,y: tuple[key: A, val: B]): int) = + ## sorts `t` according to `cmp`. This modifies the internal list + ## that kept the insertion order, so insertion order is lost after this + ## call but key lookup and insertions remain possible after `sort` (in + ## contrast to the `sort` for count tables). + t[].sort(cmp) + # ------------------------------ count tables ------------------------------- type @@ -424,6 +601,7 @@ type A] = object ## table that counts the number of each key data: seq[tuple[key: A, val: int]] counter: int + PCountTable*[A] = ref TCountTable[A] proc len*[A](t: TCountTable[A]): int = ## returns the number of keys in `t`. @@ -567,6 +745,93 @@ proc sort*[A](t: var TCountTable[A]) = if j < h: break if h == 1: break +proc len*[A](t: PCountTable[A]): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*[A](t: PCountTable[A]): tuple[key: A, val: int] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + +iterator mpairs*[A](t: PCountTable[A]): tuple[key: A, val: var int] = + ## iterates over any (key, value) pair in the table `t`. The values can + ## be modified. + for h in 0..high(t.data): + if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + +iterator keys*[A](t: PCountTable[A]): A = + ## iterates over any key in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].key + +iterator values*[A](t: PCountTable[A]): int = + ## iterates over any value in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].val + +iterator mvalues*[A](t: PCountTable[A]): var int = + ## iterates over any value in the table `t`. The values can be modified. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].val + +proc `[]`*[A](t: PCountTable[A], key: A): int = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## 0 is returned. One can check with ``hasKey`` whether the key + ## exists. + result = t[][key] + +proc mget*[A](t: PCountTable[A], key: A): var int = + ## retrieves the value at ``t[key]``. The value can be modified. + ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. + result = t[].mget(key) + +proc hasKey*[A](t: PCountTable[A], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = t[].hasKey(key) + +proc `[]=`*[A](t: PCountTable[A], key: A, val: int) = + ## puts a (key, value)-pair into `t`. `val` has to be positive. + assert val > 0 + t[][key] = val + +proc newCountTable*[A](initialSize=64): PCountTable[A] = + ## creates a new count table that is empty. + ## + ## `initialSize` needs to be a power of two. If you need to accept runtime + ## values for this you could use the ``nextPowerOfTwo`` proc from the + ## `math <math.html>`_ module. + new(result) + result[] = initCountTable[A](initialSize) + +proc newCountTable*[A](keys: openArray[A]): PCountTable[A] = + ## creates a new count table with every key in `keys` having a count of 1. + result = newCountTable[A](nextPowerOfTwo(keys.len+10)) + for key in items(keys): result[key] = 1 + +proc `$`*[A](t: PCountTable[A]): string = + ## The `$` operator for count tables. + dollarImpl() + +proc inc*[A](t: PCountTable[A], key: A, val = 1) = + ## increments `t[key]` by `val`. + t[].inc(key, val) + +proc smallest*[A](t: PCountTable[A]): tuple[key: A, val: int] = + ## returns the largest (key,val)-pair. Efficiency: O(n) + t[].smallest + +proc largest*[A](t: PCountTable[A]): tuple[key: A, val: int] = + ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) + t[].largest + +proc sort*[A](t: PCountTable[A]) = + ## sorts the count table so that the entry with the highest counter comes + ## first. This is destructive! You must not modify `t` afterwards! + ## You can use the iterators `pairs`, `keys`, and `values` to iterate over + ## `t` in the sorted order. + t[].sort + when isMainModule: type Person = object diff --git a/tests/table/ptables.nim b/tests/table/ptables.nim new file mode 100644 index 000000000..ec52d08c3 --- /dev/null +++ b/tests/table/ptables.nim @@ -0,0 +1,128 @@ +discard """ + output: '''true''' +""" + +import hashes, tables + +const + data = { + "34": 123456, "12": 789, + "90": 343, "0": 34404, + "1": 344004, "2": 344774, + "3": 342244, "4": 3412344, + "5": 341232144, "6": 34214544, + "7": 3434544, "8": 344544, + "9": 34435644, "---00": 346677844, + "10": 34484, "11": 34474, "19": 34464, + "20": 34454, "30": 34141244, "40": 344114, + "50": 344490, "60": 344491, "70": 344492, + "80": 344497} + + sorteddata = { + "---00": 346677844, + "0": 34404, + "1": 344004, + "10": 34484, + "11": 34474, + "12": 789, + "19": 34464, + "2": 344774, "20": 34454, + "3": 342244, "30": 34141244, + "34": 123456, + "4": 3412344, "40": 344114, + "5": 341232144, "50": 344490, + "6": 34214544, "60": 344491, + "7": 3434544, "70": 344492, + "8": 344544, "80": 344497, + "9": 34435644, + "90": 343} + +block tableTest1: + var t = newTable[tuple[x, y: int], string]() + t[(0,0)] = "00" + t[(1,0)] = "10" + t[(0,1)] = "01" + t[(1,1)] = "11" + for x in 0..1: + for y in 0..1: + assert t[(x,y)] == $x & $y + assert($t == + "{(x: 0, y: 0): 00, (x: 0, y: 1): 01, (x: 1, y: 0): 10, (x: 1, y: 1): 11}") + +block tableTest2: + var t = newTable[string, float]() + t["test"] = 1.2345 + t["111"] = 1.000043 + t["123"] = 1.23 + t.del("111") + + t["012"] = 67.9 + t["123"] = 1.5 # test overwriting + + assert t["123"] == 1.5 + assert t["111"] == 0.0 # deleted + assert(not hasKey(t, "111")) + + for key, val in items(data): t[key] = val.toFloat + for key, val in items(data): assert t[key] == val.toFloat + + +block orderedTableTest1: + var t = newOrderedTable[string, int](2) + for key, val in items(data): t[key] = val + for key, val in items(data): assert t[key] == val + var i = 0 + # `pairs` needs to yield in insertion order: + for key, val in pairs(t): + assert key == data[i][0] + assert val == data[i][1] + inc(i) + + for key, val in mpairs(t): val = 99 + for val in mvalues(t): assert val == 99 + +block countTableTest1: + var s = data.toTable + var t = newCountTable[string]() + for k in s.Keys: t.inc(k) + for k in t.keys: assert t[k] == 1 + t.inc("90", 3) + t.inc("12", 2) + t.inc("34", 1) + assert t.largest()[0] == "90" + + t.sort() + var i = 0 + for k, v in t.pairs: + case i + of 0: assert k == "90" and v == 4 + of 1: assert k == "12" and v == 3 + of 2: assert k == "34" and v == 2 + else: break + inc i + +block SyntaxTest: + var x = newTable[int, string]({:}) + +proc orderedTableSortTest() = + var t = newOrderedTable[string, int](2) + for key, val in items(data): t[key] = val + for key, val in items(data): assert t[key] == val + t.sort(proc (x, y: tuple[key: string, val: int]): int = cmp(x.key, y.key)) + var i = 0 + # `pairs` needs to yield in sorted order: + for key, val in pairs(t): + doAssert key == sorteddata[i][0] + doAssert val == sorteddata[i][1] + inc(i) + + # check that lookup still works: + for key, val in pairs(t): + doAssert val == t[key] + # check that insert still works: + t["newKeyHere"] = 80 + + +orderedTableSortTest() +echo "true" + diff --git a/tests/table/ptables2.nim b/tests/table/ptables2.nim new file mode 100644 index 000000000..939de2b84 --- /dev/null +++ b/tests/table/ptables2.nim @@ -0,0 +1,20 @@ +discard """ + output: '''true''' +""" + +import tables + +proc TestHashIntInt() = + var tab = newTable[int,int]() + for i in 1..1_000_000: + tab[i] = i + for i in 1..1_000_000: + var x = tab[i] + if x != i : echo "not found ", i + +proc run1() = # occupied Memory stays constant, but + for i in 1 .. 50: # aborts at run: 44 on win32 with 3.2GB with out of memory + TestHashIntInt() + +run1() +echo "true" |