diff options
author | Araq <rumpf_a@web.de> | 2011-06-06 08:45:11 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2011-06-06 08:45:11 +0200 |
commit | 42eb21be7b422b47732a56a1b8420b6f1b63f2c6 (patch) | |
tree | fdd97f56d6c4f88b23f6c38ebea2dde59e762516 /lib/pure/collections | |
parent | 958961bd8d0bc699c6301984a706c5d182079c71 (diff) | |
download | Nim-42eb21be7b422b47732a56a1b8420b6f1b63f2c6.tar.gz |
bugfix: generic instantiation across module boundaries
Diffstat (limited to 'lib/pure/collections')
-rwxr-xr-x | lib/pure/collections/lists.nim | 326 | ||||
-rw-r--r-- | lib/pure/collections/tables.nim (renamed from lib/pure/collections/hashtables.nim) | 229 |
2 files changed, 356 insertions, 199 deletions
diff --git a/lib/pure/collections/lists.nim b/lib/pure/collections/lists.nim index 930fd776e..10caac336 100755 --- a/lib/pure/collections/lists.nim +++ b/lib/pure/collections/lists.nim @@ -23,14 +23,19 @@ type next*: ref TSinglyLinkedNode[T] value*: T PSinglyLinkedNode*[T] = ref TSinglyLinkedNode[T] + + TSinglyLinkedList*[T] {.pure, final.} = object ## a singly linked list + head*, tail*: PSinglyLinkedNode[T] - TRingNode[T] {.pure, - final.} = object ## a node a ring list consists of - next*, prev*: ref TRingNode[T] - value*: T - - PRingNode*[T] = ref TRingNode[T] + TDoublyLinkedList*[T] {.pure, final.} = object ## a doubly linked list + head*, tail*: PDoublyLinkedNode[T] + TSinglyLinkedRing*[T] {.pure, final.} = object ## a singly linked ring + head*: PSinglyLinkedNode[T] + + TDoublyLinkedRing*[T] {.pure, final.} = object ## a doubly linked ring + head*: PDoublyLinkedNode[T] + proc newDoublyLinkedNode*[T](value: T): PDoublyLinkedNode[T] = ## creates a new doubly linked node with the given `value`. new(result) @@ -41,124 +46,249 @@ proc newSinglyLinkedNode*[T](value: T): PSinglyLinkedNode[T] = new(result) result.value = value -iterator items*[T](n: PDoublyLinkedNode[T]): T = - ## yields every value of `x`. - var it = n +template itemsListImpl() = + var it = L.head while it != nil: yield it.value it = it.next -iterator items*[T](n: PSinglyLinkedNode[T]): T = - ## yields every value of `x`. - var it = n - while it != nil: - yield it.value - it = it.next +template itemsRingImpl() = + var it = L.head + if it != nil: + while true: + yield it.value + it = it.next + if it == L.head: break -iterator nodes*[T](n: PSinglyLinkedNode[T]): PSinglyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the - ## list during traversal is supported. - var it = n +template nodesListImpl() = + var it = L.head while it != nil: var nxt = it.next yield it it = nxt -iterator nodes*[T](n: PDoublyLinkedNode[T]): PDoublyLinkedNode[T] = +template nodesRingImpl() = + var it = L.head + if it != nil: + while true: + var nxt = it.next + yield it + it = nxt + if it == L.head: break + +template findImpl() = + for x in nodes(L): + if x.value == value: return x + +iterator items*[T](L: TDoublyLinkedList[T]): T = + ## yields every value of `L`. + itemsListImpl() + +iterator items*[T](L: TSinglyLinkedList[T]): T = + ## yields every value of `L`. + itemsListImpl() + +iterator items*[T](L: TSinglyLinkedRing[T]): T = + ## yields every value of `L`. + itemsRingImpl() + +iterator items*[T](L: TDoublyLinkedRing[T]): T = + ## yields every value of `L`. + itemsRingImpl() + +iterator nodes*[T](L: TSinglyLinkedList[T]): PSinglyLinkedNode[T] = ## iterates over every node of `x`. Removing the current node from the ## list during traversal is supported. - var it = n - while it != nil: - var nxt = it.next - yield it - it = nxt + nodesListImpl() -proc `$`*[list: PSinglyLinkedNode|PDoublyLinkedNode](n: list): string = - ## turns a list into its string representation. +iterator nodes*[T](L: TDoublyLinkedList[T]): PDoublyLinkedNode[T] = + ## iterates over every node of `x`. Removing the current node from the + ## list during traversal is supported. + nodesListImpl() + +iterator nodes*[T](L: TSinglyLinkedRing[T]): PSinglyLinkedNode[T] = + ## iterates over every node of `x`. Removing the current node from the + ## list during traversal is supported. + nodesRingImpl() + +iterator nodes*[T](L: TDoublyLinkedRing[T]): PDoublyLinkedNode[T] = + ## iterates over every node of `x`. Removing the current node from the + ## list during traversal is supported. + nodesRingImpl() + +template dollarImpl() = result = "[" - for x in nodes(n): + for x in nodes(L): if result.len > 1: result.add(", ") result.add($x.value) result.add("]") -proc find*[list: PSinglyLinkedNode|PDoublyLinkedNode, T]( - n: list, value: T): list = +proc `$`*[T](L: TSinglyLinkedList[T]): string = + ## turns a list into its string representation. + dollarImpl() + +proc `$`*[T](L: TDoublyLinkedList[T]): string = + ## turns a list into its string representation. + dollarImpl() + +proc `$`*[T](L: TSinglyLinkedRing[T]): string = + ## turns a list into its string representation. + dollarImpl() + +proc `$`*[T](L: TDoublyLinkedRing[T]): string = + ## turns a list into its string representation. + dollarImpl() + +proc find*[T](L: TSinglyLinkedList[T], value: T): PSinglyLinkedNode[T] = ## searches in the list for a value. Returns nil if the value does not ## exist. - for x in nodes(n): - if x.value == value: return x + findImpl() + +proc find*[T](L: TDoublyLinkedList[T], value: T): PDoublyLinkedNode[T] = + ## searches in the list for a value. Returns nil if the value does not + ## exist. + findImpl() + +proc find*[T](L: TSinglyLinkedRing[T], value: T): PSinglyLinkedNode[T] = + ## searches in the list for a value. Returns nil if the value does not + ## exist. + findImpl() -proc contains*[list: PSinglyLinkedNode|PDoublyLinkedNode, T]( - n: list, value: T): list = +proc find*[T](L: TDoublyLinkedRing[T], value: T): PDoublyLinkedNode[T] = + ## searches in the list for a value. Returns nil if the value does not + ## exist. + findImpl() + +proc contains*[T](L: TSinglyLinkedList[T], value: T): bool {.inline.} = ## searches in the list for a value. Returns false if the value does not ## exist, true otherwise. - for x in nodes(n): - if x.value == value: return true - -proc prepend*[T](head: var PSinglyLinkedNode[T], - toAdd: PSinglyLinkedNode[T]) {.inline.} = - ## prepends a node to `head`. Efficiency: O(1). - toAdd.next = head - head = toAdd - -proc prepend*[T](head: var PSinglyLinkedNode[T], x: T) {.inline.} = - ## creates a new node with the value `x` and prepends that node to `head`. - ## Efficiency: O(1). - preprend(head, newSinglyLinkedNode(x)) - -proc append*[T](head: var PSinglyLinkedNode[T], - toAdd: PSinglyLinkedNode[T]) = - ## appends a node to `head`. Efficiency: O(n). - if head == nil: - head = toAdd - else: - var it = head - while it.next != nil: it = it.next - it.next = toAdd - -proc append*[T](head: var PSinglyLinkedNode[T], x: T) {.inline.} = - ## creates a new node with the value `x` and appends that node to `head`. - ## Efficiency: O(n). - append(head, newSinglyLinkedNode(x)) - - -proc prepend*[T](head: var PDoublyLinkedNode[T], - toAdd: PDoublyLinkedNode[T]) {.inline.} = - ## prepends a node to `head`. Efficiency: O(1). - if head == nil: - head = toAdd - # head.prev stores the last node: - head.prev = toAdd - else: - toAdd.next = head - toAdd.prev = head.prev # copy pointer to last element - head.prev = toAdd - head = toAdd - -proc prepend*[T](head: var PDoublyLinkedNode[T], x: T) {.inline.} = - ## creates a new node with the value `x` and prepends that node to `head`. - ## Efficiency: O(1). - preprend(head, newDoublyLinkedNode(x)) - -proc append*[T](head: var PDoublyLinkedNode[T], - toAdd: PDoublyLinkedNode[T]) {.inline.} = - ## appends a node to `head`. Efficiency: O(1). - if head == nil: - head = toAdd - # head.prev stores the last node: - head.prev = toAdd + result = find(L, value) != nil + +proc contains*[T](L: TDoublyLinkedList[T], value: T): bool {.inline.} = + ## searches in the list for a value. Returns false if the value does not + ## exist, true otherwise. + result = find(L, value) != nil + +proc contains*[T](L: TSinglyLinkedRing[T], value: T): bool {.inline.} = + ## searches in the list for a value. Returns false if the value does not + ## exist, true otherwise. + result = find(L, value) != nil + +proc contains*[T](L: TDoublyLinkedRing[T], value: T): bool {.inline.} = + ## searches in the list for a value. Returns false if the value does not + ## exist, true otherwise. + result = find(L, value) != nil + +proc prepend*[T](L: var TSinglyLinkedList[T], + n: PSinglyLinkedNode[T]) {.inline.} = + ## prepends a node to `L`. Efficiency: O(1). + n.next = L.head + L.head = n + +proc prepend*[T](L: var TSinglyLinkedList[T], value: T) {.inline.} = + ## prepends a node to `L`. Efficiency: O(1). + prepend(L, newSinglyLinkedNode(value)) + +proc append*[T](L: var TDoublyLinkedList[T], n: PDoublyLinkedNode[T]) = + ## appends a node `n` to `L`. Efficiency: O(1). + n.next = nil + n.prev = L.tail + if L.tail != nil: + assert(L.tail.next == nil) + L.tail.next = n + L.tail = n + if L.head == nil: L.head = n + +proc append*[T](L: var TDoublyLinkedList[T], value: T) = + ## appends a value to `L`. Efficiency: O(1). + append(L, newDoublyLinkedNode(value)) + +proc prepend*[T](L: var TDoublyLinkedList[T], n: PDoublyLinkedNode[T]) = + ## prepends a node `n` to `L`. Efficiency: O(1). + n.prev = nil + n.next = L.head + if L.head != nil: + assert(L.head.prev == nil) + L.head.prev = n + L.head = n + if L.tail == nil: L.tail = n + +proc prepend*[T](L: var TDoublyLinkedList[T], value: T) = + ## prepends a value to `L`. Efficiency: O(1). + prepend(L, newDoublyLinkedNode(value)) + +proc remove*[T](L: var TDoublyLinkedList[T], n: PDoublyLinkedNode[T]) = + ## removes `n` from `L`. Efficiency: O(1). + if n == L.tail: L.tail = n.prev + if n == L.head: L.head = n.next + if n.next != nil: n.next.prev = n.prev + if n.prev != nil: n.prev.next = n.next + + +proc prepend*[T](L: var TSinglyLinkedRing[T], n: PSinglyLinkedNode[T]) = + ## prepends a node `n` to `L`. Efficiency: O(1). + if L.head != nil: + n.next = L.head + L.head.next = n + else: + n.next = n + L.head = n + +proc prepend*[T](L: var TSinglyLinkedRing[T], value: T) = + ## prepends a value to `L`. Efficiency: O(1). + prepend(L, newSinglyLinkedNode(value)) + +proc append*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = + ## appends a node `n` to `L`. Efficiency: O(1). + if L.tail != nil: + L.tail.next = n + n.prev = L.tail + n.next = L.head else: - var last = head.prev - assert last.next == nil - last.next = toAdd - toAdd.prev = last - head.prev = toAdd # new last element + # both head and tail are nil: + assert L.head == nil + L.head = n + n.prev = n + n.next = n + L.tail = n -proc append*[T](head: var PDoublyLinkedNode[T], x: T) {.inline.} = - ## creates a new node with the value `x` and appends that node to `head`. - ## Efficiency: O(1). - append(head, newDoublyLinkedNode(x)) +proc append*[T](L: var TDoublyLinkedRing[T], value: T) = + ## appends a value to `L`. Efficiency: O(1). + append(L, newDoublyLinkedNode(value)) +proc prepend*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = + ## prepends a node `n` to `L`. Efficiency: O(1). + if L.head != nil: + L.head.prev = n + n.prev = L.tail + n.next = L.head + else: + # both head and tail are nil: + assert L.tail == nil + L.tail = n + n.prev = n + n.next = n + L.head = n +proc prepend*[T](L: var TDoublyLinkedRing[T], value: T) = + ## prepends a value to `L`. Efficiency: O(1). + prepend(L, newDoublyLinkedNode(value)) + +proc remove*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = + ## removes `n` from `L`. Efficiency: O(1). + if n == L.tail: + if n == L.head: + # only element: + L.tail = nil + L.head = nil + else: + L.tail = n.prev + elif n == L.head: + L.head = n.next + n.next.prev = n.prev + n.prev.next = n.next + # break cycles for the GC; not necessary, but might help: + n.next = nil + n.prev = nil diff --git a/lib/pure/collections/hashtables.nim b/lib/pure/collections/tables.nim index 4f4c4f5a0..f132051da 100644 --- a/lib/pure/collections/hashtables.nim +++ b/lib/pure/collections/tables.nim @@ -7,37 +7,45 @@ # distribution, for details about the copyright. # -## The ``hashtables`` module implements an efficient hash table that is +## The ``tables`` module implements an efficient hash table that is ## a mapping from keys to values. +## +## Note: The data types declared here have *value semantics*: This means that +## ``=`` performs a copy of the hash table. If you are overly concerned with +## efficiency and don't need this behaviour, you can define the symbol +## ``shallowADT`` to compile a version that uses shallow copies instead. import os, hashes, math +when defined(shallowADT): + {.pragma: myShallow, shallow.} +else: + {.pragma: myShallow.} + type TSlotEnum = enum seEmpty, seFilled, seDeleted TKeyValuePair[A, B] = tuple[slot: TSlotEnum, key: A, val: B] TKeyValuePairSeq[A, B] = seq[TKeyValuePair[A, B]] - THashTable[A, B] = object of TObject + TTable* {.final, myShallow.}[A, B] = object data: TKeyValuePairSeq[A, B] counter: int - PHashTable*[A, B] = ref THashTable[A, B] ## use this type to declare tables - -proc len*[A, B](t: THashTable[A, B]): int = +proc len*[A, B](t: TTable[A, B]): int = ## returns the number of keys in `t`. result = t.counter -iterator pairs*[A, B](t: THashTable[A, B]): tuple[key: A, val: B] = +iterator pairs*[A, B](t: TTable[A, B]): tuple[key: A, val: B] = ## iterates over any (key, value) pair in the table `t`. for h in 0..high(t.data): if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) -iterator keys*[A, B](t: THashTable[A, B]): A = +iterator keys*[A, B](t: TTable[A, B]): A = ## iterates over any key in the table `t`. for h in 0..high(t.data): if t.data[h].slot == seFilled: yield t.data[h].key -iterator values*[A, B](t: THashTable[A, B]): B = +iterator values*[A, B](t: TTable[A, B]): B = ## iterates over any value in the table `t`. for h in 0..high(t.data): if t.data[h].slot == seFilled: yield t.data[h].val @@ -68,10 +76,10 @@ template rawInsertImpl() = data[h].val = val data[h].slot = seFilled -proc RawGet[A, B](t: THashTable[A, B], key: A): int = +proc RawGet[A, B](t: TTable[A, B], key: A): int = rawGetImpl() -proc `[]`*[A, B](t: THashTable[A, B], key: A): B = +proc `[]`*[A, B](t: TTable[A, B], key: A): B = ## retrieves the value at ``t[key]``. If `key` is not in `t`, ## default empty value for the type `B` is returned ## and no exception is raised. One can check with ``hasKey`` whether the key @@ -79,15 +87,15 @@ proc `[]`*[A, B](t: THashTable[A, B], key: A): B = var index = RawGet(t, key) if index >= 0: result = t.data[index].val -proc hasKey*[A, B](t: THashTable[A, B], key: A): bool = +proc hasKey*[A, B](t: TTable[A, B], key: A): bool = ## returns true iff `key` is in the table `t`. result = rawGet(t, key) >= 0 -proc RawInsert[A, B](t: var THashTable[A, B], data: var TKeyValuePairSeq[A, B], +proc RawInsert[A, B](t: var TTable[A, B], data: var TKeyValuePairSeq[A, B], key: A, val: B) = rawInsertImpl() -proc Enlarge[A, B](t: var THashTable[A, B]) = +proc Enlarge[A, B](t: var TTable[A, B]) = var n: TKeyValuePairSeq[A, B] newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): @@ -103,24 +111,30 @@ template PutImpl() = RawInsert(t, t.data, key, val) inc(t.counter) -proc `[]=`*[A, B](t: var THashTable[A, B], key: A, val: B) = +proc `[]=`*[A, B](t: var TTable[A, B], key: A, val: B) = ## puts a (key, value)-pair into `t`. putImpl() -proc del*[A, B](t: var THashTable[A, B], key: A) = +proc del*[A, B](t: var TTable[A, B], key: A) = ## deletes `key` from hash table `t`. var index = RawGet(t, key) if index >= 0: t.data[index].slot = seDeleted dec(t.counter) -proc initHashTable*[A, B](initialSize = 64): THashTable[A, B] = - ## creates a new string table that is empty. `initialSize` needs to be +proc initTable*[A, B](initialSize=64): TTable[A, B] = + ## creates a new hash table table that is empty. `initialSize` needs to be ## a power of two. assert isPowerOfTwo(initialSize) result.counter = 0 newSeq(result.data, initialSize) +proc toTable*[A, B](pairs: openarray[tuple[key: A, + val: B]]): TTable[A, B] = + ## creates a new hash table that contains the given `pairs`. + result = initTable[A](nextPowerOfTwo(pairs.len+10)) + for key, val in items(pairs): result[key] = val + template dollarImpl(): stmt = if t.len == 0: result = "{:}" @@ -133,7 +147,7 @@ template dollarImpl(): stmt = result.add($val) result.add("}") -proc `$`*[A, B](t: THashTable[A, B]): string = +proc `$`*[A, B](t: TTable[A, B]): string = ## The `$` operator for string tables. dollarImpl() @@ -143,11 +157,12 @@ type TOrderedKeyValuePair[A, B] = tuple[ slot: TSlotEnum, next: int, key: A, val: B] TOrderedKeyValuePairSeq[A, B] = seq[TOrderedKeyValuePair[A, B]] - TOrderedHashTable*[A, B] {.final.} = object + TOrderedTable* {. + final, myShallow.}[A, B] = object ## table that remembers insertion order data: TOrderedKeyValuePairSeq[A, B] counter, first, last: int -proc len*[A, B](t: TOrderedHashTable[A, B]): int {.inline.} = +proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} = ## returns the number of keys in `t`. result = t.counter @@ -158,26 +173,26 @@ template forAllOrderedPairs(yieldStmt: stmt) = if t.data[h].slot == seFilled: yieldStmt i = nxt -iterator pairs*[A, B](t: TOrderedHashTable[A, B]): tuple[key: A, val: B] = +iterator pairs*[A, B](t: TOrderedTable[A, B]): tuple[key: A, val: B] = ## iterates over any (key, value) pair in the table `t` in insertion ## order. forAllOrderedPairs: yield (t.data[h].key, t.data[h].val) -iterator keys*[A, B](t: TOrderedHashTable[A, B]): A = +iterator keys*[A, B](t: TOrderedTable[A, B]): A = ## iterates over any key in the table `t` in insertion order. forAllOrderedPairs: yield t.data[h].key -iterator values*[A, B](t: TOrderedHashTable[A, B]): B = +iterator values*[A, B](t: TOrderedTable[A, B]): B = ## iterates over any value in the table `t` in insertion order. forAllOrderedPairs: yield t.data[h].val -proc RawGet[A, B](t: TOrderedHashTable[A, B], key: A): int = +proc RawGet[A, B](t: TOrderedTable[A, B], key: A): int = rawGetImpl() -proc `[]`*[A, B](t: TOrderedHashTable[A, B], key: A): B = +proc `[]`*[A, B](t: TOrderedTable[A, B], key: A): B = ## retrieves the value at ``t[key]``. If `key` is not in `t`, ## default empty value for the type `B` is returned ## and no exception is raised. One can check with ``hasKey`` whether the key @@ -185,11 +200,11 @@ proc `[]`*[A, B](t: TOrderedHashTable[A, B], key: A): B = var index = RawGet(t, key) if index >= 0: result = t.data[index].val -proc hasKey*[A, B](t: TOrderedHashTable[A, B], key: A): bool = +proc hasKey*[A, B](t: TOrderedTable[A, B], key: A): bool = ## returns true iff `key` is in the table `t`. result = rawGet(t, key) >= 0 -proc RawInsert[A, B](t: TOrderedHashTable[A, B], +proc RawInsert[A, B](t: TOrderedTable[A, B], data: var TOrderedKeyValuePairSeq[A, B], key: A, val: B) = rawInsertImpl() @@ -198,39 +213,19 @@ proc RawInsert[A, B](t: TOrderedHashTable[A, B], if last >= 0: data[last].next = h lastEntry = h -proc Enlarge[A, B](t: TOrderedHashTable[A, B]) = +proc Enlarge[A, B](t: TOrderedTable[A, B]) = var n: TOrderedKeyValuePairSeq[A, B] newSeq(n, len(t.data) * growthFactor) forAllOrderedPairs: RawInsert(t, n, t.data[h].key, t.data[h].val) swap(t.data, n) -proc `[]=`*[A, B](t: TOrderedHashTable[A, B], key: A, val: B) = +proc `[]=`*[A, B](t: TOrderedTable[A, B], key: A, val: B) = ## puts a (key, value)-pair into `t`. - var index = RawGet(t, key) - if index >= 0: - t.data[index].val = val - else: - if mustRehash(len(t.data), t.counter): Enlarge(t) - RawInsert(t, t.data, key, val) - inc(t.counter) - -proc del*[A, B](t: TOrderedHashTable[A, B], key: A) = - ## deletes `key` from hash table `t`. Warning: It's inefficient for ordered - ## tables: O(n). - var index = RawGet(t, key) - if index >= 0: - var i = t.first - while i >= 0: - var nxt = t.data[i].next - if nxt == index: XXX - i = nxt - - t.data[index].slot = seDeleted - dec(t.counter) + putImpl() -proc initHashTable*[A, B](initialSize = 64): TOrderedHashTable[A, B] = - ## creates a new string table that is empty. `initialSize` needs to be +proc initOrderedTable*[A, B](initialSize=64): TOrderedTable[A, B] = + ## creates a new ordered hash table that is empty. `initialSize` needs to be ## a power of two. assert isPowerOfTwo(initialSize) result.counter = 0 @@ -238,17 +233,21 @@ proc initHashTable*[A, B](initialSize = 64): TOrderedHashTable[A, B] = result.last = -1 newSeq(result.data, initialSize) -proc `$`*[A, B](t: TOrderedHashTable[A, B]): string = +proc toOrderedTable*[A, B](pairs: openarray[tuple[key: A, + val: B]]): TOrderedTable[A, B] = + ## creates a new ordered hash table that contains the given `pairs`. + result = initOrderedTable[A, B](nextPowerOfTwo(pairs.len+10)) + for key, val in items(pairs): result[key] = val + +proc `$`*[A, B](t: TOrderedTable[A, B]): string = ## The `$` operator for hash tables. dollarImpl() # ------------------------------ count tables ------------------------------- -const - deletedCount = -1 - type - TCountTable*[A] {.final.} = object + TCountTable* {.final, myShallow.}[ + A] = object ## table that counts the number of each key data: seq[tuple[key: A, val: int]] counter: int @@ -259,30 +258,28 @@ proc len*[A](t: TCountTable[A]): int = iterator pairs*[A](t: TCountTable[A]): tuple[key: A, val: int] = ## iterates over any (key, value) pair in the table `t`. for h in 0..high(t.data): - if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) + if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) iterator keys*[A](t: TCountTable[A]): A = ## iterates over any key in the table `t`. for h in 0..high(t.data): - if t.data[h].slot == seFilled: yield t.data[h].key + if t.data[h].val != 0: yield t.data[h].key iterator values*[A](t: TCountTable[A]): int = ## iterates over any value in the table `t`. for h in 0..high(t.data): - if t.data[h].slot == seFilled: yield t.data[h].val + if t.data[h].val != 0: yield t.data[h].val proc RawGet[A](t: TCountTable[A], key: A): int = var h: THash = hash(key) and high(t.data) # start with real hash value - while t.data[h].slot != seEmpty: - if t.data[h].key == key and t.data[h].slot == seFilled: - return h + while t.data[h].val != 0: + if t.data[h].key == key: return h h = nextTry(h, high(t.data)) result = -1 -proc `[]`*[A](t: TCountTable[A], key: A): B = +proc `[]`*[A](t: TCountTable[A], key: A): int = ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key + ## 0 is returned. One can check with ``hasKey`` whether the key ## exists. var index = RawGet(t, key) if index >= 0: result = t.data[index].val @@ -291,62 +288,92 @@ proc hasKey*[A](t: TCountTable[A], key: A): bool = ## returns true iff `key` is in the table `t`. result = rawGet(t, key) >= 0 -proc RawInsert[A](t: TCountTable[A], data: var TKeyValuePairSeq[A, B], - key: A, val: int) = +proc RawInsert[A](t: TCountTable[A], data: var seq[tuple[key: A, val: int]], + key: A, val: int) = var h: THash = hash(key) and high(data) - while data[h].slot == seFilled: - h = nextTry(h, high(data)) + while data[h].val != 0: h = nextTry(h, high(data)) data[h].key = key data[h].val = val - data[h].slot = seFilled proc Enlarge[A](t: TCountTable[A]) = - var n: TKeyValuePairSeq[A, B] + var n: seq[tuple[key: A, val: int]] newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): - if t.data[i].slot == seFilled: RawInsert(t, n, t.data[i].key, t.data[i].val) + if t.data[i].val != 0: RawInsert(t, n, t.data[i].key, t.data[i].val) swap(t.data, n) proc `[]=`*[A](t: TCountTable[A], key: A, val: int) = - ## puts a (key, value)-pair into `t`. - var index = RawGet(t, key) - if index >= 0: - t.data[index].val = val - else: - if mustRehash(len(t.data), t.counter): Enlarge(t) - RawInsert(t, t.data, key, val) - inc(t.counter) + ## puts a (key, value)-pair into `t`. `val` has to be positive. + assert val > 0 + PutImpl() -proc del*[A](t: TCountTable[A], key: A) = - ## deletes `key` from hash table `t`. - var index = RawGet(t, key) - if index >= 0: - t.data[index].slot = seDeleted - -proc newHashTable*[A, B](initialSize = 64): PHashTable[A, B] = - ## creates a new string table that is empty. `initialSize` needs to be +proc initCountTable*[A](initialSize=64): TCountTable[A] = + ## creates a new count table that is empty. `initialSize` needs to be ## a power of two. assert isPowerOfTwo(initialSize) - new(result) result.counter = 0 newSeq(result.data, initialSize) +proc toCountTable*[A](keys: openArray[A]): TCountTable[A] = + ## creates a new count table with every key in `keys` having a count of 1. + result = initCountTable[A](nextPowerOfTwo(keys.len+10)) + for key in items(keys): result[key] = 1 + proc `$`*[A](t: TCountTable[A]): string = - ## The `$` operator for string tables. - if t.len == 0: - result = "{:}" + ## The `$` operator for count tables. + dollarImpl() + +proc inc*[A](t: TCountTable[A], key: A, val = 1) = + ## increments `t[key]` by `val`. + var index = RawGet(t, key) + if index >= 0: + inc(t.data[index].val, val) else: - result = "{" - for key, val in pairs(t): - if result.len > 1: result.add(", ") - result.add($key) - result.add(": ") - result.add($val) - result.add("}") + if mustRehash(len(t.data), t.counter): Enlarge(t) + RawInsert(t, t.data, key, val) + inc(t.counter) +proc Smallest*[A](t: TCountTable[A]): tuple[key: A, val: int] = + ## returns the largest (key,val)-pair. Efficiency: O(n) + assert t.len > 0 + var minIdx = 0 + for h in 1..high(t.data): + if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h + result.key = t.data[minIdx].key + result.val = t.data[minIdx].val + +proc Largest*[A](t: TCountTable[A]): tuple[key: A, val: int] = + ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) + assert t.len > 0 + var maxIdx = 0 + for h in 1..high(t.data): + if t.data[maxIdx].val < t.data[h].val: maxIdx = h + result.key = t.data[maxIdx].key + result.val = t.data[maxIdx].val + +proc sort*[A](t: var TCountTable[A]) = + ## sorts the count table so that the entry with the highest counter comes + ## first. This is destructive! You must not modify `t` afterwards! + ## You can use the iterators `pairs`, `keys`, and `values` to iterate over + ## `t` in the sorted order. + + # we use shellsort here; fast enough and simple + var h = 1 + while true: + h = 3 * h + 1 + if h >= t.data.high: break + while true: + h = h div 3 + for i in countup(h, t.data.high): + var j = i + while t.data[j-h].val < t.data[j].val: + swap(t.data[j], t.data[j-h]) + j = j-h + if j < h: break + if h == 1: break when isMainModule: - var table = newHashTable[string, float]() + var table = initHashTable[string, float]() table["test"] = 1.2345 table["111"] = 1.000043 echo table |