bugfix: generic instantiation across module boundaries

author: Araq <rumpf_a@web.de> 2011-06-06 08:45:11 +0200
committer: Araq <rumpf_a@web.de> 2011-06-06 08:45:11 +0200
commit: 42eb21be7b422b47732a56a1b8420b6f1b63f2c6 (patch)
tree: fdd97f56d6c4f88b23f6c38ebea2dde59e762516 /lib/pure/collections
parent: 958961bd8d0bc699c6301984a706c5d182079c71 (diff)
download: Nim-42eb21be7b422b47732a56a1b8420b6f1b63f2c6.tar.gz
2 files changed, 356 insertions, 199 deletions
diff --git a/lib/pure/collections/lists.nim b/lib/pure/collections/lists.nim
index 930fd776e..10caac336 100755
--- a/lib/pure/collections/lists.nim
+++ b/lib/pure/collections/lists.nim
@@ -23,14 +23,19 @@ type
     next*: ref TSinglyLinkedNode[T]
     value*: T
   PSinglyLinkedNode*[T] = ref TSinglyLinkedNode[T]
+
+  TSinglyLinkedList*[T] {.pure, final.} = object ## a singly linked list
+    head*, tail*: PSinglyLinkedNode[T]
   
-  TRingNode[T] {.pure, 
-      final.} = object ## a node a ring list consists of
-    next*, prev*: ref TRingNode[T]
-    value*: T
-    
-  PRingNode*[T] = ref TRingNode[T]
+  TDoublyLinkedList*[T] {.pure, final.} = object ## a doubly linked list
+    head*, tail*: PDoublyLinkedNode[T]
 
+  TSinglyLinkedRing*[T] {.pure, final.} = object ## a singly linked ring
+    head*: PSinglyLinkedNode[T]
+  
+  TDoublyLinkedRing*[T] {.pure, final.} = object ## a doubly linked ring
+    head*: PDoublyLinkedNode[T]
+    
 proc newDoublyLinkedNode*[T](value: T): PDoublyLinkedNode[T] =
   ## creates a new doubly linked node with the given `value`.
   new(result)
@@ -41,124 +46,249 @@ proc newSinglyLinkedNode*[T](value: T): PSinglyLinkedNode[T] =
   new(result)
   result.value = value
 
-iterator items*[T](n: PDoublyLinkedNode[T]): T = 
-  ## yields every value of `x`.
-  var it = n
+template itemsListImpl() =
+  var it = L.head
   while it != nil:
     yield it.value
     it = it.next
 
-iterator items*[T](n: PSinglyLinkedNode[T]): T = 
-  ## yields every value of `x`.
-  var it = n
-  while it != nil:
-    yield it.value
-    it = it.next
+template itemsRingImpl() =
+  var it = L.head
+  if it != nil:
+    while true:
+      yield it.value
+      it = it.next
+      if it == L.head: break
 
-iterator nodes*[T](n: PSinglyLinkedNode[T]): PSinglyLinkedNode[T] = 
-  ## iterates over every node of `x`. Removing the current node from the
-  ## list during traversal is supported.
-  var it = n
+template nodesListImpl() =
+  var it = L.head
   while it != nil:
     var nxt = it.next
     yield it
     it = nxt
 
-iterator nodes*[T](n: PDoublyLinkedNode[T]): PDoublyLinkedNode[T] = 
+template nodesRingImpl() =
+  var it = L.head
+  if it != nil:
+    while true:
+      var nxt = it.next
+      yield it
+      it = nxt
+      if it == L.head: break
+
+template findImpl() =
+  for x in nodes(L):
+    if x.value == value: return x
+
+iterator items*[T](L: TDoublyLinkedList[T]): T = 
+  ## yields every value of `L`.
+  itemsListImpl()
+
+iterator items*[T](L: TSinglyLinkedList[T]): T = 
+  ## yields every value of `L`.
+  itemsListImpl()
+
+iterator items*[T](L: TSinglyLinkedRing[T]): T = 
+  ## yields every value of `L`.
+  itemsRingImpl()
+
+iterator items*[T](L: TDoublyLinkedRing[T]): T = 
+  ## yields every value of `L`.
+  itemsRingImpl()
+
+iterator nodes*[T](L: TSinglyLinkedList[T]): PSinglyLinkedNode[T] = 
   ## iterates over every node of `x`. Removing the current node from the
   ## list during traversal is supported.
-  var it = n
-  while it != nil:
-    var nxt = it.next
-    yield it
-    it = nxt
+  nodesListImpl()
 
-proc `$`*[list: PSinglyLinkedNode|PDoublyLinkedNode](n: list): string = 
-  ## turns a list into its string representation.
+iterator nodes*[T](L: TDoublyLinkedList[T]): PDoublyLinkedNode[T] = 
+  ## iterates over every node of `x`. Removing the current node from the
+  ## list during traversal is supported.
+  nodesListImpl()
+
+iterator nodes*[T](L: TSinglyLinkedRing[T]): PSinglyLinkedNode[T] = 
+  ## iterates over every node of `x`. Removing the current node from the
+  ## list during traversal is supported.
+  nodesRingImpl()
+
+iterator nodes*[T](L: TDoublyLinkedRing[T]): PDoublyLinkedNode[T] = 
+  ## iterates over every node of `x`. Removing the current node from the
+  ## list during traversal is supported.
+  nodesRingImpl()
+
+template dollarImpl() =
   result = "["
-  for x in nodes(n):
+  for x in nodes(L):
     if result.len > 1: result.add(", ")
     result.add($x.value)
   result.add("]")
 
-proc find*[list: PSinglyLinkedNode|PDoublyLinkedNode, T](
-           n: list, value: T): list = 
+proc `$`*[T](L: TSinglyLinkedList[T]): string = 
+  ## turns a list into its string representation.
+  dollarImpl()
+
+proc `$`*[T](L: TDoublyLinkedList[T]): string = 
+  ## turns a list into its string representation.
+  dollarImpl()
+
+proc `$`*[T](L: TSinglyLinkedRing[T]): string = 
+  ## turns a list into its string representation.
+  dollarImpl()
+
+proc `$`*[T](L: TDoublyLinkedRing[T]): string = 
+  ## turns a list into its string representation.
+  dollarImpl()
+
+proc find*[T](L: TSinglyLinkedList[T], value: T): PSinglyLinkedNode[T] = 
   ## searches in the list for a value. Returns nil if the value does not
   ## exist.
-  for x in nodes(n):
-    if x.value == value: return x
+  findImpl()
+
+proc find*[T](L: TDoublyLinkedList[T], value: T): PDoublyLinkedNode[T] = 
+  ## searches in the list for a value. Returns nil if the value does not
+  ## exist.
+  findImpl()
+
+proc find*[T](L: TSinglyLinkedRing[T], value: T): PSinglyLinkedNode[T] = 
+  ## searches in the list for a value. Returns nil if the value does not
+  ## exist.
+  findImpl()
 
-proc contains*[list: PSinglyLinkedNode|PDoublyLinkedNode, T](
-           n: list, value: T): list = 
+proc find*[T](L: TDoublyLinkedRing[T], value: T): PDoublyLinkedNode[T] = 
+  ## searches in the list for a value. Returns nil if the value does not
+  ## exist.
+  findImpl()
+
+proc contains*[T](L: TSinglyLinkedList[T], value: T): bool {.inline.} = 
   ## searches in the list for a value. Returns false if the value does not
   ## exist, true otherwise.
-  for x in nodes(n):
-    if x.value == value: return true
-
-proc prepend*[T](head: var PSinglyLinkedNode[T], 
-                 toAdd: PSinglyLinkedNode[T]) {.inline.} = 
-  ## prepends a node to `head`. Efficiency: O(1).
-  toAdd.next = head
-  head = toAdd
-
-proc prepend*[T](head: var PSinglyLinkedNode[T], x: T) {.inline.} = 
-  ## creates a new node with the value `x` and prepends that node to `head`.
-  ## Efficiency: O(1).
-  preprend(head, newSinglyLinkedNode(x))
-
-proc append*[T](head: var PSinglyLinkedNode[T], 
-                toAdd: PSinglyLinkedNode[T]) = 
-  ## appends a node to `head`. Efficiency: O(n).
-  if head == nil:
-    head = toAdd
-  else:
-    var it = head
-    while it.next != nil: it = it.next
-    it.next = toAdd
-
-proc append*[T](head: var PSinglyLinkedNode[T], x: T) {.inline.} = 
-  ## creates a new node with the value `x` and appends that node to `head`.
-  ## Efficiency: O(n).
-  append(head, newSinglyLinkedNode(x))
-
-
-proc prepend*[T](head: var PDoublyLinkedNode[T], 
-                 toAdd: PDoublyLinkedNode[T]) {.inline.} = 
-  ## prepends a node to `head`. Efficiency: O(1).
-  if head == nil:
-    head = toAdd
-    # head.prev stores the last node:
-    head.prev = toAdd
-  else:
-    toAdd.next = head
-    toAdd.prev = head.prev # copy pointer to last element
-    head.prev = toAdd
-    head = toAdd
-
-proc prepend*[T](head: var PDoublyLinkedNode[T], x: T) {.inline.} = 
-  ## creates a new node with the value `x` and prepends that node to `head`.
-  ## Efficiency: O(1).
-  preprend(head, newDoublyLinkedNode(x))
-
-proc append*[T](head: var PDoublyLinkedNode[T], 
-                toAdd: PDoublyLinkedNode[T]) {.inline.} = 
-  ## appends a node to `head`. Efficiency: O(1).
-  if head == nil:
-    head = toAdd
-    # head.prev stores the last node:
-    head.prev = toAdd
+  result = find(L, value) != nil
+
+proc contains*[T](L: TDoublyLinkedList[T], value: T): bool {.inline.} = 
+  ## searches in the list for a value. Returns false if the value does not
+  ## exist, true otherwise.
+  result = find(L, value) != nil
+
+proc contains*[T](L: TSinglyLinkedRing[T], value: T): bool {.inline.} = 
+  ## searches in the list for a value. Returns false if the value does not
+  ## exist, true otherwise.
+  result = find(L, value) != nil
+
+proc contains*[T](L: TDoublyLinkedRing[T], value: T): bool {.inline.} = 
+  ## searches in the list for a value. Returns false if the value does not
+  ## exist, true otherwise.
+  result = find(L, value) != nil
+
+proc prepend*[T](L: var TSinglyLinkedList[T], 
+                 n: PSinglyLinkedNode[T]) {.inline.} = 
+  ## prepends a node to `L`. Efficiency: O(1).
+  n.next = L.head
+  L.head = n
+
+proc prepend*[T](L: var TSinglyLinkedList[T], value: T) {.inline.} = 
+  ## prepends a node to `L`. Efficiency: O(1).
+  prepend(L, newSinglyLinkedNode(value))
+  
+proc append*[T](L: var TDoublyLinkedList[T], n: PDoublyLinkedNode[T]) = 
+  ## appends a node `n` to `L`. Efficiency: O(1).
+  n.next = nil
+  n.prev = L.tail
+  if L.tail != nil: 
+    assert(L.tail.next == nil)
+    L.tail.next = n
+  L.tail = n
+  if L.head == nil: L.head = n
+
+proc append*[T](L: var TDoublyLinkedList[T], value: T) = 
+  ## appends a value to `L`. Efficiency: O(1).
+  append(L, newDoublyLinkedNode(value))
+
+proc prepend*[T](L: var TDoublyLinkedList[T], n: PDoublyLinkedNode[T]) = 
+  ## prepends a node `n` to `L`. Efficiency: O(1).
+  n.prev = nil
+  n.next = L.head
+  if L.head != nil:
+    assert(L.head.prev == nil)
+    L.head.prev = n
+  L.head = n
+  if L.tail == nil: L.tail = n
+
+proc prepend*[T](L: var TDoublyLinkedList[T], value: T) = 
+  ## prepends a value to `L`. Efficiency: O(1).
+  prepend(L, newDoublyLinkedNode(value))
+  
+proc remove*[T](L: var TDoublyLinkedList[T], n: PDoublyLinkedNode[T]) = 
+  ## removes `n` from `L`. Efficiency: O(1).
+  if n == L.tail: L.tail = n.prev
+  if n == L.head: L.head = n.next
+  if n.next != nil: n.next.prev = n.prev
+  if n.prev != nil: n.prev.next = n.next
+
+
+proc prepend*[T](L: var TSinglyLinkedRing[T], n: PSinglyLinkedNode[T]) = 
+  ## prepends a node `n` to `L`. Efficiency: O(1).
+  if L.head != nil: 
+    n.next = L.head    
+    L.head.next = n
+  else: 
+    n.next = n
+  L.head = n
+
+proc prepend*[T](L: var TSinglyLinkedRing[T], value: T) = 
+  ## prepends a value to `L`. Efficiency: O(1).
+  prepend(L, newSinglyLinkedNode(value))
+
+proc append*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = 
+  ## appends a node `n` to `L`. Efficiency: O(1).
+  if L.tail != nil: 
+    L.tail.next = n
+    n.prev = L.tail
+    n.next = L.head
   else:
-    var last = head.prev
-    assert last.next == nil
-    last.next = toAdd
-    toAdd.prev = last
-    head.prev = toAdd # new last element
+    # both head and tail are nil:
+    assert L.head == nil
+    L.head = n
+    n.prev = n
+    n.next = n
+  L.tail = n
 
-proc append*[T](head: var PDoublyLinkedNode[T], x: T) {.inline.} = 
-  ## creates a new node with the value `x` and appends that node to `head`.
-  ## Efficiency: O(1).
-  append(head, newDoublyLinkedNode(x))
+proc append*[T](L: var TDoublyLinkedRing[T], value: T) = 
+  ## appends a value to `L`. Efficiency: O(1).
+  append(L, newDoublyLinkedNode(value))
 
+proc prepend*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = 
+  ## prepends a node `n` to `L`. Efficiency: O(1).
+  if L.head != nil: 
+    L.head.prev = n
+    n.prev = L.tail
+    n.next = L.head
+  else:
+    # both head and tail are nil:
+    assert L.tail == nil
+    L.tail = n
+    n.prev = n
+    n.next = n
+  L.head = n
 
+proc prepend*[T](L: var TDoublyLinkedRing[T], value: T) = 
+  ## prepends a value to `L`. Efficiency: O(1).
+  prepend(L, newDoublyLinkedNode(value))
+  
+proc remove*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = 
+  ## removes `n` from `L`. Efficiency: O(1).
+  if n == L.tail: 
+    if n == L.head:
+      # only element:
+      L.tail = nil
+      L.head = nil
+    else:
+      L.tail = n.prev
+  elif n == L.head: 
+    L.head = n.next
+  n.next.prev = n.prev
+  n.prev.next = n.next
+  # break cycles for the GC; not necessary, but might help:
+  n.next = nil
+  n.prev = nil
 
 
diff --git a/lib/pure/collections/hashtables.nim b/lib/pure/collections/tables.nim
index 4f4c4f5a0..f132051da 100644
--- a/lib/pure/collections/hashtables.nim
+++ b/lib/pure/collections/tables.nim
@@ -7,37 +7,45 @@
 #    distribution, for details about the copyright.
 #
 
-## The ``hashtables`` module implements an efficient hash table that is
+## The ``tables`` module implements an efficient hash table that is
 ## a mapping from keys to values.
+##
+## Note: The data types declared here have *value semantics*: This means that
+## ``=`` performs a copy of the hash table. If you are overly concerned with
+## efficiency and don't need this behaviour, you can define the symbol
+## ``shallowADT`` to compile a version that uses shallow copies instead.
 
 import
   os, hashes, math
 
+when defined(shallowADT):
+  {.pragma: myShallow, shallow.}
+else:
+  {.pragma: myShallow.}
+
 type
   TSlotEnum = enum seEmpty, seFilled, seDeleted
   TKeyValuePair[A, B] = tuple[slot: TSlotEnum, key: A, val: B]
   TKeyValuePairSeq[A, B] = seq[TKeyValuePair[A, B]]
-  THashTable[A, B] = object of TObject
+  TTable* {.final, myShallow.}[A, B] = object
     data: TKeyValuePairSeq[A, B]
     counter: int
 
-  PHashTable*[A, B] = ref THashTable[A, B] ## use this type to declare tables
-
-proc len*[A, B](t: THashTable[A, B]): int =
+proc len*[A, B](t: TTable[A, B]): int =
   ## returns the number of keys in `t`.
   result = t.counter
 
-iterator pairs*[A, B](t: THashTable[A, B]): tuple[key: A, val: B] =
+iterator pairs*[A, B](t: TTable[A, B]): tuple[key: A, val: B] =
   ## iterates over any (key, value) pair in the table `t`.
   for h in 0..high(t.data):
     if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
 
-iterator keys*[A, B](t: THashTable[A, B]): A =
+iterator keys*[A, B](t: TTable[A, B]): A =
   ## iterates over any key in the table `t`.
   for h in 0..high(t.data):
     if t.data[h].slot == seFilled: yield t.data[h].key
 
-iterator values*[A, B](t: THashTable[A, B]): B =
+iterator values*[A, B](t: TTable[A, B]): B =
   ## iterates over any value in the table `t`.
   for h in 0..high(t.data):
     if t.data[h].slot == seFilled: yield t.data[h].val
@@ -68,10 +76,10 @@ template rawInsertImpl() =
   data[h].val = val
   data[h].slot = seFilled
 
-proc RawGet[A, B](t: THashTable[A, B], key: A): int =
+proc RawGet[A, B](t: TTable[A, B], key: A): int =
   rawGetImpl()
 
-proc `[]`*[A, B](t: THashTable[A, B], key: A): B =
+proc `[]`*[A, B](t: TTable[A, B], key: A): B =
   ## retrieves the value at ``t[key]``. If `key` is not in `t`,
   ## default empty value for the type `B` is returned
   ## and no exception is raised. One can check with ``hasKey`` whether the key
@@ -79,15 +87,15 @@ proc `[]`*[A, B](t: THashTable[A, B], key: A): B =
   var index = RawGet(t, key)
   if index >= 0: result = t.data[index].val
 
-proc hasKey*[A, B](t: THashTable[A, B], key: A): bool =
+proc hasKey*[A, B](t: TTable[A, B], key: A): bool =
   ## returns true iff `key` is in the table `t`.
   result = rawGet(t, key) >= 0
 
-proc RawInsert[A, B](t: var THashTable[A, B], data: var TKeyValuePairSeq[A, B],
+proc RawInsert[A, B](t: var TTable[A, B], data: var TKeyValuePairSeq[A, B],
                      key: A, val: B) =
   rawInsertImpl()
 
-proc Enlarge[A, B](t: var THashTable[A, B]) =
+proc Enlarge[A, B](t: var TTable[A, B]) =
   var n: TKeyValuePairSeq[A, B]
   newSeq(n, len(t.data) * growthFactor)
   for i in countup(0, high(t.data)):
@@ -103,24 +111,30 @@ template PutImpl() =
     RawInsert(t, t.data, key, val)
     inc(t.counter)
 
-proc `[]=`*[A, B](t: var THashTable[A, B], key: A, val: B) =
+proc `[]=`*[A, B](t: var TTable[A, B], key: A, val: B) =
   ## puts a (key, value)-pair into `t`.
   putImpl()
 
-proc del*[A, B](t: var THashTable[A, B], key: A) =
+proc del*[A, B](t: var TTable[A, B], key: A) =
   ## deletes `key` from hash table `t`.
   var index = RawGet(t, key)
   if index >= 0:
     t.data[index].slot = seDeleted
     dec(t.counter)
 
-proc initHashTable*[A, B](initialSize = 64): THashTable[A, B] =
-  ## creates a new string table that is empty. `initialSize` needs to be
+proc initTable*[A, B](initialSize=64): TTable[A, B] =
+  ## creates a new hash table table that is empty. `initialSize` needs to be
   ## a power of two.
   assert isPowerOfTwo(initialSize)
   result.counter = 0
   newSeq(result.data, initialSize)
 
+proc toTable*[A, B](pairs: openarray[tuple[key: A, 
+                    val: B]]): TTable[A, B] =
+  ## creates a new hash table that contains the given `pairs`.
+  result = initTable[A](nextPowerOfTwo(pairs.len+10))
+  for key, val in items(pairs): result[key] = val
+
 template dollarImpl(): stmt =
   if t.len == 0:
     result = "{:}"
@@ -133,7 +147,7 @@ template dollarImpl(): stmt =
       result.add($val)
     result.add("}")
 
-proc `$`*[A, B](t: THashTable[A, B]): string =
+proc `$`*[A, B](t: TTable[A, B]): string =
   ## The `$` operator for string tables.
   dollarImpl()
 
@@ -143,11 +157,12 @@ type
   TOrderedKeyValuePair[A, B] = tuple[
     slot: TSlotEnum, next: int, key: A, val: B]
   TOrderedKeyValuePairSeq[A, B] = seq[TOrderedKeyValuePair[A, B]]
-  TOrderedHashTable*[A, B] {.final.} = object
+  TOrderedTable* {.
+      final, myShallow.}[A, B] = object ## table that remembers insertion order
     data: TOrderedKeyValuePairSeq[A, B]
     counter, first, last: int
 
-proc len*[A, B](t: TOrderedHashTable[A, B]): int {.inline.} =
+proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} =
   ## returns the number of keys in `t`.
   result = t.counter
 
@@ -158,26 +173,26 @@ template forAllOrderedPairs(yieldStmt: stmt) =
     if t.data[h].slot == seFilled: yieldStmt
     i = nxt
 
-iterator pairs*[A, B](t: TOrderedHashTable[A, B]): tuple[key: A, val: B] =
+iterator pairs*[A, B](t: TOrderedTable[A, B]): tuple[key: A, val: B] =
   ## iterates over any (key, value) pair in the table `t` in insertion
   ## order.
   forAllOrderedPairs:
     yield (t.data[h].key, t.data[h].val)
 
-iterator keys*[A, B](t: TOrderedHashTable[A, B]): A =
+iterator keys*[A, B](t: TOrderedTable[A, B]): A =
   ## iterates over any key in the table `t` in insertion order.
   forAllOrderedPairs:
     yield t.data[h].key
 
-iterator values*[A, B](t: TOrderedHashTable[A, B]): B =
+iterator values*[A, B](t: TOrderedTable[A, B]): B =
   ## iterates over any value in the table `t` in insertion order.
   forAllOrderedPairs:
     yield t.data[h].val
 
-proc RawGet[A, B](t: TOrderedHashTable[A, B], key: A): int =
+proc RawGet[A, B](t: TOrderedTable[A, B], key: A): int =
   rawGetImpl()
 
-proc `[]`*[A, B](t: TOrderedHashTable[A, B], key: A): B =
+proc `[]`*[A, B](t: TOrderedTable[A, B], key: A): B =
   ## retrieves the value at ``t[key]``. If `key` is not in `t`,
   ## default empty value for the type `B` is returned
   ## and no exception is raised. One can check with ``hasKey`` whether the key
@@ -185,11 +200,11 @@ proc `[]`*[A, B](t: TOrderedHashTable[A, B], key: A): B =
   var index = RawGet(t, key)
   if index >= 0: result = t.data[index].val
 
-proc hasKey*[A, B](t: TOrderedHashTable[A, B], key: A): bool =
+proc hasKey*[A, B](t: TOrderedTable[A, B], key: A): bool =
   ## returns true iff `key` is in the table `t`.
   result = rawGet(t, key) >= 0
 
-proc RawInsert[A, B](t: TOrderedHashTable[A, B], 
+proc RawInsert[A, B](t: TOrderedTable[A, B], 
                      data: var TOrderedKeyValuePairSeq[A, B],
                      key: A, val: B) =
   rawInsertImpl()
@@ -198,39 +213,19 @@ proc RawInsert[A, B](t: TOrderedHashTable[A, B],
   if last >= 0: data[last].next = h
   lastEntry = h
 
-proc Enlarge[A, B](t: TOrderedHashTable[A, B]) =
+proc Enlarge[A, B](t: TOrderedTable[A, B]) =
   var n: TOrderedKeyValuePairSeq[A, B]
   newSeq(n, len(t.data) * growthFactor)
   forAllOrderedPairs:
     RawInsert(t, n, t.data[h].key, t.data[h].val)
   swap(t.data, n)
 
-proc `[]=`*[A, B](t: TOrderedHashTable[A, B], key: A, val: B) =
+proc `[]=`*[A, B](t: TOrderedTable[A, B], key: A, val: B) =
   ## puts a (key, value)-pair into `t`.
-  var index = RawGet(t, key)
-  if index >= 0:
-    t.data[index].val = val
-  else:
-    if mustRehash(len(t.data), t.counter): Enlarge(t)
-    RawInsert(t, t.data, key, val)
-    inc(t.counter)
-
-proc del*[A, B](t: TOrderedHashTable[A, B], key: A) =
-  ## deletes `key` from hash table `t`. Warning: It's inefficient for ordered
-  ## tables: O(n).
-  var index = RawGet(t, key)
-  if index >= 0:
-    var i = t.first
-    while i >= 0:
-      var nxt = t.data[i].next
-      if nxt == index: XXX
-      i = nxt
-    
-    t.data[index].slot = seDeleted
-    dec(t.counter)
+  putImpl()
 
-proc initHashTable*[A, B](initialSize = 64): TOrderedHashTable[A, B] =
-  ## creates a new string table that is empty. `initialSize` needs to be
+proc initOrderedTable*[A, B](initialSize=64): TOrderedTable[A, B] =
+  ## creates a new ordered hash table that is empty. `initialSize` needs to be
   ## a power of two.
   assert isPowerOfTwo(initialSize)
   result.counter = 0
@@ -238,17 +233,21 @@ proc initHashTable*[A, B](initialSize = 64): TOrderedHashTable[A, B] =
   result.last = -1
   newSeq(result.data, initialSize)
 
-proc `$`*[A, B](t: TOrderedHashTable[A, B]): string =
+proc toOrderedTable*[A, B](pairs: openarray[tuple[key: A, 
+                           val: B]]): TOrderedTable[A, B] =
+  ## creates a new ordered hash table that contains the given `pairs`.
+  result = initOrderedTable[A, B](nextPowerOfTwo(pairs.len+10))
+  for key, val in items(pairs): result[key] = val
+
+proc `$`*[A, B](t: TOrderedTable[A, B]): string =
   ## The `$` operator for hash tables.
   dollarImpl()
 
 # ------------------------------ count tables -------------------------------
 
-const
-  deletedCount = -1
-
 type
-  TCountTable*[A] {.final.} = object
+  TCountTable* {.final, myShallow.}[
+      A] = object ## table that counts the number of each key
     data: seq[tuple[key: A, val: int]]
     counter: int
 
@@ -259,30 +258,28 @@ proc len*[A](t: TCountTable[A]): int =
 iterator pairs*[A](t: TCountTable[A]): tuple[key: A, val: int] =
   ## iterates over any (key, value) pair in the table `t`.
   for h in 0..high(t.data):
-    if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
+    if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val)
 
 iterator keys*[A](t: TCountTable[A]): A =
   ## iterates over any key in the table `t`.
   for h in 0..high(t.data):
-    if t.data[h].slot == seFilled: yield t.data[h].key
+    if t.data[h].val != 0: yield t.data[h].key
 
 iterator values*[A](t: TCountTable[A]): int =
   ## iterates over any value in the table `t`.
   for h in 0..high(t.data):
-    if t.data[h].slot == seFilled: yield t.data[h].val
+    if t.data[h].val != 0: yield t.data[h].val
 
 proc RawGet[A](t: TCountTable[A], key: A): int =
   var h: THash = hash(key) and high(t.data) # start with real hash value
-  while t.data[h].slot != seEmpty:
-    if t.data[h].key == key and t.data[h].slot == seFilled:
-      return h
+  while t.data[h].val != 0:
+    if t.data[h].key == key: return h
     h = nextTry(h, high(t.data))
   result = -1
 
-proc `[]`*[A](t: TCountTable[A], key: A): B =
+proc `[]`*[A](t: TCountTable[A], key: A): int =
   ## retrieves the value at ``t[key]``. If `key` is not in `t`,
-  ## default empty value for the type `B` is returned
-  ## and no exception is raised. One can check with ``hasKey`` whether the key
+  ## 0 is returned. One can check with ``hasKey`` whether the key
   ## exists.
   var index = RawGet(t, key)
   if index >= 0: result = t.data[index].val
@@ -291,62 +288,92 @@ proc hasKey*[A](t: TCountTable[A], key: A): bool =
   ## returns true iff `key` is in the table `t`.
   result = rawGet(t, key) >= 0
 
-proc RawInsert[A](t: TCountTable[A], data: var TKeyValuePairSeq[A, B],
-                     key: A, val: int) =
+proc RawInsert[A](t: TCountTable[A], data: var seq[tuple[key: A, val: int]],
+                  key: A, val: int) =
   var h: THash = hash(key) and high(data)
-  while data[h].slot == seFilled:
-    h = nextTry(h, high(data))
+  while data[h].val != 0: h = nextTry(h, high(data))
   data[h].key = key
   data[h].val = val
-  data[h].slot = seFilled
 
 proc Enlarge[A](t: TCountTable[A]) =
-  var n: TKeyValuePairSeq[A, B]
+  var n: seq[tuple[key: A, val: int]]
   newSeq(n, len(t.data) * growthFactor)
   for i in countup(0, high(t.data)):
-    if t.data[i].slot == seFilled: RawInsert(t, n, t.data[i].key, t.data[i].val)
+    if t.data[i].val != 0: RawInsert(t, n, t.data[i].key, t.data[i].val)
   swap(t.data, n)
 
 proc `[]=`*[A](t: TCountTable[A], key: A, val: int) =
-  ## puts a (key, value)-pair into `t`.
-  var index = RawGet(t, key)
-  if index >= 0:
-    t.data[index].val = val
-  else:
-    if mustRehash(len(t.data), t.counter): Enlarge(t)
-    RawInsert(t, t.data, key, val)
-    inc(t.counter)
+  ## puts a (key, value)-pair into `t`. `val` has to be positive.
+  assert val > 0
+  PutImpl()
 
-proc del*[A](t: TCountTable[A], key: A) =
-  ## deletes `key` from hash table `t`.
-  var index = RawGet(t, key)
-  if index >= 0:
-    t.data[index].slot = seDeleted
-
-proc newHashTable*[A, B](initialSize = 64): PHashTable[A, B] =
-  ## creates a new string table that is empty. `initialSize` needs to be
+proc initCountTable*[A](initialSize=64): TCountTable[A] =
+  ## creates a new count table that is empty. `initialSize` needs to be
   ## a power of two.
   assert isPowerOfTwo(initialSize)
-  new(result)
   result.counter = 0
   newSeq(result.data, initialSize)
 
+proc toCountTable*[A](keys: openArray[A]): TCountTable[A] =
+  ## creates a new count table with every key in `keys` having a count of 1.
+  result = initCountTable[A](nextPowerOfTwo(keys.len+10))
+  for key in items(keys): result[key] = 1
+
 proc `$`*[A](t: TCountTable[A]): string =
-  ## The `$` operator for string tables.
-  if t.len == 0:
-    result = "{:}"
+  ## The `$` operator for count tables.
+  dollarImpl()
+
+proc inc*[A](t: TCountTable[A], key: A, val = 1) = 
+  ## increments `t[key]` by `val`.
+  var index = RawGet(t, key)
+  if index >= 0:
+    inc(t.data[index].val, val)
   else:
-    result = "{"
-    for key, val in pairs(t):
-      if result.len > 1: result.add(", ")
-      result.add($key)
-      result.add(": ")
-      result.add($val)
-    result.add("}")
+    if mustRehash(len(t.data), t.counter): Enlarge(t)
+    RawInsert(t, t.data, key, val)
+    inc(t.counter)
 
+proc Smallest*[A](t: TCountTable[A]): tuple[key: A, val: int] =
+  ## returns the largest (key,val)-pair. Efficiency: O(n)
+  assert t.len > 0
+  var minIdx = 0
+  for h in 1..high(t.data):
+    if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h
+  result.key = t.data[minIdx].key
+  result.val = t.data[minIdx].val
+
+proc Largest*[A](t: TCountTable[A]): tuple[key: A, val: int] =
+  ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n)
+  assert t.len > 0
+  var maxIdx = 0
+  for h in 1..high(t.data):
+    if t.data[maxIdx].val < t.data[h].val: maxIdx = h
+  result.key = t.data[maxIdx].key
+  result.val = t.data[maxIdx].val
+
+proc sort*[A](t: var TCountTable[A]) = 
+  ## sorts the count table so that the entry with the highest counter comes 
+  ## first. This is destructive! You must not modify `t` afterwards!
+  ## You can use the iterators `pairs`,  `keys`, and `values` to iterate over
+  ## `t` in the sorted order.
+
+  # we use shellsort here; fast enough and simple
+  var h = 1
+  while true:
+    h = 3 * h + 1
+    if h >= t.data.high: break
+  while true: 
+    h = h div 3
+    for i in countup(h, t.data.high):
+      var j = i
+      while t.data[j-h].val < t.data[j].val:
+        swap(t.data[j], t.data[j-h])
+        j = j-h
+        if j < h: break
+    if h == 1: break
 
 when isMainModule:
-  var table = newHashTable[string, float]()
+  var table = initHashTable[string, float]()
   table["test"] = 1.2345
   table["111"] = 1.000043
   echo table
author	Araq <rumpf_a@web.de>	2011-06-06 08:45:11 +0200
committer	Araq <rumpf_a@web.de>	2011-06-06 08:45:11 +0200
commit	42eb21be7b422b47732a56a1b8420b6f1b63f2c6 (patch)
tree	fdd97f56d6c4f88b23f6c38ebea2dde59e762516 /lib/pure/collections
parent	958961bd8d0bc699c6301984a706c5d182079c71 (diff)
download	Nim-42eb21be7b422b47732a56a1b8420b6f1b63f2c6.tar.gz