diff options
author | Araq <rumpf_a@web.de> | 2011-06-07 03:37:36 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2011-06-07 03:37:36 +0200 |
commit | 3bc821aa5c676869d65f1448a005dfd47f197f61 (patch) | |
tree | 16b757f5f8bcab86e46b677b8b8becd63ff04bed /lib/pure/collections | |
parent | 42eb21be7b422b47732a56a1b8420b6f1b63f2c6 (diff) | |
download | Nim-3bc821aa5c676869d65f1448a005dfd47f197f61.tar.gz |
basic generic collections implemented and tested
Diffstat (limited to 'lib/pure/collections')
-rwxr-xr-x | lib/pure/collections/lists.nim | 55 | ||||
-rw-r--r-- | lib/pure/collections/sets.nim | 231 | ||||
-rw-r--r-- | lib/pure/collections/tables.nim | 81 |
3 files changed, 287 insertions, 80 deletions
diff --git a/lib/pure/collections/lists.nim b/lib/pure/collections/lists.nim index 10caac336..55c3d6ad9 100755 --- a/lib/pure/collections/lists.nim +++ b/lib/pure/collections/lists.nim @@ -12,28 +12,28 @@ ## be manipulated directly for efficiency. type - TDoublyLinkedNode*[T] {.pure, - final.} = object ## a node a doubly linked list consists of + TDoublyLinkedNode* {.pure, + final.}[T] = object ## a node a doubly linked list consists of next*, prev*: ref TDoublyLinkedNode[T] value*: T PDoublyLinkedNode*[T] = ref TDoublyLinkedNode[T] - TSinglyLinkedNode*[T] {.pure, - final.} = object ## a node a singly linked list consists of + TSinglyLinkedNode* {.pure, + final.}[T] = object ## a node a singly linked list consists of next*: ref TSinglyLinkedNode[T] value*: T PSinglyLinkedNode*[T] = ref TSinglyLinkedNode[T] - TSinglyLinkedList*[T] {.pure, final.} = object ## a singly linked list + TSinglyLinkedList* {.pure, final.}[T] = object ## a singly linked list head*, tail*: PSinglyLinkedNode[T] - TDoublyLinkedList*[T] {.pure, final.} = object ## a doubly linked list + TDoublyLinkedList* {.pure, final.}[T] = object ## a doubly linked list head*, tail*: PDoublyLinkedNode[T] - TSinglyLinkedRing*[T] {.pure, final.} = object ## a singly linked ring + TSinglyLinkedRing* {.pure, final.}[T] = object ## a singly linked ring head*: PSinglyLinkedNode[T] - TDoublyLinkedRing*[T] {.pure, final.} = object ## a doubly linked ring + TDoublyLinkedRing* {.pure, final.}[T] = object ## a doubly linked ring head*: PDoublyLinkedNode[T] proc newDoublyLinkedNode*[T](value: T): PDoublyLinkedNode[T] = @@ -240,17 +240,15 @@ proc prepend*[T](L: var TSinglyLinkedRing[T], value: T) = proc append*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = ## appends a node `n` to `L`. Efficiency: O(1). - if L.tail != nil: - L.tail.next = n - n.prev = L.tail + if L.head != nil: n.next = L.head + n.prev = L.head.prev + L.head.prev.next = n + L.head.prev = n else: - # both head and tail are nil: - assert L.head == nil - L.head = n n.prev = n n.next = n - L.tail = n + L.head = n proc append*[T](L: var TDoublyLinkedRing[T], value: T) = ## appends a value to `L`. Efficiency: O(1). @@ -259,13 +257,11 @@ proc append*[T](L: var TDoublyLinkedRing[T], value: T) = proc prepend*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = ## prepends a node `n` to `L`. Efficiency: O(1). if L.head != nil: - L.head.prev = n - n.prev = L.tail n.next = L.head + n.prev = L.head.prev + L.head.prev.next = n + L.head.prev = n else: - # both head and tail are nil: - assert L.tail == nil - L.tail = n n.prev = n n.next = n L.head = n @@ -276,19 +272,14 @@ proc prepend*[T](L: var TDoublyLinkedRing[T], value: T) = proc remove*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = ## removes `n` from `L`. Efficiency: O(1). - if n == L.tail: - if n == L.head: - # only element: - L.tail = nil - L.head = nil - else: - L.tail = n.prev - elif n == L.head: - L.head = n.next n.next.prev = n.prev n.prev.next = n.next - # break cycles for the GC; not necessary, but might help: - n.next = nil - n.prev = nil + if n == L.head: + var p = L.head.prev + if p == L.head: + # only one element left: + L.head = nil + else: + L.head = L.head.prev diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim new file mode 100644 index 000000000..331881d88 --- /dev/null +++ b/lib/pure/collections/sets.nim @@ -0,0 +1,231 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``sets`` module implements an efficient hash set and ordered hash set. +## +## Note: The data types declared here have *value semantics*: This means that +## ``=`` performs a copy of the hash table. If you are overly concerned with +## efficiency and know what you do (!), you can define the symbol +## ``shallowADT`` to compile a version that uses shallow copies instead. + +import + os, hashes, math + +when defined(shallowADT): + {.pragma: myShallow, shallow.} +else: + {.pragma: myShallow.} + +type + TSlotEnum = enum seEmpty, seFilled, seDeleted + TKeyValuePair[A] = tuple[slot: TSlotEnum, key: A] + TKeyValuePairSeq[A] = seq[TKeyValuePair[A]] + TSet* {.final, myShallow.}[A] = object + data: TKeyValuePairSeq[A] + counter: int + +proc len*[A](s: TSet[A]): int = + ## returns the number of keys in `s`. + result = s.counter + +proc card*[A](s: TSet[A]): int = + ## alias for `len`. + result = s.counter + +iterator items*[A](s: TSet[A]): A = + ## iterates over any key in the table `t`. + for h in 0..high(s.data): + if s.data[h].slot == seFilled: yield s.data[h].key + +const + growthFactor = 2 + +proc mustRehash(length, counter: int): bool {.inline.} = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +proc nextTry(h, maxHash: THash): THash {.inline.} = + result = ((5 * h) + 1) and maxHash + +template rawGetImpl() = + var h: THash = hash(key) and high(s.data) # start with real hash value + while s.data[h].slot != seEmpty: + if s.data[h].key == key and s.data[h].slot == seFilled: + return h + h = nextTry(h, high(s.data)) + result = -1 + +template rawInsertImpl() = + var h: THash = hash(key) and high(data) + while data[h].slot == seFilled: + h = nextTry(h, high(data)) + data[h].key = key + data[h].slot = seFilled + +proc RawGet[A](s: TSet[A], key: A): int = + rawGetImpl() + +proc contains*[A](s: TSet[A], key: A): bool = + ## returns true iff `key` is in `s`. + var index = RawGet(t, key) + result = index >= 0 + +proc RawInsert[A](s: var TSet[A], data: var TKeyValuePairSeq[A], key: A) = + rawInsertImpl() + +proc Enlarge[A](s: var TSet[A]) = + var n: TKeyValuePairSeq[A] + newSeq(n, len(s.data) * growthFactor) + for i in countup(0, high(s.data)): + if s.data[i].slot == seFilled: RawInsert(s, n, s.data[i].key) + swap(s.data, n) + +template inclImpl() = + var index = RawGet(s, key) + if index < 0: + if mustRehash(len(s.data), s.counter): Enlarge(s) + RawInsert(s, s.data, key) + inc(s.counter) + +template containsOrInclImpl() = + var index = RawGet(s, key) + if index >= 0: + result = true + else: + if mustRehash(len(s.data), s.counter): Enlarge(s) + RawInsert(s, s.data, key) + inc(s.counter) + +proc incl*[A](s: var TSet[A], key: A) = + ## includes an element `key` in `s`. + inclImpl() + +proc excl*[A](s: var TSet[A], key: A) = + ## excludes `key` from the set `s`. + var index = RawGet(t, key) + if index >= 0: + s.data[index].slot = seDeleted + dec(s.counter) + +proc containsOrIncl*[A](s: var TSet[A], key: A): bool = + ## returns true if `s` contains `key`, otherwise `key` is included in `s` + ## and false is returned. + containsOrInclImpl() + +proc initSet*[A](initialSize=64): TSet[A] = + ## creates a new hash set that is empty. `initialSize` needs to be + ## a power of two. + assert isPowerOfTwo(initialSize) + result.counter = 0 + newSeq(result.data, initialSize) + +proc toSet*[A](keys: openarray[A]): TSet[A] = + ## creates a new hash set that contains the given `keys`. + result = initSet[A](nextPowerOfTwo(keys.len+10)) + for key in items(keys): result.incl(key) + +template dollarImpl(): stmt = + result = "{" + for key in items(s): + if result.len > 1: result.add(", ") + result.add($key) + result.add("}") + +proc `$`*[A](s: TSet[A]): string = + ## The `$` operator for hash sets. + dollarImpl() + +# ------------------------------ ordered table ------------------------------ + +type + TOrderedKeyValuePair[A] = tuple[ + slot: TSlotEnum, next: int, key: A] + TOrderedKeyValuePairSeq[A] = seq[TOrderedKeyValuePair[A]] + TOrderedSet* {. + final, myShallow.}[A] = object ## set that remembers insertion order + data: TOrderedKeyValuePairSeq[A] + counter, first, last: int + +proc len*[A](s: TOrderedSet[A]): int {.inline.} = + ## returns the number of keys in `s`. + result = t.counter + +proc card*[A](s: TOrderedSet[A]): int {.inline.} = + ## alias for `len`. + result = t.counter + +template forAllOrderedPairs(yieldStmt: stmt) = + var h = s.first + while h >= 0: + var nxt = s.data[h].next + if s.data[h].slot == seFilled: yieldStmt + h = nxt + +iterator items*[A](s: TOrderedSet[A]): A = + ## iterates over any key in the set `s` in insertion order. + forAllOrderedPairs: + yield s.data[h].key + +proc RawGet[A](s: TOrderedSet[A], key: A): int = + rawGetImpl() + +proc contains*[A](s: TOrderedSet[A], key: A): bool = + ## returns true iff `key` is in `s`. + var index = RawGet(s, key) + result = index >= 0 + +proc RawInsert[A](s: var TOrderedSet[A], + data: var TOrderedKeyValuePairSeq[A], key: A) = + rawInsertImpl() + data[h].next = -1 + if s.first < 0: s.first = h + if s.last >= 0: data[s.last].next = h + s.last = h + +proc Enlarge[A](s: var TOrderedSet[A]) = + var n: TOrderedKeyValuePairSeq[A] + newSeq(n, len(s.data) * growthFactor) + var h = s.first + s.first = -1 + s.last = -1 + while h >= 0: + var nxt = s.data[h].next + if s.data[h].slot == seFilled: + RawInsert(s, n, s.data[h].key) + h = nxt + swap(s.data, n) + +proc incl*[A](s: var TOrderedSet[A], key: A) = + ## includes an element `key` in `s`. + inclImpl() + +proc containsOrIncl*[A](s: var TOrderedSet[A], key: A): bool = + ## returns true if `s` contains `key`, otherwise `key` is included in `s` + ## and false is returned. + containsOrInclImpl() + +proc initOrderedSet*[A](initialSize=64): TOrderedSet[A] = + ## creates a new ordered hash set that is empty. `initialSize` needs to be + ## a power of two. + assert isPowerOfTwo(initialSize) + result.counter = 0 + result.first = -1 + result.last = -1 + newSeq(result.data, initialSize) + +proc toOrderedSet*[A](keys: openarray[A]): TOrderedSet[A] = + ## creates a new ordered hash set that contains the given `keys`. + result = initOrderedSet[A](nextPowerOfTwo(keys.len+10)) + for key in items(keys): result.incl(key) + +proc `$`*[A](s: TOrderedSet[A]): string = + ## The `$` operator for ordered hash sets. + dollarImpl() + + diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index f132051da..f817ec800 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2011 Andreas Rumpf, Dominik Picheta +# (c) Copyright 2011 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -12,7 +12,7 @@ ## ## Note: The data types declared here have *value semantics*: This means that ## ``=`` performs a copy of the hash table. If you are overly concerned with -## efficiency and don't need this behaviour, you can define the symbol +## efficiency and know what you do (!), you can define the symbol ## ``shallowADT`` to compile a version that uses shallow copies instead. import @@ -123,7 +123,7 @@ proc del*[A, B](t: var TTable[A, B], key: A) = dec(t.counter) proc initTable*[A, B](initialSize=64): TTable[A, B] = - ## creates a new hash table table that is empty. `initialSize` needs to be + ## creates a new hash table that is empty. `initialSize` needs to be ## a power of two. assert isPowerOfTwo(initialSize) result.counter = 0 @@ -132,7 +132,7 @@ proc initTable*[A, B](initialSize=64): TTable[A, B] = proc toTable*[A, B](pairs: openarray[tuple[key: A, val: B]]): TTable[A, B] = ## creates a new hash table that contains the given `pairs`. - result = initTable[A](nextPowerOfTwo(pairs.len+10)) + result = initTable[A, B](nextPowerOfTwo(pairs.len+10)) for key, val in items(pairs): result[key] = val template dollarImpl(): stmt = @@ -148,7 +148,7 @@ template dollarImpl(): stmt = result.add("}") proc `$`*[A, B](t: TTable[A, B]): string = - ## The `$` operator for string tables. + ## The `$` operator for hash tables. dollarImpl() # ------------------------------ ordered table ------------------------------ @@ -167,11 +167,11 @@ proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} = result = t.counter template forAllOrderedPairs(yieldStmt: stmt) = - var i = t.first - while i >= 0: - var nxt = t.data[i].next + var h = t.first + while h >= 0: + var nxt = t.data[h].next if t.data[h].slot == seFilled: yieldStmt - i = nxt + h = nxt iterator pairs*[A, B](t: TOrderedTable[A, B]): tuple[key: A, val: B] = ## iterates over any (key, value) pair in the table `t` in insertion @@ -204,23 +204,29 @@ proc hasKey*[A, B](t: TOrderedTable[A, B], key: A): bool = ## returns true iff `key` is in the table `t`. result = rawGet(t, key) >= 0 -proc RawInsert[A, B](t: TOrderedTable[A, B], +proc RawInsert[A, B](t: var TOrderedTable[A, B], data: var TOrderedKeyValuePairSeq[A, B], key: A, val: B) = rawInsertImpl() data[h].next = -1 - if first < 0: first = h - if last >= 0: data[last].next = h - lastEntry = h + if t.first < 0: t.first = h + if t.last >= 0: data[t.last].next = h + t.last = h -proc Enlarge[A, B](t: TOrderedTable[A, B]) = +proc Enlarge[A, B](t: var TOrderedTable[A, B]) = var n: TOrderedKeyValuePairSeq[A, B] newSeq(n, len(t.data) * growthFactor) - forAllOrderedPairs: - RawInsert(t, n, t.data[h].key, t.data[h].val) + var h = t.first + t.first = -1 + t.last = -1 + while h >= 0: + var nxt = t.data[h].next + if t.data[h].slot == seFilled: + RawInsert(t, n, t.data[h].key, t.data[h].val) + h = nxt swap(t.data, n) -proc `[]=`*[A, B](t: TOrderedTable[A, B], key: A, val: B) = +proc `[]=`*[A, B](t: var TOrderedTable[A, B], key: A, val: B) = ## puts a (key, value)-pair into `t`. putImpl() @@ -240,7 +246,7 @@ proc toOrderedTable*[A, B](pairs: openarray[tuple[key: A, for key, val in items(pairs): result[key] = val proc `$`*[A, B](t: TOrderedTable[A, B]): string = - ## The `$` operator for hash tables. + ## The `$` operator for ordered hash tables. dollarImpl() # ------------------------------ count tables ------------------------------- @@ -295,14 +301,14 @@ proc RawInsert[A](t: TCountTable[A], data: var seq[tuple[key: A, val: int]], data[h].key = key data[h].val = val -proc Enlarge[A](t: TCountTable[A]) = +proc Enlarge[A](t: var TCountTable[A]) = var n: seq[tuple[key: A, val: int]] newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): if t.data[i].val != 0: RawInsert(t, n, t.data[i].key, t.data[i].val) swap(t.data, n) -proc `[]=`*[A](t: TCountTable[A], key: A, val: int) = +proc `[]=`*[A](t: var TCountTable[A], key: A, val: int) = ## puts a (key, value)-pair into `t`. `val` has to be positive. assert val > 0 PutImpl() @@ -323,7 +329,7 @@ proc `$`*[A](t: TCountTable[A]): string = ## The `$` operator for count tables. dollarImpl() -proc inc*[A](t: TCountTable[A], key: A, val = 1) = +proc inc*[A](t: var TCountTable[A], key: A, val = 1) = ## increments `t[key]` by `val`. var index = RawGet(t, key) if index >= 0: @@ -351,8 +357,8 @@ proc Largest*[A](t: TCountTable[A]): tuple[key: A, val: int] = result.key = t.data[maxIdx].key result.val = t.data[maxIdx].val -proc sort*[A](t: var TCountTable[A]) = - ## sorts the count table so that the entry with the highest counter comes +proc sort*[A](t: var TCountTable[A]) = + ## sorts the count table so that the entry with the highest counter comes ## first. This is destructive! You must not modify `t` afterwards! ## You can use the iterators `pairs`, `keys`, and `values` to iterate over ## `t` in the sorted order. @@ -361,35 +367,14 @@ proc sort*[A](t: var TCountTable[A]) = var h = 1 while true: h = 3 * h + 1 - if h >= t.data.high: break - while true: + if h >= high(t.data): break + while true: h = h div 3 - for i in countup(h, t.data.high): + for i in countup(h, high(t.data)): var j = i - while t.data[j-h].val < t.data[j].val: + while t.data[j-h].val <= t.data[j].val: swap(t.data[j], t.data[j-h]) j = j-h if j < h: break if h == 1: break -when isMainModule: - var table = initHashTable[string, float]() - table["test"] = 1.2345 - table["111"] = 1.000043 - echo table - table.del("111") - echo table - #echo repr(table["111"]) - #echo(repr(table["1212"])) - table["111"] = 1.5 - table["011"] = 67.9 - echo table - table.del("test") - table.del("111") - echo table - - - echo hash("test") - echo hash("test") - - |