diff options
Diffstat (limited to 'lib/pure/collections')
-rw-r--r-- | lib/pure/collections/LockFreeHash.nim | 607 | ||||
-rw-r--r-- | lib/pure/collections/chains.nim | 44 | ||||
-rw-r--r-- | lib/pure/collections/critbits.nim | 478 | ||||
-rw-r--r-- | lib/pure/collections/deques.nim | 480 | ||||
-rw-r--r-- | lib/pure/collections/hashcommon.nim | 76 | ||||
-rw-r--r-- | lib/pure/collections/heapqueue.nim | 266 | ||||
-rw-r--r-- | lib/pure/collections/intsets.nim | 204 | ||||
-rw-r--r-- | lib/pure/collections/lists.nim | 1044 | ||||
-rw-r--r-- | lib/pure/collections/queues.nim | 102 | ||||
-rw-r--r-- | lib/pure/collections/rtarrays.nim | 9 | ||||
-rw-r--r-- | lib/pure/collections/sequtils.nim | 1429 | ||||
-rw-r--r-- | lib/pure/collections/setimpl.nim | 156 | ||||
-rw-r--r-- | lib/pure/collections/sets.nim | 1482 | ||||
-rw-r--r-- | lib/pure/collections/sharedlist.nim | 105 | ||||
-rw-r--r-- | lib/pure/collections/sharedtables.nim | 252 | ||||
-rw-r--r-- | lib/pure/collections/tableimpl.nim | 231 | ||||
-rw-r--r-- | lib/pure/collections/tables.nim | 3384 |
17 files changed, 7213 insertions, 3136 deletions
diff --git a/lib/pure/collections/LockFreeHash.nim b/lib/pure/collections/LockFreeHash.nim deleted file mode 100644 index c3954468a..000000000 --- a/lib/pure/collections/LockFreeHash.nim +++ /dev/null @@ -1,607 +0,0 @@ -#nim c -t:-march=i686 --cpu:amd64 --threads:on -d:release lockfreehash.nim - -import unsigned, math, hashes - -#------------------------------------------------------------------------------ -## Memory Utility Functions - -proc newHeap*[T](): ptr T = - result = cast[ptr T](alloc0(sizeof(T))) - -proc copyNew*[T](x: var T): ptr T = - var - size = sizeof(T) - mem = alloc(size) - copyMem(mem, x.addr, size) - return cast[ptr T](mem) - -proc copyTo*[T](val: var T, dest: int) = - copyMem(pointer(dest), val.addr, sizeof(T)) - -proc allocType*[T](): pointer = alloc(sizeof(T)) - -proc newShared*[T](): ptr T = - result = cast[ptr T](allocShared0(sizeof(T))) - -proc copyShared*[T](x: var T): ptr T = - var - size = sizeof(T) - mem = allocShared(size) - copyMem(mem, x.addr, size) - return cast[ptr T](mem) - -#------------------------------------------------------------------------------ -## Pointer arithmetic - -proc `+`*(p: pointer, i: int): pointer {.inline.} = - cast[pointer](cast[int](p) + i) - -const - minTableSize = 8 - reProbeLimit = 12 - minCopyWork = 4096 - intSize = sizeof(int) - - - -when sizeof(int) == 4: # 32bit - type - TRaw = range[0..1073741823] - ## The range of uint values that can be stored directly in a value slot - ## when on a 32 bit platform - -elif sizeof(int) == 8: # 64bit - type - TRaw = range[0..4611686018427387903] - ## The range of uint values that can be stored directly in a value slot - ## when on a 64 bit platform -else: - {.error: "unsupported platform".} - -type - TEntry = tuple - key: int - value: int - - TEntryArr = ptr array[0..10_000_000, TEntry] - - PConcTable[K,V] = ptr object {.pure.} - len: int - used: int - active: int - copyIdx: int - copyDone: int - next: PConcTable[K,V] - data: TEntryArr - - -proc setVal[K,V](table: var PConcTable[K,V], key: int, val: int, - expVal: int, match: bool): int - -#------------------------------------------------------------------------------ - -# Create a new table -proc newLFTable*[K,V](size: int = minTableSize): PConcTable[K,V] = - let - dataLen = max(nextPowerOfTwo(size), minTableSize) - dataSize = dataLen*sizeof(TEntry) - dataMem = allocShared0(dataSize) - tableSize = 7 * intSize - tableMem = allocShared0(tableSize) - table = cast[PConcTable[K,V]](tableMem) - table.len = dataLen - table.used = 0 - table.active = 0 - table.copyIdx = 0 - table.copyDone = 0 - table.next = nil - table.data = cast[TEntryArr](dataMem) - result = table - -#------------------------------------------------------------------------------ - -# Delete a table -proc deleteConcTable[K,V](tbl: PConcTable[K,V]) = - deallocShared(tbl.data) - deallocShared(tbl) - -#------------------------------------------------------------------------------ - -proc `[]`[K,V](table: var PConcTable[K,V], i: int): var TEntry {.inline.} = - table.data[i] - -#------------------------------------------------------------------------------ -# State flags stored in ptr - - -proc pack[T](x: T): int {.inline.} = - result = (cast[int](x) shl 2) - #echo("packKey ",cast[int](x) , " -> ", result) - -# Pop the flags off returning a 4 byte aligned ptr to our Key or Val -proc pop(x: int): int {.inline.} = - result = x and 0xFFFFFFFC'i32 - -# Pop the raw value off of our Key or Val -proc popRaw(x: int): int {.inline.} = - result = x shr 2 - -# Pop the flags off returning a 4 byte aligned ptr to our Key or Val -proc popPtr[V](x: int): ptr V {.inline.} = - result = cast[ptr V](pop(x)) - #echo("popPtr " & $x & " -> " & $cast[int](result)) - -# Ghost (sentinel) -# K or V is no longer valid use new table -const Ghost = 0xFFFFFFFC -proc isGhost(x: int): bool {.inline.} = - result = x == 0xFFFFFFFC - -# Tombstone -# applied to V = K is dead -proc isTomb(x: int): bool {.inline.} = - result = (x and 0x00000002) != 0 - -proc setTomb(x: int): int {.inline.} = - result = x or 0x00000002 - -# Prime -# K or V is in new table copied from old -proc isPrime(x: int): bool {.inline.} = - result = (x and 0x00000001) != 0 - -proc setPrime(x: int): int {.inline.} = - result = x or 0x00000001 - -#------------------------------------------------------------------------------ - -##This is for i32 only need to override for i64 -proc hashInt(x: int):int {.inline.} = - var h = uint32(x) #shr 2'u32 - h = h xor (h shr 16'u32) - h *= 0x85ebca6b'u32 - h = h xor (h shr 13'u32) - h *= 0xc2b2ae35'u32 - h = h xor (h shr 16'u32) - result = int(h) - -#------------------------------------------------------------------------------ - -proc resize[K,V](self: PConcTable[K,V]): PConcTable[K,V] = - var next = atomic_load_n(self.next.addr, ATOMIC_RELAXED) - #echo("next = " & $cast[int](next)) - if next != nil: - #echo("A new table already exists, copy in progress") - return next - var - oldLen = atomic_load_n(self.len.addr, ATOMIC_RELAXED) - newTable = newLFTable[K,V](oldLen*2) - success = atomic_compare_exchange_n(self.next.addr, next.addr, newTable, - false, ATOMIC_RELAXED, ATOMIC_RELAXED) - if not success: - echo("someone beat us to it! delete table we just created and return his " & $cast[int](next)) - deleteConcTable(newTable) - return next - else: - echo("Created New Table! " & $cast[int](newTable) & " Size = " & $newTable.len) - return newTable - - -#------------------------------------------------------------------------------ -#proc keyEQ[K](key1: ptr K, key2: ptr K): bool {.inline.} = -proc keyEQ[K](key1: int, key2: int): bool {.inline.} = - result = false - when K is TRaw: - if key1 == key2: - result = true - else: - var - p1 = popPtr[K](key1) - p2 = popPtr[K](key2) - if p1 != nil and p2 != nil: - if cast[int](p1) == cast[int](p2): - return true - if p1[] == p2[]: - return true - -#------------------------------------------------------------------------------ - -#proc tableFull(self: var PConcTable[K,V]) : bool {.inline.} = - - -#------------------------------------------------------------------------------ - -proc copySlot[K,V](idx: int, oldTbl: var PConcTable[K,V], newTbl: var PConcTable[K,V]): bool = - #echo("Copy idx " & $idx) - var - oldVal = 0 - oldkey = 0 - ok = false - result = false - #Block the key so no other threads waste time here - while not ok: - ok = atomic_compare_exchange_n(oldTbl[idx].key.addr, oldKey.addr, - setTomb(oldKey), false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #echo("oldKey was = " & $oldKey & " set it to tomb " & $setTomb(oldKey)) - #Prevent new values from appearing in the old table by priming - oldVal = atomic_load_n(oldTbl[idx].value.addr, ATOMIC_RELAXED) - while not isPrime(oldVal): - var box = if oldVal == 0 or isTomb(oldVal) : oldVal.setTomb.setPrime - else: oldVal.setPrime - if atomic_compare_exchange_n(oldTbl[idx].value.addr, oldVal.addr, - box, false, ATOMIC_RELAXED, ATOMIC_RELAXED): - if isPrime(box) and isTomb(box): - return true - oldVal = box - break - #echo("oldVal was = ", oldVal, " set it to prime ", box) - if isPrime(oldVal) and isTomb(oldVal): - #when not (K is TRaw): - # deallocShared(popPtr[K](oldKey)) - return false - if isTomb(oldVal): - echo("oldVal is Tomb!!!, should not happen") - if pop(oldVal) != 0: - result = setVal(newTbl, pop(oldKey), pop(oldVal), 0, true) == 0 - if result: - #echo("Copied a Slot! idx= " & $idx & " key= " & $oldKey & " val= " & $oldVal) - else: - #echo("copy slot failed") - # Our copy is done so we disable the old slot - while not ok: - ok = atomic_compare_exchange_n(oldTbl[idx].value.addr, oldVal.addr, - oldVal.setTomb.setPrime , false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #echo("disabled old slot") - #echo"---------------------" - -#------------------------------------------------------------------------------ - -proc promote[K,V](table: var PConcTable[K,V]) = - var - newData = atomic_load_n(table.next.data.addr, ATOMIC_RELAXED) - newLen = atomic_load_n(table.next.len.addr, ATOMIC_RELAXED) - newUsed = atomic_load_n(table.next.used.addr, ATOMIC_RELAXED) - - deallocShared(table.data) - atomic_store_n(table.data.addr, newData, ATOMIC_RELAXED) - atomic_store_n(table.len.addr, newLen, ATOMIC_RELAXED) - atomic_store_n(table.used.addr, newUsed, ATOMIC_RELAXED) - atomic_store_n(table.copyIdx.addr, 0, ATOMIC_RELAXED) - atomic_store_n(table.copyDone.addr, 0, ATOMIC_RELAXED) - deallocShared(table.next) - atomic_store_n(table.next.addr, nil, ATOMIC_RELAXED) - echo("new table swapped!") - -#------------------------------------------------------------------------------ - -proc checkAndPromote[K,V](table: var PConcTable[K,V], workDone: int): bool = - var - oldLen = atomic_load_n(table.len.addr, ATOMIC_RELAXED) - copyDone = atomic_load_n(table.copyDone.addr, ATOMIC_RELAXED) - ok: bool - result = false - if workDone > 0: - #echo("len to copy =" & $oldLen) - #echo("copyDone + workDone = " & $copyDone & " + " & $workDone) - while not ok: - ok = atomic_compare_exchange_n(table.copyDone.addr, copyDone.addr, - copyDone + workDone, false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #if ok: echo("set copyDone") - # If the copy is done we can promote this table - if copyDone + workDone >= oldLen: - # Swap new data - #echo("work is done!") - table.promote - result = true - -#------------------------------------------------------------------------------ - -proc copySlotAndCheck[K,V](table: var PConcTable[K,V], idx: int): - PConcTable[K,V] = - var - newTable = cast[PConcTable[K,V]](atomic_load_n(table.next.addr, ATOMIC_RELAXED)) - result = newTable - if newTable != nil and copySlot(idx, table, newTable): - #echo("copied a single slot, idx = " & $idx) - if checkAndPromote(table, 1): return table - - -#------------------------------------------------------------------------------ - -proc helpCopy[K,V](table: var PConcTable[K,V]): PConcTable[K,V] = - var - newTable = cast[PConcTable[K,V]](atomic_load_n(table.next.addr, ATOMIC_RELAXED)) - result = newTable - if newTable != nil: - var - oldLen = atomic_load_n(table.len.addr, ATOMIC_RELAXED) - copyDone = atomic_load_n(table.copyDone.addr, ATOMIC_RELAXED) - copyIdx = 0 - work = min(oldLen, minCopyWork) - #panicStart = -1 - workDone = 0 - if copyDone < oldLen: - var ok: bool - while not ok: - ok = atomic_compare_exchange_n(table.copyIdx.addr, copyIdx.addr, - copyIdx + work, false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #echo("copy idx = ", copyIdx) - for i in 0..work-1: - var idx = (copyIdx + i) and (oldLen - 1) - if copySlot(idx, table, newTable): - workDone += 1 - if workDone > 0: - #echo("did work ", workDone, " on thread ", cast[int](myThreadID[pointer]())) - if checkAndPromote(table, workDone): return table - # In case a thread finished all the work then got stalled before promotion - if checkAndPromote(table, 0): return table - - - -#------------------------------------------------------------------------------ - -proc setVal[K,V](table: var PConcTable[K,V], key: int, val: int, - expVal: int, match: bool): int = - #echo("-try set- in table ", " key = ", (popPtr[K](key)[]), " val = ", val) - when K is TRaw: - var idx = hashInt(key) - else: - var idx = popPtr[K](key)[].hash - var - nextTable: PConcTable[K,V] - probes = 1 - # spin until we find a key slot or build and jump to next table - while true: - idx = idx and (table.len - 1) - #echo("try set idx = " & $idx & "for" & $key) - var - probedKey = 0 - openKey = atomic_compare_exchange_n(table[idx].key.addr, probedKey.addr, - key, false, ATOMIC_RELAXED, ATOMIC_RELAXED) - if openKey: - if val.isTomb: - #echo("val was tomb, bail, no reason to set an open slot to tomb") - return val - #increment used slots - #echo("found an open slot, total used = " & - #$atomic_add_fetch(table.used.addr, 1, ATOMIC_RELAXED)) - discard atomic_add_fetch(table.used.addr, 1, ATOMIC_RELAXED) - break # We found an open slot - #echo("set idx ", idx, " key = ", key, " probed = ", probedKey) - if keyEQ[K](probedKey, key): - #echo("we found the matching slot") - break # We found a matching slot - if (not(expVal != 0 and match)) and (probes >= reProbeLimit or key.isTomb): - if key.isTomb: echo("Key is Tombstone") - #if probes >= reProbeLimit: echo("Too much probing " & $probes) - #echo("try to resize") - #create next bigger table - nextTable = resize(table) - #help do some copying - #echo("help copy old table to new") - nextTable = helpCopy(table) - #now setVal in the new table instead - #echo("jumping to next table to set val") - return setVal(nextTable, key, val, expVal, match) - else: - idx += 1 - probes += 1 - # Done spinning for a new slot - var oldVal = atomic_load_n(table[idx].value.addr, ATOMIC_RELAXED) - if val == oldVal: - #echo("this val is alredy in the slot") - return oldVal - nextTable = atomic_load_n(table.next.addr, ATOMIC_SEQ_CST) - if nextTable == nil and - ((oldVal == 0 and - (probes >= reProbeLimit or table.used / table.len > 0.8)) or - (isPrime(oldVal))): - if table.used / table.len > 0.8: echo("resize because usage ratio = " & - $(table.used / table.len)) - if isPrime(oldVal): echo("old val isPrime, should be a rare mem ordering event") - nextTable = resize(table) - if nextTable != nil: - #echo("tomb old slot then set in new table") - nextTable = copySlotAndCheck(table,idx) - return setVal(nextTable, key, val, expVal, match) - # Finally ready to add new val to table - while true: - if match and oldVal != expVal: - #echo("set failed, no match oldVal= " & $oldVal & " expVal= " & $expVal) - return oldVal - if atomic_compare_exchange_n(table[idx].value.addr, oldVal.addr, - val, false, ATOMIC_RELEASE, ATOMIC_RELAXED): - #echo("val set at table " & $cast[int](table)) - if expVal != 0: - if (oldVal == 0 or isTomb(oldVal)) and not isTomb(val): - discard atomic_add_fetch(table.active.addr, 1, ATOMIC_RELAXED) - elif not (oldVal == 0 or isTomb(oldVal)) and isTomb(val): - discard atomic_add_fetch(table.active.addr, -1, ATOMIC_RELAXED) - if oldVal == 0 and expVal != 0: - return setTomb(oldVal) - else: return oldVal - if isPrime(oldVal): - nextTable = copySlotAndCheck(table, idx) - return setVal(nextTable, key, val, expVal, match) - -#------------------------------------------------------------------------------ - -proc getVal[K,V](table: var PConcTable[K,V], key: int): int = - #echo("-try get- key = " & $key) - when K is TRaw: - var idx = hashInt(key) - else: - var idx = popPtr[K](key)[].hash - #echo("get idx ", idx) - var - probes = 0 - val: int - while true: - idx = idx and (table.len - 1) - var - newTable: PConcTable[K,V] # = atomic_load_n(table.next.addr, ATOMIC_ACQUIRE) - probedKey = atomic_load_n(table[idx].key.addr, ATOMIC_SEQ_CST) - if keyEQ[K](probedKey, key): - #echo("found key after ", probes+1) - val = atomic_load_n(table[idx].value.addr, ATOMIC_ACQUIRE) - if not isPrime(val): - if isTomb(val): - #echo("val was tomb but not prime") - return 0 - else: - #echo("-GotIt- idx = ", idx, " key = ", key, " val ", val ) - return val - else: - newTable = copySlotAndCheck(table, idx) - return getVal(newTable, key) - else: - #echo("probe ", probes, " idx = ", idx, " key = ", key, " found ", probedKey ) - if probes >= reProbeLimit*4 or key.isTomb: - if newTable == nil: - #echo("too many probes and no new table ", key, " ", idx ) - return 0 - else: - newTable = helpCopy(table) - return getVal(newTable, key) - idx += 1 - probes += 1 - -#------------------------------------------------------------------------------ - -#proc set*(table: var PConcTable[TRaw,TRaw], key: TRaw, val: TRaw) = -# discard setVal(table, pack(key), pack(key), 0, false) - -#proc set*[V](table: var PConcTable[TRaw,V], key: TRaw, val: ptr V) = -# discard setVal(table, pack(key), cast[int](val), 0, false) - -proc set*[K,V](table: var PConcTable[K,V], key: var K, val: var V) = - when not (K is TRaw): - var newKey = cast[int](copyShared(key)) - else: - var newKey = pack(key) - when not (V is TRaw): - var newVal = cast[int](copyShared(val)) - else: - var newVal = pack(val) - var oldPtr = pop(setVal(table, newKey, newVal, 0, false)) - #echo("oldPtr = ", cast[int](oldPtr), " newPtr = ", cast[int](newPtr)) - when not (V is TRaw): - if newVal != oldPtr and oldPtr != 0: - deallocShared(cast[ptr V](oldPtr)) - - - -proc get*[K,V](table: var PConcTable[K,V], key: var K): V = - when not (V is TRaw): - when not (K is TRaw): - return popPtr[V](getVal(table, cast[int](key.addr)))[] - else: - return popPtr[V](getVal(table, pack(key)))[] - else: - when not (K is TRaw): - return popRaw(getVal(table, cast[int](key.addr))) - else: - return popRaw(getVal(table, pack(key))) - - - - - - - - - - - -#proc `[]`[K,V](table: var PConcTable[K,V], key: K): PEntry[K,V] {.inline.} = -# getVal(table, key) - -#proc `[]=`[K,V](table: var PConcTable[K,V], key: K, val: V): PEntry[K,V] {.inline.} = -# setVal(table, key, val) - - - - - - -#Tests ---------------------------- -when isMainModule: - import locks, times, mersenne - - const - numTests = 100000 - numThreads = 10 - - - - type - TTestObj = tuple - thr: int - f0: int - f1: int - - TData = tuple[k: string,v: TTestObj] - PDataArr = array[0..numTests-1, TData] - Dict = PConcTable[string,TTestObj] - - var - thr: array[0..numThreads-1, TThread[Dict]] - - table = newLFTable[string,TTestObj](8) - rand = newMersenneTwister(2525) - - proc createSampleData(len: int): PDataArr = - #result = cast[PDataArr](allocShared0(sizeof(TData)*numTests)) - for i in 0..len-1: - result[i].k = "mark" & $(i+1) - #echo("mark" & $(i+1), " ", hash("mark" & $(i+1))) - result[i].v.thr = 0 - result[i].v.f0 = i+1 - result[i].v.f1 = 0 - #echo("key = " & $(i+1) & " Val ptr = " & $cast[int](result[i].v.addr)) - - - - proc threadProc(tp: Dict) {.thread.} = - var t = cpuTime(); - for i in 1..numTests: - var key = "mark" & $(i) - var got = table.get(key) - got.thr = cast[int](myThreadID[pointer]()) - got.f1 = got.f1 + 1 - table.set(key, got) - t = cpuTime() - t - echo t - - - var testData = createSampleData(numTests) - - for i in 0..numTests-1: - table.set(testData[i].k, testData[i].v) - - var i = 0 - while i < numThreads: - createThread(thr[i], threadProc, table) - i += 1 - - joinThreads(thr) - - - - - - var fails = 0 - - for i in 0..numTests-1: - var got = table.get(testData[i].k) - if got.f0 != i+1 or got.f1 != numThreads: - fails += 1 - echo(got) - - echo("Failed read or write = ", fails) - - - #for i in 1..numTests: - # echo(i, " = ", hashInt(i) and 8191) - - deleteConcTable(table) diff --git a/lib/pure/collections/chains.nim b/lib/pure/collections/chains.nim new file mode 100644 index 000000000..6b2ecd272 --- /dev/null +++ b/lib/pure/collections/chains.nim @@ -0,0 +1,44 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Template based implementation of singly and doubly linked lists. +## The involved types should have 'prev' or 'next' fields and the +## list header should have 'head' or 'tail' fields. + +template prepend*(header, node) = + when compiles(header.head): + when compiles(node.prev): + if header.head != nil: + header.head.prev = node + node.next = header.head + header.head = node + when compiles(header.tail): + if header.tail == nil: + header.tail = node + +template append*(header, node) = + when compiles(header.head): + if header.head == nil: + header.head = node + when compiles(header.tail): + when compiles(node.prev): + node.prev = header.tail + if header.tail != nil: + header.tail.next = node + header.tail = node + +template unlink*(header, node) = + if node.next != nil: + node.next.prev = node.prev + if node.prev != nil: + node.prev.next = node.next + if header.head == node: + header.head = node.prev + if header.tail == node: + header.tail = node.next diff --git a/lib/pure/collections/critbits.nim b/lib/pure/collections/critbits.nim index 3d10e39aa..24257dacb 100644 --- a/lib/pure/collections/critbits.nim +++ b/lib/pure/collections/critbits.nim @@ -8,32 +8,62 @@ # ## This module implements a `crit bit tree`:idx: which is an efficient -## container for a set or a mapping of strings. Based on the excellent paper -## by Adam Langley. +## container for a sorted set of strings, or for a sorted mapping of strings. Based on the +## [excellent paper by Adam Langley](https://www.imperialviolet.org/binary/critbit.pdf). +## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.) + +runnableExamples: + from std/sequtils import toSeq + + var critbitAsSet: CritBitTree[void] = ["kitten", "puppy"].toCritBitTree + doAssert critbitAsSet.len == 2 + critbitAsSet.incl("") + doAssert "" in critbitAsSet + critbitAsSet.excl("") + doAssert "" notin critbitAsSet + doAssert toSeq(critbitAsSet.items) == @["kitten", "puppy"] + let same = ["puppy", "kitten", "puppy"].toCritBitTree + doAssert toSeq(same.keys) == toSeq(critbitAsSet.keys) + + var critbitAsDict: CritBitTree[int] = {"key1": 42}.toCritBitTree + doAssert critbitAsDict.len == 1 + critbitAsDict["key2"] = 0 + doAssert "key2" in critbitAsDict + doAssert critbitAsDict["key2"] == 0 + critbitAsDict.excl("key1") + doAssert "key1" notin critbitAsDict + doAssert toSeq(critbitAsDict.pairs) == @[("key2", 0)] + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions type - NodeObj[T] = object {.acyclic.} + NodeObj[T] {.acyclic.} = object byte: int ## byte index of the difference - otherbits: char + otherBits: char case isLeaf: bool of false: child: array[0..1, ref NodeObj[T]] - of true: + of true: key: string when T isnot void: val: T - + Node[T] = ref NodeObj[T] CritBitTree*[T] = object ## The crit bit tree can either be used ## as a mapping from strings to - ## some type ``T`` or as a set of - ## strings if ``T`` is void. + ## some type `T` or as a set of + ## strings if `T` is `void`. root: Node[T] count: int -{.deprecated: [TCritBitTree: CritBitTree].} +func len*[T](c: CritBitTree[T]): int {.inline.} = + ## Returns the number of elements in `c` in O(1). + runnableExamples: + let c = ["key1", "key2"].toCritBitTree + doAssert c.len == 2 -proc len*[T](c: CritBitTree[T]): int = - ## returns the number of elements in `c` in O(1). result = c.count proc rawGet[T](c: CritBitTree[T], key: string): Node[T] = @@ -46,19 +76,22 @@ proc rawGet[T](c: CritBitTree[T], key: string): Node[T] = else: return if it.key == key: it else: nil -proc contains*[T](c: CritBitTree[T], key: string): bool {.inline.} = - ## returns true iff `c` contains the given `key`. +func contains*[T](c: CritBitTree[T], key: string): bool {.inline.} = + ## Returns true if `c` contains the given `key`. + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.contains("key") + result = rawGet(c, key) != nil -proc hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} = - ## alias for `contains`. +func hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} = + ## Alias for `contains <#contains,CritBitTree[T],string>`_. result = rawGet(c, key) != nil proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = if c.root == nil: - new c.root - c.root.isleaf = true - c.root.key = key + c.root = Node[T](isleaf: true, key: key) result = c.root else: var it = c.root @@ -66,34 +99,33 @@ proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = let ch = if it.byte < key.len: key[it.byte] else: '\0' let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8 it = it.child[dir] - + var newOtherBits = 0 var newByte = 0 block blockX: - while newbyte < key.len: - if it.key[newbyte] != key[newbyte]: - newotherbits = it.key[newbyte].ord xor key[newbyte].ord + while newByte < key.len: + let ch = if newByte < it.key.len: it.key[newByte] else: '\0' + if ch != key[newByte]: + newOtherBits = ch.ord xor key[newByte].ord break blockX - inc newbyte - if it.key[newbyte] != '\0': - newotherbits = it.key[newbyte].ord + inc newByte + if newByte < it.key.len: + newOtherBits = it.key[newByte].ord else: return it while (newOtherBits and (newOtherBits-1)) != 0: newOtherBits = newOtherBits and (newOtherBits-1) newOtherBits = newOtherBits xor 255 - let ch = it.key[newByte] + let ch = if newByte < it.key.len: it.key[newByte] else: '\0' let dir = (1 + (ord(ch) or newOtherBits)) shr 8 - + var inner: Node[T] new inner - new result - result.isLeaf = true - result.key = key + result = Node[T](isLeaf: true, key: key) inner.otherBits = chr(newOtherBits) inner.byte = newByte inner.child[1 - dir] = result - + var wherep = addr(c.root) while true: var p = wherep[] @@ -107,9 +139,87 @@ proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = wherep[] = inner inc c.count -proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: T): bool = - ## returns true iff `c` contains the given `key`. If the key does not exist - ## ``c[key] = val`` is performed. +func exclImpl[T](c: var CritBitTree[T], key: string): int = + var p = c.root + var wherep = addr(c.root) + var whereq: ptr Node[T] = nil + if p == nil: return c.count + var dir = 0 + var q: Node[T] + while not p.isLeaf: + whereq = wherep + q = p + let ch = if p.byte < key.len: key[p.byte] else: '\0' + dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + wherep = addr(p.child[dir]) + p = wherep[] + if p.key == key: + # else: not in tree at all + if whereq == nil: + c.root = nil + else: + whereq[] = q.child[1 - dir] + dec c.count + + return c.count + +proc excl*[T](c: var CritBitTree[T], key: string) = + ## Removes `key` (and its associated value) from the set `c`. + ## If the `key` does not exist, nothing happens. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + excl(c, "key") + doAssert not c.contains("key") + + discard exclImpl(c, key) + +proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool = + ## Returns true if `c` does not contain the given `key`. If the key + ## does exist, `c.excl(key)` is performed. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_ + runnableExamples: + block: + var c: CritBitTree[void] + doAssert c.missingOrExcl("key") + block: + var c: CritBitTree[void] + incl(c, "key") + doAssert not c.missingOrExcl("key") + doAssert not c.contains("key") + + let oldCount = c.count + discard exclImpl(c, key) + result = c.count == oldCount + +proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: sink T): bool = + ## Returns true if `c` contains the given `key`. If the key does not exist, + ## `c[key] = val` is performed. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_ + ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_ + runnableExamples: + block: + var c: CritBitTree[int] + doAssert not c.containsOrIncl("key", 42) + doAssert c.contains("key") + block: + var c: CritBitTree[int] + incl(c, "key", 21) + doAssert c.containsOrIncl("key", 42) + doAssert c["key"] == 21 + let oldCount = c.count var n = rawInsert(c, key) result = c.count == oldCount @@ -117,66 +227,107 @@ proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: T): bool = if not result: n.val = val proc containsOrIncl*(c: var CritBitTree[void], key: string): bool = - ## returns true iff `c` contains the given `key`. If the key does not exist + ## Returns true if `c` contains the given `key`. If the key does not exist, ## it is inserted into `c`. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_ + ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_ + runnableExamples: + block: + var c: CritBitTree[void] + doAssert not c.containsOrIncl("key") + doAssert c.contains("key") + block: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.containsOrIncl("key") + let oldCount = c.count - var n = rawInsert(c, key) + discard rawInsert(c, key) result = c.count == oldCount +proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) = + ## Increments `c[key]` by `val`. + runnableExamples: + var c: CritBitTree[int] + c["key"] = 1 + inc(c, "key") + doAssert c["key"] == 2 + + var n = rawInsert(c, key) + inc n.val, val + proc incl*(c: var CritBitTree[void], key: string) = - ## includes `key` in `c`. + ## Includes `key` in `c`. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.hasKey("key") + discard rawInsert(c, key) -proc `[]=`*[T](c: var CritBitTree[T], key: string, val: T) = - ## puts a (key, value)-pair into `t`. +proc incl*[T](c: var CritBitTree[T], key: string, val: sink T) = + ## Inserts `key` with value `val` into `c`. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `incl proc <#incl,CritBitTree[void],string>`_ + runnableExamples: + var c: CritBitTree[int] + incl(c, "key", 42) + doAssert c["key"] == 42 + var n = rawInsert(c, key) n.val = val -proc `[]`*[T](c: CritBitTree[T], key: string): T {.inline.} = - ## retrieves the value at ``c[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - let n = rawGet(c, key) - if n != nil: result = n.val +proc `[]=`*[T](c: var CritBitTree[T], key: string, val: sink T) = + ## Alias for `incl <#incl,CritBitTree[T],string,T>`_. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string>`_ + ## * `[] proc <#[],CritBitTree[T],string_2>`_ + var n = rawInsert(c, key) + n.val = val -proc mget*[T](c: var CritBitTree[T], key: string): var T {.inline.} = - ## retrieves the value at ``c[key]``. The value can be modified. - ## If `key` is not in `t`, the ``KeyError`` exception is raised. +template get[T](c: CritBitTree[T], key: string): T = let n = rawGet(c, key) - if n != nil: result = n.val - else: raise newException(KeyError, "key not found: " & $key) + if n == nil: + raise newException(KeyError, "key not found: " & key) -proc excl*[T](c: var CritBitTree[T], key: string) = - ## removes `key` (and its associated value) from the set `c`. - ## If the `key` does not exist, nothing happens. - var p = c.root - var wherep = addr(c.root) - var whereq: ptr Node[T] = nil - if p == nil: return - var dir = 0 - var q: Node[T] - while not p.isLeaf: - whereq = wherep - q = p - let ch = if p.byte < key.len: key[p.byte] else: '\0' - dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 - wherep = addr(p.child[dir]) - p = wherep[] - if p.key == key: - # else: not in tree at all - if whereq == nil: - c.root = nil - else: - whereq[] = q.child[1 - dir] - dec c.count + n.val + +func `[]`*[T](c: CritBitTree[T], key: string): lent T {.inline.} = + ## Retrieves the value at `c[key]`. If `key` is not in `t`, the + ## `KeyError` exception is raised. One can check with `hasKey` whether + ## the key exists. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string_2>`_ + ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_ + get(c, key) + +func `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline.} = + ## Retrieves the value at `c[key]`. The value can be modified. + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string>`_ + ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_ + get(c, key) iterator leaves[T](n: Node[T]): Node[T] = if n != nil: # XXX actually we could compute the necessary stack size in advance: # it's roughly log2(c.count). var stack = @[n] - while stack.len > 0: + while stack.len > 0: var it = stack.pop while not it.isLeaf: stack.add(it.child[1]) @@ -185,29 +336,61 @@ iterator leaves[T](n: Node[T]): Node[T] = yield it iterator keys*[T](c: CritBitTree[T]): string = - ## yields all keys in lexicographical order. + ## Yields all keys in lexicographical order. + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.keys) == @["key1", "key2"] + for x in leaves(c.root): yield x.key -iterator values*[T](c: CritBitTree[T]): T = - ## yields all values of `c` in the lexicographical order of the +iterator values*[T](c: CritBitTree[T]): lent T = + ## Yields all values of `c` in the lexicographical order of the ## corresponding keys. + ## + ## **See also:** + ## * `mvalues iterator <#mvalues.i,CritBitTree[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.values) == @[1, 2] + for x in leaves(c.root): yield x.val iterator mvalues*[T](c: var CritBitTree[T]): var T = - ## yields all values of `c` in the lexicographical order of the + ## Yields all values of `c` in the lexicographical order of the ## corresponding keys. The values can be modified. + ## + ## **See also:** + ## * `values iterator <#values.i,CritBitTree[T]>`_ for x in leaves(c.root): yield x.val iterator items*[T](c: CritBitTree[T]): string = - ## yields all keys in lexicographical order. + ## Alias for `keys <#keys.i,CritBitTree[T]>`_. for x in leaves(c.root): yield x.key iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] = - ## yields all (key, value)-pairs of `c`. + ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the + ## corresponding keys. + ## + ## **See also:** + ## * `mpairs iterator <#mpairs.i,CritBitTree[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.pairs) == @[(key: "key1", val: 1), (key: "key2", val: 2)] + for x in leaves(c.root): yield (x.key, x.val) - + iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] = - ## yields all (key, value)-pairs of `c`. The yielded values can be modified. + ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the + ## corresponding keys. The yielded values can be modified. + ## + ## **See also:** + ## * `pairs iterator <#pairs.i,CritBitTree[T]>`_ for x in leaves(c.root): yield (x.key, x.val) proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] = @@ -220,50 +403,83 @@ proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] = let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 p = p.child[dir] if q.byte < key.len: top = p - for i in 0 .. <key.len: - if p.key[i] != key[i]: return + for i in 0 ..< key.len: + if i >= p.key.len or p.key[i] != key[i]: return result = top -iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string = - ## yields all keys starting with `prefix`. - let top = allprefixedAux(c, prefix) - for x in leaves(top): yield x.key - iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string): string = - ## yields all keys starting with `prefix`. + ## Yields all keys starting with `prefix`. + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.keysWithPrefix("key")) == @["key1", "key2"] + let top = allprefixedAux(c, prefix) for x in leaves(top): yield x.key -iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): T = - ## yields all values of `c` starting with `prefix` of the +iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): lent T = + ## Yields all values of `c` starting with `prefix` of the ## corresponding keys. + ## + ## **See also:** + ## * `mvaluesWithPrefix iterator <#mvaluesWithPrefix.i,CritBitTree[T],string>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.valuesWithPrefix("key")) == @[42, 43] + let top = allprefixedAux(c, prefix) for x in leaves(top): yield x.val iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string): var T = - ## yields all values of `c` starting with `prefix` of the + ## Yields all values of `c` starting with `prefix` of the ## corresponding keys. The values can be modified. + ## + ## **See also:** + ## * `valuesWithPrefix iterator <#valuesWithPrefix.i,CritBitTree[T],string>`_ let top = allprefixedAux(c, prefix) for x in leaves(top): yield x.val +iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string = + ## Alias for `keysWithPrefix <#keysWithPrefix.i,CritBitTree[T],string>`_. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.key + iterator pairsWithPrefix*[T](c: CritBitTree[T], prefix: string): tuple[key: string, val: T] = - ## yields all (key, value)-pairs of `c` starting with `prefix`. + ## Yields all (key, value)-pairs of `c` starting with `prefix`. + ## + ## **See also:** + ## * `mpairsWithPrefix iterator <#mpairsWithPrefix.i,CritBitTree[T],string>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.pairsWithPrefix("key")) == @[(key: "key1", val: 42), (key: "key2", val: 43)] + let top = allprefixedAux(c, prefix) for x in leaves(top): yield (x.key, x.val) - + iterator mpairsWithPrefix*[T](c: var CritBitTree[T], prefix: string): tuple[key: string, val: var T] = - ## yields all (key, value)-pairs of `c` starting with `prefix`. + ## Yields all (key, value)-pairs of `c` starting with `prefix`. ## The yielded values can be modified. + ## + ## **See also:** + ## * `pairsWithPrefix iterator <#pairsWithPrefix.i,CritBitTree[T],string>`_ let top = allprefixedAux(c, prefix) for x in leaves(top): yield (x.key, x.val) -proc `$`*[T](c: CritBitTree[T]): string = - ## turns `c` into a string representation. Example outputs: - ## ``{keyA: value, keyB: value}``, ``{:}`` - ## If `T` is void the outputs look like: - ## ``{keyA, keyB}``, ``{}``. +func `$`*[T](c: CritBitTree[T]): string = + ## Turns `c` into a string representation. + runnableExamples: + doAssert $CritBitTree[int].default == "{:}" + doAssert $toCritBitTree({"key1": 1, "key2": 2}) == """{"key1": 1, "key2": 2}""" + doAssert $CritBitTree[void].default == "{}" + doAssert $toCritBitTree(["key1", "key2"]) == """{"key1", "key2"}""" + if c.len == 0: when T is void: result = "{}" @@ -277,27 +493,45 @@ proc `$`*[T](c: CritBitTree[T]): string = const avgItemLen = 16 result = newStringOfCap(c.count * avgItemLen) result.add("{") - for key, val in pairs(c): - if result.len > 1: result.add(", ") - result.add($key) - when T isnot void: + when T is void: + for key in keys(c): + if result.len > 1: result.add(", ") + result.addQuoted(key) + else: + for key, val in pairs(c): + if result.len > 1: result.add(", ") + result.addQuoted(key) result.add(": ") - result.add($val) + result.addQuoted(val) result.add("}") -when isMainModule: - var r: CritBitTree[void] - r.incl "abc" - r.incl "xyz" - r.incl "def" - r.incl "definition" - r.incl "prefix" - doAssert r.contains"def" - #r.del "def" - - for w in r.items: - echo w - - for w in r.itemsWithPrefix("de"): - echo w +func commonPrefixLen*[T](c: CritBitTree[T]): int {.inline, since((1, 3)).} = + ## Returns the length of the longest common prefix of all keys in `c`. + ## If `c` is empty, returns 0. + runnableExamples: + var c: CritBitTree[void] + doAssert c.commonPrefixLen == 0 + incl(c, "key1") + doAssert c.commonPrefixLen == 4 + incl(c, "key2") + doAssert c.commonPrefixLen == 3 + + if c.root != nil: + if c.root.isLeaf: len(c.root.key) + else: c.root.byte + else: 0 + +proc toCritBitTree*[T](pairs: sink openArray[(string, T)]): CritBitTree[T] {.since: (1, 3).} = + ## Creates a new `CritBitTree` that contains the given `pairs`. + runnableExamples: + doAssert {"a": "0", "b": "1", "c": "2"}.toCritBitTree is CritBitTree[string] + doAssert {"a": 0, "b": 1, "c": 2}.toCritBitTree is CritBitTree[int] + + for item in pairs: result.incl item[0], item[1] + +proc toCritBitTree*(items: sink openArray[string]): CritBitTree[void] {.since: (1, 3).} = + ## Creates a new `CritBitTree` that contains the given `items`. + runnableExamples: + doAssert ["a", "b", "c"].toCritBitTree is CritBitTree[void] + for item in items: result.incl item diff --git a/lib/pure/collections/deques.nim b/lib/pure/collections/deques.nim new file mode 100644 index 000000000..d2b0099f2 --- /dev/null +++ b/lib/pure/collections/deques.nim @@ -0,0 +1,480 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## An implementation of a `deque`:idx: (double-ended queue). +## The underlying implementation uses a `seq`. +## +## .. note:: None of the procs that get an individual value from the deque should be used +## on an empty deque. +## +## If compiled with the `boundChecks` option, those procs will raise an `IndexDefect` +## on such access. This should not be relied upon, as `-d:danger` or `--checks:off` will +## disable those checks and then the procs may return garbage or crash the program. +## +## As such, a check to see if the deque is empty is needed before any +## access, unless your program logic guarantees it indirectly. + +runnableExamples: + var a = [10, 20, 30, 40].toDeque + + doAssertRaises(IndexDefect, echo a[4]) + + a.addLast(50) + assert $a == "[10, 20, 30, 40, 50]" + + assert a.peekFirst == 10 + assert a.peekLast == 50 + assert len(a) == 5 + + assert a.popFirst == 10 + assert a.popLast == 50 + assert len(a) == 3 + + a.addFirst(11) + a.addFirst(22) + a.addFirst(33) + assert $a == "[33, 22, 11, 20, 30, 40]" + + a.shrink(fromFirst = 1, fromLast = 2) + assert $a == "[22, 11, 20]" + +## See also +## ======== +## * `lists module <lists.html>`_ for singly and doubly linked lists and rings + +import std/private/since + +import std/[assertions, hashes, math] + +type + Deque*[T] = object + ## A double-ended queue backed with a ringed `seq` buffer. + ## + ## To initialize an empty deque, + ## use the `initDeque proc <#initDeque,int>`_. + data: seq[T] + + # `head` and `tail` are masked only when accessing an element of `data` + # so that `tail - head == data.len` when the deque is full. + # They are uint so that incrementing/decrementing them doesn't cause + # over/underflow. You can get a number of items with `tail - head` + # even if `tail` or `head` is wraps around and `tail < head`, because + # `tail - head == (uint.high + 1 + tail) - head` when `tail < head`. + head, tail: uint + +const + defaultInitialSize* = 4 + +template initImpl(result: typed, initialSize: int) = + let correctSize = nextPowerOfTwo(initialSize) + newSeq(result.data, correctSize) + +template checkIfInitialized(deq: typed) = + if deq.data.len == 0: + initImpl(deq, defaultInitialSize) + +func mask[T](deq: Deque[T]): uint {.inline.} = + uint(deq.data.len) - 1 + +proc initDeque*[T](initialSize: int = defaultInitialSize): Deque[T] = + ## Creates a new empty deque. + ## + ## Optionally, the initial capacity can be reserved via `initialSize` + ## as a performance optimization + ## (default: `defaultInitialSize <#defaultInitialSize>`_). + ## The length of a newly created deque will still be 0. + ## + ## **See also:** + ## * `toDeque proc <#toDeque,openArray[T]>`_ + result.initImpl(initialSize) + +func len*[T](deq: Deque[T]): int {.inline.} = + ## Returns the number of elements of `deq`. + int(deq.tail - deq.head) + +template emptyCheck(deq) = + # Bounds check for the regular deque access. + when compileOption("boundChecks"): + if unlikely(deq.len < 1): + raise newException(IndexDefect, "Empty deque.") + +template xBoundsCheck(deq, i) = + # Bounds check for the array like accesses. + when compileOption("boundChecks"): # `-d:danger` or `--checks:off` should disable this. + if unlikely(i >= deq.len): # x < deq.low is taken care by the Natural parameter + raise newException(IndexDefect, + "Out of bounds: " & $i & " > " & $(deq.len - 1)) + if unlikely(i < 0): # when used with BackwardsIndex + raise newException(IndexDefect, + "Out of bounds: " & $i & " < 0") + +proc `[]`*[T](deq: Deque[T], i: Natural): lent T {.inline.} = + ## Accesses the `i`-th element of `deq`. + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert a[0] == 10 + assert a[3] == 40 + doAssertRaises(IndexDefect, echo a[8]) + + xBoundsCheck(deq, i) + return deq.data[(deq.head + i.uint) and deq.mask] + +proc `[]`*[T](deq: var Deque[T], i: Natural): var T {.inline.} = + ## Accesses the `i`-th element of `deq` and returns a mutable + ## reference to it. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + inc(a[0]) + assert a[0] == 11 + + xBoundsCheck(deq, i) + return deq.data[(deq.head + i.uint) and deq.mask] + +proc `[]=`*[T](deq: var Deque[T], i: Natural, val: sink T) {.inline.} = + ## Sets the `i`-th element of `deq` to `val`. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a[0] = 99 + a[3] = 66 + assert $a == "[99, 20, 30, 66, 50]" + + checkIfInitialized(deq) + xBoundsCheck(deq, i) + deq.data[(deq.head + i.uint) and deq.mask] = val + +proc `[]`*[T](deq: Deque[T], i: BackwardsIndex): lent T {.inline.} = + ## Accesses the backwards indexed `i`-th element. + ## + ## `deq[^1]` is the last element. + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert a[^1] == 50 + assert a[^4] == 20 + doAssertRaises(IndexDefect, echo a[^9]) + + xBoundsCheck(deq, deq.len - int(i)) + return deq[deq.len - int(i)] + +proc `[]`*[T](deq: var Deque[T], i: BackwardsIndex): var T {.inline.} = + ## Accesses the backwards indexed `i`-th element and returns a mutable + ## reference to it. + ## + ## `deq[^1]` is the last element. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + inc(a[^1]) + assert a[^1] == 51 + + xBoundsCheck(deq, deq.len - int(i)) + return deq[deq.len - int(i)] + +proc `[]=`*[T](deq: var Deque[T], i: BackwardsIndex, x: sink T) {.inline.} = + ## Sets the backwards indexed `i`-th element of `deq` to `x`. + ## + ## `deq[^1]` is the last element. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a[^1] = 99 + a[^3] = 77 + assert $a == "[10, 20, 77, 40, 99]" + + checkIfInitialized(deq) + xBoundsCheck(deq, deq.len - int(i)) + deq[deq.len - int(i)] = x + +iterator items*[T](deq: Deque[T]): lent T = + ## Yields every element of `deq`. + ## + ## **See also:** + ## * `mitems iterator <#mitems.i,Deque[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let a = [10, 20, 30, 40, 50].toDeque + assert toSeq(a.items) == @[10, 20, 30, 40, 50] + + for c in 0 ..< deq.len: + yield deq.data[(deq.head + c.uint) and deq.mask] + +iterator mitems*[T](deq: var Deque[T]): var T = + ## Yields every element of `deq`, which can be modified. + ## + ## **See also:** + ## * `items iterator <#items.i,Deque[T]>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + for x in mitems(a): + x = 5 * x - 1 + assert $a == "[49, 99, 149, 199, 249]" + + for c in 0 ..< deq.len: + yield deq.data[(deq.head + c.uint) and deq.mask] + +iterator pairs*[T](deq: Deque[T]): tuple[key: int, val: T] = + ## Yields every `(position, value)`-pair of `deq`. + runnableExamples: + from std/sequtils import toSeq + + let a = [10, 20, 30].toDeque + assert toSeq(a.pairs) == @[(0, 10), (1, 20), (2, 30)] + + for c in 0 ..< deq.len: + yield (c, deq.data[(deq.head + c.uint) and deq.mask]) + +proc contains*[T](deq: Deque[T], item: T): bool {.inline.} = + ## Returns true if `item` is in `deq` or false if not found. + ## + ## Usually used via the `in` operator. + ## It is the equivalent of `deq.find(item) >= 0`. + runnableExamples: + let q = [7, 9].toDeque + assert 7 in q + assert q.contains(7) + assert 8 notin q + + for e in deq: + if e == item: return true + return false + +proc expandIfNeeded[T](deq: var Deque[T]) = + checkIfInitialized(deq) + let cap = deq.data.len + assert deq.len <= cap + if unlikely(deq.len == cap): + var n = newSeq[T](cap * 2) + var i = 0 + for x in mitems(deq): + when nimvm: n[i] = x # workaround for VM bug + else: n[i] = move(x) + inc i + deq.data = move(n) + deq.tail = cap.uint + deq.head = 0 + +proc addFirst*[T](deq: var Deque[T], item: sink T) = + ## Adds an `item` to the beginning of `deq`. + ## + ## **See also:** + ## * `addLast proc <#addLast,Deque[T],sinkT>`_ + runnableExamples: + var a = initDeque[int]() + for i in 1 .. 5: + a.addFirst(10 * i) + assert $a == "[50, 40, 30, 20, 10]" + + expandIfNeeded(deq) + dec deq.head + deq.data[deq.head and deq.mask] = item + +proc addLast*[T](deq: var Deque[T], item: sink T) = + ## Adds an `item` to the end of `deq`. + ## + ## **See also:** + ## * `addFirst proc <#addFirst,Deque[T],sinkT>`_ + runnableExamples: + var a = initDeque[int]() + for i in 1 .. 5: + a.addLast(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + + expandIfNeeded(deq) + deq.data[deq.tail and deq.mask] = item + inc deq.tail + +proc toDeque*[T](x: openArray[T]): Deque[T] {.since: (1, 3).} = + ## Creates a new deque that contains the elements of `x` (in the same order). + ## + ## **See also:** + ## * `initDeque proc <#initDeque,int>`_ + runnableExamples: + let a = toDeque([7, 8, 9]) + assert len(a) == 3 + assert $a == "[7, 8, 9]" + + result.initImpl(x.len) + for item in items(x): + result.addLast(item) + +proc peekFirst*[T](deq: Deque[T]): lent T {.inline.} = + ## Returns the first element of `deq`, but does not remove it from the deque. + ## + ## **See also:** + ## * `peekFirst proc <#peekFirst,Deque[T]_2>`_ which returns a mutable reference + ## * `peekLast proc <#peekLast,Deque[T]>`_ + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.peekFirst == 10 + assert len(a) == 5 + + emptyCheck(deq) + result = deq.data[deq.head and deq.mask] + +proc peekLast*[T](deq: Deque[T]): lent T {.inline.} = + ## Returns the last element of `deq`, but does not remove it from the deque. + ## + ## **See also:** + ## * `peekLast proc <#peekLast,Deque[T]_2>`_ which returns a mutable reference + ## * `peekFirst proc <#peekFirst,Deque[T]>`_ + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.peekLast == 50 + assert len(a) == 5 + + emptyCheck(deq) + result = deq.data[(deq.tail - 1) and deq.mask] + +proc peekFirst*[T](deq: var Deque[T]): var T {.inline, since: (1, 3).} = + ## Returns a mutable reference to the first element of `deq`, + ## but does not remove it from the deque. + ## + ## **See also:** + ## * `peekFirst proc <#peekFirst,Deque[T]>`_ + ## * `peekLast proc <#peekLast,Deque[T]_2>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a.peekFirst() = 99 + assert $a == "[99, 20, 30, 40, 50]" + + emptyCheck(deq) + result = deq.data[deq.head and deq.mask] + +proc peekLast*[T](deq: var Deque[T]): var T {.inline, since: (1, 3).} = + ## Returns a mutable reference to the last element of `deq`, + ## but does not remove it from the deque. + ## + ## **See also:** + ## * `peekFirst proc <#peekFirst,Deque[T]_2>`_ + ## * `peekLast proc <#peekLast,Deque[T]>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a.peekLast() = 99 + assert $a == "[10, 20, 30, 40, 99]" + + emptyCheck(deq) + result = deq.data[(deq.tail - 1) and deq.mask] + +template destroy(x: untyped) = + reset(x) + +proc popFirst*[T](deq: var Deque[T]): T {.inline, discardable.} = + ## Removes and returns the first element of the `deq`. + ## + ## See also: + ## * `popLast proc <#popLast,Deque[T]>`_ + ## * `shrink proc <#shrink,Deque[T],int,int>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.popFirst == 10 + assert $a == "[20, 30, 40, 50]" + + emptyCheck(deq) + result = move deq.data[deq.head and deq.mask] + inc deq.head + +proc popLast*[T](deq: var Deque[T]): T {.inline, discardable.} = + ## Removes and returns the last element of the `deq`. + ## + ## **See also:** + ## * `popFirst proc <#popFirst,Deque[T]>`_ + ## * `shrink proc <#shrink,Deque[T],int,int>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.popLast == 50 + assert $a == "[10, 20, 30, 40]" + + emptyCheck(deq) + dec deq.tail + result = move deq.data[deq.tail and deq.mask] + +proc clear*[T](deq: var Deque[T]) {.inline.} = + ## Resets the deque so that it is empty. + ## + ## **See also:** + ## * `shrink proc <#shrink,Deque[T],int,int>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + clear(a) + assert len(a) == 0 + + for el in mitems(deq): destroy(el) + deq.tail = deq.head + +proc shrink*[T](deq: var Deque[T], fromFirst = 0, fromLast = 0) = + ## Removes `fromFirst` elements from the front of the deque and + ## `fromLast` elements from the back. + ## + ## If the supplied number of elements exceeds the total number of elements + ## in the deque, the deque will remain empty. + ## + ## **See also:** + ## * `clear proc <#clear,Deque[T]>`_ + ## * `popFirst proc <#popFirst,Deque[T]>`_ + ## * `popLast proc <#popLast,Deque[T]>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + a.shrink(fromFirst = 2, fromLast = 1) + assert $a == "[30, 40]" + + if fromFirst + fromLast > deq.len: + clear(deq) + return + + for i in 0 ..< fromFirst: + destroy(deq.data[deq.head and deq.mask]) + inc deq.head + + for i in 0 ..< fromLast: + destroy(deq.data[(deq.tail - 1) and deq.mask]) + dec deq.tail + +proc `$`*[T](deq: Deque[T]): string = + ## Turns a deque into its string representation. + runnableExamples: + let a = [10, 20, 30].toDeque + assert $a == "[10, 20, 30]" + + result = "[" + for x in deq: + if result.len > 1: result.add(", ") + result.addQuoted(x) + result.add("]") + +func `==`*[T](deq1, deq2: Deque[T]): bool = + ## The `==` operator for Deque. + ## Returns `true` if both deques contains the same values in the same order. + runnableExamples: + var a, b = initDeque[int]() + a.addFirst(2) + a.addFirst(1) + b.addLast(1) + b.addLast(2) + doAssert a == b + + if deq1.len != deq2.len: + return false + + for i in 0 ..< deq1.len: + if deq1.data[(deq1.head + i.uint) and deq1.mask] != deq2.data[(deq2.head + i.uint) and deq2.mask]: + return false + + true + +func hash*[T](deq: Deque[T]): Hash = + ## Hashing of Deque. + var h: Hash = 0 + for x in deq: + h = h !& hash(x) + !$h diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim new file mode 100644 index 000000000..17785c8c7 --- /dev/null +++ b/lib/pure/collections/hashcommon.nim @@ -0,0 +1,76 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2019 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# An `include` file which contains common code for +# hash sets and tables. + +when defined(nimPreviewSlimSystem): + import std/assertions + +import std / outparams + +const + growthFactor = 2 + +# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These +# two procs retain clarity of that encoding without the space cost of an enum. +proc isEmpty(hcode: Hash): bool {.inline.} = + result = hcode == 0 + +proc isFilled(hcode: Hash): bool {.inline.} = + result = hcode != 0 + +proc nextTry(h, maxHash: Hash): Hash {.inline.} = + result = (h + 1) and maxHash + +proc mustRehash[T](t: T): bool {.inline.} = + # If this is changed, make sure to synchronize it with `slotsNeeded` below + assert(t.dataLen > t.counter) + result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4) + +proc slotsNeeded(count: Natural): int {.inline.} = + # Make sure to synchronize with `mustRehash` above + result = nextPowerOfTwo(count * 3 div 2 + 4) + +template rawGetKnownHCImpl() {.dirty.} = + if t.dataLen == 0: + return -1 + var h: Hash = hc and maxHash(t) # start with real hash value + while isFilled(t.data[h].hcode): + # Compare hc THEN key with boolean short circuit. This makes the common case + # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. + # It does slow down succeeding lookups by one extra Hash cmp&and..usually + # just a few clock cycles, generally worth it for any non-integer-like A. + if t.data[h].hcode == hc and t.data[h].key == key: + return h + h = nextTry(h, maxHash(t)) + result = -1 - h # < 0 => MISSING; insert idx = -1 - result + +proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} = + rawGetKnownHCImpl() + +template genHashImpl(key, hc: typed) = + hc = hash(key) + if hc == 0: # This almost never taken branch should be very predictable. + when sizeof(int) < 4: + hc = 31415 # Value doesn't matter; Any non-zero favorite is fine <= 16-bit. + else: + hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. + +template genHash(key: typed): Hash = + var res: Hash + genHashImpl(key, res) + res + +template rawGetImpl() {.dirty.} = + genHashImpl(key, hc) + rawGetKnownHCImpl() + +proc rawGet[X, A](t: X, key: A, hc: var Hash): int {.inline, outParamsAt: [3].} = + rawGetImpl() diff --git a/lib/pure/collections/heapqueue.nim b/lib/pure/collections/heapqueue.nim new file mode 100644 index 000000000..96f9b4430 --- /dev/null +++ b/lib/pure/collections/heapqueue.nim @@ -0,0 +1,266 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Yuriy Glukhov +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. + + +## The `heapqueue` module implements a +## `binary heap data structure<https://en.wikipedia.org/wiki/Binary_heap>`_ +## that can be used as a `priority queue<https://en.wikipedia.org/wiki/Priority_queue>`_. +## They are represented as arrays for which `a[k] <= a[2*k+1]` and `a[k] <= a[2*k+2]` +## for all indices `k` (counting elements from 0). The interesting property of a heap is that +## `a[0]` is always its smallest element. +## +## Basic usage +## ----------- +## +runnableExamples: + var heap = [8, 2].toHeapQueue + heap.push(5) + # the first element is the lowest element + assert heap[0] == 2 + # remove and return the lowest element + assert heap.pop() == 2 + # the lowest element remaining is 5 + assert heap[0] == 5 + +## Usage with custom objects +## ------------------------- +## To use a `HeapQueue` with a custom object, the `<` operator must be +## implemented. + +runnableExamples: + type Job = object + priority: int + + proc `<`(a, b: Job): bool = a.priority < b.priority + + var jobs = initHeapQueue[Job]() + jobs.push(Job(priority: 1)) + jobs.push(Job(priority: 2)) + + assert jobs[0].priority == 1 + + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions + +type HeapQueue*[T] = object + ## A heap queue, commonly known as a priority queue. + data: seq[T] + +proc initHeapQueue*[T](): HeapQueue[T] = + ## Creates a new empty heap. + ## + ## Heaps are initialized by default, so it is not necessary to call + ## this function explicitly. + ## + ## **See also:** + ## * `toHeapQueue proc <#toHeapQueue,openArray[T]>`_ + result = default(HeapQueue[T]) + +proc len*[T](heap: HeapQueue[T]): int {.inline.} = + ## Returns the number of elements of `heap`. + runnableExamples: + let heap = [9, 5, 8].toHeapQueue + assert heap.len == 3 + + heap.data.len + +proc `[]`*[T](heap: HeapQueue[T], i: Natural): lent T {.inline.} = + ## Accesses the i-th element of `heap`. + heap.data[i] + +iterator items*[T](heap: HeapQueue[T]): lent T {.inline, since: (2, 1, 1).} = + ## Iterates over each item of `heap`. + let L = len(heap) + for i in 0 .. high(heap.data): + yield heap.data[i] + assert(len(heap) == L, "the length of the HeapQueue changed while iterating over it") + +proc heapCmp[T](x, y: T): bool {.inline.} = x < y + +proc siftup[T](heap: var HeapQueue[T], startpos, p: int) = + ## `heap` is a heap at all indices >= `startpos`, except possibly for `p`. `p` + ## is the index of a leaf with a possibly out-of-order value. Restores the + ## heap invariant. + var pos = p + let newitem = heap[pos] + # Follow the path to the root, moving parents down until finding a place + # newitem fits. + while pos > startpos: + let parentpos = (pos - 1) shr 1 + let parent = heap[parentpos] + if heapCmp(newitem, parent): + heap.data[pos] = parent + pos = parentpos + else: + break + heap.data[pos] = newitem + +proc siftdownToBottom[T](heap: var HeapQueue[T], p: int) = + # This is faster when the element should be close to the bottom. + let endpos = len(heap) + var pos = p + let startpos = pos + let newitem = heap[pos] + # Bubble up the smaller child until hitting a leaf. + var childpos = 2 * pos + 1 # leftmost child position + while childpos < endpos: + # Set childpos to index of smaller child. + let rightpos = childpos + 1 + if rightpos < endpos and not heapCmp(heap[childpos], heap[rightpos]): + childpos = rightpos + # Move the smaller child up. + heap.data[pos] = heap[childpos] + pos = childpos + childpos = 2 * pos + 1 + # The leaf at pos is empty now. Put newitem there, and bubble it up + # to its final resting place (by sifting its parents down). + heap.data[pos] = newitem + siftup(heap, startpos, pos) + +proc siftdown[T](heap: var HeapQueue[T], p: int) = + let endpos = len(heap) + var pos = p + let newitem = heap[pos] + var childpos = 2 * pos + 1 + while childpos < endpos: + let rightpos = childpos + 1 + if rightpos < endpos and not heapCmp(heap[childpos], heap[rightpos]): + childpos = rightpos + if not heapCmp(heap[childpos], newitem): + break + heap.data[pos] = heap[childpos] + pos = childpos + childpos = 2 * pos + 1 + heap.data[pos] = newitem + +proc push*[T](heap: var HeapQueue[T], item: sink T) = + ## Pushes `item` onto `heap`, maintaining the heap invariant. + heap.data.add(item) + siftup(heap, 0, len(heap) - 1) + +proc toHeapQueue*[T](x: openArray[T]): HeapQueue[T] {.since: (1, 3).} = + ## Creates a new HeapQueue that contains the elements of `x`. + ## + ## **See also:** + ## * `initHeapQueue proc <#initHeapQueue>`_ + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + assert heap.pop() == 5 + assert heap[0] == 8 + + # see https://en.wikipedia.org/wiki/Binary_heap#Building_a_heap + result.data = @x + for i in countdown(x.len div 2 - 1, 0): + siftdown(result, i) + +proc pop*[T](heap: var HeapQueue[T]): T = + ## Pops and returns the smallest item from `heap`, + ## maintaining the heap invariant. + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + assert heap.pop() == 5 + + let lastelt = heap.data.pop() + if heap.len > 0: + result = heap[0] + heap.data[0] = lastelt + siftdownToBottom(heap, 0) + else: + result = lastelt + +proc find*[T](heap: HeapQueue[T], x: T): int {.since: (1, 3).} = + ## Linear scan to find the index of the item `x` or -1 if not found. + runnableExamples: + let heap = [9, 5, 8].toHeapQueue + assert heap.find(5) == 0 + assert heap.find(9) == 1 + assert heap.find(777) == -1 + + result = -1 + for i in 0 ..< heap.len: + if heap[i] == x: return i + +proc contains*[T](heap: HeapQueue[T], x: T): bool {.since: (2, 1, 1).} = + ## Returns true if `x` is in `heap` or false if not found. This is a shortcut + ## for `find(heap, x) >= 0`. + result = find(heap, x) >= 0 + +proc del*[T](heap: var HeapQueue[T], index: Natural) = + ## Removes the element at `index` from `heap`, maintaining the heap invariant. + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + heap.del(1) + assert heap[0] == 5 + assert heap[1] == 8 + + swap(heap.data[^1], heap.data[index]) + let newLen = heap.len - 1 + heap.data.setLen(newLen) + if index < newLen: + siftdownToBottom(heap, index) + +proc replace*[T](heap: var HeapQueue[T], item: sink T): T = + ## Pops and returns the current smallest value, and add the new item. + ## This is more efficient than `pop()` followed by `push()`, and can be + ## more appropriate when using a fixed-size heap. Note that the value + ## returned may be larger than `item`! That constrains reasonable uses of + ## this routine unless written as part of a conditional replacement. + ## + ## **See also:** + ## * `pushpop proc <#pushpop,HeapQueue[T],sinkT>`_ + runnableExamples: + var heap = [5, 12].toHeapQueue + assert heap.replace(6) == 5 + assert heap.len == 2 + assert heap[0] == 6 + assert heap.replace(4) == 6 + + result = heap[0] + heap.data[0] = item + siftdown(heap, 0) + +proc pushpop*[T](heap: var HeapQueue[T], item: sink T): T = + ## Fast version of a `push()` followed by a `pop()`. + ## + ## **See also:** + ## * `replace proc <#replace,HeapQueue[T],sinkT>`_ + runnableExamples: + var heap = [5, 12].toHeapQueue + assert heap.pushpop(6) == 5 + assert heap.len == 2 + assert heap[0] == 6 + assert heap.pushpop(4) == 4 + + result = item + if heap.len > 0 and heapCmp(heap.data[0], result): + swap(result, heap.data[0]) + siftdown(heap, 0) + +proc clear*[T](heap: var HeapQueue[T]) = + ## Removes all elements from `heap`, making it empty. + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + heap.clear() + assert heap.len == 0 + + heap.data.setLen(0) + +proc `$`*[T](heap: HeapQueue[T]): string = + ## Turns a heap into its string representation. + runnableExamples: + let heap = [1, 2].toHeapQueue + assert $heap == "[1, 2]" + + result = "[" + for x in heap.data: + if result.len > 1: result.add(", ") + result.addQuoted(x) + result.add("]") diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim index 7520e6e46..765a23e97 100644 --- a/lib/pure/collections/intsets.nim +++ b/lib/pure/collections/intsets.nim @@ -7,205 +7,17 @@ # distribution, for details about the copyright. # -## The ``intsets`` module implements an efficient int set implemented as a -## `sparse bit set`:idx:. -## **Note**: Since Nim currently does not allow the assignment operator to -## be overloaded, ``=`` for int sets performs some rather meaningless shallow -## copy; use ``assign`` to get a deep copy. +## Specialization of the generic `packedsets module <packedsets.html>`_ +## (see its documentation for more examples) for ordinal sparse sets. -import - os, hashes, math +import std/private/since +import std/packedsets +export packedsets type - BitScalar = int + IntSet* = PackedSet[int] -const - InitIntSetSize = 8 # must be a power of two! - TrunkShift = 9 - BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and - # divisible by 64 - TrunkMask = BitsPerTrunk - 1 - IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8) - IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width - IntMask = 1 shl IntShift - 1 +proc toIntSet*(x: openArray[int]): IntSet {.since: (1, 3), inline.} = toPackedSet[int](x) -type - PTrunk = ref TTrunk - TTrunk {.final.} = object - next: PTrunk # all nodes are connected with this pointer - key: int # start address at bit 0 - bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector - - TTrunkSeq = seq[PTrunk] - IntSet* = object ## an efficient set of 'int' implemented as a sparse bit set - counter, max: int - head: PTrunk - data: TTrunkSeq - -{.deprecated: [TIntSet: IntSet].} - -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = ((5 * h) + 1) and maxHash - -proc intSetGet(t: IntSet, key: int): PTrunk = - var h = key and t.max - while t.data[h] != nil: - if t.data[h].key == key: - return t.data[h] - h = nextTry(h, t.max) - result = nil - -proc intSetRawInsert(t: IntSet, data: var TTrunkSeq, desc: PTrunk) = - var h = desc.key and t.max - while data[h] != nil: - assert(data[h] != desc) - h = nextTry(h, t.max) - assert(data[h] == nil) - data[h] = desc - -proc intSetEnlarge(t: var IntSet) = - var n: TTrunkSeq - var oldMax = t.max - t.max = ((t.max + 1) * 2) - 1 - newSeq(n, t.max + 1) - for i in countup(0, oldMax): - if t.data[i] != nil: intSetRawInsert(t, n, t.data[i]) - swap(t.data, n) - -proc intSetPut(t: var IntSet, key: int): PTrunk = - var h = key and t.max - while t.data[h] != nil: - if t.data[h].key == key: - return t.data[h] - h = nextTry(h, t.max) - if mustRehash(t.max + 1, t.counter): intSetEnlarge(t) - inc(t.counter) - h = key and t.max - while t.data[h] != nil: h = nextTry(h, t.max) - assert(t.data[h] == nil) - new(result) - result.next = t.head - result.key = key - t.head = result - t.data[h] = result - -proc contains*(s: IntSet, key: int): bool = - ## returns true iff `key` is in `s`. - var t = intSetGet(s, `shr`(key, TrunkShift)) - if t != nil: - var u = key and TrunkMask - result = (t.bits[`shr`(u, IntShift)] and `shl`(1, u and IntMask)) != 0 - else: - result = false - -proc incl*(s: var IntSet, key: int) = - ## includes an element `key` in `s`. - var t = intSetPut(s, `shr`(key, TrunkShift)) - var u = key and TrunkMask - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] or - `shl`(1, u and IntMask) - -proc excl*(s: var IntSet, key: int) = - ## excludes `key` from the set `s`. - var t = intSetGet(s, `shr`(key, TrunkShift)) - if t != nil: - var u = key and TrunkMask - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] and - not `shl`(1, u and IntMask) - -proc containsOrIncl*(s: var IntSet, key: int): bool = - ## returns true if `s` contains `key`, otherwise `key` is included in `s` - ## and false is returned. - var t = intSetGet(s, `shr`(key, TrunkShift)) - if t != nil: - var u = key and TrunkMask - result = (t.bits[`shr`(u, IntShift)] and `shl`(1, u and IntMask)) != 0 - if not result: - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] or - `shl`(1, u and IntMask) - else: - incl(s, key) - result = false - -proc initIntSet*: IntSet = - ## creates a new int set that is empty. - newSeq(result.data, InitIntSetSize) - result.max = InitIntSetSize-1 - result.counter = 0 - result.head = nil - -proc assign*(dest: var IntSet, src: IntSet) = - ## copies `src` to `dest`. `dest` does not need to be initialized by - ## `initIntSet`. - dest.counter = src.counter - dest.max = src.max - newSeq(dest.data, src.data.len) - - var it = src.head - while it != nil: - - var h = it.key and dest.max - while dest.data[h] != nil: h = nextTry(h, dest.max) - assert(dest.data[h] == nil) - - var n: PTrunk - new(n) - n.next = dest.head - n.key = it.key - n.bits = it.bits - dest.head = n - dest.data[h] = n - - it = it.next - -iterator items*(s: IntSet): int {.inline.} = - ## iterates over any included element of `s`. - var r = s.head - while r != nil: - var i = 0 - while i <= high(r.bits): - var w = r.bits[i] - # taking a copy of r.bits[i] here is correct, because - # modifying operations are not allowed during traversation - var j = 0 - while w != 0: # test all remaining bits for zero - if (w and 1) != 0: # the bit is set! - yield (r.key shl TrunkShift) or (i shl IntShift +% j) - inc(j) - w = w shr 1 - inc(i) - r = r.next - -template dollarImpl(): stmt = - result = "{" - for key in items(s): - if result.len > 1: result.add(", ") - result.add($key) - result.add("}") - -proc `$`*(s: IntSet): string = - ## The `$` operator for int sets. - dollarImpl() - -proc empty*(s: IntSet): bool {.inline, deprecated.} = - ## returns true if `s` is empty. This is safe to call even before - ## the set has been initialized with `initIntSet`. Note this never - ## worked reliably and so is deprecated. - result = s.counter == 0 - -when isMainModule: - var x = initIntSet() - x.incl(1) - x.incl(2) - x.incl(7) - x.incl(1056) - for e in items(x): echo e - - var y: TIntSet - assign(y, x) - for e in items(y): echo e +proc initIntSet*(): IntSet {.inline.} = initPackedSet[int]() diff --git a/lib/pure/collections/lists.nim b/lib/pure/collections/lists.nim index 535d5e21d..6b88747ef 100644 --- a/lib/pure/collections/lists.nim +++ b/lib/pure/collections/lists.nim @@ -7,246 +7,574 @@ # distribution, for details about the copyright. # -## Implementation of singly and doubly linked lists. Because it makes no sense -## to do so, the 'next' and 'prev' pointers are not hidden from you and can -## be manipulated directly for efficiency. - -when not defined(nimhygiene): - {.pragma: dirty.} +## Implementation of: +## * `singly linked lists <#SinglyLinkedList>`_ +## * `doubly linked lists <#DoublyLinkedList>`_ +## * `singly linked rings <#SinglyLinkedRing>`_ (circular lists) +## * `doubly linked rings <#DoublyLinkedRing>`_ (circular lists) +## +## # Basic Usage +## Because it makes no sense to do otherwise, the `next` and `prev` pointers +## are not hidden from you and can be manipulated directly for efficiency. +## +## ## Lists +runnableExamples: + var list = initDoublyLinkedList[int]() + let + a = newDoublyLinkedNode[int](3) + b = newDoublyLinkedNode[int](7) + c = newDoublyLinkedNode[int](9) + + list.add(a) + list.add(b) + list.prepend(c) + + assert a.next == b + assert a.prev == c + assert c.next == a + assert c.next.next == b + assert c.prev == nil + assert b.next == nil + +## ## Rings +runnableExamples: + var ring = initSinglyLinkedRing[int]() + let + a = newSinglyLinkedNode[int](3) + b = newSinglyLinkedNode[int](7) + c = newSinglyLinkedNode[int](9) + + ring.add(a) + ring.add(b) + ring.prepend(c) + + assert c.next == a + assert a.next == b + assert c.next.next == b + assert b.next == c + assert c.next.next.next == c + +## # See also +## * `deques module <deques.html>`_ for double-ended queues + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions type - DoublyLinkedNodeObj*[T] = object ## a node a doubly linked list consists of - next*, prev*: ref DoublyLinkedNodeObj[T] + DoublyLinkedNodeObj*[T] = object + ## A node of a doubly linked list. + ## + ## It consists of a `value` field, and pointers to `next` and `prev`. + next*: DoublyLinkedNode[T] + prev* {.cursor.}: DoublyLinkedNode[T] value*: T DoublyLinkedNode*[T] = ref DoublyLinkedNodeObj[T] - SinglyLinkedNodeObj*[T] = object ## a node a singly linked list consists of - next*: ref SinglyLinkedNodeObj[T] + SinglyLinkedNodeObj*[T] = object + ## A node of a singly linked list. + ## + ## It consists of a `value` field, and a pointer to `next`. + next*: SinglyLinkedNode[T] value*: T SinglyLinkedNode*[T] = ref SinglyLinkedNodeObj[T] - SinglyLinkedList*[T] = object ## a singly linked list - head*, tail*: SinglyLinkedNode[T] - - DoublyLinkedList*[T] = object ## a doubly linked list - head*, tail*: DoublyLinkedNode[T] + SinglyLinkedList*[T] = object + ## A singly linked list. + head*: SinglyLinkedNode[T] + tail* {.cursor.}: SinglyLinkedNode[T] + + DoublyLinkedList*[T] = object + ## A doubly linked list. + head*: DoublyLinkedNode[T] + tail* {.cursor.}: DoublyLinkedNode[T] + + SinglyLinkedRing*[T] = object + ## A singly linked ring. + head*: SinglyLinkedNode[T] + tail* {.cursor.}: SinglyLinkedNode[T] - SinglyLinkedRing*[T] = object ## a singly linked ring - head*, tail*: SinglyLinkedNode[T] - - DoublyLinkedRing*[T] = object ## a doubly linked ring + DoublyLinkedRing*[T] = object + ## A doubly linked ring. head*: DoublyLinkedNode[T] -{.deprecated: [TDoublyLinkedNode: DoublyLinkedNodeObj, - PDoublyLinkedNode: DoublyLinkedNode, - TSinglyLinkedNode: SinglyLinkedNodeObj, - PSinglyLinkedNode: SinglyLinkedNode, - TDoublyLinkedList: DoublyLinkedList, - TSinglyLinkedRing: SinglyLinkedRing, - TDoublyLinkedRing: DoublyLinkedRing, - TSinglyLinkedList: SinglyLinkedList].} + SomeLinkedList*[T] = SinglyLinkedList[T] | DoublyLinkedList[T] + + SomeLinkedRing*[T] = SinglyLinkedRing[T] | DoublyLinkedRing[T] + + SomeLinkedCollection*[T] = SomeLinkedList[T] | SomeLinkedRing[T] + + SomeLinkedNode*[T] = SinglyLinkedNode[T] | DoublyLinkedNode[T] proc initSinglyLinkedList*[T](): SinglyLinkedList[T] = - ## creates a new singly linked list that is empty. + ## Creates a new singly linked list that is empty. + ## + ## Singly linked lists are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initSinglyLinkedList[int]() + discard proc initDoublyLinkedList*[T](): DoublyLinkedList[T] = - ## creates a new doubly linked list that is empty. + ## Creates a new doubly linked list that is empty. + ## + ## Doubly linked lists are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initDoublyLinkedList[int]() + discard proc initSinglyLinkedRing*[T](): SinglyLinkedRing[T] = - ## creates a new singly linked ring that is empty. + ## Creates a new singly linked ring that is empty. + ## + ## Singly linked rings are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initSinglyLinkedRing[int]() + discard proc initDoublyLinkedRing*[T](): DoublyLinkedRing[T] = - ## creates a new doubly linked ring that is empty. + ## Creates a new doubly linked ring that is empty. + ## + ## Doubly linked rings are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initDoublyLinkedRing[int]() + discard proc newDoublyLinkedNode*[T](value: T): DoublyLinkedNode[T] = - ## creates a new doubly linked node with the given `value`. + ## Creates a new doubly linked node with the given `value`. + runnableExamples: + let n = newDoublyLinkedNode[int](5) + assert n.value == 5 + new(result) result.value = value proc newSinglyLinkedNode*[T](value: T): SinglyLinkedNode[T] = - ## creates a new singly linked node with the given `value`. + ## Creates a new singly linked node with the given `value`. + runnableExamples: + let n = newSinglyLinkedNode[int](5) + assert n.value == 5 + new(result) result.value = value template itemsListImpl() {.dirty.} = - var it = L.head + var it {.cursor.} = L.head while it != nil: yield it.value it = it.next template itemsRingImpl() {.dirty.} = - var it = L.head + var it {.cursor.} = L.head if it != nil: while true: yield it.value it = it.next if it == L.head: break -template nodesListImpl() {.dirty.} = - var it = L.head - while it != nil: - var nxt = it.next - yield it - it = nxt +iterator items*[T](L: SomeLinkedList[T]): T = + ## Yields every value of `L`. + ## + ## **See also:** + ## * `mitems iterator <#mitems.i,SomeLinkedList[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedList[T]>`_ + runnableExamples: + from std/sugar import collect + from std/sequtils import toSeq + let a = collect(initSinglyLinkedList): + for i in 1..3: 10 * i + assert toSeq(items(a)) == toSeq(a) + assert toSeq(a) == @[10, 20, 30] -template nodesRingImpl() {.dirty.} = - var it = L.head - if it != nil: - while true: - var nxt = it.next - yield it - it = nxt - if it == L.head: break - -template findImpl() {.dirty.} = - for x in nodes(L): - if x.value == value: return x - -iterator items*[T](L: DoublyLinkedList[T]): T = - ## yields every value of `L`. - itemsListImpl() - -iterator items*[T](L: SinglyLinkedList[T]): T = - ## yields every value of `L`. itemsListImpl() -iterator items*[T](L: SinglyLinkedRing[T]): T = - ## yields every value of `L`. - itemsRingImpl() +iterator items*[T](L: SomeLinkedRing[T]): T = + ## Yields every value of `L`. + ## + ## **See also:** + ## * `mitems iterator <#mitems.i,SomeLinkedRing[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedRing[T]>`_ + runnableExamples: + from std/sugar import collect + from std/sequtils import toSeq + let a = collect(initSinglyLinkedRing): + for i in 1..3: 10 * i + assert toSeq(items(a)) == toSeq(a) + assert toSeq(a) == @[10, 20, 30] -iterator items*[T](L: DoublyLinkedRing[T]): T = - ## yields every value of `L`. itemsRingImpl() -iterator mitems*[T](L: var DoublyLinkedList[T]): var T = - ## yields every value of `L` so that you can modify it. - itemsListImpl() +iterator mitems*[T](L: var SomeLinkedList[T]): var T = + ## Yields every value of `L` so that you can modify it. + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedList[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedList[T]>`_ + runnableExamples: + var a = initSinglyLinkedList[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in mitems(a): + x = 5 * x - 1 + assert $a == "[49, 99, 149, 199, 249]" -iterator mitems*[T](L: var SinglyLinkedList[T]): var T = - ## yields every value of `L` so that you can modify it. itemsListImpl() -iterator mitems*[T](L: var SinglyLinkedRing[T]): var T = - ## yields every value of `L` so that you can modify it. - itemsRingImpl() +iterator mitems*[T](L: var SomeLinkedRing[T]): var T = + ## Yields every value of `L` so that you can modify it. + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedRing[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedRing[T]>`_ + runnableExamples: + var a = initSinglyLinkedRing[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in mitems(a): + x = 5 * x - 1 + assert $a == "[49, 99, 149, 199, 249]" -iterator mitems*[T](L: var DoublyLinkedRing[T]): var T = - ## yields every value of `L` so that you can modify it. itemsRingImpl() -iterator nodes*[T](L: SinglyLinkedList[T]): SinglyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the - ## list during traversal is supported. - nodesListImpl() - -iterator nodes*[T](L: DoublyLinkedList[T]): DoublyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the +iterator nodes*[T](L: SomeLinkedList[T]): SomeLinkedNode[T] = + ## Iterates over every node of `x`. Removing the current node from the ## list during traversal is supported. - nodesListImpl() + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedList[T]>`_ + ## * `mitems iterator <#mitems.i,SomeLinkedList[T]>`_ + runnableExamples: + var a = initDoublyLinkedList[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in nodes(a): + if x.value == 30: + a.remove(x) + else: + x.value = 5 * x.value - 1 + assert $a == "[49, 99, 199, 249]" + + var it {.cursor.} = L.head + while it != nil: + let nxt = it.next + yield it + it = nxt -iterator nodes*[T](L: SinglyLinkedRing[T]): SinglyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the +iterator nodes*[T](L: SomeLinkedRing[T]): SomeLinkedNode[T] = + ## Iterates over every node of `x`. Removing the current node from the ## list during traversal is supported. - nodesRingImpl() + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedRing[T]>`_ + ## * `mitems iterator <#mitems.i,SomeLinkedRing[T]>`_ + runnableExamples: + var a = initDoublyLinkedRing[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in nodes(a): + if x.value == 30: + a.remove(x) + else: + x.value = 5 * x.value - 1 + assert $a == "[49, 99, 199, 249]" + + var it {.cursor.} = L.head + if it != nil: + while true: + let nxt = it.next + yield it + it = nxt + if it == L.head: break -iterator nodes*[T](L: DoublyLinkedRing[T]): DoublyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the - ## list during traversal is supported. - nodesRingImpl() +proc `$`*[T](L: SomeLinkedCollection[T]): string = + ## Turns a list into its string representation for logging and printing. + runnableExamples: + let a = [1, 2, 3, 4].toSinglyLinkedList + assert $a == "[1, 2, 3, 4]" -template dollarImpl() {.dirty.} = result = "[" for x in nodes(L): if result.len > 1: result.add(", ") - result.add($x.value) + result.addQuoted(x.value) result.add("]") -proc `$`*[T](L: SinglyLinkedList[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc `$`*[T](L: DoublyLinkedList[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc `$`*[T](L: SinglyLinkedRing[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc `$`*[T](L: DoublyLinkedRing[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc find*[T](L: SinglyLinkedList[T], value: T): SinglyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not - ## exist. - findImpl() - -proc find*[T](L: DoublyLinkedList[T], value: T): DoublyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not +proc find*[T](L: SomeLinkedCollection[T], value: T): SomeLinkedNode[T] = + ## Searches in the list for a value. Returns `nil` if the value does not ## exist. - findImpl() + ## + ## **See also:** + ## * `contains proc <#contains,SomeLinkedCollection[T],T>`_ + runnableExamples: + let a = [9, 8].toSinglyLinkedList + assert a.find(9).value == 9 + assert a.find(1) == nil -proc find*[T](L: SinglyLinkedRing[T], value: T): SinglyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not - ## exist. - findImpl() + for x in nodes(L): + if x.value == value: return x -proc find*[T](L: DoublyLinkedRing[T], value: T): DoublyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not - ## exist. - findImpl() +proc contains*[T](L: SomeLinkedCollection[T], value: T): bool {.inline.} = + ## Searches in the list for a value. Returns `false` if the value does not + ## exist, `true` otherwise. This allows the usage of the `in` and `notin` + ## operators. + ## + ## **See also:** + ## * `find proc <#find,SomeLinkedCollection[T],T>`_ + runnableExamples: + let a = [9, 8].toSinglyLinkedList + assert a.contains(9) + assert 8 in a + assert(not a.contains(1)) + assert 2 notin a -proc contains*[T](L: SinglyLinkedList[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. result = find(L, value) != nil -proc contains*[T](L: DoublyLinkedList[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. - result = find(L, value) != nil +proc prepend*[T: SomeLinkedList](a: var T, b: T) {.since: (1, 5, 1).} = + ## Prepends a shallow copy of `b` to the beginning of `a`. + ## + ## **See also:** + ## * `prependMoved proc <#prependMoved,T,T>`_ + ## for moving the second list instead of copying + runnableExamples: + from std/sequtils import toSeq + var a = [4, 5].toSinglyLinkedList + let b = [1, 2, 3].toSinglyLinkedList + a.prepend(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [1, 2, 3] + a.prepend(a) + assert a.toSeq == [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] + + var tmp = b.copy + tmp.addMoved(a) + a = tmp + +proc prependMoved*[T: SomeLinkedList](a, b: var T) {.since: (1, 5, 1).} = + ## Moves `b` before the head of `a`. Efficiency: O(1). + ## Note that `b` becomes empty after the operation unless it has the same address as `a`. + ## Self-prepending results in a cycle. + ## + ## **See also:** + ## * `prepend proc <#prepend,T,T>`_ + ## for prepending a copy of a list + runnableExamples: + import std/[sequtils, enumerate, sugar] + var + a = [4, 5].toSinglyLinkedList + b = [1, 2, 3].toSinglyLinkedList + c = [0, 1].toSinglyLinkedList + a.prependMoved(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [] + c.prependMoved(c) + let s = collect: + for i, ci in enumerate(c): + if i == 6: break + ci + assert s == [0, 1, 0, 1, 0, 1] + + b.addMoved(a) + swap a, b + +proc add*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) {.inline.} = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedList[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedList[int]() + let n = newSinglyLinkedNode[int](9) + a.add(n) + assert a.contains(9) -proc contains*[T](L: SinglyLinkedRing[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. - result = find(L, value) != nil + n.next = nil + if L.tail != nil: + assert(L.tail.next == nil) + L.tail.next = n + L.tail = n + if L.head == nil: L.head = n -proc contains*[T](L: DoublyLinkedRing[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. - result = find(L, value) != nil +proc add*[T](L: var SinglyLinkedList[T], value: T) {.inline.} = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedList[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedList[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newSinglyLinkedNode(value)) + +proc prepend*[T](L: var SinglyLinkedList[T], + n: SinglyLinkedNode[T]) {.inline.} = + ## Prepends (adds to the beginning) a node to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedList[int]() + let n = newSinglyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var SinglyLinkedList[T], - n: SinglyLinkedNode[T]) {.inline.} = - ## prepends a node to `L`. Efficiency: O(1). n.next = L.head L.head = n + if L.tail == nil: L.tail = n + +proc prepend*[T](L: var SinglyLinkedList[T], value: T) {.inline.} = + ## Prepends (adds to the beginning) a node to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + runnableExamples: + var a = initSinglyLinkedList[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) -proc prepend*[T](L: var SinglyLinkedList[T], value: T) {.inline.} = - ## prepends a node to `L`. Efficiency: O(1). prepend(L, newSinglyLinkedNode(value)) - -proc append*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = - ## appends a node `n` to `L`. Efficiency: O(1). + +func copy*[T](a: SinglyLinkedList[T]): SinglyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a shallow copy of `a`. + runnableExamples: + from std/sequtils import toSeq + type Foo = ref object + x: int + var + f = Foo(x: 1) + a = [f].toSinglyLinkedList + let b = a.copy + a.add([f].toSinglyLinkedList) + assert a.toSeq == [f, f] + assert b.toSeq == [f] # b isn't modified... + f.x = 42 + assert a.head.value.x == 42 + assert b.head.value.x == 42 # ... but the elements are not deep copied + + let c = [1, 2, 3].toSinglyLinkedList + assert $c == $c.copy + + result = initSinglyLinkedList[T]() + for x in a.items: + result.add(x) + +proc addMoved*[T](a, b: var SinglyLinkedList[T]) {.since: (1, 5, 1).} = + ## Moves `b` to the end of `a`. Efficiency: O(1). + ## Note that `b` becomes empty after the operation unless it has the same address as `a`. + ## Self-adding results in a cycle. + ## + ## **See also:** + ## * `add proc <#add,T,T>`_ for adding a copy of a list + runnableExamples: + import std/[sequtils, enumerate, sugar] + var + a = [1, 2, 3].toSinglyLinkedList + b = [4, 5].toSinglyLinkedList + c = [0, 1].toSinglyLinkedList + a.addMoved(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [] + c.addMoved(c) + let s = collect: + for i, ci in enumerate(c): + if i == 6: break + ci + assert s == [0, 1, 0, 1, 0, 1] + + if b.head != nil: + if a.head == nil: + a.head = b.head + else: + a.tail.next = b.head + a.tail = b.tail + if a.addr != b.addr: + b.head = nil + b.tail = nil + +proc add*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedList[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + let n = newDoublyLinkedNode[int](9) + a.add(n) + assert a.contains(9) + n.next = nil n.prev = L.tail - if L.tail != nil: + if L.tail != nil: assert(L.tail.next == nil) L.tail.next = n L.tail = n if L.head == nil: L.head = n -proc append*[T](L: var DoublyLinkedList[T], value: T) = - ## appends a value to `L`. Efficiency: O(1). - append(L, newDoublyLinkedNode(value)) +proc add*[T](L: var DoublyLinkedList[T], value: T) = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `prepend proc <#prepend,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedList[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newDoublyLinkedNode(value)) + +proc prepend*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = + ## Prepends (adds to the beginning) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedList[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + let n = newDoublyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = - ## prepends a node `n` to `L`. Efficiency: O(1). n.prev = nil n.next = L.head if L.head != nil: @@ -255,36 +583,225 @@ proc prepend*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = L.head = n if L.tail == nil: L.tail = n -proc prepend*[T](L: var DoublyLinkedList[T], value: T) = - ## prepends a value to `L`. Efficiency: O(1). +proc prepend*[T](L: var DoublyLinkedList[T], value: T) = + ## Prepends (adds to the beginning) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) + prepend(L, newDoublyLinkedNode(value)) - -proc remove*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = - ## removes `n` from `L`. Efficiency: O(1). + +func copy*[T](a: DoublyLinkedList[T]): DoublyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a shallow copy of `a`. + runnableExamples: + from std/sequtils import toSeq + type Foo = ref object + x: int + var + f = Foo(x: 1) + a = [f].toDoublyLinkedList + let b = a.copy + a.add([f].toDoublyLinkedList) + assert a.toSeq == [f, f] + assert b.toSeq == [f] # b isn't modified... + f.x = 42 + assert a.head.value.x == 42 + assert b.head.value.x == 42 # ... but the elements are not deep copied + + let c = [1, 2, 3].toDoublyLinkedList + assert $c == $c.copy + + result = initDoublyLinkedList[T]() + for x in a.items: + result.add(x) + +proc addMoved*[T](a, b: var DoublyLinkedList[T]) {.since: (1, 5, 1).} = + ## Moves `b` to the end of `a`. Efficiency: O(1). + ## Note that `b` becomes empty after the operation unless it has the same address as `a`. + ## Self-adding results in a cycle. + ## + ## **See also:** + ## * `add proc <#add,T,T>`_ + ## for adding a copy of a list + runnableExamples: + import std/[sequtils, enumerate, sugar] + var + a = [1, 2, 3].toDoublyLinkedList + b = [4, 5].toDoublyLinkedList + c = [0, 1].toDoublyLinkedList + a.addMoved(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [] + c.addMoved(c) + let s = collect: + for i, ci in enumerate(c): + if i == 6: break + ci + assert s == [0, 1, 0, 1, 0, 1] + + if b.head != nil: + if a.head == nil: + a.head = b.head + else: + b.head.prev = a.tail + a.tail.next = b.head + a.tail = b.tail + if a.addr != b.addr: + b.head = nil + b.tail = nil + +proc add*[T: SomeLinkedList](a: var T, b: T) {.since: (1, 5, 1).} = + ## Appends a shallow copy of `b` to the end of `a`. + ## + ## **See also:** + ## * `addMoved proc <#addMoved,SinglyLinkedList[T],SinglyLinkedList[T]>`_ + ## * `addMoved proc <#addMoved,DoublyLinkedList[T],DoublyLinkedList[T]>`_ + ## for moving the second list instead of copying + runnableExamples: + from std/sequtils import toSeq + var a = [1, 2, 3].toSinglyLinkedList + let b = [4, 5].toSinglyLinkedList + a.add(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [4, 5] + a.add(a) + assert a.toSeq == [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] + + var tmp = b.copy + a.addMoved(tmp) + +proc remove*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]): bool {.discardable.} = + ## Removes a node `n` from `L`. + ## Returns `true` if `n` was found in `L`. + ## Efficiency: O(n); the list is traversed until `n` is found. + ## Attempting to remove an element not contained in the list is a no-op. + ## When the list is cyclic, the cycle is preserved after removal. + runnableExamples: + import std/[sequtils, enumerate, sugar] + var a = [0, 1, 2].toSinglyLinkedList + let n = a.head.next + assert n.value == 1 + assert a.remove(n) == true + assert a.toSeq == [0, 2] + assert a.remove(n) == false + assert a.toSeq == [0, 2] + a.addMoved(a) # cycle: [0, 2, 0, 2, ...] + a.remove(a.head) + let s = collect: + for i, ai in enumerate(a): + if i == 4: break + ai + assert s == [2, 2, 2, 2] + + if n == L.head: + L.head = n.next + if L.tail.next == n: + L.tail.next = L.head # restore cycle + else: + var prev {.cursor.} = L.head + while prev.next != n and prev.next != nil: + prev = prev.next + if prev.next == nil: + return false + prev.next = n.next + if L.tail == n: + L.tail = prev # update tail if we removed the last node + true + +proc remove*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = + ## Removes a node `n` from `L`. Efficiency: O(1). + ## This function assumes, for the sake of efficiency, that `n` is contained in `L`, + ## otherwise the effects are undefined. + ## When the list is cyclic, the cycle is preserved after removal. + runnableExamples: + import std/[sequtils, enumerate, sugar] + var a = [0, 1, 2].toSinglyLinkedList + let n = a.head.next + assert n.value == 1 + a.remove(n) + assert a.toSeq == [0, 2] + a.remove(n) + assert a.toSeq == [0, 2] + a.addMoved(a) # cycle: [0, 2, 0, 2, ...] + a.remove(a.head) + let s = collect: + for i, ai in enumerate(a): + if i == 4: break + ai + assert s == [2, 2, 2, 2] + if n == L.tail: L.tail = n.prev if n == L.head: L.head = n.next if n.next != nil: n.next.prev = n.prev if n.prev != nil: n.prev.next = n.next -proc append*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = - ## appends a node `n` to `L`. Efficiency: O(1). + +proc add*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedRing[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedRing[int]() + let n = newSinglyLinkedNode[int](9) + a.add(n) + assert a.contains(9) + if L.head != nil: n.next = L.head assert(L.tail != nil) L.tail.next = n - L.tail = n else: n.next = n L.head = n - L.tail = n + L.tail = n -proc append*[T](L: var SinglyLinkedRing[T], value: T) = - ## appends a value to `L`. Efficiency: O(1). - append(L, newSinglyLinkedNode(value)) +proc add*[T](L: var SinglyLinkedRing[T], value: T) = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `prepend proc <#prepend,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedRing[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedRing[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newSinglyLinkedNode(value)) + +proc prepend*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = + ## Prepends (adds to the beginning) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedRing[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedRing[int]() + let n = newSinglyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = - ## prepends a node `n` to `L`. Efficiency: O(1). if L.head != nil: n.next = L.head assert(L.tail != nil) @@ -294,12 +811,41 @@ proc prepend*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = L.tail = n L.head = n -proc prepend*[T](L: var SinglyLinkedRing[T], value: T) = - ## prepends a value to `L`. Efficiency: O(1). +proc prepend*[T](L: var SinglyLinkedRing[T], value: T) = + ## Prepends (adds to the beginning) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + runnableExamples: + var a = initSinglyLinkedRing[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) + prepend(L, newSinglyLinkedNode(value)) -proc append*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = - ## appends a node `n` to `L`. Efficiency: O(1). + + +proc add*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedRing[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + let n = newDoublyLinkedNode[int](9) + a.add(n) + assert a.contains(9) + if L.head != nil: n.next = L.head n.prev = L.head.prev @@ -310,13 +856,42 @@ proc append*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = n.next = n L.head = n -proc append*[T](L: var DoublyLinkedRing[T], value: T) = - ## appends a value to `L`. Efficiency: O(1). - append(L, newDoublyLinkedNode(value)) +proc add*[T](L: var DoublyLinkedRing[T], value: T) = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `prepend proc <#prepend,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedRing[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newDoublyLinkedNode(value)) + +proc prepend*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = + ## Prepends (adds to the beginning) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedRing[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + let n = newDoublyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = - ## prepends a node `n` to `L`. Efficiency: O(1). - if L.head != nil: + if L.head != nil: n.next = L.head n.prev = L.head.prev L.head.prev.next = n @@ -326,18 +901,115 @@ proc prepend*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = n.next = n L.head = n -proc prepend*[T](L: var DoublyLinkedRing[T], value: T) = - ## prepends a value to `L`. Efficiency: O(1). +proc prepend*[T](L: var DoublyLinkedRing[T], value: T) = + ## Prepends (adds to the beginning) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) + prepend(L, newDoublyLinkedNode(value)) - -proc remove*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = - ## removes `n` from `L`. Efficiency: O(1). + +proc remove*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = + ## Removes `n` from `L`. Efficiency: O(1). + ## This function assumes, for the sake of efficiency, that `n` is contained in `L`, + ## otherwise the effects are undefined. + runnableExamples: + var a = initDoublyLinkedRing[int]() + let n = newDoublyLinkedNode[int](5) + a.add(n) + assert 5 in a + a.remove(n) + assert 5 notin a + n.next.prev = n.prev n.prev.next = n.next - if n == L.head: - var p = L.head.prev - if p == L.head: + if n == L.head: + let p = L.head.prev + if p == L.head: # only one element left: L.head = nil else: - L.head = L.head.prev + L.head = p + +proc append*[T](a: var (SinglyLinkedList[T] | SinglyLinkedRing[T]), + b: SinglyLinkedList[T] | SinglyLinkedNode[T] | T) = + ## Alias for `a.add(b)`. + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## * `add proc <#add,SinglyLinkedList[T],T>`_ + ## * `add proc <#add,T,T>`_ + a.add(b) + +proc append*[T](a: var (DoublyLinkedList[T] | DoublyLinkedRing[T]), + b: DoublyLinkedList[T] | DoublyLinkedNode[T] | T) = + ## Alias for `a.add(b)`. + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## * `add proc <#add,DoublyLinkedList[T],T>`_ + ## * `add proc <#add,T,T>`_ + a.add(b) + +proc appendMoved*[T: SomeLinkedList](a, b: var T) {.since: (1, 5, 1).} = + ## Alias for `a.addMoved(b)`. + ## + ## **See also:** + ## * `addMoved proc <#addMoved,SinglyLinkedList[T],SinglyLinkedList[T]>`_ + ## * `addMoved proc <#addMoved,DoublyLinkedList[T],DoublyLinkedList[T]>`_ + a.addMoved(b) + +func toSinglyLinkedList*[T](elems: openArray[T]): SinglyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a new `SinglyLinkedList` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toSinglyLinkedList + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initSinglyLinkedList[T]() + for elem in elems.items: + result.add(elem) + +func toSinglyLinkedRing*[T](elems: openArray[T]): SinglyLinkedRing[T] = + ## Creates a new `SinglyLinkedRing` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toSinglyLinkedRing + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initSinglyLinkedRing[T]() + for elem in elems.items: + result.add(elem) + +func toDoublyLinkedList*[T](elems: openArray[T]): DoublyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a new `DoublyLinkedList` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toDoublyLinkedList + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initDoublyLinkedList[T]() + for elem in elems.items: + result.add(elem) + +func toDoublyLinkedRing*[T](elems: openArray[T]): DoublyLinkedRing[T] = + ## Creates a new `DoublyLinkedRing` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toDoublyLinkedRing + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initDoublyLinkedRing[T]() + for elem in elems.items: + result.add(elem) diff --git a/lib/pure/collections/queues.nim b/lib/pure/collections/queues.nim deleted file mode 100644 index af5e7b6cd..000000000 --- a/lib/pure/collections/queues.nim +++ /dev/null @@ -1,102 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Implementation of a `queue`:idx:. The underlying implementation uses a ``seq``. -## Note: For inter thread communication use -## a `TChannel <channels.html>`_ instead. - -import math - -type - Queue*[T] = object ## a queue - data: seq[T] - rd, wr, count, mask: int - -{.deprecated: [TQueue: Queue].} - -proc initQueue*[T](initialSize=4): Queue[T] = - ## creates a new queue. `initialSize` needs to be a power of 2. - assert isPowerOfTwo(initialSize) - result.mask = initialSize-1 - newSeq(result.data, initialSize) - -proc len*[T](q: Queue[T]): int = - ## returns the number of elements of `q`. - result = q.count - -iterator items*[T](q: Queue[T]): T = - ## yields every element of `q`. - var i = q.rd - var c = q.count - while c > 0: - dec c - yield q.data[i] - i = (i + 1) and q.mask - -iterator mitems*[T](q: var Queue[T]): var T = - ## yields every element of `q`. - var i = q.rd - var c = q.count - while c > 0: - dec c - yield q.data[i] - i = (i + 1) and q.mask - -proc add*[T](q: var Queue[T], item: T) = - ## adds an `item` to the end of the queue `q`. - var cap = q.mask+1 - if q.count >= cap: - var n: seq[T] - newSeq(n, cap*2) - var i = 0 - for x in items(q): - shallowCopy(n[i], x) - inc i - shallowCopy(q.data, n) - q.mask = cap*2 - 1 - q.wr = q.count - q.rd = 0 - inc q.count - q.data[q.wr] = item - q.wr = (q.wr + 1) and q.mask - -proc enqueue*[T](q: var Queue[T], item: T) = - ## alias for the ``add`` operation. - add(q, item) - -proc dequeue*[T](q: var Queue[T]): T = - ## removes and returns the first element of the queue `q`. - assert q.count > 0 - dec q.count - result = q.data[q.rd] - q.rd = (q.rd + 1) and q.mask - -proc `$`*[T](q: Queue[T]): string = - ## turns a queue into its string representation. - result = "[" - for x in items(q): - if result.len > 1: result.add(", ") - result.add($x) - result.add("]") - -when isMainModule: - var q = initQueue[int]() - q.add(123) - q.add(9) - q.add(4) - var first = q.dequeue - q.add(56) - q.add(6) - var second = q.dequeue - q.add(789) - - assert first == 123 - assert second == 9 - assert($q == "[4, 56, 6, 789]") - diff --git a/lib/pure/collections/rtarrays.nim b/lib/pure/collections/rtarrays.nim index 9d8085643..3c3ffda7c 100644 --- a/lib/pure/collections/rtarrays.nim +++ b/lib/pure/collections/rtarrays.nim @@ -10,18 +10,19 @@ ## Module that implements a fixed length array whose size ## is determined at runtime. Note: This is not ready for other people to use! +## +## Unstable API. const ArrayPartSize = 10 type - RtArray*[T] = object ## + RtArray*[T] = object ## L: Natural spart: seq[T] - apart: array [ArrayPartSize, T] - UncheckedArray* {.unchecked.}[T] = array[0..100_000_000, T] + apart: array[ArrayPartSize, T] -template usesSeqPart(x): expr = x.L > ArrayPartSize +template usesSeqPart(x): untyped = x.L > ArrayPartSize proc initRtArray*[T](len: Natural): RtArray[T] = result.L = len diff --git a/lib/pure/collections/sequtils.nim b/lib/pure/collections/sequtils.nim index 3f37d1ef0..3c0d8dc0e 100644 --- a/lib/pure/collections/sequtils.nim +++ b/lib/pure/collections/sequtils.nim @@ -1,43 +1,137 @@ # # # Nim's Runtime Library -# (c) Copyright 2011 Alex Mitchell +# (c) Copyright 2011 Alexander Mitchell-Robinson # # See the file "copying.txt", included in this # distribution, for details about the copyright. # -## :Author: Alex Mitchell +## Although this module has `seq` in its name, it implements operations +## not only for the `seq`:idx: type, but for three built-in container types +## under the `openArray` umbrella: +## * sequences +## * strings +## * array ## -## This module implements operations for the built-in `seq`:idx: type which -## were inspired by functional programming languages. If you are looking for -## the typical `map` function which applies a function to every element in a -## sequence, it already exists in the `system <system.html>`_ module in both -## mutable and immutable styles. +## The `system` module defines several common functions, such as: +## * `newSeq[T]` for creating new sequences of type `T` +## * `@` for converting arrays and strings to sequences +## * `add` for adding new elements to strings and sequences +## * `&` for string and seq concatenation +## * `in` (alias for `contains`) and `notin` for checking if an item is +## in a container ## -## Also, for functional style programming you may want to pass `anonymous procs -## <manual.html#anonymous-procs>`_ to procs like ``filter`` to reduce typing. -## Anonymous procs can use `the special do notation <manual.html#do-notation>`_ -## which is more convenient in certain situations. +## This module builds upon that, providing additional functionality in form of +## procs, iterators and templates inspired by functional programming +## languages. ## -## **Note**: This interface will change as soon as the compiler supports -## closures and proper coroutines. +## For functional style programming you have different options at your disposal: +## * the `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ +## * pass an `anonymous proc<manual.html#procedures-anonymous-procs>`_ +## * import the `sugar module<sugar.html>`_ and use +## the `=> macro<sugar.html#%3D>.m,untyped,untyped>`_ +## * use `...It templates<#18>`_ +## (`mapIt<#mapIt.t,typed,untyped>`_, +## `filterIt<#filterIt.t,untyped,untyped>`_, etc.) +## +## Chaining of functions is possible thanks to the +## `method call syntax<manual.html#procedures-method-call-syntax>`_. + +runnableExamples: + import std/sugar + + # Creating a sequence from 1 to 10, multiplying each member by 2, + # keeping only the members which are not divisible by 6. + let + foo = toSeq(1..10).map(x => x * 2).filter(x => x mod 6 != 0) + bar = toSeq(1..10).mapIt(it * 2).filterIt(it mod 6 != 0) + baz = collect: + for i in 1..10: + let j = 2 * i + if j mod 6 != 0: + j + + doAssert foo == bar + doAssert foo == baz + doAssert foo == @[2, 4, 8, 10, 14, 16, 20] + + doAssert foo.any(x => x > 17) + doAssert not bar.allIt(it < 20) + doAssert foo.foldl(a + b) == 74 # sum of all members + + +runnableExamples: + from std/strutils import join + + let + vowels = @"aeiou" + foo = "sequtils is an awesome module" + + doAssert (vowels is seq[char]) and (vowels == @['a', 'e', 'i', 'o', 'u']) + doAssert foo.filterIt(it notin vowels).join == "sqtls s n wsm mdl" + +## See also +## ======== +## * `strutils module<strutils.html>`_ for common string functions +## * `sugar module<sugar.html>`_ for syntactic sugar macros +## * `algorithm module<algorithm.html>`_ for common generic algorithms +## * `json module<json.html>`_ for a structure which allows +## heterogeneous members + + +import std/private/since + +import std/macros +from std/typetraits import supportsCopyMem + +when defined(nimPreviewSlimSystem): + import std/assertions + + +when defined(nimHasEffectsOf): + {.experimental: "strictEffects".} +else: + {.pragma: effectsOf.} + +macro evalOnceAs(expAlias, exp: untyped, + letAssigneable: static[bool]): untyped = + ## Injects `expAlias` in caller scope, to avoid bugs involving multiple + ## substitution in macro arguments such as + ## https://github.com/nim-lang/Nim/issues/7187. + ## `evalOnceAs(myAlias, myExp)` will behave as `let myAlias = myExp` + ## except when `letAssigneable` is false (e.g. to handle openArray) where + ## it just forwards `exp` unchanged. + expectKind(expAlias, nnkIdent) + var val = exp -when not defined(nimhygiene): - {.pragma: dirty.} + result = newStmtList() + # If `exp` is not a symbol we evaluate it once here and then use the temporary + # symbol as alias + if exp.kind != nnkSym and letAssigneable: + val = genSym() + result.add(newLetStmt(val, exp)) -proc concat*[T](seqs: varargs[seq[T]]): seq[T] = + result.add( + newProc(name = genSym(nskTemplate, $expAlias), params = [getType(untyped)], + body = val, procType = nnkTemplateDef)) + +func concat*[T](seqs: varargs[seq[T]]): seq[T] = ## Takes several sequences' items and returns them inside a new sequence. + ## All sequences must be of the same type. ## - ## Example: + ## **See also:** + ## * `distribute func<#distribute,seq[T],Positive>`_ for a reverse + ## operation ## - ## .. code-block:: - ## let - ## s1 = @[1, 2, 3] - ## s2 = @[4, 5] - ## s3 = @[6, 7] - ## total = concat(s1, s2, s3) - ## assert total == @[1, 2, 3, 4, 5, 6, 7] + runnableExamples: + let + s1 = @[1, 2, 3] + s2 = @[4, 5] + s3 = @[6, 7] + total = concat(s1, s2, s3) + assert total == @[1, 2, 3, 4, 5, 6, 7] + var L = 0 for seqitm in items(seqs): inc(L, len(seqitm)) newSeq(result, L) @@ -47,96 +141,226 @@ proc concat*[T](seqs: varargs[seq[T]]): seq[T] = result[i] = itm inc(i) -proc repeat*[T](s: seq[T], n: Natural): seq[T] = - ## Returns a new sequence with the items of `s` repeated `n` times. - ## - ## Example: +func addUnique*[T](s: var seq[T], x: sink T) = + ## Adds `x` to the container `s` if it is not already present. + ## Uses `==` to check if the item is already present. + runnableExamples: + var a = @[1, 2, 3] + a.addUnique(4) + a.addUnique(4) + assert a == @[1, 2, 3, 4] + + for i in 0..high(s): + if s[i] == x: return + when declared(ensureMove): + s.add ensureMove(x) + else: + s.add x + +func count*[T](s: openArray[T], x: T): int = + ## Returns the number of occurrences of the item `x` in the container `s`. ## - ## .. code-block: + runnableExamples: + let + a = @[1, 2, 2, 3, 2, 4, 2] + b = "abracadabra" + assert count(a, 2) == 4 + assert count(a, 99) == 0 + assert count(b, 'r') == 2 + + for itm in items(s): + if itm == x: + inc result + +func cycle*[T](s: openArray[T], n: Natural): seq[T] = + ## Returns a new sequence with the items of the container `s` repeated + ## `n` times. + ## `n` must be a non-negative number (zero or more). ## - ## let - ## s = @[1, 2, 3] - ## total = s.repeat(3) - ## assert total == @[1, 2, 3, 1, 2, 3, 1, 2, 3] + runnableExamples: + let + s = @[1, 2, 3] + total = s.cycle(3) + assert total == @[1, 2, 3, 1, 2, 3, 1, 2, 3] + result = newSeq[T](n * s.len) var o = 0 - for x in 1..n: + for x in 0 ..< n: for e in s: result[o] = e inc o -proc deduplicate*[T](seq1: seq[T]): seq[T] = +proc repeat*[T](x: T, n: Natural): seq[T] = + ## Returns a new sequence with the item `x` repeated `n` times. + ## `n` must be a non-negative number (zero or more). + ## + runnableExamples: + let + total = repeat(5, 3) + assert total == @[5, 5, 5] + + result = newSeq[T](n) + for i in 0 ..< n: + result[i] = x + +func deduplicate*[T](s: openArray[T], isSorted: bool = false): seq[T] = ## Returns a new sequence without duplicates. ## - ## .. code-block:: - ## let - ## dup1 = @[1, 1, 3, 4, 2, 2, 8, 1, 4] - ## dup2 = @["a", "a", "c", "d", "d"] - ## unique1 = deduplicate(dup1) - ## unique2 = deduplicate(dup2) - ## assert unique1 == @[1, 3, 4, 2, 8] - ## assert unique2 == @["a", "c", "d"] + ## Setting the optional argument `isSorted` to true (default: false) + ## uses a faster algorithm for deduplication. + ## + runnableExamples: + let + dup1 = @[1, 1, 3, 4, 2, 2, 8, 1, 4] + dup2 = @["a", "a", "c", "d", "d"] + unique1 = deduplicate(dup1) + unique2 = deduplicate(dup2, isSorted = true) + assert unique1 == @[1, 3, 4, 2, 8] + assert unique2 == @["a", "c", "d"] + result = @[] - for itm in items(seq1): - if not result.contains(itm): result.add(itm) - -{.deprecated: [distnct: deduplicate].} - -proc zip*[S, T](seq1: seq[S], seq2: seq[T]): seq[tuple[a: S, b: T]] = - ## Returns a new sequence with a combination of the two input sequences. - ## - ## For convenience you can access the returned tuples through the named - ## fields `a` and `b`. If one sequence is shorter, the remaining items in the - ## longer sequence are discarded. Example: - ## - ## .. code-block:: - ## let - ## short = @[1, 2, 3] - ## long = @[6, 5, 4, 3, 2, 1] - ## words = @["one", "two", "three"] - ## zip1 = zip(short, long) - ## zip2 = zip(short, words) - ## assert zip1 == @[(1, 6), (2, 5), (3, 4)] - ## assert zip2 == @[(1, "one"), (2, "two"), (3, "three")] - ## assert zip1[2].b == 4 - ## assert zip2[2].b == "three" - var m = min(seq1.len, seq2.len) - newSeq(result, m) - for i in 0 .. m-1: result[i] = (seq1[i], seq2[i]) - -proc distribute*[T](s: seq[T], num: Positive, spread = true): seq[seq[T]] = - ## Splits and distributes a sequence `s` into `num` sub sequences. - ## - ## Returns a sequence of `num` sequences. For some input values this is the - ## inverse of the `concat <#concat>`_ proc. The proc will assert in debug - ## builds if `s` is nil or `num` is less than one, and will likely crash on - ## release builds. The input sequence `s` can be empty, which will produce + if s.len > 0: + if isSorted: + var prev = s[0] + result.add(prev) + for i in 1..s.high: + if s[i] != prev: + prev = s[i] + result.add(prev) + else: + for itm in items(s): + if not result.contains(itm): result.add(itm) + +func minIndex*[T](s: openArray[T]): int {.since: (1, 1).} = + ## Returns the index of the minimum value of `s`. + ## `T` needs to have a `<` operator. + runnableExamples: + let + a = @[1, 2, 3, 4] + b = @[6, 5, 4, 3] + c = [2, -7, 8, -5] + d = "ziggy" + assert minIndex(a) == 0 + assert minIndex(b) == 3 + assert minIndex(c) == 1 + assert minIndex(d) == 2 + + for i in 1..high(s): + if s[i] < s[result]: result = i + +func maxIndex*[T](s: openArray[T]): int {.since: (1, 1).} = + ## Returns the index of the maximum value of `s`. + ## `T` needs to have a `<` operator. + runnableExamples: + let + a = @[1, 2, 3, 4] + b = @[6, 5, 4, 3] + c = [2, -7, 8, -5] + d = "ziggy" + assert maxIndex(a) == 3 + assert maxIndex(b) == 0 + assert maxIndex(c) == 2 + assert maxIndex(d) == 0 + + for i in 1..high(s): + if s[i] > s[result]: result = i + +func minmax*[T](x: openArray[T]): (T, T) = + ## The minimum and maximum values of `x`. `T` needs to have a `<` operator. + var l = x[0] + var h = x[0] + for i in 1..high(x): + if x[i] < l: l = x[i] + if h < x[i]: h = x[i] + result = (l, h) + + +template zipImpl(s1, s2, retType: untyped): untyped = + proc zip*[S, T](s1: openArray[S], s2: openArray[T]): retType = + ## Returns a new sequence with a combination of the two input containers. + ## + ## The input containers can be of different types. + ## If one container is shorter, the remaining items in the longer container + ## are discarded. + ## + ## **Note**: For Nim 1.0.x and older version, `zip` returned a seq of + ## named tuples with fields `a` and `b`. For Nim versions 1.1.x and newer, + ## `zip` returns a seq of unnamed tuples. + runnableExamples: + let + short = @[1, 2, 3] + long = @[6, 5, 4, 3, 2, 1] + words = @["one", "two", "three"] + letters = "abcd" + zip1 = zip(short, long) + zip2 = zip(short, words) + assert zip1 == @[(1, 6), (2, 5), (3, 4)] + assert zip2 == @[(1, "one"), (2, "two"), (3, "three")] + assert zip1[2][0] == 3 + assert zip2[1][1] == "two" + when (NimMajor, NimMinor) <= (1, 0): + let + zip3 = zip(long, letters) + assert zip3 == @[(a: 6, b: 'a'), (5, 'b'), (4, 'c'), (3, 'd')] + assert zip3[0].b == 'a' + else: + let + zip3: seq[tuple[num: int, letter: char]] = zip(long, letters) + assert zip3 == @[(6, 'a'), (5, 'b'), (4, 'c'), (3, 'd')] + assert zip3[0].letter == 'a' + + var m = min(s1.len, s2.len) + newSeq(result, m) + for i in 0 ..< m: + result[i] = (s1[i], s2[i]) + +when (NimMajor, NimMinor) <= (1, 0): + zipImpl(s1, s2, seq[tuple[a: S, b: T]]) +else: + zipImpl(s1, s2, seq[(S, T)]) + +proc unzip*[S, T](s: openArray[(S, T)]): (seq[S], seq[T]) {.since: (1, 1).} = + ## Returns a tuple of two sequences split out from a sequence of 2-field tuples. + runnableExamples: + let + zipped = @[(1, 'a'), (2, 'b'), (3, 'c')] + unzipped1 = @[1, 2, 3] + unzipped2 = @['a', 'b', 'c'] + assert zipped.unzip() == (unzipped1, unzipped2) + assert zip(unzipped1, unzipped2).unzip() == (unzipped1, unzipped2) + result = (newSeq[S](s.len), newSeq[T](s.len)) + for i in 0..<s.len: + result[0][i] = s[i][0] + result[1][i] = s[i][1] + +func distribute*[T](s: seq[T], num: Positive, spread = true): seq[seq[T]] = + ## Splits and distributes a sequence `s` into `num` sub-sequences. + ## + ## Returns a sequence of `num` sequences. For *some* input values this is the + ## inverse of the `concat <#concat,varargs[seq[T]]>`_ func. + ## The input sequence `s` can be empty, which will produce ## `num` empty sequences. ## ## If `spread` is false and the length of `s` is not a multiple of `num`, the - ## proc will max out the first sub sequences with ``1 + len(s) div num`` + ## func will max out the first sub-sequence with `1 + len(s) div num` ## entries, leaving the remainder of elements to the last sequence. ## - ## On the other hand, if `spread` is true, the proc will distribute evenly + ## On the other hand, if `spread` is true, the func will distribute evenly ## the remainder of the division across all sequences, which makes the result ## more suited to multithreading where you are passing equal sized work units ## to a thread pool and want to maximize core usage. ## - ## Example: - ## - ## .. code-block:: - ## let numbers = @[1, 2, 3, 4, 5, 6, 7] - ## assert numbers.distribute(3) == @[@[1, 2, 3], @[4, 5], @[6, 7]] - ## assert numbers.distribute(3, false) == @[@[1, 2, 3], @[4, 5, 6], @[7]] - ## assert numbers.distribute(6)[0] == @[1, 2] - ## assert numbers.distribute(6)[5] == @[7] - assert(not s.isNil, "`s` can't be nil") + runnableExamples: + let numbers = @[1, 2, 3, 4, 5, 6, 7] + assert numbers.distribute(3) == @[@[1, 2, 3], @[4, 5], @[6, 7]] + assert numbers.distribute(3, false) == @[@[1, 2, 3], @[4, 5, 6], @[7]] + assert numbers.distribute(6)[0] == @[1, 2] + assert numbers.distribute(6)[1] == @[3] + if num < 2: result = @[s] return - let num = int(num) # XXX probably only needed because of .. bug - # Create the result and calculate the stride size and the remainder if any. result = newSeq[seq[T]](num) var @@ -148,117 +372,266 @@ proc distribute*[T](s: seq[T], num: Positive, spread = true): seq[seq[T]] = if extra == 0 or spread == false: # Use an algorithm which overcounts the stride and minimizes reading limits. if extra > 0: inc(stride) - - for i in 0 .. <num: + for i in 0 ..< num: result[i] = newSeq[T]() - for g in first .. <min(s.len, first + stride): + for g in first ..< min(s.len, first + stride): result[i].add(s[g]) first += stride - else: # Use an undercounting algorithm which *adds* the remainder each iteration. - for i in 0 .. <num: + for i in 0 ..< num: last = first + stride if extra > 0: extra -= 1 inc(last) - result[i] = newSeq[T]() - for g in first .. <last: + for g in first ..< last: result[i].add(s[g]) first = last +proc map*[T, S](s: openArray[T], op: proc (x: T): S {.closure.}): + seq[S] {.inline, effectsOf: op.} = + ## Returns a new sequence with the results of the `op` proc applied to every + ## item in the container `s`. + ## + ## Since the input is not modified, you can use it to + ## transform the type of the elements in the input container. + ## + ## Instead of using `map` and `filter`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `mapIt template<#mapIt.t,typed,untyped>`_ + ## * `apply proc<#apply,openArray[T],proc(T)_2>`_ for the in-place version + ## + runnableExamples: + let + a = @[1, 2, 3, 4] + b = map(a, proc(x: int): string = $x) + assert b == @["1", "2", "3", "4"] + newSeq(result, s.len) + for i in 0 ..< s.len: + result[i] = op(s[i]) -iterator filter*[T](seq1: seq[T], pred: proc(item: T): bool {.closure.}): T = - ## Iterates through a sequence and yields every item that fulfills the - ## predicate. +proc apply*[T](s: var openArray[T], op: proc (x: var T) {.closure.}) + {.inline, effectsOf: op.} = + ## Applies `op` to every item in `s`, modifying it directly. + ## + ## Note that the container `s` must be declared as a `var`, + ## since `s` is modified in-place. + ## The parameter function takes a `var T` type parameter. + ## + ## **See also:** + ## * `applyIt template<#applyIt.t,untyped,untyped>`_ + ## * `map proc<#map,openArray[T],proc(T)>`_ + ## + runnableExamples: + var a = @["1", "2", "3", "4"] + apply(a, proc(x: var string) = x &= "42") + assert a == @["142", "242", "342", "442"] + + for i in 0 ..< s.len: op(s[i]) + +proc apply*[T](s: var openArray[T], op: proc (x: T): T {.closure.}) + {.inline, effectsOf: op.} = + ## Applies `op` to every item in `s` modifying it directly. + ## + ## Note that the container `s` must be declared as a `var` + ## and it is required for your input and output types to + ## be the same, since `s` is modified in-place. + ## The parameter function takes and returns a `T` type variable. + ## + ## **See also:** + ## * `applyIt template<#applyIt.t,untyped,untyped>`_ + ## * `map proc<#map,openArray[T],proc(T)>`_ + ## + runnableExamples: + var a = @["1", "2", "3", "4"] + apply(a, proc(x: string): string = x & "42") + assert a == @["142", "242", "342", "442"] + + for i in 0 ..< s.len: s[i] = op(s[i]) + +proc apply*[T](s: openArray[T], op: proc (x: T) {.closure.}) {.inline, since: (1, 3), effectsOf: op.} = + ## Same as `apply` but for a proc that does not return anything + ## and does not mutate `s` directly. + runnableExamples: + var message: string + apply([0, 1, 2, 3, 4], proc(item: int) = message.addInt item) + assert message == "01234" + for i in 0 ..< s.len: op(s[i]) + +iterator filter*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): T {.effectsOf: pred.} = + ## Iterates through a container `s` and yields every item that fulfills the + ## predicate `pred` (a function that returns a `bool`). + ## + ## Instead of using `map` and `filter`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `filter proc<#filter,openArray[T],proc(T)>`_ + ## * `filterIt template<#filterIt.t,untyped,untyped>`_ + ## + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + var evens = newSeq[int]() + for n in filter(numbers, proc (x: int): bool = x mod 2 == 0): + evens.add(n) + assert evens == @[4, 8, 4] + + for i in 0 ..< s.len: + if pred(s[i]): + yield s[i] + +proc filter*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): seq[T] + {.inline, effectsOf: pred.} = + ## Returns a new sequence with all the items of `s` that fulfill the + ## predicate `pred` (a function that returns a `bool`). + ## + ## Instead of using `map` and `filter`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `filterIt template<#filterIt.t,untyped,untyped>`_ + ## * `filter iterator<#filter.i,openArray[T],proc(T)>`_ + ## * `keepIf proc<#keepIf,seq[T],proc(T)>`_ for the in-place version + ## + runnableExamples: + let + colors = @["red", "yellow", "black"] + f1 = filter(colors, proc(x: string): bool = x.len < 6) + f2 = filter(colors, proc(x: string): bool = x.contains('y')) + assert f1 == @["red", "black"] + assert f2 == @["yellow"] + + result = newSeq[T]() + for i in 0 ..< s.len: + if pred(s[i]): + result.add(s[i]) + +proc keepIf*[T](s: var seq[T], pred: proc(x: T): bool {.closure.}) + {.inline, effectsOf: pred.} = + ## Keeps the items in the passed sequence `s` if they fulfill the + ## predicate `pred` (a function that returns a `bool`). ## - ## Example: - ## - ## .. code-block:: - ## let numbers = @[1, 4, 5, 8, 9, 7, 4] - ## for n in filter(numbers, proc (x: int): bool = x mod 2 == 0): - ## echo($n) - ## # echoes 4, 8, 4 in separate lines - for i in countup(0, len(seq1)-1): - var item = seq1[i] - if pred(item): yield seq1[i] - -proc filter*[T](seq1: seq[T], pred: proc(item: T): bool {.closure.}): seq[T] = - ## Returns a new sequence with all the items that fulfilled the predicate. - ## - ## Example: - ## - ## .. code-block:: - ## let - ## colors = @["red", "yellow", "black"] - ## f1 = filter(colors, proc(x: string): bool = x.len < 6) - ## f2 = filter(colors) do (x: string) -> bool : x.len > 5 - ## assert f1 == @["red", "black"] - ## assert f2 == @["yellow"] - accumulateResult(filter(seq1, pred)) - -proc keepIf*[T](seq1: var seq[T], pred: proc(item: T): bool {.closure.}) = - ## Keeps the items in the passed sequence if they fulfilled the predicate. - ## Same as the ``filter`` proc, but modifies the sequence directly. - ## - ## Example: - ## - ## .. code-block:: - ## var floats = @[13.0, 12.5, 5.8, 2.0, 6.1, 9.9, 10.1] - ## keepIf(floats, proc(x: float): bool = x > 10) - ## assert floats == @[13.0, 12.5, 10.1] + ## Note that `s` must be declared as a `var`. + ## + ## Similar to the `filter proc<#filter,openArray[T],proc(T)>`_, + ## but modifies the sequence directly. + ## + ## **See also:** + ## * `keepItIf template<#keepItIf.t,seq,untyped>`_ + ## * `filter proc<#filter,openArray[T],proc(T)>`_ + ## + runnableExamples: + var floats = @[13.0, 12.5, 5.8, 2.0, 6.1, 9.9, 10.1] + keepIf(floats, proc(x: float): bool = x > 10) + assert floats == @[13.0, 12.5, 10.1] + var pos = 0 - for i in 0 .. <len(seq1): - if pred(seq1[i]): + for i in 0 ..< len(s): + if pred(s[i]): if pos != i: - seq1[pos] = seq1[i] + when defined(gcDestructors): + s[pos] = move(s[i]) + else: + shallowCopy(s[pos], s[i]) inc(pos) - setLen(seq1, pos) + setLen(s, pos) -proc delete*[T](s: var seq[T], first=0, last=0) = - ## Deletes in `s` the items at position `first` .. `last`. This modifies - ## `s` itself, it does not return a copy. +func delete*[T](s: var seq[T]; slice: Slice[int]) = + ## Deletes the items `s[slice]`, raising `IndexDefect` if the slice contains + ## elements out of range. ## - ## Example: - ## - ##.. code-block:: - ## let outcome = @[1,1,1,1,1,1,1,1] - ## var dest = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - ## dest.delete(3, 8) - ## assert outcome == dest + ## This operation moves all elements after `s[slice]` in linear time. + runnableExamples: + var a = @[10, 11, 12, 13, 14] + doAssertRaises(IndexDefect): a.delete(4..5) + assert a == @[10, 11, 12, 13, 14] + a.delete(4..4) + assert a == @[10, 11, 12, 13] + a.delete(1..2) + assert a == @[10, 13] + a.delete(1..<1) # empty slice + assert a == @[10, 13] + when compileOption("boundChecks"): + if not (slice.a < s.len and slice.a >= 0 and slice.b < s.len): + raise newException(IndexDefect, $(slice: slice, len: s.len)) + if slice.b >= slice.a: + template defaultImpl = + var i = slice.a + var j = slice.b + 1 + var newLen = s.len - j + i + while i < newLen: + when defined(gcDestructors): + s[i] = move(s[j]) + else: + s[i].shallowCopy(s[j]) + inc(i) + inc(j) + setLen(s, newLen) + when nimvm: defaultImpl() + else: + when defined(js): + let n = slice.b - slice.a + 1 + let first = slice.a + {.emit: "`s`.splice(`first`, `n`);".} + else: + defaultImpl() +func delete*[T](s: var seq[T]; first, last: Natural) {.deprecated: "use `delete(s, first..last)`".} = + ## Deletes the items of a sequence `s` at positions `first..last` + ## (including both ends of the range). + ## This modifies `s` itself, it does not return a copy. + runnableExamples("--warning:deprecated:off"): + let outcome = @[1, 1, 1, 1, 1, 1, 1, 1] + var dest = @[1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1] + dest.delete(3, 8) + assert outcome == dest + doAssert first <= last + if first >= s.len: + return var i = first - var j = last+1 - var newLen = len(s)-j+i + var j = min(len(s), last + 1) + var newLen = len(s) - j + i while i < newLen: - s[i].shallowCopy(s[j]) + when defined(gcDestructors): + s[i] = move(s[j]) + else: + s[i].shallowCopy(s[j]) inc(i) inc(j) setLen(s, newLen) -proc insert*[T](dest: var seq[T], src: openArray[T], pos=0) = +func insert*[T](dest: var seq[T], src: openArray[T], pos = 0) = ## Inserts items from `src` into `dest` at position `pos`. This modifies ## `dest` itself, it does not return a copy. ## - ## Example: + ## Note that the elements of `src` and `dest` must be of the same type. ## - ##.. code-block:: - ## var dest = @[1,1,1,1,1,1,1,1] - ## let - ## src = @[2,2,2,2,2,2] - ## outcome = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - ## dest.insert(src, 3) - ## assert dest == outcome + runnableExamples: + var dest = @[1, 1, 1, 1, 1, 1, 1, 1] + let + src = @[2, 2, 2, 2, 2, 2] + outcome = @[1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1] + dest.insert(src, 3) + assert dest == outcome var j = len(dest) - 1 - var i = len(dest) + len(src) - 1 + var i = j + len(src) + if i == j: return dest.setLen(i + 1) # Move items after `pos` to the end of the sequence. while j >= pos: - dest[i].shallowCopy(dest[j]) + when defined(gcDestructors): + dest[i] = move(dest[j]) + else: + dest[i].shallowCopy(dest[j]) dec(i) dec(j) # Insert items from `dest` into `dest` at `pos` @@ -268,261 +641,266 @@ proc insert*[T](dest: var seq[T], src: openArray[T], pos=0) = inc(j) -template filterIt*(seq1, pred: expr): expr {.immediate.} = - ## Returns a new sequence with all the items that fulfilled the predicate. +template filterIt*(s, pred: untyped): untyped = + ## Returns a new sequence with all the items of `s` that fulfill the + ## predicate `pred`. + ## + ## Unlike the `filter proc<#filter,openArray[T],proc(T)>`_ and + ## `filter iterator<#filter.i,openArray[T],proc(T)>`_, + ## the predicate needs to be an expression using the `it` variable + ## for testing, like: `filterIt("abcxyz", it == 'x')`. ## - ## Unlike the `proc` version, the predicate needs to be an expression using - ## the ``it`` variable for testing, like: ``filterIt("abcxyz", it == 'x')``. - ## Example: + ## Instead of using `mapIt` and `filterIt`, consider using the `collect` macro + ## from the `sugar` module. ## - ## .. code-block:: - ## let - ## temperatures = @[-272.15, -2.0, 24.5, 44.31, 99.9, -113.44] - ## acceptable = filterIt(temperatures, it < 50 and it > -10) - ## notAcceptable = filterIt(temperatures, it > 50 or it < -10) - ## assert acceptable == @[-2.0, 24.5, 44.31] - ## assert notAcceptable == @[-272.15, 99.9, -113.44] - var result {.gensym.}: type(seq1) = @[] - for it {.inject.} in items(seq1): + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `filter proc<#filter,openArray[T],proc(T)>`_ + ## * `filter iterator<#filter.i,openArray[T],proc(T)>`_ + ## + runnableExamples: + let + temperatures = @[-272.15, -2.0, 24.5, 44.31, 99.9, -113.44] + acceptable = temperatures.filterIt(it < 50 and it > -10) + notAcceptable = temperatures.filterIt(it > 50 or it < -10) + assert acceptable == @[-2.0, 24.5, 44.31] + assert notAcceptable == @[-272.15, 99.9, -113.44] + + var result = newSeq[typeof(s[0])]() + for it {.inject.} in items(s): if pred: result.add(it) result -template keepItIf*(varSeq, pred: expr) = - ## Convenience template around the ``keepIf`` proc to reduce typing. +template keepItIf*(varSeq: seq, pred: untyped) = + ## Keeps the items in the passed sequence (must be declared as a `var`) + ## if they fulfill the predicate. + ## + ## Unlike the `keepIf proc<#keepIf,seq[T],proc(T)>`_, + ## the predicate needs to be an expression using + ## the `it` variable for testing, like: `keepItIf("abcxyz", it == 'x')`. ## - ## Unlike the `proc` version, the predicate needs to be an expression using - ## the ``it`` variable for testing, like: ``keepItIf("abcxyz", it == 'x')``. - ## Example: + ## **See also:** + ## * `keepIf proc<#keepIf,seq[T],proc(T)>`_ + ## * `filterIt template<#filterIt.t,untyped,untyped>`_ ## - ## .. code-block:: - ## var candidates = @["foo", "bar", "baz", "foobar"] - ## keepItIf(candidates, it.len == 3 and it[0] == 'b') - ## assert candidates == @["bar", "baz"] + runnableExamples: + var candidates = @["foo", "bar", "baz", "foobar"] + candidates.keepItIf(it.len == 3 and it[0] == 'b') + assert candidates == @["bar", "baz"] + var pos = 0 - for i in 0 .. <len(varSeq): + for i in 0 ..< len(varSeq): let it {.inject.} = varSeq[i] if pred: if pos != i: - varSeq[pos] = varSeq[i] + when defined(gcDestructors): + varSeq[pos] = move(varSeq[i]) + else: + shallowCopy(varSeq[pos], varSeq[i]) inc(pos) setLen(varSeq, pos) +since (1, 1): + template countIt*(s, pred: untyped): int = + ## Returns a count of all the items that fulfill the predicate. + ## + ## The predicate needs to be an expression using + ## the `it` variable for testing, like: `countIt(@[1, 2, 3], it > 2)`. + ## + runnableExamples: + let numbers = @[-3, -2, -1, 0, 1, 2, 3, 4, 5, 6] + iterator iota(n: int): int = + for i in 0..<n: yield i + assert numbers.countIt(it < 0) == 3 + assert countIt(iota(10), it < 2) == 2 -template toSeq*(iter: expr): expr {.immediate.} = - ## Transforms any iterator into a sequence. - ## - ## Example: - ## - ## .. code-block:: - ## let - ## numeric = @[1, 2, 3, 4, 5, 6, 7, 8, 9] - ## odd_numbers = toSeq(filter(numeric) do (x: int) -> bool: - ## if x mod 2 == 1: - ## result = true) - ## assert odd_numbers == @[1, 3, 5, 7, 9] - ## - ## **Note**: Since this is an immediate macro, you cannot always invoke this - ## as ``x.toSeq``, depending on the ``x``. - ## See `this <manual.html#limitations-of-the-method-call-syntax>`_ - ## for an explanation. - var result {.gensym.}: seq[type(iter)] = @[] - for x in iter: add(result, x) - result + var result = 0 + for it {.inject.} in s: + if pred: result += 1 + result -template foldl*(sequence, operation: expr): expr = - ## Template to fold a sequence from left to right, returning the accumulation. +proc all*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): bool {.effectsOf: pred.} = + ## Iterates through a container and checks if every item fulfills the + ## predicate. ## - ## The sequence is required to have at least a single element. Debug versions - ## of your program will assert in this situation but release versions will - ## happily go ahead. If the sequence has a single element it will be returned - ## without applying ``operation``. + ## **See also:** + ## * `allIt template<#allIt.t,untyped,untyped>`_ + ## * `any proc<#any,openArray[T],proc(T)>`_ ## - ## The ``operation`` parameter should be an expression which uses the - ## variables ``a`` and ``b`` for each step of the fold. Since this is a left - ## fold, for non associative binary operations like subtraction think that - ## the sequence of numbers 1, 2 and 3 will be parenthesized as (((1) - 2) - - ## 3). Example: - ## - ## .. code-block:: - ## let - ## numbers = @[5, 9, 11] - ## addition = foldl(numbers, a + b) - ## subtraction = foldl(numbers, a - b) - ## multiplication = foldl(numbers, a * b) - ## words = @["nim", "is", "cool"] - ## concatenation = foldl(words, a & b) - ## assert addition == 25, "Addition is (((5)+9)+11)" - ## assert subtraction == -15, "Subtraction is (((5)-9)-11)" - ## assert multiplication == 495, "Multiplication is (((5)*9)*11)" - ## assert concatenation == "nimiscool" - assert sequence.len > 0, "Can't fold empty sequences" - var result {.gensym.}: type(sequence[0]) - result = sequence[0] - for i in countup(1, sequence.len - 1): - let - a {.inject.} = result - b {.inject.} = sequence[i] - result = operation - result + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert all(numbers, proc (x: int): bool = x < 10) == true + assert all(numbers, proc (x: int): bool = x < 9) == false -template foldr*(sequence, operation: expr): expr = - ## Template to fold a sequence from right to left, returning the accumulation. + for i in s: + if not pred(i): + return false + true + +template allIt*(s, pred: untyped): bool = + ## Iterates through a container and checks if every item fulfills the + ## predicate. ## - ## The sequence is required to have at least a single element. Debug versions - ## of your program will assert in this situation but release versions will - ## happily go ahead. If the sequence has a single element it will be returned - ## without applying ``operation``. + ## Unlike the `all proc<#all,openArray[T],proc(T)>`_, + ## the predicate needs to be an expression using + ## the `it` variable for testing, like: `allIt("abba", it == 'a')`. ## - ## The ``operation`` parameter should be an expression which uses the - ## variables ``a`` and ``b`` for each step of the fold. Since this is a right - ## fold, for non associative binary operations like subtraction think that - ## the sequence of numbers 1, 2 and 3 will be parenthesized as (1 - (2 - - ## (3))). Example: - ## - ## .. code-block:: - ## let - ## numbers = @[5, 9, 11] - ## addition = foldr(numbers, a + b) - ## subtraction = foldr(numbers, a - b) - ## multiplication = foldr(numbers, a * b) - ## words = @["nim", "is", "cool"] - ## concatenation = foldr(words, a & b) - ## assert addition == 25, "Addition is (5+(9+(11)))" - ## assert subtraction == 7, "Subtraction is (5-(9-(11)))" - ## assert multiplication == 495, "Multiplication is (5*(9*(11)))" - ## assert concatenation == "nimiscool" - assert sequence.len > 0, "Can't fold empty sequences" - var result {.gensym.}: type(sequence[0]) - result = sequence[sequence.len - 1] - for i in countdown(sequence.len - 2, 0): - let - a {.inject.} = sequence[i] - b {.inject.} = result - result = operation - result + ## **See also:** + ## * `all proc<#all,openArray[T],proc(T)>`_ + ## * `anyIt template<#anyIt.t,untyped,untyped>`_ + ## + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert numbers.allIt(it < 10) == true + assert numbers.allIt(it < 9) == false -template mapIt*(seq1, typ, op: expr): expr = - ## Convenience template around the ``map`` proc to reduce typing. - ## - ## The template injects the ``it`` variable which you can use directly in an - ## expression. You also need to pass as `typ` the type of the expression, - ## since the new returned sequence can have a different type than the - ## original. Example: - ## - ## .. code-block:: - ## let - ## nums = @[1, 2, 3, 4] - ## strings = nums.mapIt(string, $(4 * it)) - ## assert strings == @["4", "8", "12", "16"] - var result {.gensym.}: seq[typ] = @[] - for it {.inject.} in items(seq1): - result.add(op) + var result = true + for it {.inject.} in items(s): + if not pred: + result = false + break result -template mapIt*(varSeq, op: expr) = - ## Convenience template around the mutable ``map`` proc to reduce typing. +proc any*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): bool {.effectsOf: pred.} = + ## Iterates through a container and checks if at least one item + ## fulfills the predicate. ## - ## The template injects the ``it`` variable which you can use directly in an - ## expression. The expression has to return the same type as the sequence you - ## are mutating. Example: + ## **See also:** + ## * `anyIt template<#anyIt.t,untyped,untyped>`_ + ## * `all proc<#all,openArray[T],proc(T)>`_ ## - ## .. code-block:: - ## var nums = @[1, 2, 3, 4] - ## nums.mapIt(it * 3) - ## assert nums[0] + nums[3] == 15 - for i in 0 .. <len(varSeq): - let it {.inject.} = varSeq[i] - varSeq[i] = op + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert any(numbers, proc (x: int): bool = x > 8) == true + assert any(numbers, proc (x: int): bool = x > 9) == false -template newSeqWith*(len: int, init: expr): expr = - ## creates a new sequence, calling `init` to initialize each value. Example: + for i in s: + if pred(i): + return true + false + +template anyIt*(s, pred: untyped): bool = + ## Iterates through a container and checks if at least one item + ## fulfills the predicate. ## - ## .. code-block:: - ## var seq2D = newSeqWith(20, newSeq[bool](10)) - ## seq2D[0][0] = true - ## seq2D[1][0] = true - ## seq2D[0][1] = true + ## Unlike the `any proc<#any,openArray[T],proc(T)>`_, + ## the predicate needs to be an expression using + ## the `it` variable for testing, like: `anyIt("abba", it == 'a')`. ## - ## import math - ## var seqRand = newSeqWith(20, random(10)) - ## echo seqRand - var result {.gensym.} = newSeq[type(init)](len) - for i in 0 .. <len: - result[i] = init + ## **See also:** + ## * `any proc<#any,openArray[T],proc(T)>`_ + ## * `allIt template<#allIt.t,untyped,untyped>`_ + ## + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert numbers.anyIt(it > 8) == true + assert numbers.anyIt(it > 9) == false + + var result = false + for it {.inject.} in items(s): + if pred: + result = true + break result -when isMainModule: - import strutils - block: # concat test - let - s1 = @[1, 2, 3] - s2 = @[4, 5] - s3 = @[6, 7] - total = concat(s1, s2, s3) - assert total == @[1, 2, 3, 4, 5, 6, 7] +template toSeq1(s: not iterator): untyped = + # overload for typed but not iterator + type OutType = typeof(items(s)) + when compiles(s.len): + block: + evalOnceAs(s2, s, compiles((let _ = s))) + var i = 0 + var result = newSeq[OutType](s2.len) + for it in s2: + result[i] = it + i += 1 + result + else: + var result: seq[OutType]# = @[] + for it in s: + result.add(it) + result - block: # duplicates test - let - dup1 = @[1, 1, 3, 4, 2, 2, 8, 1, 4] - dup2 = @["a", "a", "c", "d", "d"] - unique1 = deduplicate(dup1) - unique2 = deduplicate(dup2) - assert unique1 == @[1, 3, 4, 2, 8] - assert unique2 == @["a", "c", "d"] +template toSeq2(iter: iterator): untyped = + # overload for iterator + evalOnceAs(iter2, iter(), false) + when compiles(iter2.len): + var i = 0 + var result = newSeq[typeof(iter2)](iter2.len) + for x in iter2: + result[i] = x + inc i + result + else: + type OutType = typeof(iter2()) + var result: seq[OutType]# = @[] + when compiles(iter2()): + evalOnceAs(iter4, iter, false) + let iter3 = iter4() + for x in iter3(): + result.add(x) + else: + for x in iter2(): + result.add(x) + result - block: # zip test - let - short = @[1, 2, 3] - long = @[6, 5, 4, 3, 2, 1] - words = @["one", "two", "three"] - zip1 = zip(short, long) - zip2 = zip(short, words) - assert zip1 == @[(1, 6), (2, 5), (3, 4)] - assert zip2 == @[(1, "one"), (2, "two"), (3, "three")] - assert zip1[2].b == 4 - assert zip2[2].b == "three" - - block: # filter proc test +template toSeq*(iter: untyped): untyped = + ## Transforms any iterable (anything that can be iterated over, e.g. with + ## a for-loop) into a sequence. + ## + runnableExamples: let - colors = @["red", "yellow", "black"] - f1 = filter(colors, proc(x: string): bool = x.len < 6) - f2 = filter(colors) do (x: string) -> bool : x.len > 5 - assert f1 == @["red", "black"] - assert f2 == @["yellow"] - - block: # filter iterator test - let numbers = @[1, 4, 5, 8, 9, 7, 4] - for n in filter(numbers, proc (x: int): bool = x mod 2 == 0): - echo($n) - # echoes 4, 8, 4 in separate lines + myRange = 1..5 + mySet: set[int8] = {5'i8, 3, 1} + assert typeof(myRange) is HSlice[system.int, system.int] + assert typeof(mySet) is set[int8] - block: # keepIf test - var floats = @[13.0, 12.5, 5.8, 2.0, 6.1, 9.9, 10.1] - keepIf(floats, proc(x: float): bool = x > 10) - assert floats == @[13.0, 12.5, 10.1] - - block: # filterIt test let - temperatures = @[-272.15, -2.0, 24.5, 44.31, 99.9, -113.44] - acceptable = filterIt(temperatures, it < 50 and it > -10) - notAcceptable = filterIt(temperatures, it > 50 or it < -10) - assert acceptable == @[-2.0, 24.5, 44.31] - assert notAcceptable == @[-272.15, 99.9, -113.44] + mySeq1 = toSeq(myRange) + mySeq2 = toSeq(mySet) + assert mySeq1 == @[1, 2, 3, 4, 5] + assert mySeq2 == @[1'i8, 3, 5] - block: # keepItIf test - var candidates = @["foo", "bar", "baz", "foobar"] - keepItIf(candidates, it.len == 3 and it[0] == 'b') - assert candidates == @["bar", "baz"] - - block: # toSeq test - let - numeric = @[1, 2, 3, 4, 5, 6, 7, 8, 9] - odd_numbers = toSeq(filter(numeric) do (x: int) -> bool: - if x mod 2 == 1: - result = true) - assert odd_numbers == @[1, 3, 5, 7, 9] + when compiles(toSeq1(iter)): + toSeq1(iter) + elif compiles(toSeq2(iter)): + toSeq2(iter) + else: + # overload for untyped, e.g.: `toSeq(myInlineIterator(3))` + when compiles(iter.len): + block: + evalOnceAs(iter2, iter, true) + var result = newSeq[typeof(iter)](iter2.len) + var i = 0 + for x in iter2: + result[i] = x + inc i + result + else: + var result: seq[typeof(iter)] = @[] + for x in iter: + result.add(x) + result - block: # foldl tests +template foldl*(sequence, operation: untyped): untyped = + ## Template to fold a sequence from left to right, returning the accumulation. + ## + ## The sequence is required to have at least a single element. Debug versions + ## of your program will assert in this situation but release versions will + ## happily go ahead. If the sequence has a single element it will be returned + ## without applying `operation`. + ## + ## The `operation` parameter should be an expression which uses the + ## variables `a` and `b` for each step of the fold. Since this is a left + ## fold, for non associative binary operations like subtraction think that + ## the sequence of numbers 1, 2 and 3 will be parenthesized as (((1) - 2) - + ## 3). + ## + ## **See also:** + ## * `foldl template<#foldl.t,,,>`_ with a starting parameter + ## * `foldr template<#foldr.t,untyped,untyped>`_ + ## + runnableExamples: let numbers = @[5, 9, 11] addition = foldl(numbers, a + b) @@ -530,12 +908,75 @@ when isMainModule: multiplication = foldl(numbers, a * b) words = @["nim", "is", "cool"] concatenation = foldl(words, a & b) + procs = @["proc", "Is", "Also", "Fine"] + + + func foo(acc, cur: string): string = + result = acc & cur + assert addition == 25, "Addition is (((5)+9)+11)" assert subtraction == -15, "Subtraction is (((5)-9)-11)" assert multiplication == 495, "Multiplication is (((5)*9)*11)" assert concatenation == "nimiscool" + assert foldl(procs, foo(a, b)) == "procIsAlsoFine" + + let s = sequence + assert s.len > 0, "Can't fold empty sequences" + var result: typeof(s[0]) + result = s[0] + for i in 1..<s.len: + let + a {.inject.} = result + b {.inject.} = s[i] + result = operation + result + +template foldl*(sequence, operation, first): untyped = + ## Template to fold a sequence from left to right, returning the accumulation. + ## + ## This version of `foldl` gets a **starting parameter**. This makes it possible + ## to accumulate the sequence into a different type than the sequence elements. + ## + ## The `operation` parameter should be an expression which uses the variables + ## `a` and `b` for each step of the fold. The `first` parameter is the + ## start value (the first `a`) and therefore defines the type of the result. + ## + ## **See also:** + ## * `foldr template<#foldr.t,untyped,untyped>`_ + ## + runnableExamples: + let + numbers = @[0, 8, 1, 5] + digits = foldl(numbers, a & (chr(b + ord('0'))), "") + assert digits == "0815" - block: # foldr tests + var result: typeof(first) = first + for x in items(sequence): + let + a {.inject.} = result + b {.inject.} = x + result = operation + result + +template foldr*(sequence, operation: untyped): untyped = + ## Template to fold a sequence from right to left, returning the accumulation. + ## + ## The sequence is required to have at least a single element. Debug versions + ## of your program will assert in this situation but release versions will + ## happily go ahead. If the sequence has a single element it will be returned + ## without applying `operation`. + ## + ## The `operation` parameter should be an expression which uses the + ## variables `a` and `b` for each step of the fold. Since this is a right + ## fold, for non associative binary operations like subtraction think that + ## the sequence of numbers 1, 2 and 3 will be parenthesized as (1 - (2 - + ## (3))). + ## + ## **See also:** + ## * `foldl template<#foldl.t,untyped,untyped>`_ + ## * `foldl template<#foldl.t,,,>`_ with a starting parameter + ## + runnableExamples: let numbers = @[5, 9, 11] addition = foldr(numbers, a + b) @@ -548,72 +989,174 @@ when isMainModule: assert multiplication == 495, "Multiplication is (5*(9*(11)))" assert concatenation == "nimiscool" - block: # delete tests - let outcome = @[1,1,1,1,1,1,1,1] - var dest = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - dest.delete(3, 8) - assert outcome == dest, """\ - Deleting range 3-9 from [1,1,1,2,2,2,2,2,2,1,1,1,1,1] - is [1,1,1,1,1,1,1,1]""" - - block: # insert tests - var dest = @[1,1,1,1,1,1,1,1] + let s = sequence # xxx inefficient, use {.evalonce.} pending #13750 + let n = s.len + assert n > 0, "Can't fold empty sequences" + var result = s[n - 1] + for i in countdown(n - 2, 0): let - src = @[2,2,2,2,2,2] - outcome = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - dest.insert(src, 3) - assert dest == outcome, """\ - Inserting [2,2,2,2,2,2] into [1,1,1,1,1,1,1,1] - at 3 is [1,1,1,2,2,2,2,2,2,1,1,1,1,1]""" + a {.inject.} = s[i] + b {.inject.} = result + result = operation + result - block: # mapIt tests - var +template mapIt*(s: typed, op: untyped): untyped = + ## Returns a new sequence with the results of the `op` proc applied to every + ## item in the container `s`. + ## + ## Since the input is not modified you can use it to + ## transform the type of the elements in the input container. + ## + ## The template injects the `it` variable which you can use directly in an + ## expression. + ## + ## Instead of using `mapIt` and `filterIt`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `map proc<#map,openArray[T],proc(T)>`_ + ## * `applyIt template<#applyIt.t,untyped,untyped>`_ for the in-place version + ## + runnableExamples: + let nums = @[1, 2, 3, 4] - strings = nums.mapIt(string, $(4 * it)) - nums.mapIt(it * 3) + strings = nums.mapIt($(4 * it)) + assert strings == @["4", "8", "12", "16"] + + type OutType = typeof(( + block: + var it{.inject.}: typeof(items(s), typeOfIter); + op), typeOfProc) + when OutType is not (proc): + # Here, we avoid to create closures in loops. + # This avoids https://github.com/nim-lang/Nim/issues/12625 + when compiles(s.len): + block: # using a block avoids https://github.com/nim-lang/Nim/issues/8580 + + # BUG: `evalOnceAs(s2, s, false)` would lead to C compile errors + # (`error: use of undeclared identifier`) instead of Nim compile errors + evalOnceAs(s2, s, compiles((let _ = s))) + + var i = 0 + var result = newSeq[OutType](s2.len) + for it {.inject.} in s2: + result[i] = op + i += 1 + result + else: + var result: seq[OutType]# = @[] + # use `items` to avoid https://github.com/nim-lang/Nim/issues/12639 + for it {.inject.} in items(s): + result.add(op) + result + else: + # `op` is going to create closures in loops, let's fallback to `map`. + # NOTE: Without this fallback, developers have to define a helper function and + # call `map`: + # [1, 2].map((it) => ((x: int) => it + x)) + # With this fallback, above code can be simplified to: + # [1, 2].mapIt((x: int) => it + x) + # In this case, `mapIt` is just syntax sugar for `map`. + type InType = typeof(items(s), typeOfIter) + # Use a help proc `f` to create closures for each element in `s` + let f = proc (x: InType): OutType = + let it {.inject.} = x + op + map(s, f) + +template applyIt*(varSeq, op: untyped) = + ## Convenience template around the mutable `apply` proc to reduce typing. + ## + ## The template injects the `it` variable which you can use directly in an + ## expression. The expression has to return the same type as the elements + ## of the sequence you are mutating. + ## + ## **See also:** + ## * `apply proc<#apply,openArray[T],proc(T)_2>`_ + ## * `mapIt template<#mapIt.t,typed,untyped>`_ + ## + runnableExamples: + var nums = @[1, 2, 3, 4] + nums.applyIt(it * 3) assert nums[0] + nums[3] == 15 - block: # distribute tests - let numbers = @[1, 2, 3, 4, 5, 6, 7] - doAssert numbers.distribute(3) == @[@[1, 2, 3], @[4, 5], @[6, 7]] - doAssert numbers.distribute(6)[0] == @[1, 2] - doAssert numbers.distribute(6)[5] == @[7] - let a = @[1, 2, 3, 4, 5, 6, 7] - doAssert a.distribute(1, true) == @[@[1, 2, 3, 4, 5, 6, 7]] - doAssert a.distribute(1, false) == @[@[1, 2, 3, 4, 5, 6, 7]] - doAssert a.distribute(2, true) == @[@[1, 2, 3, 4], @[5, 6, 7]] - doAssert a.distribute(2, false) == @[@[1, 2, 3, 4], @[5, 6, 7]] - doAssert a.distribute(3, true) == @[@[1, 2, 3], @[4, 5], @[6, 7]] - doAssert a.distribute(3, false) == @[@[1, 2, 3], @[4, 5, 6], @[7]] - doAssert a.distribute(4, true) == @[@[1, 2], @[3, 4], @[5, 6], @[7]] - doAssert a.distribute(4, false) == @[@[1, 2], @[3, 4], @[5, 6], @[7]] - doAssert a.distribute(5, true) == @[@[1, 2], @[3, 4], @[5], @[6], @[7]] - doAssert a.distribute(5, false) == @[@[1, 2], @[3, 4], @[5, 6], @[7], @[]] - doAssert a.distribute(6, true) == @[@[1, 2], @[3], @[4], @[5], @[6], @[7]] - doAssert a.distribute(6, false) == @[ - @[1, 2], @[3, 4], @[5, 6], @[7], @[], @[]] - doAssert a.distribute(8, false) == a.distribute(8, true) - doAssert a.distribute(90, false) == a.distribute(90, true) - var b = @[0] - for f in 1 .. 25: b.add(f) - doAssert b.distribute(5, true)[4].len == 5 - doAssert b.distribute(5, false)[4].len == 2 - - block: # newSeqWith tests - var seq2D = newSeqWith(4, newSeq[bool](2)) - seq2D[0][0] = true - seq2D[1][0] = true - seq2D[0][1] = true - doAssert seq2D == @[@[true, true], @[true, false], @[false, false], @[false, false]] - - block: # repeat tests - let - a = @[1, 2, 3] - b: seq[int] = @[] + for i in low(varSeq) .. high(varSeq): + let it {.inject.} = varSeq[i] + varSeq[i] = op + + +template newSeqWith*(len: int, init: untyped): untyped = + ## Creates a new `seq` of length `len`, calling `init` to initialize + ## each value of the seq. + ## + ## Useful for creating "2D" seqs - seqs containing other seqs + ## or to populate fields of the created seq. + runnableExamples: + ## Creates a seq containing 5 bool seqs, each of length of 3. + var seq2D = newSeqWith(5, newSeq[bool](3)) + assert seq2D.len == 5 + assert seq2D[0].len == 3 + assert seq2D[4][2] == false + + ## Creates a seq with random numbers + import std/random + var seqRand = newSeqWith(20, rand(1.0)) + assert seqRand[0] != seqRand[1] + type T = typeof(init) + let newLen = len + when supportsCopyMem(T) and declared(newSeqUninit): + var result = newSeqUninit[T](newLen) + else: # TODO: use `newSeqUnsafe` when that's available + var result = newSeq[T](newLen) + for i in 0 ..< newLen: + result[i] = init + move(result) # refs bug #7295 + +func mapLitsImpl(constructor: NimNode; op: NimNode; nested: bool; + filter = nnkLiterals): NimNode = + if constructor.kind in filter: + result = newNimNode(nnkCall, lineInfoFrom = constructor) + result.add op + result.add constructor + else: + result = copyNimNode(constructor) + for v in constructor: + if nested or v.kind in filter: + result.add mapLitsImpl(v, op, nested, filter) + else: + result.add v + +macro mapLiterals*(constructor, op: untyped; + nested = true): untyped = + ## Applies `op` to each of the **atomic** literals like `3` + ## or `"abc"` in the specified `constructor` AST. This can + ## be used to map every array element to some target type: + runnableExamples: + let x = mapLiterals([0.1, 1.2, 2.3, 3.4], int) + doAssert x is array[4, int] + doAssert x == [int(0.1), int(1.2), int(2.3), int(3.4)] + ## If `nested` is true (which is the default), the literals are replaced + ## everywhere in the `constructor` AST, otherwise only the first level + ## is considered: + runnableExamples: + let a = mapLiterals((1.2, (2.3, 3.4), 4.8), int) + let b = mapLiterals((1.2, (2.3, 3.4), 4.8), int, nested=false) + assert a == (1, (2, 3), 4) + assert b == (1, (2.3, 3.4), 4) - doAssert a.repeat(3) == @[1, 2, 3, 1, 2, 3, 1, 2, 3] - doAssert a.repeat(0) == @[] - #doAssert a.repeat(-1) == @[] # will not compile! - doAssert b.repeat(3) == @[] + let c = mapLiterals((1, (2, 3), 4, (5, 6)), `$`) + let d = mapLiterals((1, (2, 3), 4, (5, 6)), `$`, nested=false) + assert c == ("1", ("2", "3"), "4", ("5", "6")) + assert d == ("1", (2, 3), "4", (5, 6)) + ## There are no constraints for the `constructor` AST, it + ## works for nested tuples of arrays of sets etc. + result = mapLitsImpl(constructor, op, nested.boolVal) - echo "Finished doc tests" +iterator items*[T](xs: iterator: T): T = + ## Iterates over each element yielded by a closure iterator. This may + ## not seem particularly useful on its own, but this allows closure + ## iterators to be used by the mapIt, filterIt, allIt, anyIt, etc. + ## templates. + for x in xs(): + yield x diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim new file mode 100644 index 000000000..360a075d6 --- /dev/null +++ b/lib/pure/collections/setimpl.nim @@ -0,0 +1,156 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2019 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# An `include` file for the different hash set implementations. + + +template maxHash(t): untyped = high(t.data) +template dataLen(t): untyped = len(t.data) + +include hashcommon + +template initImpl(s: typed, size: int) = + let correctSize = slotsNeeded(size) + when s is OrderedSet: + s.first = -1 + s.last = -1 + s.counter = 0 + newSeq(s.data, correctSize) + +template rawInsertImpl() {.dirty.} = + if data.len == 0: + initImpl(s, defaultInitialSize) + data[h].key = key + data[h].hcode = hc + +proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, + hc: Hash, h: Hash) = + rawInsertImpl() + +proc enlarge[A](s: var HashSet[A]) = + var n: KeyValuePairSeq[A] + newSeq(n, len(s.data) * growthFactor) + swap(s.data, n) # n is now old seq + for i in countup(0, high(n)): + if isFilled(n[i].hcode): + var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode) + rawInsert(s, s.data, n[i].key, n[i].hcode, j) + +template inclImpl() {.dirty.} = + if s.data.len == 0: + initImpl(s, defaultInitialSize) + var hc: Hash + var index = rawGet(s, key, hc) + if index < 0: + if mustRehash(s): + enlarge(s) + index = rawGetKnownHC(s, key, hc) + rawInsert(s, s.data, key, hc, -1 - index) + inc(s.counter) + +template containsOrInclImpl() {.dirty.} = + if s.data.len == 0: + initImpl(s, defaultInitialSize) + var hc: Hash + var index = rawGet(s, key, hc) + if index >= 0: + result = true + else: + result = false + if mustRehash(s): + enlarge(s) + index = rawGetKnownHC(s, key, hc) + rawInsert(s, s.data, key, hc, -1 - index) + inc(s.counter) + +template doWhile(a, b) = + while true: + b + if not a: break + +proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} = + var hc: Hash + var i = rawGet(s, key, hc) + var msk = high(s.data) + result = true + + if i >= 0: + result = false + dec(s.counter) + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + s.data[i].hcode = 0 # mark current EMPTY + {.push warning[UnsafeDefault]:off.} + reset(s.data[i].key) + {.pop.} + doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + i = (i + 1) and msk # increment mod table size + if isEmpty(s.data[i].hcode): # end of collision cluster; So all done + return + r = s.data[i].hcode and msk # "home" location of key@i + s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop + +template dollarImpl() {.dirty.} = + result = "{" + for key in items(s): + if result.len > 1: result.add(", ") + result.addQuoted(key) + result.add("}") + + + +# --------------------------- OrderedSet ------------------------------ + +proc rawGet[A](t: OrderedSet[A], key: A, hc: var Hash): int {.inline.} = + rawGetImpl() + +proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A], + key: A, hc: Hash, h: Hash) = + rawInsertImpl() + data[h].next = -1 + if s.first < 0: s.first = h + if s.last >= 0: data[s.last].next = h + s.last = h + +proc enlarge[A](s: var OrderedSet[A]) = + var n: OrderedKeyValuePairSeq[A] + newSeq(n, len(s.data) * growthFactor) + var h = s.first + s.first = -1 + s.last = -1 + swap(s.data, n) + while h >= 0: + var nxt = n[h].next + if isFilled(n[h].hcode): + var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) + rawInsert(s, s.data, n[h].key, n[h].hcode, j) + h = nxt + +proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} = + if len(s.data) == 0: + return true + var n: OrderedKeyValuePairSeq[A] + newSeq(n, len(s.data)) + var h = s.first + s.first = -1 + s.last = -1 + swap(s.data, n) + let hc = genHash(key) + result = true + while h >= 0: + var nxt = n[h].next + if isFilled(n[h].hcode): + if n[h].hcode == hc and n[h].key == key: + dec s.counter + result = false + else: + var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) + rawInsert(s, s.data, n[h].key, n[h].hcode, j) + h = nxt diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index 4a20d00a4..af13135aa 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -7,94 +7,255 @@ # distribution, for details about the copyright. # -## The ``sets`` module implements an efficient `hash set`:idx: and +## The `sets` module implements an efficient `hash set`:idx: and ## ordered hash set. ## ## Hash sets are different from the `built in set type -## <manual.html#set-type>`_. Sets allow you to store any value that can be +## <manual.html#types-set-type>`_. Sets allow you to store any value that can be ## `hashed <hashes.html>`_ and they don't contain duplicate entries. ## -## **Note**: The data types declared here have *value semantics*: This means -## that ``=`` performs a copy of the set. +## Common usages of sets: +## * removing duplicates from a container by converting it with `toHashSet proc +## <#toHashSet,openArray[A]>`_ (see also `sequtils.deduplicate func +## <sequtils.html#deduplicate,openArray[T],bool>`_) +## * membership testing +## * mathematical operations on two sets, such as +## `union <#union,HashSet[A],HashSet[A]>`_, +## `intersection <#intersection,HashSet[A],HashSet[A]>`_, +## `difference <#difference,HashSet[A],HashSet[A]>`_, and +## `symmetric difference <#symmetricDifference,HashSet[A],HashSet[A]>`_ +## +## **Examples:** +## +## ```Nim +## echo toHashSet([9, 5, 1]) # {9, 1, 5} +## echo toOrderedSet([9, 5, 1]) # {9, 5, 1} +## +## let +## s1 = toHashSet([9, 5, 1]) +## s2 = toHashSet([3, 5, 7]) +## +## echo s1 + s2 # {9, 1, 3, 5, 7} +## echo s1 - s2 # {1, 9} +## echo s1 * s2 # {5} +## echo s1 -+- s2 # {9, 1, 3, 7} +## ``` +## +## Note: The data types declared here have *value semantics*: This means +## that `=` performs a copy of the set. +## +## **See also:** +## * `intsets module <intsets.html>`_ for efficient int sets +## * `tables module <tables.html>`_ for hash tables + import - os, hashes, math + std/[hashes, math] -{.pragma: myShallow.} -when not defined(nimhygiene): - {.pragma: dirty.} +when not defined(nimHasEffectsOf): + {.pragma: effectsOf.} +{.pragma: myShallow.} # For "integer-like A" that are too big for intsets/bit-vectors to be practical, # it would be best to shrink hcode to the same size as the integer. Larger # codes should never be needed, and this can pack more entries per cache-line. # Losing hcode entirely is also possible - if some element value is forbidden. type - KeyValuePair[A] = tuple[hcode: THash, key: A] + KeyValuePair[A] = tuple[hcode: Hash, key: A] KeyValuePairSeq[A] = seq[KeyValuePair[A]] - HashSet* {.myShallow.}[A] = object ## \ + HashSet*[A] {.myShallow.} = object ## \ ## A generic hash set. ## - ## Use `init() <#init,HashSet[A],int>`_ or `initSet[type]() <#initSet>`_ + ## Use `init proc <#init,HashSet[A]>`_ or `initHashSet proc <#initHashSet>`_ ## before calling other procs on it. data: KeyValuePairSeq[A] counter: int -{.deprecated: [TSet: HashSet].} +type + OrderedKeyValuePair[A] = tuple[ + hcode: Hash, next: int, key: A] + OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]] + OrderedSet*[A] {.myShallow.} = object ## \ + ## A generic hash set that remembers insertion order. + ## + ## Use `init proc <#init,OrderedSet[A]>`_ or `initOrderedSet proc + ## <#initOrderedSet>`_ before calling other procs on it. + data: OrderedKeyValuePairSeq[A] + counter, first, last: int + SomeSet*[A] = HashSet[A] | OrderedSet[A] + ## Type union representing `HashSet` or `OrderedSet`. + +const + defaultInitialSize* = 64 + +include setimpl -# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These -# two procs retain clarity of that encoding without the space cost of an enum. -proc isEmpty(hcode: THash): bool {.inline.} = - result = hcode == 0 +# --------------------------------------------------------------------- +# ------------------------------ HashSet ------------------------------ +# --------------------------------------------------------------------- -proc isFilled(hcode: THash): bool {.inline.} = - result = hcode != 0 -proc isValid*[A](s: HashSet[A]): bool = - ## Returns `true` if the set has been initialized with `initSet <#initSet>`_. +proc init*[A](s: var HashSet[A], initialSize = defaultInitialSize) = + ## Initializes a hash set. ## - ## Most operations over an uninitialized set will crash at runtime and - ## `assert <system.html#assert>`_ in debug builds. You can use this proc in - ## your own procs to verify that sets passed to your procs are correctly - ## initialized. Example: + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. ## - ## .. code-block :: - ## proc savePreferences(options: TSet[string]) = - ## assert options.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = not s.data.isNil + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. + ## + ## See also: + ## * `initHashSet proc <#initHashSet>`_ + ## * `toHashSet proc <#toHashSet,openArray[A]>`_ + runnableExamples: + var a: HashSet[int] + init(a) + + initImpl(s, initialSize) + +proc initHashSet*[A](initialSize = defaultInitialSize): HashSet[A] = + ## Wrapper around `init proc <#init,HashSet[A]>`_ for initialization of + ## hash sets. + ## + ## Returns an empty hash set you can assign directly in `var` blocks in a + ## single line. + ## + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toHashSet proc <#toHashSet,openArray[A]>`_ + runnableExamples: + var a = initHashSet[int]() + a.incl(3) + assert len(a) == 1 + result = default(HashSet[A]) + result.init(initialSize) + +proc `[]`*[A](s: var HashSet[A], key: A): var A = + ## Returns the element that is actually stored in `s` which has the same + ## value as `key` or raises the `KeyError` exception. + ## + ## This is useful when one overloaded `hash` and `==` but still needs + ## reference semantics for sharing. + var hc: Hash + var index = rawGet(s, key, hc) + if index >= 0: result = s.data[index].key + else: + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +proc contains*[A](s: HashSet[A], key: A): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initHashSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + + var hc: Hash + var index = rawGet(s, key, hc) + result = index >= 0 proc len*[A](s: HashSet[A]): int = - ## Returns the number of keys in `s`. + ## Returns the number of elements in `s`. ## ## Due to an implementation detail you can call this proc on variables which ## have not been initialized yet. The proc will return zero as the length - ## then. Example: - ## - ## .. code-block:: - ## - ## var values: TSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 + ## then. + runnableExamples: + var a: HashSet[string] + assert len(a) == 0 + let s = toHashSet([3, 5, 7]) + assert len(s) == 3 + result = s.counter proc card*[A](s: HashSet[A]): int = - ## Alias for `len() <#len,TSet[A]>`_. + ## Alias for `len() <#len,HashSet[A]>`_. ## ## Card stands for the `cardinality ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. result = s.counter +proc incl*[A](s: var HashSet[A], key: A) = + ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initHashSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + + inclImpl() + +proc incl*[A](s: var HashSet[A], other: HashSet[A]) = + ## Includes all elements from `other` set into `s` (must be declared as `var`). + ## + ## This is the in-place version of `s + other <#+,HashSet[A],HashSet[A]>`_. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var + values = toHashSet([1, 2, 3]) + others = toHashSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + + for item in other: incl(s, item) + +proc toHashSet*[A](keys: openArray[A]): HashSet[A] = + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initHashSet proc <#initHashSet>`_ + runnableExamples: + let + a = toHashSet([5, 3, 2]) + b = toHashSet("abracadabra") + assert len(a) == 3 + ## a == {2, 3, 5} + assert len(b) == 5 + ## b == {'a', 'b', 'c', 'd', 'r'} + + result = initHashSet[A](keys.len) + for key in items(keys): result.incl(key) + iterator items*[A](s: HashSet[A]): A = - ## Iterates over keys in the set `s`. + ## Iterates over elements of the set `s`. ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: + ## If you need a sequence with the elements you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. ## - ## .. code-block:: + ## ```Nim ## type ## pair = tuple[a, b: int] ## var - ## a, b = initSet[pair]() + ## a, b = initHashSet[pair]() ## a.incl((2, 3)) ## a.incl((3, 2)) ## a.incl((2, 3)) @@ -103,334 +264,202 @@ iterator items*[A](s: HashSet[A]): A = ## assert a.len == 2 ## echo b ## # --> {(a: 1, b: 3), (a: 0, b: 4)} - assert s.isValid, "The set needs to be initialized." - for h in 0..high(s.data): - if isFilled(s.data[h].hcode): yield s.data[h].key - -const - growthFactor = 2 - -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc rightSize*(count: int): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. - ## - ## If more items are expected to be added, simply add that - ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want mustRehash(rightSize(x), x) == false. - result = nextPowerOfTwo(count * 3 div 2 + 4) - -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = (h + 1) and maxHash - -template rawGetKnownHCImpl() {.dirty.} = - var h: THash = hc and high(s.data) # start with real hash value - while isFilled(s.data[h].hcode): - # Compare hc THEN key with boolean short circuit. This makes the common case - # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. - # It does slow down succeeding lookups by one extra THash cmp&and..usually - # just a few clock cycles, generally worth it for any non-integer-like A. - if s.data[h].hcode == hc and s.data[h].key == key: # compare hc THEN key - return h - h = nextTry(h, high(s.data)) - result = -1 - h # < 0 => MISSING; insert idx = -1 - result - -template rawGetImpl() {.dirty.} = - hc = hash(key) - if hc == 0: # This almost never taken branch should be very predictable. - hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. - rawGetKnownHCImpl() - -template rawInsertImpl() {.dirty.} = - data[h].key = key - data[h].hcode = hc - -proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: THash): int {.inline.} = - rawGetKnownHCImpl() - -proc rawGet[A](s: HashSet[A], key: A, hc: var THash): int {.inline.} = - rawGetImpl() - -proc mget*[A](s: var HashSet[A], key: A): var A = - ## returns the element that is actually stored in 's' which has the same - ## value as 'key' or raises the ``EInvalidKey`` exception. This is useful - ## when one overloaded 'hash' and '==' but still needs reference semantics - ## for sharing. - assert s.isValid, "The set needs to be initialized." - var hc: THash - var index = rawGet(s, key, hc) - if index >= 0: result = s.data[index].key - else: raise newException(KeyError, "key not found: " & $key) + ## ``` + let length = s.len + for h in 0 .. high(s.data): + if isFilled(s.data[h].hcode): + yield s.data[h].key + assert(len(s) == length, "the length of the HashSet changed while iterating over it") -proc contains*[A](s: HashSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - ## values.excl(2) - ## assert(not values.contains(2)) - assert s.isValid, "The set needs to be initialized." - var hc: THash - var index = rawGet(s, key, hc) - result = index >= 0 - -proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, - hc: THash, h: THash) = - rawInsertImpl() - -proc enlarge[A](s: var HashSet[A]) = - var n: KeyValuePairSeq[A] - newSeq(n, len(s.data) * growthFactor) - swap(s.data, n) # n is now old seq - for i in countup(0, high(n)): - if isFilled(n[i].hcode): - var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode) - rawInsert(s, s.data, n[i].key, n[i].hcode, j) - -template inclImpl() {.dirty.} = - var hc: THash - var index = rawGet(s, key, hc) - if index < 0: - if mustRehash(len(s.data), s.counter): - enlarge(s) - index = rawGetKnownHC(s, key, hc) - rawInsert(s, s.data, key, hc, -1 - index) - inc(s.counter) - -template containsOrInclImpl() {.dirty.} = - var hc: THash - var index = rawGet(s, key, hc) - if index >= 0: - result = true - else: - if mustRehash(len(s.data), s.counter): - enlarge(s) - index = rawGetKnownHC(s, key, hc) - rawInsert(s, s.data, key, hc, -1 - index) - inc(s.counter) - -proc incl*[A](s: var HashSet[A], key: A) = - ## Includes an element `key` in `s`. - ## - ## This doesn't do anything if `key` is already in `s`. Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." - inclImpl() - -proc incl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## var others = toSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) +proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,HashSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return `false` if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var values = initHashSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false -template doWhile(a: expr, b: stmt): stmt = - while true: - b - if not a: break + containsOrInclImpl() proc excl*[A](s: var HashSet[A], key: A) = ## Excludes `key` from the set `s`. ## - ## This doesn't do anything if `key` is not found in `s`. Example: - ## - ## .. code-block:: - ## var s = toSet([2, 3, 6, 7]) - ## s.excl(2) - ## s.excl(2) - ## assert s.len == 3 - assert s.isValid, "The set needs to be initialized." - var hc: THash - var i = rawGet(s, key, hc) - var msk = high(s.data) - if i >= 0: - s.data[i].hcode = 0 - dec(s.counter) - while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 - var j = i # The correctness of this depends on (h+1) in nextTry, - var r = j # though may be adaptable to other simple sequences. - s.data[i].hcode = 0 # mark current EMPTY - doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): - i = (i + 1) and msk # increment mod table size - if isEmpty(s.data[i].hcode): # end of collision cluster; So all done - return - r = s.data[i].hcode and msk # "home" location of key@i - shallowCopy(s.data[j], s.data[i]) # data[j] will be marked EMPTY next loop - -proc excl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Excludes everything in `other` from `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## numbers = toSet([1, 2, 3, 4, 5]) - ## even = toSet([2, 4, 6, 8]) - ## numbers.excl(even) - ## echo numbers - ## # --> {1, 3, 5} - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: excl(s, item) - -proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. - ## - ## The difference with regards to the `incl() <#incl,TSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was already present in `s`. The - ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: + ## This doesn't do anything if `key` is not found in `s`. ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() - -proc init*[A](s: var HashSet[A], initialSize=64) = - ## Initializes a hash set. - ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TSet[A]>`_ and `len() <#len,TSet[A]>`_. - ## - ## You can call this proc on a previously initialized hash set, which will - ## discard all its values. This might be more convenient than iterating over - ## existing values and calling `excl() <#excl,TSet[A],A>`_ on them. Example: - ## - ## .. code-block :: - ## var a: TSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid - assert isPowerOfTwo(initialSize) - s.counter = 0 - newSeq(s.data, initialSize) + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var s = toHashSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 -proc initSet*[A](initialSize=64): HashSet[A] = - ## Wrapper around `init() <#init,TSet[A],int>`_ for initialization of hash - ## sets. - ## - ## Returns an empty hash set you can assign directly in ``var`` blocks in a - ## single line. Example: - ## - ## .. code-block :: - ## var a = initSet[int](4) - ## a.incl(2) - result.init(initialSize) + discard exclImpl(s, key) -proc toSet*[A](keys: openArray[A]): HashSet[A] = - ## Creates a new hash set that contains the given `keys`. +proc excl*[A](s: var HashSet[A], other: HashSet[A]) = + ## Excludes all elements of `other` set from `s`. ## - ## Example: + ## This is the in-place version of `s - other <#-,HashSet[A],HashSet[A]>`_. ## - ## .. code-block:: - ## var numbers = toSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) - result = initSet[A](rightSize(keys.len)) - for key in items(keys): result.incl(key) + ## See also: + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var + numbers = toHashSet([1, 2, 3, 4, 5]) + even = toHashSet([2, 4, 6, 8]) + numbers.excl(even) + assert len(numbers) == 3 + ## numbers == {1, 3, 5} + + for item in other: discard exclImpl(s, item) + +proc missingOrExcl*[A](s: var HashSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## + ## The difference with regards to the `excl proc <#excl,HashSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var s = toHashSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true + + exclImpl(s, key) + +proc pop*[A](s: var HashSet[A]): A = + ## Removes and returns an arbitrary element from the set `s`. + ## + ## Raises `KeyError` if the set `s` is empty. + ## + ## See also: + ## * `clear proc <#clear,HashSet[A]>`_ + runnableExamples: + var s = toHashSet([2, 1]) + assert [s.pop, s.pop] in [[1, 2], [2,1]] # order unspecified + doAssertRaises(KeyError, echo s.pop) + + for h in 0 .. high(s.data): + if isFilled(s.data[h].hcode): + result = s.data[h].key + excl(s, result) + return result + raise newException(KeyError, "set is empty") + +proc clear*[A](s: var HashSet[A]) = + ## Clears the HashSet back to an empty state, without shrinking + ## any of the existing storage. + ## + ## `O(n)` operation, where `n` is the size of the hash bucket. + ## + ## See also: + ## * `pop proc <#pop,HashSet[A]>`_ + runnableExamples: + var s = toHashSet([3, 5, 7]) + clear(s) + assert len(s) == 0 -template dollarImpl(): stmt {.dirty.} = - result = "{" - for key in items(s): - if result.len > 1: result.add(", ") - result.add($key) - result.add("}") + s.counter = 0 + for i in 0 ..< s.data.len: + s.data[i].hcode = 0 + {.push warning[UnsafeDefault]:off.} + reset(s.data[i].key) + {.pop.} -proc `$`*[A](s: HashSet[A]): string = - ## Converts the set `s` to a string, mostly for logging purposes. - ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: - ## - ## Example: - ## - ## .. code-block:: - ## echo toSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} - assert s.isValid, "The set needs to be initialized." - dollarImpl() proc union*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the union of the sets `s1` and `s2`. ## - ## The union of two sets is represented mathematically as *A ∪ B* and is the - ## set of all objects that are members of `s1`, `s2` or both. Example: + ## The same as `s1 + s2 <#+,HashSet[A],HashSet[A]>`_. ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = union(a, b) - ## assert c == toSet(["a", "b", "c"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." + ## The union of two sets is represented mathematically as *A ∪ B* and is the + ## set of all objects that are members of `s1`, `s2` or both. + ## + ## See also: + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = union(a, b) + assert c == toHashSet(["a", "b", "c"]) + result = s1 incl(result, s2) proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the intersection of the sets `s1` and `s2`. ## + ## The same as `s1 * s2 <#*,HashSet[A],HashSet[A]>`_. + ## ## The intersection of two sets is represented mathematically as *A ∩ B* and ## is the set of all objects that are members of `s1` and `s2` at the same - ## time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c == toSet(["b"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." - result = initSet[A](min(s1.data.len, s2.data.len)) - for item in s1: - if item in s2: incl(result, item) + ## time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = intersection(a, b) + assert c == toHashSet(["b"]) + + result = initHashSet[A](max(min(s1.data.len, s2.data.len), 2)) + + # iterate over the elements of the smaller set + if s1.data.len < s2.data.len: + for item in s1: + if item in s2: incl(result, item) + else: + for item in s2: + if item in s1: incl(result, item) + proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the difference of the sets `s1` and `s2`. ## - ## The difference of two sets is represented mathematically as *A \ B* and is + ## The same as `s1 - s2 <#-,HashSet[A],HashSet[A]>`_. + ## + ## The difference of two sets is represented mathematically as *A ∖ B* and is ## the set of all objects that are members of `s1` and not members of `s2`. - ## Example: ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = difference(a, b) - ## assert c == toSet(["a"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." - result = initSet[A]() + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = difference(a, b) + assert c == toHashSet(["a"]) + + result = initHashSet[A]() for item in s1: if not contains(s2, item): incl(result, item) @@ -438,51 +467,53 @@ proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the symmetric difference of the sets `s1` and `s2`. ## + ## The same as `s1 -+- s2 <#-+-,HashSet[A],HashSet[A]>`_. + ## ## The symmetric difference of two sets is represented mathematically as *A △ ## B* or *A ⊖ B* and is the set of all objects that are members of `s1` or - ## `s2` but not both at the same time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = symmetricDifference(a, b) - ## assert c == toSet(["a", "c"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." + ## `s2` but not both at the same time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = symmetricDifference(a, b) + assert c == toHashSet(["a", "c"]) + result = s1 for item in s2: if containsOrIncl(result, item): excl(result, item) proc `+`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `union(s1, s2) <#union>`_. + ## Alias for `union(s1, s2) <#union,HashSet[A],HashSet[A]>`_. result = union(s1, s2) proc `*`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `intersection(s1, s2) <#intersection>`_. + ## Alias for `intersection(s1, s2) <#intersection,HashSet[A],HashSet[A]>`_. result = intersection(s1, s2) proc `-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `difference(s1, s2) <#difference>`_. + ## Alias for `difference(s1, s2) <#difference,HashSet[A],HashSet[A]>`_. result = difference(s1, s2) proc `-+-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `symmetricDifference(s1, s2) <#symmetricDifference>`_. + ## Alias for `symmetricDifference(s1, s2) + ## <#symmetricDifference,HashSet[A],HashSet[A]>`_. result = symmetricDifference(s1, s2) proc disjoint*[A](s1, s2: HashSet[A]): bool = - ## Returns true iff the sets `s1` and `s2` have no items in common. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## assert disjoint(a, b) == false - ## assert disjoint(a, b - a) == true - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." + ## Returns `true` if the sets `s1` and `s2` have no items in common. + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + assert disjoint(a, b) == false + assert disjoint(a, b - a) == true + for item in s1: if item in s2: return false return true @@ -491,306 +522,344 @@ proc `<`*[A](s, t: HashSet[A]): bool = ## Returns true if `s` is a strict or proper subset of `t`. ## ## A strict or proper subset `s` has all of its members in `t` but `t` has - ## more elements than `s`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c < a and c < b - ## assert((a < a) == false) + ## more elements than `s`. + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = intersection(a, b) + assert c < a and c < b + assert(not (a < a)) + s.counter != t.counter and s <= t proc `<=`*[A](s, t: HashSet[A]): bool = - ## Returns true if `s` is subset of `t`. + ## Returns true if `s` is a subset of `t`. ## ## A subset `s` has all of its members in `t` and `t` doesn't necessarily - ## have more members than `s`. That is, `s` can be equal to `t`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c <= a and c <= b - ## assert((a <= a)) + ## have more members than `s`. That is, `s` can be equal to `t`. + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = intersection(a, b) + assert c <= a and c <= b + assert a <= a + result = false if s.counter > t.counter: return result = true - for item in s: + for item in items(s): if not(t.contains(item)): result = false return proc `==`*[A](s, t: HashSet[A]): bool = ## Returns true if both `s` and `t` have the same members and set size. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## a = toSet([1, 2]) - ## b = toSet([1]) - ## b.incl(2) - ## assert a == b + runnableExamples: + var + a = toHashSet([1, 2]) + b = toHashSet([2, 1]) + assert a == b + s.counter == t.counter and s <= t -proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = - ## Returns a new set after applying `op` on each of the elements of `data`. - ## - ## You can use this proc to transform the elements from a set. Example: +proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] {.effectsOf: op.} = + ## Returns a new set after applying `op` proc on each of the elements of + ##`data` set. ## - ## .. code-block:: - ## var a = toSet([1, 2, 3]) - ## var b = a.map(proc (x: int): string = $x) - ## assert b == toSet(["1", "2", "3"]) - result = initSet[B]() - for item in data: result.incl(op(item)) + ## You can use this proc to transform the elements from a set. + runnableExamples: + let + a = toHashSet([1, 2, 3]) + b = a.map(proc (x: int): string = $x) + assert b == toHashSet(["1", "2", "3"]) -# ------------------------------ ordered set ------------------------------ - -type - OrderedKeyValuePair[A] = tuple[ - hcode: THash, next: int, key: A] - OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]] - OrderedSet* {.myShallow.}[A] = object ## \ - ## A generic hash set that remembers insertion order. - ## - ## Use `init() <#init,OrderedSet[A],int>`_ or `initOrderedSet[type]() - ## <#initOrderedSet>`_ before calling other procs on it. - data: OrderedKeyValuePairSeq[A] - counter, first, last: int + result = initHashSet[B]() + for item in items(data): result.incl(op(item)) -{.deprecated: [TOrderedSet: OrderedSet].} +proc hash*[A](s: HashSet[A]): Hash = + ## Hashing of HashSet. + for h in 0 .. high(s.data): + result = result xor s.data[h].hcode + result = !$result -proc isValid*[A](s: OrderedSet[A]): bool = - ## Returns `true` if the ordered set has been initialized with `initSet - ## <#initOrderedSet>`_. +proc `$`*[A](s: HashSet[A]): string = + ## Converts the set `s` to a string, mostly for logging and printing purposes. ## - ## Most operations over an uninitialized ordered set will crash at runtime - ## and `assert <system.html#assert>`_ in debug builds. You can use this proc - ## in your own procs to verify that ordered sets passed to your procs are - ## correctly initialized. Example: + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. ## - ## .. code-block:: - ## proc saveTarotCards(cards: TOrderedSet[int]) = - ## assert cards.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = not s.data.isNil + ## **Examples:** + ## ```Nim + ## echo toHashSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toHashSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + ## ``` + dollarImpl() -proc len*[A](s: OrderedSet[A]): int {.inline.} = - ## Returns the number of keys in `s`. + +proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated: + "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize) + +proc toSet*[A](keys: openArray[A]): HashSet[A] {.deprecated: + "Deprecated since v0.20, use 'toHashSet'".} = toHashSet[A](keys) + +proc isValid*[A](s: HashSet[A]): bool {.deprecated: + "Deprecated since v0.20; sets are initialized by default".} = + ## Returns `true` if the set has been initialized (with `initHashSet proc + ## <#initHashSet>`_ or `init proc <#init,HashSet[A]>`_). ## - ## Due to an implementation detail you can call this proc on variables which - ## have not been initialized yet. The proc will return zero as the length - ## then. Example: + runnableExamples: + proc savePreferences(options: HashSet[string]) = + assert options.isValid, "Pass an initialized set!" + # Do stuff here, may crash in release builds! + result = s.data.len > 0 + + + +# --------------------------------------------------------------------- +# --------------------------- OrderedSet ------------------------------ +# --------------------------------------------------------------------- + +template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = + if s.data.len > 0: + var h = s.first + var idx = 0 + while h >= 0: + var nxt = s.data[h].next + if isFilled(s.data[h].hcode): + yieldStmt + inc(idx) + h = nxt + + +proc init*[A](s: var OrderedSet[A], initialSize = defaultInitialSize) = + ## Initializes an ordered hash set. ## - ## .. code-block:: + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. ## - ## var values: TOrderedSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 - result = s.counter - -proc card*[A](s: OrderedSet[A]): int {.inline.} = - ## Alias for `len() <#len,TOrderedSet[A]>`_. + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. ## - ## Card stands for the `cardinality - ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. - result = s.counter + ## See also: + ## * `initOrderedSet proc <#initOrderedSet>`_ + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a: OrderedSet[int] + init(a) -template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = - var h = s.first - while h >= 0: - var nxt = s.data[h].next - if isFilled(s.data[h].hcode): yieldStmt - h = nxt + initImpl(s, initialSize) -iterator items*[A](s: OrderedSet[A]): A = - ## Iterates over keys in the ordered set `s` in insertion order. +proc initOrderedSet*[A](initialSize = defaultInitialSize): OrderedSet[A] = + ## Wrapper around `init proc <#init,OrderedSet[A]>`_ for initialization of + ## ordered hash sets. ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: + ## Returns an empty ordered hash set you can assign directly in `var` blocks + ## in a single line. ## - ## .. code-block:: - ## var a = initOrderedSet[int]() - ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: - ## a.incl(value) - ## for value in a.items: - ## echo "Got ", value - ## # --> Got 9 - ## # --> Got 2 - ## # --> Got 1 - ## # --> Got 5 - ## # --> Got 8 - ## # --> Got 4 - assert s.isValid, "The set needs to be initialized." - forAllOrderedPairs: - yield s.data[h].key + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a = initOrderedSet[int]() + a.incl(3) + assert len(a) == 1 -proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: THash): int {.inline.} = - rawGetKnownHCImpl() + result.init(initialSize) -proc rawGet[A](s: OrderedSet[A], key: A, hc: var THash): int {.inline.} = - rawGetImpl() +proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] = + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initOrderedSet proc <#initOrderedSet>`_ + runnableExamples: + let + a = toOrderedSet([5, 3, 2]) + b = toOrderedSet("abracadabra") + assert len(a) == 3 + ## a == {5, 3, 2} # different than in HashSet + assert len(b) == 5 + ## b == {'a', 'b', 'r', 'c', 'd'} # different than in HashSet + + result = initOrderedSet[A](keys.len) + for key in items(keys): result.incl(key) proc contains*[A](s: OrderedSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. + ## Returns true if `key` is in `s`. ## - ## Example: + ## This allows the usage of `in` operator. ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - assert s.isValid, "The set needs to be initialized." - var hc: THash + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + + var hc: Hash var index = rawGet(s, key, hc) result = index >= 0 -proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A], - key: A, hc: THash, h: THash) = - rawInsertImpl() - data[h].next = -1 - if s.first < 0: s.first = h - if s.last >= 0: data[s.last].next = h - s.last = h - -proc enlarge[A](s: var OrderedSet[A]) = - var n: OrderedKeyValuePairSeq[A] - newSeq(n, len(s.data) * growthFactor) - var h = s.first - s.first = -1 - s.last = -1 - swap(s.data, n) - while h >= 0: - var nxt = n[h].next - if isFilled(n[h].hcode): - var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) - rawInsert(s, s.data, n[h].key, n[h].hcode, j) - h = nxt - proc incl*[A](s: var OrderedSet[A], key: A) = ## Includes an element `key` in `s`. ## - ## This doesn't do anything if `key` is already in `s`. Example: + ## This doesn't do anything if `key` is already in `s`. ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],OrderedSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + inclImpl() proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## var others = toOrderedSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) + ## Includes all elements from the OrderedSet `other` into + ## HashSet `s` (must be declared as `var`). + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var + values = toHashSet([1, 2, 3]) + others = toOrderedSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + + for item in items(other): incl(s, item) proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. + ## Includes `key` in the set `s` and tells if `key` was already in `s`. ## - ## The difference with regards to the `incl() <#incl,TOrderedSet[A],A>`_ proc - ## is that this proc returns `true` if `key` was already present in `s`. The + ## The difference with regards to the `incl proc <#incl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: + ## this call. ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false -proc init*[A](s: var OrderedSet[A], initialSize=64) = - ## Initializes an ordered hash set. - ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TOrderedSet[A]>`_ and `len() <#len,TOrderedSet[A]>`_. - ## - ## You can call this proc on a previously initialized ordered hash set to - ## discard its values. At the moment this is the only proc to remove elements - ## from an ordered hash set. Example: - ## - ## .. code-block :: - ## var a: TOrderedSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid - assert isPowerOfTwo(initialSize) - s.counter = 0 - s.first = -1 - s.last = -1 - newSeq(s.data, initialSize) + containsOrInclImpl() -proc initOrderedSet*[A](initialSize=64): OrderedSet[A] = - ## Wrapper around `init() <#init,TOrderedSet[A],int>`_ for initialization of - ## ordered hash sets. +proc excl*[A](s: var OrderedSet[A], key: A) = + ## Excludes `key` from the set `s`. Efficiency: `O(n)`. ## - ## Returns an empty ordered hash set you can assign directly in ``var`` - ## blocks in a single line. Example: + ## This doesn't do anything if `key` is not found in `s`. ## - ## .. code-block :: - ## var a = initOrderedSet[int](4) - ## a.incl(2) - result.init(initialSize) + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 -proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] = - ## Creates a new ordered hash set that contains the given `keys`. + discard exclImpl(s, key) + +proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## Efficiency: O(n). ## - ## Example: + ## The difference with regards to the `excl proc <#excl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. ## - ## .. code-block:: - ## var numbers = toOrderedSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) - result = initOrderedSet[A](rightSize(keys.len)) - for key in items(keys): result.incl(key) + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true -proc `$`*[A](s: OrderedSet[A]): string = - ## Converts the ordered hash set `s` to a string, mostly for logging purposes. + exclImpl(s, key) + +proc clear*[A](s: var OrderedSet[A]) = + ## Clears the OrderedSet back to an empty state, without shrinking + ## any of the existing storage. ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: + ## `O(n)` operation where `n` is the size of the hash bucket. + runnableExamples: + var s = toOrderedSet([3, 5, 7]) + clear(s) + assert len(s) == 0 + + s.counter = 0 + s.first = -1 + s.last = -1 + for i in 0 ..< s.data.len: + s.data[i].hcode = 0 + s.data[i].next = 0 + {.push warning[UnsafeDefault]:off.} + reset(s.data[i].key) + {.pop.} + +proc len*[A](s: OrderedSet[A]): int {.inline.} = + ## Returns the number of elements in `s`. ## - ## Example: + ## Due to an implementation detail you can call this proc on variables which + ## have not been initialized yet. The proc will return zero as the length + ## then. + runnableExamples: + var a: OrderedSet[string] + assert len(a) == 0 + let s = toHashSet([3, 5, 7]) + assert len(s) == 3 + + result = s.counter + +proc card*[A](s: OrderedSet[A]): int {.inline.} = + ## Alias for `len() <#len,OrderedSet[A]>`_. ## - ## .. code-block:: - ## echo toOrderedSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} - assert s.isValid, "The set needs to be initialized." - dollarImpl() + ## Card stands for the `cardinality + ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. + result = s.counter proc `==`*[A](s, t: OrderedSet[A]): bool = ## Equality for ordered sets. + runnableExamples: + let + a = toOrderedSet([1, 2]) + b = toOrderedSet([2, 1]) + assert(not (a == b)) + if s.counter != t.counter: return false var h = s.first - var g = s.first + var g = t.first var compared = 0 while h >= 0 and g >= 0: var nxh = s.data[h].next var nxg = t.data[g].next - if isFilled(s.data[h].hcode) and isFilled(s.data[g].hcode): - if s.data[h].key == s.data[g].key: + if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode): + if s.data[h].key == t.data[g].key: inc compared else: return false @@ -798,177 +867,64 @@ proc `==`*[A](s, t: OrderedSet[A]): bool = g = nxg result = compared == s.counter -proc testModule() = - ## Internal micro test to validate docstrings and such. - block isValidTest: - var options: HashSet[string] - proc savePreferences(options: HashSet[string]) = - assert options.isValid, "Pass an initialized set!" - options = initSet[string]() - options.savePreferences - - block lenTest: - var values: HashSet[int] - assert(not values.isValid) - assert values.len == 0 - assert values.card == 0 - - block setIterator: - type pair = tuple[a, b: int] - var a, b = initSet[pair]() - a.incl((2, 3)) - a.incl((3, 2)) - a.incl((2, 3)) - for x, y in a.items: - b.incl((x - 2, y + 1)) - assert a.len == b.card - assert a.len == 2 - #echo b - - block setContains: - var values = initSet[int]() - assert(not values.contains(2)) - values.incl(2) - assert values.contains(2) - values.excl(2) - assert(not values.contains(2)) - - values.incl(4) - var others = toSet([6, 7]) - values.incl(others) - assert values.len == 3 - - values.init - assert values.containsOrIncl(2) == false - assert values.containsOrIncl(2) == true - var - a = toSet([1, 2]) - b = toSet([1]) - b.incl(2) - assert a == b - - block exclusions: - var s = toSet([2, 3, 6, 7]) - s.excl(2) - s.excl(2) - assert s.len == 3 +proc hash*[A](s: OrderedSet[A]): Hash = + ## Hashing of OrderedSet. + forAllOrderedPairs: + result = result !& s.data[h].hcode + result = !$result - var - numbers = toSet([1, 2, 3, 4, 5]) - even = toSet([2, 4, 6, 8]) - numbers.excl(even) - #echo numbers - # --> {1, 3, 5} - - block toSeqAndString: - var a = toSet([2, 4, 5]) - var b = initSet[int]() - for x in [2, 4, 5]: b.incl(x) - assert($a == $b) - #echo a - #echo toSet(["no", "esc'aping", "is \" provided"]) - - #block orderedToSeqAndString: - # echo toOrderedSet([2, 4, 5]) - # echo toOrderedSet(["no", "esc'aping", "is \" provided"]) - - block setOperations: - var - a = toSet(["a", "b"]) - b = toSet(["b", "c"]) - c = union(a, b) - assert c == toSet(["a", "b", "c"]) - var d = intersection(a, b) - assert d == toSet(["b"]) - var e = difference(a, b) - assert e == toSet(["a"]) - var f = symmetricDifference(a, b) - assert f == toSet(["a", "c"]) - assert d < a and d < b - assert((a < a) == false) - assert d <= a and d <= b - assert((a <= a)) - # Alias test. - assert a + b == toSet(["a", "b", "c"]) - assert a * b == toSet(["b"]) - assert a - b == toSet(["a"]) - assert a -+- b == toSet(["a", "c"]) - assert disjoint(a, b) == false - assert disjoint(a, b - a) == true +proc `$`*[A](s: OrderedSet[A]): string = + ## Converts the ordered hash set `s` to a string, mostly for logging and + ## printing purposes. + ## + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. + ## + ## **Examples:** + ## ```Nim + ## echo toOrderedSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + ## ``` + dollarImpl() - block mapSet: - var a = toSet([1, 2, 3]) - var b = a.map(proc (x: int): string = $x) - assert b == toSet(["1", "2", "3"]) - - block isValidTest: - var cards: OrderedSet[string] - proc saveTarotCards(cards: OrderedSet[string]) = - assert cards.isValid, "Pass an initialized set!" - cards = initOrderedSet[string]() - cards.saveTarotCards - - block lenTest: - var values: OrderedSet[int] - assert(not values.isValid) - assert values.len == 0 - assert values.card == 0 - - block setIterator: - type pair = tuple[a, b: int] - var a, b = initOrderedSet[pair]() - a.incl((2, 3)) - a.incl((3, 2)) - a.incl((2, 3)) - for x, y in a.items: - b.incl((x - 2, y + 1)) - assert a.len == b.card - assert a.len == 2 - - #block orderedSetIterator: - # var a = initOrderedSet[int]() - # for value in [9, 2, 1, 5, 1, 8, 4, 2]: - # a.incl(value) - # for value in a.items: - # echo "Got ", value - - block setContains: - var values = initOrderedSet[int]() - assert(not values.contains(2)) - values.incl(2) - assert values.contains(2) - block toSeqAndString: - var a = toOrderedSet([2, 4, 5]) - var b = initOrderedSet[int]() - for x in [2, 4, 5]: b.incl(x) - assert($a == $b) - assert(a == b) # https://github.com/Araq/Nimrod/issues/1413 - block initBlocks: - var a: OrderedSet[int] - a.init(4) - a.incl(2) - a.init - assert a.len == 0 and a.isValid - a = initOrderedSet[int](4) - a.incl(2) - assert a.len == 1 - - var b: HashSet[int] - b.init(4) - b.incl(2) - b.init - assert b.len == 0 and b.isValid - b = initSet[int](4) - b.incl(2) - assert b.len == 1 - - for i in 0 .. 32: - var s = rightSize(i) - if s <= i or mustRehash(s, i): - echo "performance issue: rightSize() will not elide enlarge() at ", i - - echo "Micro tests run successfully." - -when isMainModule and not defined(release): testModule() +iterator items*[A](s: OrderedSet[A]): A = + ## Iterates over keys in the ordered set `s` in insertion order. + ## + ## If you need a sequence with the elements you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. + ## + ## ```Nim + ## var a = initOrderedSet[int]() + ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: + ## a.incl(value) + ## for value in a.items: + ## echo "Got ", value + ## # --> Got 9 + ## # --> Got 2 + ## # --> Got 1 + ## # --> Got 5 + ## # --> Got 8 + ## # --> Got 4 + ## ``` + let length = s.len + forAllOrderedPairs: + yield s.data[h].key + assert(len(s) == length, "the length of the OrderedSet changed while iterating over it") + +iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] = + ## Iterates through (position, value) tuples of OrderedSet `s`. + runnableExamples: + let a = toOrderedSet("abracadabra") + var p = newSeq[(int, char)]() + for x in pairs(a): + p.add(x) + assert p == @[(0, 'a'), (1, 'b'), (2, 'r'), (3, 'c'), (4, 'd')] + + let length = s.len + forAllOrderedPairs: + yield (idx, s.data[h].key) + assert(len(s) == length, "the length of the OrderedSet changed while iterating over it") diff --git a/lib/pure/collections/sharedlist.nim b/lib/pure/collections/sharedlist.nim new file mode 100644 index 000000000..ec8f1cd86 --- /dev/null +++ b/lib/pure/collections/sharedlist.nim @@ -0,0 +1,105 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Shared list support. +## +## Unstable API. + +{.deprecated.} + +{.push stackTrace: off.} + +import + std/locks + +const + ElemsPerNode = 100 + +type + SharedListNode[A] = ptr object + next: SharedListNode[A] + dataLen: int + d: array[ElemsPerNode, A] + + SharedList*[A] = object ## generic shared list + head, tail: SharedListNode[A] + lock*: Lock + +template withLock(t, x: untyped) = + acquire(t.lock) + x + release(t.lock) + +proc iterAndMutate*[A](x: var SharedList[A]; action: proc(x: A): bool) = + ## Iterates over the list. If `action` returns true, the + ## current item is removed from the list. + ## + ## .. warning:: It may not preserve the element order after some modifications. + withLock(x): + var n = x.head + while n != nil: + var i = 0 + while i < n.dataLen: + # action can add new items at the end, so release the lock: + release(x.lock) + if action(n.d[i]): + acquire(x.lock) + let t = x.tail + dec t.dataLen # TODO considering t.dataLen == 0, + # probably the module should be refactored using doubly linked lists + n.d[i] = t.d[t.dataLen] + else: + acquire(x.lock) + inc i + n = n.next + +iterator items*[A](x: var SharedList[A]): A = + withLock(x): + var it = x.head + while it != nil: + for i in 0..it.dataLen-1: + yield it.d[i] + it = it.next + +proc add*[A](x: var SharedList[A]; y: A) = + withLock(x): + var node: SharedListNode[A] + if x.tail == nil: + node = cast[typeof node](allocShared0(sizeof(node[]))) + x.tail = node + x.head = node + elif x.tail.dataLen == ElemsPerNode: + node = cast[typeof node](allocShared0(sizeof(node[]))) + x.tail.next = node + x.tail = node + else: + node = x.tail + node.d[node.dataLen] = y + inc(node.dataLen) + +proc init*[A](t: var SharedList[A]) = + initLock t.lock + t.head = nil + t.tail = nil + +proc clear*[A](t: var SharedList[A]) = + withLock(t): + var it = t.head + while it != nil: + let nxt = it.next + deallocShared(it) + it = nxt + t.head = nil + t.tail = nil + +proc deinitSharedList*[A](t: var SharedList[A]) = + clear(t) + deinitLock t.lock + +{.pop.} diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim new file mode 100644 index 000000000..b474ecd31 --- /dev/null +++ b/lib/pure/collections/sharedtables.nim @@ -0,0 +1,252 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Shared table support for Nim. Use plain old non GC'ed keys and values or +## you'll be in trouble. Uses a single lock to protect the table, lockfree +## implementations welcome but if lock contention is so high that you need a +## lockfree hash table, you're doing it wrong. +## +## Unstable API. + +{.deprecated.} + +import + std/[hashes, math, locks] + +type + KeyValuePair[A, B] = tuple[hcode: Hash, key: A, val: B] + KeyValuePairSeq[A, B] = ptr UncheckedArray[KeyValuePair[A, B]] + SharedTable*[A, B] = object ## generic hash SharedTable + data: KeyValuePairSeq[A, B] + counter, dataLen: int + lock: Lock + +template maxHash(t): untyped = t.dataLen-1 + +include tableimpl + +template st_maybeRehashPutImpl(enlarge) {.dirty.} = + if mustRehash(t): + enlarge(t) + index = rawGetKnownHC(t, key, hc) + index = -1 - index # important to transform for mgetOrPutImpl + rawInsert(t, t.data, key, val, hc, index) + inc(t.counter) + +proc enlarge[A, B](t: var SharedTable[A, B]) = + let oldSize = t.dataLen + let size = oldSize * growthFactor + var n = cast[KeyValuePairSeq[A, B]](allocShared0( + sizeof(KeyValuePair[A, B]) * size)) + t.dataLen = size + swap(t.data, n) + for i in 0..<oldSize: + let eh = n[i].hcode + if isFilled(eh): + var j: Hash = eh and maxHash(t) + while isFilled(t.data[j].hcode): + j = nextTry(j, maxHash(t)) + rawInsert(t, t.data, n[i].key, n[i].val, eh, j) + deallocShared(n) + +template withLock(t, x: untyped) = + acquire(t.lock) + x + release(t.lock) + +template withValue*[A, B](t: var SharedTable[A, B], key: A, + value, body: untyped) = + ## Retrieves the value at `t[key]`. + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + var table: SharedTable[string, string] + init(table) + + table["a"] = "x" + table["b"] = "y" + table["c"] = "z" + + table.withValue("a", value): + assert value[] == "x" + + table.withValue("b", value): + value[] = "modified" + + table.withValue("b", value): + assert value[] == "modified" + + table.withValue("nonexistent", value): + assert false # not called + acquire(t.lock) + try: + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body + finally: + release(t.lock) + +template withValue*[A, B](t: var SharedTable[A, B], key: A, + value, body1, body2: untyped) = + ## Retrieves the value at `t[key]`. + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + var table: SharedTable[string, string] + init(table) + + table["a"] = "x" + table["b"] = "y" + table["c"] = "z" + + + table.withValue("a", value): + value[] = "m" + + var flag = false + table.withValue("d", value): + discard value + doAssert false + do: # if "d" notin table + flag = true + + if flag: + table["d"] = "n" + + assert table.mget("a") == "m" + assert table.mget("d") == "n" + + acquire(t.lock) + try: + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body1 + else: + body2 + finally: + release(t.lock) + +proc mget*[A, B](t: var SharedTable[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. The value can be modified. + ## If `key` is not in `t`, the `KeyError` exception is raised. + withLock t: + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: result = t.data[index].val + if not hasKey: + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +proc mgetOrPut*[A, B](t: var SharedTable[A, B], key: A, val: B): var B = + ## Retrieves value at `t[key]` or puts `val` if not present, either way + ## returning a value which can be modified. **Note**: This is inherently + ## unsafe in the context of multi-threading since it returns a pointer + ## to `B`. + withLock t: + mgetOrPutImpl(enlarge) + +proc hasKeyOrPut*[A, B](t: var SharedTable[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + withLock t: + hasKeyOrPutImpl(enlarge) + +template tabMakeEmpty(i) = t.data[i].hcode = 0 +template tabCellEmpty(i) = isEmpty(t.data[i].hcode) +template tabCellHash(i) = t.data[i].hcode + +proc withKey*[A, B](t: var SharedTable[A, B], key: A, + mapper: proc(key: A, val: var B, pairExists: var bool)) = + ## Computes a new mapping for the `key` with the specified `mapper` + ## procedure. + ## + ## The `mapper` takes 3 arguments: + ## + ## 1. `key` - the current key, if it exists, or the key passed to + ## `withKey` otherwise; + ## 2. `val` - the current value, if the key exists, or default value + ## of the type otherwise; + ## 3. `pairExists` - `true` if the key exists, `false` otherwise. + ## + ## The `mapper` can can modify `val` and `pairExists` values to change + ## the mapping of the key or delete it from the table. + ## When adding a value, make sure to set `pairExists` to `true` along + ## with modifying the `val`. + ## + ## The operation is performed atomically and other operations on the table + ## will be blocked while the `mapper` is invoked, so it should be short and + ## simple. + ## + ## Example usage: + ## + ## ```nim + ## # If value exists, decrement it. + ## # If it becomes zero or less, delete the key + ## t.withKey(1'i64) do (k: int64, v: var int, pairExists: var bool): + ## if pairExists: + ## dec v + ## if v <= 0: + ## pairExists = false + ## ``` + withLock t: + var hc: Hash + var index = rawGet(t, key, hc) + + var pairExists = index >= 0 + if pairExists: + mapper(t.data[index].key, t.data[index].val, pairExists) + if not pairExists: + delImplIdx(t, index, tabMakeEmpty, tabCellEmpty, tabCellHash) + else: + var val: B + mapper(key, val, pairExists) + if pairExists: + st_maybeRehashPutImpl(enlarge) + +proc `[]=`*[A, B](t: var SharedTable[A, B], key: A, val: B) = + ## Puts a (key, value)-pair into `t`. + withLock t: + putImpl(enlarge) + +proc add*[A, B](t: var SharedTable[A, B], key: A, val: B) = + ## Puts a new (key, value)-pair into `t` even if `t[key]` already exists. + ## This can introduce duplicate keys into the table! + withLock t: + addImpl(enlarge) + +proc del*[A, B](t: var SharedTable[A, B], key: A) = + ## Deletes `key` from hash table `t`. + withLock t: + delImpl(tabMakeEmpty, tabCellEmpty, tabCellHash) + +proc len*[A, B](t: var SharedTable[A, B]): int = + ## Number of elements in `t`. + withLock t: + result = t.counter + +proc init*[A, B](t: var SharedTable[A, B], initialSize = 32) = + ## Creates a new hash table that is empty. + ## + ## This proc must be called before any other usage of `t`. + let initialSize = slotsNeeded(initialSize) + t.counter = 0 + t.dataLen = initialSize + t.data = cast[KeyValuePairSeq[A, B]](allocShared0( + sizeof(KeyValuePair[A, B]) * initialSize)) + initLock t.lock + +proc deinitSharedTable*[A, B](t: var SharedTable[A, B]) = + deallocShared(t.data) + deinitLock t.lock diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim new file mode 100644 index 000000000..3542741fa --- /dev/null +++ b/lib/pure/collections/tableimpl.nim @@ -0,0 +1,231 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# An `include` file for the different table implementations. + +include hashcommon + +const + defaultInitialSize* = 32 + +template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add + genHashImpl(key, hc) + var h: Hash = hc and maxHash(t) + while isFilled(t.data[h].hcode): + h = nextTry(h, maxHash(t)) + result = h + +template rawInsertImpl() {.dirty.} = + data[h].key = key + data[h].val = val + data[h].hcode = hc + +proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline, outParamsAt: [3].} = + rawGetDeepImpl() + +proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B], + key: A, val: sink B, hc: Hash, h: Hash) = + rawInsertImpl() + +template checkIfInitialized() = + if t.dataLen == 0: + initImpl(t, defaultInitialSize) + +template addImpl(enlarge) {.dirty.} = + checkIfInitialized() + if mustRehash(t): enlarge(t) + var hc: Hash + var j = rawGetDeep(t, key, hc) + rawInsert(t, t.data, key, val, hc, j) + inc(t.counter) + +template maybeRehashPutImpl(enlarge, val) {.dirty.} = + checkIfInitialized() + if mustRehash(t): + enlarge(t) + index = rawGetKnownHC(t, key, hc) + index = -1 - index # important to transform for mgetOrPutImpl + rawInsert(t, t.data, key, val, hc, index) + inc(t.counter) + +template putImpl(enlarge) {.dirty.} = + checkIfInitialized() + var hc: Hash = default(Hash) + var index = rawGet(t, key, hc) + if index >= 0: t.data[index].val = val + else: maybeRehashPutImpl(enlarge, val) + +template mgetOrPutImpl(enlarge) {.dirty.} = + checkIfInitialized() + var hc: Hash = default(Hash) + var index = rawGet(t, key, hc) + if index < 0: + # not present: insert (flipping index) + when declared(val): + maybeRehashPutImpl(enlarge, val) + else: + maybeRehashPutImpl(enlarge, default(B)) + # either way return modifiable val + result = t.data[index].val + +# template mgetOrPutDefaultImpl(enlarge) {.dirty.} = +# checkIfInitialized() +# var hc: Hash = default(Hash) +# var index = rawGet(t, key, hc) +# if index < 0: +# # not present: insert (flipping index) +# maybeRehashPutImpl(enlarge, default(B)) +# # either way return modifiable val +# result = t.data[index].val + +template hasKeyOrPutImpl(enlarge) {.dirty.} = + checkIfInitialized() + var hc: Hash = default(Hash) + var index = rawGet(t, key, hc) + if index < 0: + result = false + maybeRehashPutImpl(enlarge, val) + else: result = true + +# delImplIdx is KnuthV3 Algo6.4R adapted to i=i+1 (from i=i-1) which has come to +# be called "back shift delete". It shifts elements in the collision cluster of +# a victim backward to make things as-if the victim were never inserted in the +# first place. This is desirable to keep things "ageless" after many deletes. +# It is trickier than you might guess since initial probe (aka "home") locations +# of keys in a cluster may collide and since table addresses wrap around. +# +# A before-after diagram might look like ('.' means empty): +# slot: 0 1 2 3 4 5 6 7 +# before(1) +# hash1: 6 7 . 3 . 5 5 6 ; Really hash() and msk +# data1: E F . A . B C D ; About to delete C @index 6 +# after(2) +# hash2: 7 . . 3 . 5 6 6 ; Really hash() and msk +# data2: F . . A . B D E ; After deletion of C +# +# This lowers total search depth over the whole table from 1+1+2+2+2+2=10 to 7. +# Had the victim been B@5, C would need back shifting to slot 5. Total depth is +# always lowered by at least 1, e.g. victim A@3. This is all quite fast when +# empty slots are frequent (also needed to keep insert/miss searches fast) and +# hash() is either fast or avoided (via `.hcode`). It need not compare keys. +# +# delImplIdx realizes the above transformation, but only works for dense Linear +# Probing, nextTry(h)=h+1. This is not an important limitation since that's the +# fastest sequence on any CPU made since the 1980s. { Performance analysis often +# overweights "key cmp" neglecting cache behavior, giving bad ideas how big/slow +# tables behave (when perf matters most!). Comparing hcode first means usually +# only 1 key cmp is needed for *any* seq. Timing only predictable activity, +# small tables, and/or integer keys often perpetuates such bad ideas. } + +template delImplIdx(t, i, makeEmpty, cellEmpty, cellHash) = + let msk = maxHash(t) + if i >= 0: + dec(t.counter) + block outer: + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry + var r = j # though may be adaptable to other simple sequences. + makeEmpty(i) # mark current EMPTY + {.push warning[UnsafeDefault]:off.} + reset(t.data[i].key) + reset(t.data[i].val) + {.pop.} + while true: + i = (i + 1) and msk # increment mod table size + if cellEmpty(i): # end of collision cluster; So all done + break outer + r = cellHash(i) and msk # initial probe index for key@slot i + if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + break + when defined(js): + t.data[j] = t.data[i] + else: + t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop + +template delImpl(makeEmpty, cellEmpty, cellHash) {.dirty.} = + var hc: Hash + var i = rawGet(t, key, hc) + delImplIdx(t, i, makeEmpty, cellEmpty, cellHash) + +template delImplNoHCode(makeEmpty, cellEmpty, cellHash) {.dirty.} = + if t.dataLen > 0: + var i: Hash = hash(key) and maxHash(t) + while not cellEmpty(i): + if t.data[i].key == key: + delImplIdx(t, i, makeEmpty, cellEmpty, cellHash) + break + i = nextTry(i, maxHash(t)) + +template clearImpl() {.dirty.} = + for i in 0 ..< t.dataLen: + when compiles(t.data[i].hcode): # CountTable records don't contain a hcode + t.data[i].hcode = 0 + {.push warning[UnsafeDefault]:off.} + reset(t.data[i].key) + reset(t.data[i].val) + {.pop.} + t.counter = 0 + +template ctAnd(a, b): bool = + when a: + when b: true + else: false + else: false + +template initImpl(result: typed, size: int) = + let correctSize = slotsNeeded(size) + when ctAnd(declared(SharedTable), typeof(result) is SharedTable): + init(result, correctSize) + else: + result.counter = 0 + newSeq(result.data, correctSize) + when compiles(result.first): + result.first = -1 + result.last = -1 + +template insertImpl() = # for CountTable + if t.dataLen == 0: initImpl(t, defaultInitialSize) + if mustRehash(t): enlarge(t) + ctRawInsert(t, t.data, key, val) + inc(t.counter) + +template getOrDefaultImpl(t, key): untyped = + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + if index >= 0: result = t.data[index].val + +template getOrDefaultImpl(t, key, default: untyped): untyped = + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + result = if index >= 0: t.data[index].val else: default + +template dollarImpl(): untyped {.dirty.} = + if t.len == 0: + result = "{:}" + else: + result = "{" + for key, val in pairs(t): + if result.len > 1: result.add(", ") + result.addQuoted(key) + result.add(": ") + result.addQuoted(val) + result.add("}") + +template equalsImpl(s, t: typed) = + if s.counter == t.counter: + # different insertion orders mean different 'data' seqs, so we have + # to use the slow route here: + for key, val in s: + if not t.hasKey(key): return false + if t.getOrDefault(key) != val: return false + return true + else: + return false diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index f85acef22..d414caeed 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -7,347 +7,591 @@ # distribution, for details about the copyright. # -## The ``tables`` module implements variants of an efficient `hash table`:idx: +## The `tables` module implements variants of an efficient `hash table`:idx: ## (also often named `dictionary`:idx: in other programming languages) that is -## a mapping from keys to values. ``Table`` is the usual hash table, -## ``OrderedTable`` is like ``Table`` but remembers insertion order -## and ``CountTable`` is a mapping from a key to its number of occurrences. -## For consistency with every other data type in Nim these have **value** -## semantics, this means that ``=`` performs a copy of the hash table. -## For **reference** semantics use the ``Ref`` variant: ``TableRef``, -## ``OrderedTableRef``, ``CountTableRef``. -## -## If you are using simple standard types like ``int`` or ``string`` for the -## keys of the table you won't have any problems, but as soon as you try to use -## a more complex object as a key you will be greeted by a strange compiler -## error:: +## a mapping from keys to values. ## -## Error: type mismatch: got (Person) -## but expected one of: -## hashes.hash(x: openarray[A]): THash -## hashes.hash(x: int): THash -## hashes.hash(x: float): THash -## … +## There are several different types of hash tables available: +## * `Table<#Table>`_ is the usual hash table, +## * `OrderedTable<#OrderedTable>`_ is like `Table` but remembers insertion order, +## * `CountTable<#CountTable>`_ is a mapping from a key to its number of occurrences ## -## What is happening here is that the types used for table keys require to have -## a ``hash()`` proc which will convert them to a `THash <hashes.html#THash>`_ -## value, and the compiler is listing all the hash functions it knows. -## Additionally there has to be a ``==`` operator that provides the same -## semantics as its corresponding ``hash`` proc. -## -## After you add ``hash`` and ``==`` for your custom type everything will work. -## Currently however ``hash`` for objects is not defined, whereas -## ``system.==`` for objects does exist and performs a "deep" comparison (every -## field is compared) which is usually what you want. So in the following -## example implementing only ``hash`` suffices: -## -## .. code-block:: -## type -## Person = object -## firstName, lastName: string +## For consistency with every other data type in Nim these have **value** +## semantics, this means that `=` performs a copy of the hash table. ## -## proc hash(x: Person): THash = -## ## Piggyback on the already available string hash proc. -## ## -## ## Without this proc nothing works! -## result = x.firstName.hash !& x.lastName.hash -## result = !$result +## For `ref semantics<manual.html#types-reference-and-pointer-types>`_ +## use their `Ref` variants: `TableRef<#TableRef>`_, +## `OrderedTableRef<#OrderedTableRef>`_, and `CountTableRef<#CountTableRef>`_. ## -## var -## salaries = initTable[Person, int]() -## p1, p2: Person +## To give an example, when `a` is a `Table`, then `var b = a` gives `b` +## as a new independent table. `b` is initialised with the contents of `a`. +## Changing `b` does not affect `a` and vice versa: + +runnableExamples: + var + a = {1: "one", 2: "two"}.toTable # creates a Table + b = a + + assert a == b + + b[3] = "three" + assert 3 notin a + assert 3 in b + assert a != b + +## On the other hand, when `a` is a `TableRef` instead, then changes to `b` +## also affect `a`. Both `a` and `b` **ref** the same data structure: + +runnableExamples: + var + a = {1: "one", 2: "two"}.newTable # creates a TableRef + b = a + + assert a == b + + b[3] = "three" + + assert 3 in a + assert 3 in b + assert a == b + ## -## p1.firstName = "Jon" -## p1.lastName = "Ross" -## salaries[p1] = 30_000 +## ---- ## -## p2.firstName = "소진" -## p2.lastName = "박" -## salaries[p2] = 45_000 -import - hashes, math +## # Basic usage -{.pragma: myShallow.} -type - KeyValuePair[A, B] = tuple[hcode: THash, key: A, val: B] - KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]] - Table* {.myShallow.}[A, B] = object ## generic hash table - data: KeyValuePairSeq[A, B] - counter: int - TableRef*[A,B] = ref Table[A, B] +## ## Table +runnableExamples: + from std/sequtils import zip -{.deprecated: [TTable: Table, PTable: TableRef].} + let + names = ["John", "Paul", "George", "Ringo"] + years = [1940, 1942, 1943, 1940] -when not defined(nimhygiene): - {.pragma: dirty.} + var beatles = initTable[string, int]() -# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These -# two procs retain clarity of that encoding without the space cost of an enum. -proc isEmpty(hcode: THash): bool {.inline.} = - result = hcode == 0 + for pairs in zip(names, years): + let (name, birthYear) = pairs + beatles[name] = birthYear -proc isFilled(hcode: THash): bool {.inline.} = - result = hcode != 0 + assert beatles == {"George": 1943, "Ringo": 1940, "Paul": 1942, "John": 1940}.toTable -proc len*[A, B](t: Table[A, B]): int = - ## returns the number of keys in `t`. - result = t.counter -iterator pairs*[A, B](t: Table[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) + var beatlesByYear = initTable[int, seq[string]]() -iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t`. The values - ## can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) + for pairs in zip(years, names): + let (birthYear, name) = pairs + if not beatlesByYear.hasKey(birthYear): + # if a key doesn't exist, we create one with an empty sequence + # before we can add elements to it + beatlesByYear[birthYear] = @[] + beatlesByYear[birthYear].add(name) -iterator keys*[A, B](t: Table[A, B]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].key + assert beatlesByYear == {1940: @["John", "Ringo"], 1942: @["Paul"], 1943: @["George"]}.toTable -iterator values*[A, B](t: Table[A, B]): B = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val +## ## OrderedTable +## `OrderedTable<#OrderedTable>`_ is used when it is important to preserve +## the insertion order of keys. -iterator mvalues*[A, B](t: var Table[A, B]): var B = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val +runnableExamples: + let + a = [('z', 1), ('y', 2), ('x', 3)] + ot = a.toOrderedTable # ordered tables -const - growthFactor = 2 + assert $ot == """{'z': 1, 'y': 2, 'x': 3}""" -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) +## ## CountTable +## `CountTable<#CountTable>`_ is useful for counting number of items of some +## container (e.g. string, sequence or array), as it is a mapping where the +## items are the keys, and their number of occurrences are the values. +## For that purpose `toCountTable proc<#toCountTable,openArray[A]>`_ +## comes handy: -proc rightSize*(count: int): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. - ## - ## If more items are expected to be added, simply add that - ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want mustRehash(rightSize(x), x) == false. - result = nextPowerOfTwo(count * 3 div 2 + 4) +runnableExamples: + let myString = "abracadabra" + let letterFrequencies = toCountTable(myString) + assert $letterFrequencies == "{'a': 5, 'd': 1, 'b': 2, 'r': 2, 'c': 1}" -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = (h + 1) and maxHash +## The same could have been achieved by manually iterating over a container +## and increasing each key's value with `inc proc +## <#inc,CountTable[A],A,int>`_: -template rawGetKnownHCImpl() {.dirty.} = - var h: THash = hc and high(t.data) # start with real hash value - while isFilled(t.data[h].hcode): - # Compare hc THEN key with boolean short circuit. This makes the common case - # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. - # It does slow down succeeding lookups by one extra THash cmp&and..usually - # just a few clock cycles, generally worth it for any non-integer-like A. - if t.data[h].hcode == hc and t.data[h].key == key: - return h - h = nextTry(h, high(t.data)) - result = -1 - h # < 0 => MISSING; insert idx = -1 - result +runnableExamples: + let myString = "abracadabra" + var letterFrequencies = initCountTable[char]() + for c in myString: + letterFrequencies.inc(c) + assert $letterFrequencies == "{'d': 1, 'r': 2, 'c': 1, 'a': 5, 'b': 2}" -template rawGetImpl() {.dirty.} = - hc = hash(key) - if hc == 0: # This almost never taken branch should be very predictable. - hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. - rawGetKnownHCImpl() +## +## ---- +## -template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add - hc = hash(key) - if hc == 0: - hc = 314159265 - var h: THash = hc and high(t.data) - while isFilled(t.data[h].hcode): - h = nextTry(h, high(t.data)) - result = h +## ## Hashing +## +## If you are using simple standard types like `int` or `string` for the +## keys of the table you won't have any problems, but as soon as you try to use +## a more complex object as a key you will be greeted by a strange compiler +## error: +## +## Error: type mismatch: got (Person) +## but expected one of: +## hashes.hash(x: openArray[A]): Hash +## hashes.hash(x: int): Hash +## hashes.hash(x: float): Hash +## +## What is happening here is that the types used for table keys require to have +## a `hash()` proc which will convert them to a `Hash <hashes.html#Hash>`_ +## value, and the compiler is listing all the hash functions it knows. +## Additionally there has to be a `==` operator that provides the same +## semantics as its corresponding `hash` proc. +## +## After you add `hash` and `==` for your custom type everything will work. +## Currently, however, `hash` for objects is not defined, whereas +## `system.==` for objects does exist and performs a "deep" comparison (every +## field is compared) which is usually what you want. So in the following +## example implementing only `hash` suffices: -template rawInsertImpl() {.dirty.} = - data[h].key = key - data[h].val = val - data[h].hcode = hc +runnableExamples: + import std/hashes -proc rawGetKnownHC[A, B](t: Table[A, B], key: A, hc: THash): int {.inline.} = - rawGetKnownHCImpl() + type + Person = object + firstName, lastName: string -proc rawGetDeep[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = - rawGetDeepImpl() + proc hash(x: Person): Hash = + ## Piggyback on the already available string hash proc. + ## + ## Without this proc nothing works! + result = x.firstName.hash !& x.lastName.hash + result = !$result -proc rawGet[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = - rawGetImpl() + var + salaries = initTable[Person, int]() + p1, p2: Person -proc `[]`*[A, B](t: Table[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val + p1.firstName = "Jon" + p1.lastName = "Ross" + salaries[p1] = 30_000 -proc mget*[A, B](t: var Table[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``KeyError`` exception is raised. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val - else: - when compiles($key): - raise newException(KeyError, "key not found: " & $key) - else: - raise newException(KeyError, "key not found") + p2.firstName = "소진" + p2.lastName = "박" + salaries[p2] = 45_000 -iterator allValues*[A, B](t: Table[A, B]; key: A): B = - ## iterates over any value in the table `t` that belongs to the given `key`. - var h: THash = hash(key) and high(t.data) - while isFilled(t.data[h].hcode): - if t.data[h].key == key: - yield t.data[h].val - h = nextTry(h, high(t.data)) +## +## ---- +## -proc hasKey*[A, B](t: Table[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - var hc: THash - result = rawGet(t, key, hc) >= 0 +## # See also +## +## * `json module<json.html>`_ for table-like structure which allows +## heterogeneous members +## * `strtabs module<strtabs.html>`_ for efficient hash tables +## mapping from strings to strings +## * `hashes module<hashes.html>`_ for helper functions for hashing -proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B], - key: A, val: B, hc: THash, h: THash) = - rawInsertImpl() + +import std/private/since +import std/[hashes, math, algorithm] + + +when not defined(nimHasEffectsOf): + {.pragma: effectsOf.} + +type + KeyValuePair[A, B] = tuple[hcode: Hash, key: A, val: B] + KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]] + Table*[A, B] = object + ## Generic hash table, consisting of a key-value pair. + ## + ## `data` and `counter` are internal implementation details which + ## can't be accessed. + ## + ## For creating an empty Table, use `initTable proc<#initTable>`_. + data: KeyValuePairSeq[A, B] + counter: int + TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_. + ## + ## For creating a new empty TableRef, use `newTable proc + ## <#newTable>`_. + + +# ------------------------------ helpers --------------------------------- + +# Do NOT move these to tableimpl.nim, because sharedtables uses that +# file and has its own implementation. +template maxHash(t): untyped = high(t.data) +template dataLen(t): untyped = len(t.data) + +include tableimpl + +proc raiseKeyError[T](key: T) {.noinline, noreturn.} = + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +template get(t, key): untyped = + ## retrieves the value at `t[key]`. The value can be modified. + ## If `key` is not in `t`, the `KeyError` exception is raised. + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + if index >= 0: result = t.data[index].val + else: + raiseKeyError(key) proc enlarge[A, B](t: var Table[A, B]) = var n: KeyValuePairSeq[A, B] newSeq(n, len(t.data) * growthFactor) swap(t.data, n) for i in countup(0, high(n)): - if isFilled(n[i].hcode): - var j = -1 - rawGetKnownHC(t, n[i].key, n[i].hcode) - rawInsert(t, t.data, n[i].key, n[i].val, n[i].hcode, j) - -template addImpl() {.dirty.} = - if mustRehash(len(t.data), t.counter): enlarge(t) - var hc: THash - var j = rawGetDeep(t, key, hc) - rawInsert(t, t.data, key, val, hc, j) - inc(t.counter) - -template maybeRehashPutImpl() {.dirty.} = - if mustRehash(len(t.data), t.counter): - enlarge(t) - index = rawGetKnownHC(t, key, hc) - index = -1 - index # important to transform for mgetOrPutImpl - rawInsert(t, t.data, key, val, hc, index) - inc(t.counter) - -template putImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: t.data[index].val = val - else: maybeRehashPutImpl() + let eh = n[i].hcode + if isFilled(eh): + var j: Hash = eh and maxHash(t) + while isFilled(t.data[j].hcode): + j = nextTry(j, maxHash(t)) + when defined(js): + rawInsert(t, t.data, n[i].key, n[i].val, eh, j) + else: + rawInsert(t, t.data, move n[i].key, move n[i].val, eh, j) -template mgetOrPutImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index < 0: maybeRehashPutImpl() # not present: insert (flipping index) - result = t.data[index].val # either way return modifiable val -template hasKeyOrPutImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index < 0: - result = false - maybeRehashPutImpl() - else: result = true + + +# ------------------------------------------------------------------- +# ------------------------------ Table ------------------------------ +# ------------------------------------------------------------------- + +proc initTable*[A, B](initialSize = defaultInitialSize): Table[A, B] = + ## Creates a new hash table that is empty. + ## + ## Starting from Nim v0.20, tables are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toTable proc<#toTable,openArray[]>`_ + ## * `newTable proc<#newTable>`_ for creating a `TableRef` + runnableExamples: + let + a = initTable[int, string]() + b = initTable[char, seq[int]]() + result = default(Table[A, B]) + initImpl(result, initialSize) + +proc `[]=`*[A, B](t: var Table[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,Table[A,B],A,B>`_ + ## * `del proc<#del,Table[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = initTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.toTable + + putImpl(enlarge) + +proc toTable*[A, B](pairs: openArray[(A, B)]): Table[A, B] = + ## Creates a new hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `initTable proc<#initTable>`_ + ## * `newTable proc<#newTable,openArray[]>`_ for a `TableRef` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = toTable(a) + assert b == {'a': 5, 'b': 9}.toTable + + result = initTable[A, B](pairs.len) + for key, val in items(pairs): result[key] = val + +proc `[]`*[A, B](t: Table[A, B], key: A): lent B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,Table[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,Table[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ for checking if a key is in + ## the table + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] + get(t, key) + +proc `[]`*[A, B](t: var Table[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. The value can be modified. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,Table[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ for checking if a key is in + ## the table + get(t, key) + +proc hasKey*[A, B](t: Table[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,Table[A,B],A>`_ for use with the `in` operator + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + var hc: Hash + result = rawGet(t, key, hc) >= 0 + +proc contains*[A, B](t: Table[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,Table[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: var Table[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.toTable + + hasKeyOrPutImpl(enlarge) + +proc getOrDefault*[A, B](t: Table[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,Table[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + result = default(B) + getOrDefaultImpl(t, key) + +proc getOrDefault*[A, B](t: Table[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,Table[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + result = default(B) + getOrDefaultImpl(t, key, default) proc mgetOrPut*[A, B](t: var Table[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. - mgetOrPutImpl() + ## + ## + ## Note that while the value returned is of type `var B`, + ## it is easy to accidentally create a copy of the value at `t[key]`. + ## Remember that seqs and strings are value types, and therefore + ## cannot be copied into a separate variable for modification. + ## See the example below. + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.toTable + + # An example of accidentally creating a copy + var t = initTable[int, seq[int]]() + # In this example, we expect t[10] to be modified, + # but it is not. + var copiedSeq = t.mgetOrPut(10, @[10]) + copiedSeq.add(20) + doAssert t[10] == @[10] + # Correct + t.mgetOrPut(25, @[25]).add(35) + doAssert t[25] == @[25, 35] + + mgetOrPutImpl(enlarge) + +proc mgetOrPut*[A, B](t: var Table[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.newTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.newTable + + mgetOrPutImpl(enlarge) -proc hasKeyOrPut*[A, B](t: var Table[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `value`. - hasKeyOrPutImpl() +proc len*[A, B](t: Table[A, B]): int = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert len(a) == 2 -proc `[]=`*[A, B](t: var Table[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - putImpl() + result = t.counter -proc add*[A, B](t: var Table[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. - addImpl() +proc add*[A, B](t: var Table[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,Table[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. + addImpl(enlarge) -template doWhile(a: expr, b: stmt): stmt = - while true: - b - if not a: break +template tabMakeEmpty(i) = t.data[i].hcode = 0 +template tabCellEmpty(i) = isEmpty(t.data[i].hcode) +template tabCellHash(i) = t.data[i].hcode proc del*[A, B](t: var Table[A, B], key: A) = - ## deletes `key` from hash table `t`. - var hc: THash - var i = rawGet(t, key, hc) - let msk = high(t.data) - if i >= 0: - t.data[i].hcode = 0 - dec(t.counter) - while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 - var j = i # The correctness of this depends on (h+1) in nextTry, - var r = j # though may be adaptable to other simple sequences. - t.data[i].hcode = 0 # mark current EMPTY - doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): - i = (i + 1) and msk # increment mod table size - if isEmpty(t.data[i].hcode): # end of collision cluster; So all done - return - r = t.data[i].hcode and msk # "home" location of key@i - shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop - -proc initTable*[A, B](initialSize=64): Table[A, B] = - ## creates a new hash table that is empty. - ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc from this module. - assert isPowerOfTwo(initialSize) - result.counter = 0 - newSeq(result.data, initialSize) - -proc toTable*[A, B](pairs: openArray[(A, - B)]): Table[A, B] = - ## creates a new hash table that contains the given `pairs`. - result = initTable[A, B](rightSize(pairs.len)) - for key, val in items(pairs): result[key] = val + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `pop proc<#pop,Table[A,B],A,B>`_ + ## * `clear proc<#clear,Table[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.toTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.toTable + + delImpl(tabMakeEmpty, tabCellEmpty, tabCellHash) + +proc pop*[A, B](t: var Table[A, B], key: A, val: var B): bool = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `del proc<#del,Table[A,B],A>`_ + ## * `clear proc<#clear,Table[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'a': 5, 'b': 9, 'c': 13}.toTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'a': 5, 'c': 13}.toTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'a': 5, 'c': 13}.toTable + doAssert i == 0 + + var hc: Hash + var index = rawGet(t, key, hc) + result = index >= 0 + if result: + val = move(t.data[index].val) + delImplIdx(t, index, tabMakeEmpty, tabCellEmpty, tabCellHash) + +proc take*[A, B](t: var Table[A, B], key: A, val: var B): bool {.inline.} = + ## Alias for: + ## * `pop proc<#pop,Table[A,B],A,B>`_ + pop(t, key, val) + +proc clear*[A, B](t: var Table[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,Table[A,B],A>`_ + ## * `pop proc<#pop,Table[A,B],A,B>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 -template dollarImpl(): stmt {.dirty.} = - if t.len == 0: - result = "{:}" - else: - result = "{" - for key, val in pairs(t): - if result.len > 1: result.add(", ") - result.add($key) - result.add(": ") - result.add($val) - result.add("}") + clearImpl() proc `$`*[A, B](t: Table[A, B]): string = - ## The `$` operator for hash tables. + ## The `$` operator for hash tables. Used internally when calling `echo` + ## on a table. dollarImpl() -template equalsImpl() = - if s.counter == t.counter: - # different insertion orders mean different 'data' seqs, so we have - # to use the slow route here: - for key, val in s: - # prefix notation leads to automatic dereference in case of PTable - if not t.hasKey(key): return false - if t[key] != val: return false - return true - proc `==`*[A, B](s, t: Table[A, B]): bool = - equalsImpl() + ## The `==` operator for hash tables. Returns `true` if the content of both + ## tables contains the same key-value pairs. Insert order does not matter. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.toTable + b = {'b': 9, 'c': 13, 'a': 5}.toTable + doAssert a == b + + equalsImpl(s, t) proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] = ## Index the collection with the proc provided. @@ -356,185 +600,705 @@ proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] = for item in collection: result[index(item)] = item -proc len*[A, B](t: TableRef[A, B]): int = - ## returns the number of keys in `t`. - result = t.counter -iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) -iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t`. The values - ## can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) +template withValue*[A, B](t: var Table[A, B], key: A, value, body: untyped) = + ## Retrieves the value at `t[key]`. + ## + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + type + User = object + name: string + uid: int + + var t = initTable[int, User]() + let u = User(name: "Hello", uid: 99) + t[1] = u + + t.withValue(1, value): + # block is executed only if `key` in `t` + value.name = "Nim" + value.uid = 1314 + + t.withValue(2, value): + value.name = "No" + value.uid = 521 + + assert t[1].name == "Nim" + assert t[1].uid == 1314 + + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body + +template withValue*[A, B](t: var Table[A, B], key: A, + value, body1, body2: untyped) = + ## Retrieves the value at `t[key]`. + ## + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + type + User = object + name: string + uid: int + + var t = initTable[int, User]() + let u = User(name: "Hello", uid: 99) + t[1] = u + + t.withValue(1, value): + # block is executed only if `key` in `t` + value.name = "Nim" + value.uid = 1314 + + t.withValue(521, value): + doAssert false + do: + # block is executed when `key` not in `t` + t[1314] = User(name: "exist", uid: 521) + + assert t[1].name == "Nim" + assert t[1].uid == 1314 + assert t[1314].name == "exist" + assert t[1314].uid == 521 + + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body1 + else: + body2 -iterator keys*[A, B](t: TableRef[A, B]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].key -iterator values*[A, B](t: TableRef[A, B]): B = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val +iterator pairs*[A, B](t: Table[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,Table[A,B]>`_ + ## * `keys iterator<#keys.i,Table[A,B]>`_ + ## * `values iterator<#values.i,Table[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.toTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: e + ## # value: [2, 4, 6, 8] + ## # key: o + ## # value: [1, 5, 7, 9] + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,Table[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,Table[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'e': @[2, 4, 6, 8, 12], 'o': @[1, 5, 7, 9, 11]}.toTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator keys*[A, B](t: Table[A, B]): lent A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,Table[A,B]>`_ + ## * `values iterator<#values.i,Table[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for k in a.keys: + a[k].add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.toTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A, B](t: Table[A, B]): lent B = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,Table[A,B]>`_ + ## * `keys iterator<#keys.i,Table[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,Table[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mvalues*[A, B](t: var Table[A, B]): var B = + ## Iterates over any value in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,Table[A,B]>`_ + ## * `values iterator<#values.i,Table[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for v in a.mvalues: + v.add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.toTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator allValues*[A, B](t: Table[A, B]; key: A): B {.deprecated: + "Deprecated since v1.4; tables with duplicated keys are deprecated".} = + ## Iterates over any value in the table `t` that belongs to the given `key`. + ## + ## Used if you have a table with duplicate keys (as a result of using + ## `add proc<#add,Table[A,B],A,sinkB>`_). + ## + runnableExamples: + import std/[sequtils, algorithm] + + var a = {'a': 3, 'b': 5}.toTable + for i in 1..3: a.add('z', 10*i) + doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)] + doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30] + var h: Hash = genHash(key) and high(t.data) + let L = len(t) + while isFilled(t.data[h].hcode): + if t.data[h].key == key: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + h = nextTry(h, high(t.data)) + + + +# ------------------------------------------------------------------- +# ---------------------------- TableRef ----------------------------- +# ------------------------------------------------------------------- + + +proc newTable*[A, B](initialSize = defaultInitialSize): TableRef[A, B] = + ## Creates a new ref hash table that is empty. + ## + ## See also: + ## * `newTable proc<#newTable,openArray[]>`_ for creating a `TableRef` + ## from a collection of `(key, value)` pairs + ## * `initTable proc<#initTable>`_ for creating a `Table` + runnableExamples: + let + a = newTable[int, string]() + b = newTable[char, seq[int]]() + + new(result) + {.noSideEffect.}: + result[] = initTable[A, B](initialSize) + +proc newTable*[A, B](pairs: openArray[(A, B)]): TableRef[A, B] = + ## Creates a new ref hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `newTable proc<#newTable>`_ + ## * `toTable proc<#toTable,openArray[]>`_ for a `Table` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = newTable(a) + assert b == {'a': 5, 'b': 9}.newTable + + new(result) + {.noSideEffect.}: + result[] = toTable[A, B](pairs) + +proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): TableRef[C, B] = + ## Index the collection with the proc provided. + # TODO: As soon as supported, change collection: A to collection: A[B] + result = newTable[C, B]() + {.noSideEffect.}: + for item in collection: + result[index(item)] = item + +proc `[]`*[A, B](t: TableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,TableRef[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,TableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ for checking if a key is in + ## the table + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] -iterator mvalues*[A, B](t: TableRef[A, B]): var B = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val - -proc `[]`*[A, B](t: TableRef[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. result = t[][key] -proc mget*[A, B](t: TableRef[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - t[].mget(key) +proc `[]=`*[A, B](t: TableRef[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,TableRef[A,B],A,B>`_ + ## * `del proc<#del,TableRef[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = newTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.newTable + + t[][key] = val + +proc hasKey*[A, B](t: TableRef[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,TableRef[A,B],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + result = t[].hasKey(key) + +proc contains*[A, B](t: TableRef[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,TableRef[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: TableRef[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.newTable + + t[].hasKeyOrPut(key, val) + +proc getOrDefault*[A, B](t: TableRef[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,TableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + + getOrDefault(t[], key) + +proc getOrDefault*[A, B](t: TableRef[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,TableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + + getOrDefault(t[], key, default) proc mgetOrPut*[A, B](t: TableRef[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. + ## + ## Note that while the value returned is of type `var B`, + ## it is easy to accidentally create an copy of the value at `t[key]`. + ## Remember that seqs and strings are value types, and therefore + ## cannot be copied into a separate variable for modification. + ## See the example below. + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.newTable + + # An example of accidentally creating a copy + var t = newTable[int, seq[int]]() + # In this example, we expect t[10] to be modified, + # but it is not. + var copiedSeq = t.mgetOrPut(10, @[10]) + copiedSeq.add(20) + doAssert t[10] == @[10] + # Correct + t.mgetOrPut(25, @[25]).add(35) + doAssert t[25] == @[25, 35] t[].mgetOrPut(key, val) -proc hasKeyOrPut*[A, B](t: var TableRef[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `value`. - t[].hasKeyOrPut(key, val) +proc mgetOrPut*[A, B](t: TableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.newTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.newTable -proc hasKey*[A, B](t: TableRef[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - result = t[].hasKey(key) + t[].mgetOrPut(key) -proc `[]=`*[A, B](t: TableRef[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - t[][key] = val +proc len*[A, B](t: TableRef[A, B]): int = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert len(a) == 2 -proc add*[A, B](t: TableRef[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + result = t.counter + +proc add*[A, B](t: TableRef[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,TableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. t[].add(key, val) proc del*[A, B](t: TableRef[A, B], key: A) = - ## deletes `key` from hash table `t`. + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `pop proc<#pop,TableRef[A,B],A,B>`_ + ## * `clear proc<#clear,TableRef[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.newTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.newTable + t[].del(key) -proc newTable*[A, B](initialSize=64): TableRef[A, B] = - new(result) - result[] = initTable[A, B](initialSize) +proc pop*[A, B](t: TableRef[A, B], key: A, val: var B): bool = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `del proc<#del,TableRef[A,B],A>`_ + ## * `clear proc<#clear,TableRef[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'a': 5, 'b': 9, 'c': 13}.newTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'a': 5, 'c': 13}.newTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'a': 5, 'c': 13}.newTable + doAssert i == 0 + + result = t[].pop(key, val) + +proc take*[A, B](t: TableRef[A, B], key: A, val: var B): bool {.inline.} = + ## Alias for: + ## * `pop proc<#pop,TableRef[A,B],A,B>`_ + pop(t, key, val) + +proc clear*[A, B](t: TableRef[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,Table[A,B],A>`_ + ## * `pop proc<#pop,Table[A,B],A,B>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 -proc newTable*[A, B](pairs: openArray[(A, B)]): TableRef[A, B] = - ## creates a new hash table that contains the given `pairs`. - new(result) - result[] = toTable[A, B](pairs) + clearImpl() proc `$`*[A, B](t: TableRef[A, B]): string = - ## The `$` operator for hash tables. + ## The `$` operator for hash tables. Used internally when calling `echo` + ## on a table. dollarImpl() proc `==`*[A, B](s, t: TableRef[A, B]): bool = + ## The `==` operator for hash tables. Returns `true` if either both tables + ## are `nil`, or neither is `nil` and the content of both tables contains the + ## same key-value pairs. Insert order does not matter. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.newTable + b = {'b': 9, 'c': 13, 'a': 5}.newTable + doAssert a == b + if isNil(s): result = isNil(t) elif isNil(t): result = false - else: equalsImpl() + else: equalsImpl(s[], t[]) -proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): TableRef[C, B] = - ## Index the collection with the proc provided. - # TODO: As soon as supported, change collection: A to collection: A[B] - result = newTable[C, B]() - for item in collection: - result[index(item)] = item -# ------------------------------ ordered table ------------------------------ -type - OrderedKeyValuePair[A, B] = tuple[ - hcode: THash, next: int, key: A, val: B] - OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]] - OrderedTable* {. - myShallow.}[A, B] = object ## table that remembers insertion order - data: OrderedKeyValuePairSeq[A, B] - counter, first, last: int - OrderedTableRef*[A, B] = ref OrderedTable[A, B] +iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,TableRef[A,B]>`_ + ## * `keys iterator<#keys.i,TableRef[A,B]>`_ + ## * `values iterator<#values.i,TableRef[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.newTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: e + ## # value: [2, 4, 6, 8] + ## # key: o + ## # value: [1, 5, 7, 9] + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -{.deprecated: [TOrderedTable: OrderedTable, POrderedTable: OrderedTableRef].} +iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t`. The values + ## can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,TableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'e': @[2, 4, 6, 8, 12], 'o': @[1, 5, 7, 9, 11]}.newTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator keys*[A, B](t: TableRef[A, B]): lent A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,TableRef[A,B]>`_ + ## * `values iterator<#values.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for k in a.keys: + a[k].add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.newTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A, B](t: TableRef[A, B]): lent B = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,TableRef[A,B]>`_ + ## * `keys iterator<#keys.i,TableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -proc len*[A, B](t: OrderedTable[A, B]): int {.inline.} = - ## returns the number of keys in `t`. - result = t.counter +iterator mvalues*[A, B](t: TableRef[A, B]): var B = + ## Iterates over any value in the table `t`. The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,TableRef[A,B]>`_ + ## * `values iterator<#values.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for v in a.mvalues: + v.add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.newTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = - var h = t.first - while h >= 0: - var nxt = t.data[h].next - if isFilled(t.data[h].hcode): yieldStmt - h = nxt -iterator pairs*[A, B](t: OrderedTable[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t` in insertion - ## order. - forAllOrderedPairs: - yield (t.data[h].key, t.data[h].val) -iterator mpairs*[A, B](t: var OrderedTable[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t` in insertion - ## order. The values can be modified. - forAllOrderedPairs: - yield (t.data[h].key, t.data[h].val) -iterator keys*[A, B](t: OrderedTable[A, B]): A = - ## iterates over any key in the table `t` in insertion order. - forAllOrderedPairs: - yield t.data[h].key -iterator values*[A, B](t: OrderedTable[A, B]): B = - ## iterates over any value in the table `t` in insertion order. - forAllOrderedPairs: - yield t.data[h].val -iterator mvalues*[A, B](t: var OrderedTable[A, B]): var B = - ## iterates over any value in the table `t` in insertion order. The values - ## can be modified. - forAllOrderedPairs: - yield t.data[h].val -proc rawGetKnownHC[A, B](t: OrderedTable[A, B], key: A, hc: THash): int = - rawGetKnownHCImpl() -proc rawGetDeep[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int {.inline.} = - rawGetDeepImpl() +# --------------------------------------------------------------------------- +# ------------------------------ OrderedTable ------------------------------- +# --------------------------------------------------------------------------- -proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int = - rawGetImpl() +type + OrderedKeyValuePair[A, B] = tuple[ + hcode: Hash, next: int, key: A, val: B] + OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]] + OrderedTable*[A, B] = object + ## Hash table that remembers insertion order. + ## + ## For creating an empty OrderedTable, use `initOrderedTable proc + ## <#initOrderedTable>`_. + data: OrderedKeyValuePairSeq[A, B] + counter, first, last: int + OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of + ## `OrderedTable<#OrderedTable>`_. + ## + ## For creating a new empty OrderedTableRef, use `newOrderedTable proc + ## <#newOrderedTable>`_. -proc `[]`*[A, B](t: OrderedTable[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val -proc mget*[A, B](t: var OrderedTable[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val - else: raise newException(KeyError, "key not found: " & $key) +# ------------------------------ helpers --------------------------------- -proc hasKey*[A, B](t: OrderedTable[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - var hc: THash - result = rawGet(t, key, hc) >= 0 +proc rawGetKnownHC[A, B](t: OrderedTable[A, B], key: A, hc: Hash): int = + rawGetKnownHCImpl() + +proc rawGetDeep[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int {.inline.} = + rawGetDeepImpl() + +proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int = + rawGetImpl() proc rawInsert[A, B](t: var OrderedTable[A, B], data: var OrderedKeyValuePairSeq[A, B], - key: A, val: B, hc: THash, h: THash) = + key: A, val: sink B, hc: Hash, h: Hash) = rawInsertImpl() data[h].next = -1 if t.first < 0: t.first = h @@ -550,56 +1314,349 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) = swap(t.data, n) while h >= 0: var nxt = n[h].next - if isFilled(n[h].hcode): - var j = -1 - rawGetKnownHC(t, n[h].key, n[h].hcode) - rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j) + let eh = n[h].hcode + if isFilled(eh): + var j: Hash = eh and maxHash(t) + while isFilled(t.data[j].hcode): + j = nextTry(j, maxHash(t)) + rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j) h = nxt -proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - putImpl() +template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = + if t.counter > 0: + var h = t.first + while h >= 0: + var nxt = t.data[h].next + if isFilled(t.data[h].hcode): + yieldStmt + h = nxt + +# ---------------------------------------------------------------------- -proc add*[A, B](t: var OrderedTable[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. - addImpl() +proc initOrderedTable*[A, B](initialSize = defaultInitialSize): OrderedTable[A, B] = + ## Creates a new ordered hash table that is empty. + ## + ## Starting from Nim v0.20, tables are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toOrderedTable proc<#toOrderedTable,openArray[]>`_ + ## * `newOrderedTable proc<#newOrderedTable>`_ for creating an + ## `OrderedTableRef` + runnableExamples: + let + a = initOrderedTable[int, string]() + b = initOrderedTable[char, seq[int]]() + result = default(OrderedTable[A, B]) + initImpl(result, initialSize) + +proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTable[A,B],A,B>`_ + ## * `del proc<#del,OrderedTable[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = initOrderedTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.toOrderedTable + + putImpl(enlarge) + +proc toOrderedTable*[A, B](pairs: openArray[(A, B)]): OrderedTable[A, B] = + ## Creates a new ordered hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `initOrderedTable proc<#initOrderedTable>`_ + ## * `newOrderedTable proc<#newOrderedTable,openArray[]>`_ for an + ## `OrderedTableRef` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = toOrderedTable(a) + assert b == {'a': 5, 'b': 9}.toOrderedTable + + result = initOrderedTable[A, B](pairs.len) + for key, val in items(pairs): result[key] = val + +proc `[]`*[A, B](t: OrderedTable[A, B], key: A): lent B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,OrderedTable[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ for checking if a + ## key is in the table + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] + + get(t, key) + +proc `[]`*[A, B](t: var OrderedTable[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. The value can be modified. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,OrderedTable[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ for checking if a + ## key is in the table + get(t, key) + +proc hasKey*[A, B](t: OrderedTable[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,OrderedTable[A,B],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + var hc: Hash = default(Hash) + result = rawGet(t, key, hc) >= 0 + +proc contains*[A, B](t: OrderedTable[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toOrderedTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.toOrderedTable + + hasKeyOrPutImpl(enlarge) + +proc getOrDefault*[A, B](t: OrderedTable[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTable[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + result = default(B) + getOrDefaultImpl(t, key) + +proc getOrDefault*[A, B](t: OrderedTable[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTable[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + result = default(B) + getOrDefaultImpl(t, key, default) proc mgetOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``value`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. - mgetOrPutImpl() + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.toOrderedTable + + mgetOrPutImpl(enlarge) + +proc mgetOrPut*[A, B](t: var OrderedTable[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.toOrderedTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.toOrderedTable + + mgetOrPutImpl(enlarge) -proc hasKeyOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `value`. - hasKeyOrPutImpl() - -proc initOrderedTable*[A, B](initialSize=64): OrderedTable[A, B] = - ## creates a new ordered hash table that is empty. - ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc from this module. - assert isPowerOfTwo(initialSize) - result.counter = 0 - result.first = -1 - result.last = -1 - newSeq(result.data, initialSize) - -proc toOrderedTable*[A, B](pairs: openArray[(A, - B)]): OrderedTable[A, B] = - ## creates a new ordered hash table that contains the given `pairs`. - result = initOrderedTable[A, B](rightSize(pairs.len)) - for key, val in items(pairs): result[key] = val +proc len*[A, B](t: OrderedTable[A, B]): int {.inline.} = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert len(a) == 2 -proc `$`*[A, B](t: OrderedTable[A, B]): string = - ## The `$` operator for ordered hash tables. - dollarImpl() + result = t.counter + +proc add*[A, B](t: var OrderedTable[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,OrderedTable[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. + addImpl(enlarge) + +proc del*[A, B](t: var OrderedTable[A, B], key: A) = + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. + ## + ## O(n) complexity. + ## + ## See also: + ## * `pop proc<#pop,OrderedTable[A,B],A,B>`_ + ## * `clear proc<#clear,OrderedTable[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toOrderedTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.toOrderedTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.toOrderedTable + + if t.counter == 0: return + var n: OrderedKeyValuePairSeq[A, B] + newSeq(n, len(t.data)) + var h = t.first + t.first = -1 + t.last = -1 + swap(t.data, n) + let hc = genHash(key) + while h >= 0: + var nxt = n[h].next + if isFilled(n[h].hcode): + if n[h].hcode == hc and n[h].key == key: + dec t.counter + else: + var j = -1 - rawGetKnownHC(t, n[h].key, n[h].hcode) + rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j) + h = nxt + +proc pop*[A, B](t: var OrderedTable[A, B], key: A, val: var B): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## O(n) complexity. + ## + ## See also: + ## * `del proc<#del,OrderedTable[A,B],A>`_ + ## * `clear proc<#clear,OrderedTable[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'c': 5, 'b': 9, 'a': 13}.toOrderedTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'c': 5, 'a': 13}.toOrderedTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'c': 5, 'a': 13}.toOrderedTable + doAssert i == 0 + + var hc: Hash + var index = rawGet(t, key, hc) + result = index >= 0 + if result: + val = move(t.data[index].val) + del(t, key) + +proc clear*[A, B](t: var OrderedTable[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,OrderedTable[A,B],A>`_ + ## * `pop proc<#pop,OrderedTable[A,B],A,B>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toOrderedTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 + + clearImpl() + t.first = -1 + t.last = -1 -proc sort*[A, B](t: var OrderedTable[A, B], - cmp: proc (x,y: (A, B)): int) = - ## sorts `t` according to `cmp`. This modifies the internal list +proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x, y: (A, B)): int, + order = SortOrder.Ascending) {.effectsOf: cmp.} = + ## Sorts `t` according to the function `cmp`. + ## + ## This modifies the internal list ## that kept the insertion order, so insertion order is lost after this ## call but key lookup and insertions remain possible after `sort` (in - ## contrast to the `sort` for count tables). + ## contrast to the `sort proc<#sort,CountTable[A]>`_ for count tables). + runnableExamples: + import std/[algorithm] + var a = initOrderedTable[char, int]() + for i, c in "cab": + a[c] = 10*i + doAssert a == {'c': 0, 'a': 10, 'b': 20}.toOrderedTable + a.sort(system.cmp) + doAssert a == {'a': 10, 'b': 20, 'c': 0}.toOrderedTable + a.sort(system.cmp, order = SortOrder.Descending) + doAssert a == {'c': 0, 'b': 20, 'a': 10}.toOrderedTable + var list = t.first var p, q, e, tail, oldhead: int @@ -626,7 +1683,7 @@ proc sort*[A, B](t: var OrderedTable[A, B], elif qsize == 0 or q < 0: e = p; p = t.data[p].next; dec(psize) elif cmp((t.data[p].key, t.data[p].val), - (t.data[q].key, t.data[q].val)) <= 0: + (t.data[q].key, t.data[q].val)) * order <= 0: e = p; p = t.data[p].next; dec(psize) else: e = q; q = t.data[q].next; dec(qsize) @@ -640,173 +1697,609 @@ proc sort*[A, B](t: var OrderedTable[A, B], t.first = list t.last = tail -proc len*[A, B](t: OrderedTableRef[A, B]): int {.inline.} = - ## returns the number of keys in `t`. - result = t.counter +proc `$`*[A, B](t: OrderedTable[A, B]): string = + ## The `$` operator for ordered hash tables. Used internally when calling + ## `echo` on a table. + dollarImpl() -template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = - var h = t.first - while h >= 0: - var nxt = t.data[h].next - if isFilled(t.data[h].hcode): yieldStmt - h = nxt +proc `==`*[A, B](s, t: OrderedTable[A, B]): bool = + ## The `==` operator for ordered hash tables. Returns `true` if both the + ## content and the order are equal. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.toOrderedTable + b = {'b': 9, 'c': 13, 'a': 5}.toOrderedTable + doAssert a != b + + if s.counter != t.counter: + return false + if s.counter == 0 and t.counter == 0: + return true + var ht = t.first + var hs = s.first + while ht >= 0 and hs >= 0: + var nxtt = t.data[ht].next + var nxts = s.data[hs].next + if isFilled(t.data[ht].hcode) and isFilled(s.data[hs].hcode): + if (s.data[hs].key != t.data[ht].key) or (s.data[hs].val != t.data[ht].val): + return false + ht = nxtt + hs = nxts + return true -iterator pairs*[A, B](t: OrderedTableRef[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t` in insertion + + +iterator pairs*[A, B](t: OrderedTable[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t` in insertion ## order. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTable[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTable[A,B]>`_ + ## * `values iterator<#values.i,OrderedTable[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.toOrderedTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: o + ## # value: [1, 5, 7, 9] + ## # key: e + ## # value: [2, 4, 6, 8] + ## ``` + + let L = len(t) forAllOrderedPairs: yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator mpairs*[A, B](t: OrderedTableRef[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t` in insertion - ## order. The values can be modified. +iterator mpairs*[A, B](t: var OrderedTable[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t` (must be + ## declared as `var`) in insertion order. The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTable[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTable[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'o': @[1, 5, 7, 9, 11], + 'e': @[2, 4, 6, 8, 12]}.toOrderedTable + + let L = len(t) forAllOrderedPairs: yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator keys*[A, B](t: OrderedTableRef[A, B]): A = - ## iterates over any key in the table `t` in insertion order. +iterator keys*[A, B](t: OrderedTable[A, B]): lent A = + ## Iterates over any key in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTable[A,B]>`_ + ## * `values iterator<#values.i,OrderedTable[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for k in a.keys: + a[k].add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], + 'e': @[2, 4, 6, 8, 99]}.toOrderedTable + + let L = len(t) forAllOrderedPairs: yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator values*[A, B](t: OrderedTableRef[A, B]): B = - ## iterates over any value in the table `t` in insertion order. +iterator values*[A, B](t: OrderedTable[A, B]): lent B = + ## Iterates over any value in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTable[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTable[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTable[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) forAllOrderedPairs: yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator mvalues*[A, B](t: OrderedTableRef[A, B]): var B = - ## iterates over any value in the table `t` in insertion order. The values +iterator mvalues*[A, B](t: var OrderedTable[A, B]): var B = + ## Iterates over any value in the table `t` (must be + ## declared as `var`) in insertion order. The values ## can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTable[A,B]>`_ + ## * `values iterator<#values.i,OrderedTable[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for v in a.mvalues: + v.add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], + 'e': @[2, 4, 6, 8, 99]}.toOrderedTable + + let L = len(t) forAllOrderedPairs: yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +# --------------------------------------------------------------------------- +# --------------------------- OrderedTableRef ------------------------------- +# --------------------------------------------------------------------------- + +proc newOrderedTable*[A, B](initialSize = defaultInitialSize): OrderedTableRef[A, B] = + ## Creates a new ordered ref hash table that is empty. + ## + ## See also: + ## * `newOrderedTable proc<#newOrderedTable,openArray[]>`_ for creating + ## an `OrderedTableRef` from a collection of `(key, value)` pairs + ## * `initOrderedTable proc<#initOrderedTable>`_ for creating an + ## `OrderedTable` + runnableExamples: + let + a = newOrderedTable[int, string]() + b = newOrderedTable[char, seq[int]]() + new(result) + {.noSideEffect.}: + result[] = initOrderedTable[A, B](initialSize) -proc `[]`*[A, B](t: OrderedTableRef[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. +proc newOrderedTable*[A, B](pairs: openArray[(A, B)]): OrderedTableRef[A, B] = + ## Creates a new ordered ref hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `newOrderedTable proc<#newOrderedTable>`_ + ## * `toOrderedTable proc<#toOrderedTable,openArray[]>`_ for an + ## `OrderedTable` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = newOrderedTable(a) + assert b == {'a': 5, 'b': 9}.newOrderedTable + + result = newOrderedTable[A, B](pairs.len) + {.noSideEffect.}: + for key, val in items(pairs): result[key] = val + + +proc `[]`*[A, B](t: OrderedTableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,OrderedTableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ for checking if + ## a key is in the table + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] result = t[][key] -proc mget*[A, B](t: OrderedTableRef[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - result = t[].mget(key) +proc `[]=`*[A, B](t: OrderedTableRef[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `del proc<#del,OrderedTableRef[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = newOrderedTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.newOrderedTable + + t[][key] = val + +proc hasKey*[A, B](t: OrderedTableRef[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,OrderedTableRef[A,B],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + result = t[].hasKey(key) + +proc contains*[A, B](t: OrderedTableRef[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: OrderedTableRef[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newOrderedTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.newOrderedTable + + result = t[].hasKeyOrPut(key, val) + +proc getOrDefault*[A, B](t: OrderedTableRef[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + + getOrDefault(t[], key) + +proc getOrDefault*[A, B](t: OrderedTableRef[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + + getOrDefault(t[], key, default) proc mgetOrPut*[A, B](t: OrderedTableRef[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.newOrderedTable + result = t[].mgetOrPut(key, val) -proc hasKeyOrPut*[A, B](t: var OrderedTableRef[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `val`. - result = t[].hasKeyOrPut(key, val) +proc mgetOrPut*[A, B](t: OrderedTableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.toOrderedTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.toOrderedTable -proc hasKey*[A, B](t: OrderedTableRef[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - result = t[].hasKey(key) + t[].mgetOrPut(key) -proc `[]=`*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - t[][key] = val +proc len*[A, B](t: OrderedTableRef[A, B]): int {.inline.} = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert len(a) == 2 -proc add*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + result = t.counter + +proc add*[A, B](t: OrderedTableRef[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,OrderedTableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. t[].add(key, val) -proc newOrderedTable*[A, B](initialSize=64): OrderedTableRef[A, B] = - ## creates a new ordered hash table that is empty. +proc del*[A, B](t: OrderedTableRef[A, B], key: A) = + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc from this module. - new(result) - result[] = initOrderedTable[A, B]() + ## See also: + ## * `clear proc<#clear,OrderedTableRef[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newOrderedTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.newOrderedTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.newOrderedTable -proc newOrderedTable*[A, B](pairs: openArray[(A, B)]): OrderedTableRef[A, B] = - ## creates a new ordered hash table that contains the given `pairs`. - result = newOrderedTable[A, B](rightSize(pairs.len)) - for key, val in items(pairs): result[key] = val + t[].del(key) + +proc pop*[A, B](t: OrderedTableRef[A, B], key: A, val: var B): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## See also: + ## * `del proc<#del,OrderedTableRef[A,B],A>`_ + ## * `clear proc<#clear,OrderedTableRef[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'c': 5, 'b': 9, 'a': 13}.newOrderedTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'c': 5, 'a': 13}.newOrderedTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'c': 5, 'a': 13}.newOrderedTable + doAssert i == 0 + + pop(t[], key, val) + +proc clear*[A, B](t: OrderedTableRef[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,OrderedTableRef[A,B],A>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newOrderedTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 + + clear(t[]) + +proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x, y: (A, B)): int, + order = SortOrder.Ascending) {.effectsOf: cmp.} = + ## Sorts `t` according to the function `cmp`. + ## + ## This modifies the internal list + ## that kept the insertion order, so insertion order is lost after this + ## call but key lookup and insertions remain possible after `sort` (in + ## contrast to the `sort proc<#sort,CountTableRef[A]>`_ for count tables). + runnableExamples: + import std/[algorithm] + var a = newOrderedTable[char, int]() + for i, c in "cab": + a[c] = 10*i + doAssert a == {'c': 0, 'a': 10, 'b': 20}.newOrderedTable + a.sort(system.cmp) + doAssert a == {'a': 10, 'b': 20, 'c': 0}.newOrderedTable + a.sort(system.cmp, order = SortOrder.Descending) + doAssert a == {'c': 0, 'b': 20, 'a': 10}.newOrderedTable + + t[].sort(cmp, order = order) proc `$`*[A, B](t: OrderedTableRef[A, B]): string = - ## The `$` operator for ordered hash tables. + ## The `$` operator for hash tables. Used internally when calling `echo` + ## on a table. dollarImpl() -proc sort*[A, B](t: OrderedTableRef[A, B], - cmp: proc (x,y: (A, B)): int) = - ## sorts `t` according to `cmp`. This modifies the internal list - ## that kept the insertion order, so insertion order is lost after this - ## call but key lookup and insertions remain possible after `sort` (in - ## contrast to the `sort` for count tables). - t[].sort(cmp) +proc `==`*[A, B](s, t: OrderedTableRef[A, B]): bool = + ## The `==` operator for ordered hash tables. Returns true if either both + ## tables are `nil`, or neither is `nil` and the content and the order of + ## both are equal. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.newOrderedTable + b = {'b': 9, 'c': 13, 'a': 5}.newOrderedTable + doAssert a != b -# ------------------------------ count tables ------------------------------- + if isNil(s): result = isNil(t) + elif isNil(t): result = false + else: result = s[] == t[] -type - CountTable* {.myShallow.}[ - A] = object ## table that counts the number of each key - data: seq[tuple[key: A, val: int]] - counter: int - CountTableRef*[A] = ref CountTable[A] -{.deprecated: [TCountTable: CountTable, PCountTable: CountTableRef].} -proc len*[A](t: CountTable[A]): int = - ## returns the number of keys in `t`. - result = t.counter +iterator pairs*[A, B](t: OrderedTableRef[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t` in insertion + ## order. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTableRef[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTableRef[A,B]>`_ + ## * `values iterator<#values.i,OrderedTableRef[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.newOrderedTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: o + ## # value: [1, 5, 7, 9] + ## # key: e + ## # value: [2, 4, 6, 8] + ## ``` -iterator pairs*[A](t: CountTable[A]): (A, int) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + let L = len(t) + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator mpairs*[A](t: var CountTable[A]): (A, var int) = - ## iterates over any (key, value) pair in the table `t`. The values can - ## be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) +iterator mpairs*[A, B](t: OrderedTableRef[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t` in insertion + ## order. The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'o': @[1, 5, 7, 9, 11], + 'e': @[2, 4, 6, 8, 12]}.newOrderedTable + + let L = len(t) + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator keys*[A](t: CountTable[A]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].key +iterator keys*[A, B](t: OrderedTableRef[A, B]): lent A = + ## Iterates over any key in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTableRef[A,B]>`_ + ## * `values iterator<#values.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for k in a.keys: + a[k].add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], 'e': @[2, 4, 6, 8, + 99]}.newOrderedTable + + let L = len(t) + forAllOrderedPairs: + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A, B](t: OrderedTableRef[A, B]): lent B = + ## Iterates over any value in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTableRef[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) + forAllOrderedPairs: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mvalues*[A, B](t: OrderedTableRef[A, B]): var B = + ## Iterates over any value in the table `t` in insertion order. The values + ## can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTableRef[A,B]>`_ + ## * `values iterator<#values.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for v in a.mvalues: + v.add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], + 'e': @[2, 4, 6, 8, 99]}.newOrderedTable + + let L = len(t) + forAllOrderedPairs: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator values*[A](t: CountTable[A]): int = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val -iterator mvalues*[A](t: CountTable[A]): var int = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val -proc rawGet[A](t: CountTable[A], key: A): int = - var h: THash = hash(key) and high(t.data) # start with real hash value - while t.data[h].val != 0: - if t.data[h].key == key: return h - h = nextTry(h, high(t.data)) - result = -1 - h # < 0 => MISSING; insert idx = -1 - result -proc `[]`*[A](t: CountTable[A], key: A): int = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## 0 is returned. One can check with ``hasKey`` whether the key - ## exists. - var index = rawGet(t, key) - if index >= 0: result = t.data[index].val -proc mget*[A](t: var CountTable[A], key: A): var int = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - var index = rawGet(t, key) - if index >= 0: result = t.data[index].val - else: raise newException(KeyError, "key not found: " & $key) -proc hasKey*[A](t: CountTable[A], key: A): bool = - ## returns true iff `key` is in the table `t`. - result = rawGet(t, key) >= 0 -proc rawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], +# ------------------------------------------------------------------------- +# ------------------------------ CountTable ------------------------------- +# ------------------------------------------------------------------------- + +type + CountTable*[A] = object + ## Hash table that counts the number of each key. + ## + ## For creating an empty CountTable, use `initCountTable proc + ## <#initCountTable>`_. + data: seq[tuple[key: A, val: int]] + counter: int + isSorted: bool + CountTableRef*[A] = ref CountTable[A] ## Ref version of + ## `CountTable<#CountTable>`_. + ## + ## For creating a new empty CountTableRef, use `newCountTable proc + ## <#newCountTable>`_. + + +# ------------------------------ helpers --------------------------------- + +proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], key: A, val: int) = - var h: THash = hash(key) and high(data) + var h: Hash = hash(key) and high(data) while data[h].val != 0: h = nextTry(h, high(data)) data[h].key = key data[h].val = val @@ -815,200 +2308,665 @@ proc enlarge[A](t: var CountTable[A]) = var n: seq[tuple[key: A, val: int]] newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): - if t.data[i].val != 0: rawInsert(t, n, t.data[i].key, t.data[i].val) + if t.data[i].val != 0: ctRawInsert(t, n, move t.data[i].key, move t.data[i].val) swap(t.data, n) -proc `[]=`*[A](t: var CountTable[A], key: A, val: int) = - ## puts a (key, value)-pair into `t`. `val` has to be positive. - assert val > 0 - var h = rawGet(t, key) - if h >= 0: - t.data[h].val = val - else: - h = -1 - h - t.data[h].key = key - t.data[h].val = val +proc rawGet[A](t: CountTable[A], key: A): int = + if t.data.len == 0: + return -1 + var h: Hash = hash(key) and high(t.data) # start with real hash value + while t.data[h].val != 0: + if t.data[h].key == key: return h + h = nextTry(h, high(t.data)) + result = -1 - h # < 0 => MISSING; insert idx = -1 - result + +template ctget(t, key, default: untyped): untyped = + var index = rawGet(t, key) + result = if index >= 0: t.data[index].val else: default + +proc inc*[A](t: var CountTable[A], key: A, val = 1) + +# ---------------------------------------------------------------------- -proc initCountTable*[A](initialSize=64): CountTable[A] = - ## creates a new count table that is empty. +proc initCountTable*[A](initialSize = defaultInitialSize): CountTable[A] = + ## Creates a new count table that is empty. ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc in this module. - assert isPowerOfTwo(initialSize) - result.counter = 0 - newSeq(result.data, initialSize) + ## Starting from Nim v0.20, tables are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toCountTable proc<#toCountTable,openArray[A]>`_ + ## * `newCountTable proc<#newCountTable>`_ for creating a + ## `CountTableRef` + result = default(CountTable[A]) + initImpl(result, initialSize) proc toCountTable*[A](keys: openArray[A]): CountTable[A] = - ## creates a new count table with every key in `keys` having a count of 1. - result = initCountTable[A](rightSize(keys.len)) - for key in items(keys): result[key] = 1 + ## Creates a new count table with every member of a container `keys` + ## having a count of how many times it occurs in that container. + result = initCountTable[A](keys.len) + for key in items(keys): result.inc(key) -proc `$`*[A](t: CountTable[A]): string = - ## The `$` operator for count tables. - dollarImpl() +proc `[]`*[A](t: CountTable[A], key: A): int = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise `0` is returned. + ## + ## See also: + ## * `getOrDefault<#getOrDefault,CountTable[A],A,int>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]%3D,CountTable[A],A,int>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,CountTable[A],A>`_ for checking if a key + ## is in the table + assert(not t.isSorted, "CountTable must not be used after sorting") + ctget(t, key, 0) + +template cntMakeEmpty(i) = t.data[i].val = 0 +template cntCellEmpty(i) = t.data[i].val == 0 +template cntCellHash(i) = hash(t.data[i].key) + +proc `[]=`*[A](t: var CountTable[A], key: A, val: int) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],CountTable[A],A>`_ for retrieving a value of a key + ## * `inc proc<#inc,CountTable[A],A,int>`_ for incrementing a + ## value of a key + assert(not t.isSorted, "CountTable must not be used after sorting") + assert val >= 0 + if val == 0: + delImplNoHCode(cntMakeEmpty, cntCellEmpty, cntCellHash) + else: + let h = rawGet(t, key) + if h >= 0: + t.data[h].val = val + else: + insertImpl() proc inc*[A](t: var CountTable[A], key: A, val = 1) = - ## increments `t[key]` by `val`. + ## Increments `t[key]` by `val` (default: 1). + runnableExamples: + var a = toCountTable("aab") + a.inc('a') + a.inc('b', 10) + doAssert a == toCountTable("aaabbbbbbbbbbb") + + assert(not t.isSorted, "CountTable must not be used after sorting") var index = rawGet(t, key) if index >= 0: inc(t.data[index].val, val) + if t.data[index].val == 0: + delImplIdx(t, index, cntMakeEmpty, cntCellEmpty, cntCellHash) else: - if mustRehash(len(t.data), t.counter): enlarge(t) - rawInsert(t, t.data, key, val) - inc(t.counter) + if val != 0: + insertImpl() + +proc len*[A](t: CountTable[A]): int = + ## Returns the number of keys in `t`. + result = t.counter proc smallest*[A](t: CountTable[A]): tuple[key: A, val: int] = - ## returns the largest (key,val)-pair. Efficiency: O(n) - assert t.len > 0 - var minIdx = 0 - for h in 1..high(t.data): - if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h + ## Returns the `(key, value)` pair with the smallest `val`. Efficiency: O(n) + ## + ## See also: + ## * `largest proc<#largest,CountTable[A]>`_ + assert t.len > 0, "counttable is empty" + var minIdx = -1 + for h in 0 .. high(t.data): + if t.data[h].val > 0 and (minIdx == -1 or t.data[minIdx].val > t.data[h].val): + minIdx = h result.key = t.data[minIdx].key result.val = t.data[minIdx].val proc largest*[A](t: CountTable[A]): tuple[key: A, val: int] = - ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) - assert t.len > 0 + ## Returns the `(key, value)` pair with the largest `val`. Efficiency: O(n) + ## + ## See also: + ## * `smallest proc<#smallest,CountTable[A]>`_ + assert t.len > 0, "counttable is empty" var maxIdx = 0 - for h in 1..high(t.data): + for h in 1 .. high(t.data): if t.data[maxIdx].val < t.data[h].val: maxIdx = h result.key = t.data[maxIdx].key result.val = t.data[maxIdx].val -proc sort*[A](t: var CountTable[A]) = - ## sorts the count table so that the entry with the highest counter comes - ## first. This is destructive! You must not modify `t` afterwards! - ## You can use the iterators `pairs`, `keys`, and `values` to iterate over - ## `t` in the sorted order. +proc hasKey*[A](t: CountTable[A], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,CountTable[A],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],CountTable[A],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,CountTable[A],A,int>`_ to return + ## a custom value if the key doesn't exist + assert(not t.isSorted, "CountTable must not be used after sorting") + result = rawGet(t, key) >= 0 - # we use shellsort here; fast enough and simple - var h = 1 - while true: - h = 3 * h + 1 - if h >= high(t.data): break - while true: - h = h div 3 - for i in countup(h, high(t.data)): - var j = i - while t.data[j-h].val <= t.data[j].val: - swap(t.data[j], t.data[j-h]) - j = j-h - if j < h: break - if h == 1: break +proc contains*[A](t: CountTable[A], key: A): bool = + ## Alias of `hasKey proc<#hasKey,CountTable[A],A>`_ for use with + ## the `in` operator. + return hasKey[A](t, key) -proc len*[A](t: CountTableRef[A]): int = - ## returns the number of keys in `t`. - result = t.counter +proc getOrDefault*[A](t: CountTable[A], key: A; default: int = 0): int = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## integer value of `default` is returned. + ## + ## See also: + ## * `[] proc<#[],CountTable[A],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,CountTable[A],A>`_ for checking if a key + ## is in the table + ctget(t, key, default) + +proc del*[A](t: var CountTable[A], key: A) {.since: (1, 1).} = + ## Deletes `key` from table `t`. Does nothing if the key does not exist. + ## + ## See also: + ## * `pop proc<#pop,CountTable[A],A,int>`_ + ## * `clear proc<#clear,CountTable[A]>`_ to empty the whole table + runnableExamples: + var a = toCountTable("aabbbccccc") + a.del('b') + assert a == toCountTable("aaccccc") + a.del('b') + assert a == toCountTable("aaccccc") + a.del('c') + assert a == toCountTable("aa") + + delImplNoHCode(cntMakeEmpty, cntCellEmpty, cntCellHash) + +proc pop*[A](t: var CountTable[A], key: A, val: var int): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## See also: + ## * `del proc<#del,CountTable[A],A>`_ + ## * `clear proc<#clear,CountTable[A]>`_ to empty the whole table + runnableExamples: + var a = toCountTable("aabbbccccc") + var i = 0 + assert a.pop('b', i) + assert i == 3 + i = 99 + assert not a.pop('b', i) + assert i == 99 -iterator pairs*[A](t: CountTableRef[A]): (A, int) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + var index = rawGet(t, key) + result = index >= 0 + if result: + val = move(t.data[index].val) + delImplIdx(t, index, cntMakeEmpty, cntCellEmpty, cntCellHash) -iterator mpairs*[A](t: CountTableRef[A]): (A, var int) = - ## iterates over any (key, value) pair in the table `t`. The values can - ## be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) +proc clear*[A](t: var CountTable[A]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,CountTable[A],A>`_ + ## * `pop proc<#pop,CountTable[A],A,int>`_ + clearImpl() + t.isSorted = false + +func ctCmp[T](a, b: tuple[key: T, val: int]): int = + result = system.cmp(a.val, b.val) + +proc sort*[A](t: var CountTable[A], order = SortOrder.Descending) = + ## Sorts the count table so that, by default, the entry with the + ## highest counter comes first. + ## + ## .. warning:: This is destructive! Once sorted, you must not modify `t` afterwards! + ## + ## You can use the iterators `pairs<#pairs.i,CountTable[A]>`_, + ## `keys<#keys.i,CountTable[A]>`_, and `values<#values.i,CountTable[A]>`_ + ## to iterate over `t` in the sorted order. + runnableExamples: + import std/[algorithm, sequtils] + var a = toCountTable("abracadabra") + doAssert a == "aaaaabbrrcd".toCountTable + a.sort() + doAssert toSeq(a.values) == @[5, 2, 2, 1, 1] + a.sort(SortOrder.Ascending) + doAssert toSeq(a.values) == @[1, 1, 2, 2, 5] + + t.data.sort(cmp = ctCmp, order = order) + t.isSorted = true + +proc merge*[A](s: var CountTable[A], t: CountTable[A]) = + ## Merges the second table into the first one (must be declared as `var`). + runnableExamples: + var a = toCountTable("aaabbc") + let b = toCountTable("bcc") + a.merge(b) + doAssert a == toCountTable("aaabbbccc") + + assert(not s.isSorted, "CountTable must not be used after sorting") + for key, value in t: + s.inc(key, value) + +when (NimMajor, NimMinor) <= (1, 0): + proc merge*[A](s, t: CountTable[A]): CountTable[A] = + ## Merges the two tables into a new one. + runnableExamples: + let + a = toCountTable("aaabbc") + b = toCountTable("bcc") + doAssert merge(a, b) == toCountTable("aaabbbccc") + + result = initCountTable[A](nextPowerOfTwo(max(s.len, t.len))) + for table in @[s, t]: + for key, value in table: + result.inc(key, value) -iterator keys*[A](t: CountTableRef[A]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].key +proc `$`*[A](t: CountTable[A]): string = + ## The `$` operator for count tables. Used internally when calling `echo` + ## on a table. + dollarImpl() -iterator values*[A](t: CountTableRef[A]): int = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val +proc `==`*[A](s, t: CountTable[A]): bool = + ## The `==` operator for count tables. Returns `true` if both tables + ## contain the same keys with the same count. Insert order does not matter. + equalsImpl(s, t) -iterator mvalues*[A](t: CountTableRef[A]): var int = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val + +iterator pairs*[A](t: CountTable[A]): (A, int) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTable[A]>`_ + ## * `keys iterator<#keys.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = toCountTable("abracadabra") + ## + ## for k, v in pairs(a): + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: a + ## # value: 5 + ## # key: b + ## # value: 2 + ## # key: c + ## # value: 1 + ## # key: d + ## # value: 1 + ## # key: r + ## # value: 2 + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mpairs*[A](t: var CountTable[A]): (A, var int) = + ## Iterates over any `(key, value)` pair in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTable[A]>`_ + runnableExamples: + var a = toCountTable("abracadabra") + for k, v in mpairs(a): + v = 2 + doAssert a == toCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator keys*[A](t: CountTable[A]): lent A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + runnableExamples: + var a = toCountTable("abracadabra") + for k in keys(a): + a[k] = 2 + doAssert a == toCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A](t: CountTable[A]): int = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `keys iterator<#keys.i,CountTable[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTable[A]>`_ + runnableExamples: + let a = toCountTable("abracadabra") + for v in values(a): + assert v < 10 + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mvalues*[A](t: var CountTable[A]): var int = + ## Iterates over any value in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + runnableExamples: + var a = toCountTable("abracadabra") + for v in mvalues(a): + v = 2 + doAssert a == toCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + + + + + + + +# --------------------------------------------------------------------------- +# ---------------------------- CountTableRef -------------------------------- +# --------------------------------------------------------------------------- + +proc inc*[A](t: CountTableRef[A], key: A, val = 1) + +proc newCountTable*[A](initialSize = defaultInitialSize): CountTableRef[A] = + ## Creates a new ref count table that is empty. + ## + ## See also: + ## * `newCountTable proc<#newCountTable,openArray[A]>`_ for creating + ## a `CountTableRef` from a collection + ## * `initCountTable proc<#initCountTable>`_ for creating a + ## `CountTable` + new(result) + {.noSideEffect.}: + result[] = initCountTable[A](initialSize) + +proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] = + ## Creates a new ref count table with every member of a container `keys` + ## having a count of how many times it occurs in that container. + result = newCountTable[A](keys.len) + {.noSideEffect.}: + for key in items(keys): result.inc(key) proc `[]`*[A](t: CountTableRef[A], key: A): int = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## 0 is returned. One can check with ``hasKey`` whether the key - ## exists. + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise `0` is returned. + ## + ## See also: + ## * `getOrDefault<#getOrDefault,CountTableRef[A],A,int>`_ to return + ## a custom value if the key doesn't exist + ## * `inc proc<#inc,CountTableRef[A],A,int>`_ to inc even if missing + ## * `[]= proc<#[]%3D,CountTableRef[A],A,int>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,CountTableRef[A],A>`_ for checking if a key + ## is in the table result = t[][key] -proc mget*[A](t: CountTableRef[A], key: A): var int = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - result = t[].mget(key) +proc `[]=`*[A](t: CountTableRef[A], key: A, val: int) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],CountTableRef[A],A>`_ for retrieving a value of a key + ## * `inc proc<#inc,CountTableRef[A],A,int>`_ for incrementing a + ## value of a key + assert val > 0 + {.noSideEffect.}: + t[][key] = val + +proc inc*[A](t: CountTableRef[A], key: A, val = 1) = + ## Increments `t[key]` by `val` (default: 1). + runnableExamples: + var a = newCountTable("aab") + a.inc('a') + a.inc('b', 10) + doAssert a == newCountTable("aaabbbbbbbbbbb") + {.noSideEffect.}: + t[].inc(key, val) + +proc smallest*[A](t: CountTableRef[A]): tuple[key: A, val: int] = + ## Returns the `(key, value)` pair with the smallest `val`. Efficiency: O(n) + ## + ## See also: + ## * `largest proc<#largest,CountTableRef[A]>`_ + t[].smallest + +proc largest*[A](t: CountTableRef[A]): tuple[key: A, val: int] = + ## Returns the `(key, value)` pair with the largest `val`. Efficiency: O(n) + ## + ## See also: + ## * `smallest proc<#smallest,CountTable[A]>`_ + t[].largest proc hasKey*[A](t: CountTableRef[A], key: A): bool = - ## returns true iff `key` is in the table `t`. + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,CountTableRef[A],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],CountTableRef[A],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,CountTableRef[A],A,int>`_ to return + ## a custom value if the key doesn't exist result = t[].hasKey(key) -proc `[]=`*[A](t: CountTableRef[A], key: A, val: int) = - ## puts a (key, value)-pair into `t`. `val` has to be positive. - assert val > 0 - t[][key] = val +proc contains*[A](t: CountTableRef[A], key: A): bool = + ## Alias of `hasKey proc<#hasKey,CountTableRef[A],A>`_ for use with + ## the `in` operator. + return hasKey[A](t, key) -proc newCountTable*[A](initialSize=64): CountTableRef[A] = - ## creates a new count table that is empty. +proc getOrDefault*[A](t: CountTableRef[A], key: A, default: int): int = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## integer value of `default` is returned. ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` method in this module. - new(result) - result[] = initCountTable[A](initialSize) + ## See also: + ## * `[] proc<#[],CountTableRef[A],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,CountTableRef[A],A>`_ for checking if a key + ## is in the table + result = t[].getOrDefault(key, default) -proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] = - ## creates a new count table with every key in `keys` having a count of 1. - result = newCountTable[A](rightSize(keys.len)) - for key in items(keys): result[key] = 1 +proc len*[A](t: CountTableRef[A]): int = + ## Returns the number of keys in `t`. + result = t.counter + +proc del*[A](t: CountTableRef[A], key: A) {.since: (1, 1).} = + ## Deletes `key` from table `t`. Does nothing if the key does not exist. + ## + ## See also: + ## * `pop proc<#pop,CountTableRef[A],A,int>`_ + ## * `clear proc<#clear,CountTableRef[A]>`_ to empty the whole table + del(t[], key) + +proc pop*[A](t: CountTableRef[A], key: A, val: var int): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## See also: + ## * `del proc<#del,CountTableRef[A],A>`_ + ## * `clear proc<#clear,CountTableRef[A]>`_ to empty the whole table + pop(t[], key, val) + +proc clear*[A](t: CountTableRef[A]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,CountTableRef[A],A>`_ + ## * `pop proc<#pop,CountTableRef[A],A,int>`_ + clear(t[]) + +proc sort*[A](t: CountTableRef[A], order = SortOrder.Descending) = + ## Sorts the count table so that, by default, the entry with the + ## highest counter comes first. + ## + ## **This is destructive! You must not modify `t` afterwards!** + ## + ## You can use the iterators `pairs<#pairs.i,CountTableRef[A]>`_, + ## `keys<#keys.i,CountTableRef[A]>`_, and `values<#values.i,CountTableRef[A]>`_ + ## to iterate over `t` in the sorted order. + t[].sort(order = order) + +proc merge*[A](s, t: CountTableRef[A]) = + ## Merges the second table into the first one. + runnableExamples: + let + a = newCountTable("aaabbc") + b = newCountTable("bcc") + a.merge(b) + doAssert a == newCountTable("aaabbbccc") + + s[].merge(t[]) proc `$`*[A](t: CountTableRef[A]): string = - ## The `$` operator for count tables. + ## The `$` operator for count tables. Used internally when calling `echo` + ## on a table. dollarImpl() -proc inc*[A](t: CountTableRef[A], key: A, val = 1) = - ## increments `t[key]` by `val`. - t[].inc(key, val) +proc `==`*[A](s, t: CountTableRef[A]): bool = + ## The `==` operator for count tables. Returns `true` if either both tables + ## are `nil`, or neither is `nil` and both contain the same keys with the same + ## count. Insert order does not matter. + if isNil(s): result = isNil(t) + elif isNil(t): result = false + else: result = s[] == t[] -proc smallest*[A](t: CountTableRef[A]): (A, int) = - ## returns the largest (key,val)-pair. Efficiency: O(n) - t[].smallest -proc largest*[A](t: CountTableRef[A]): (A, int) = - ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) - t[].largest +iterator pairs*[A](t: CountTableRef[A]): (A, int) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTableRef[A]>`_ + ## * `keys iterator<#keys.i,CountTableRef[A]>`_ + ## * `values iterator<#values.i,CountTableRef[A]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = newCountTable("abracadabra") + ## + ## for k, v in pairs(a): + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: a + ## # value: 5 + ## # key: b + ## # value: 2 + ## # key: c + ## # value: 1 + ## # key: d + ## # value: 1 + ## # key: r + ## # value: 2 + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -proc sort*[A](t: CountTableRef[A]) = - ## sorts the count table so that the entry with the highest counter comes - ## first. This is destructive! You must not modify `t` afterwards! - ## You can use the iterators `pairs`, `keys`, and `values` to iterate over - ## `t` in the sorted order. - t[].sort +iterator mpairs*[A](t: CountTableRef[A]): (A, var int) = + ## Iterates over any `(key, value)` pair in the table `t`. The values can + ## be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTableRef[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTableRef[A]>`_ + runnableExamples: + let a = newCountTable("abracadabra") + for k, v in mpairs(a): + v = 2 + doAssert a == newCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "table modified while iterating over it") -when isMainModule: - type - Person = object - firstName, lastName: string +iterator keys*[A](t: CountTableRef[A]): A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + runnableExamples: + let a = newCountTable("abracadabra") + for k in keys(a): + a[k] = 2 + doAssert a == newCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") - proc hash(x: Person): THash = - ## Piggyback on the already available string hash proc. - ## - ## Without this proc nothing works! - result = x.firstName.hash !& x.lastName.hash - result = !$result +iterator values*[A](t: CountTableRef[A]): int = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTableRef[A]>`_ + ## * `keys iterator<#keys.i,CountTableRef[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTableRef[A]>`_ + runnableExamples: + let a = newCountTable("abracadabra") + for v in values(a): + assert v < 10 + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") - var - salaries = initTable[Person, int]() - p1, p2: Person - p1.firstName = "Jon" - p1.lastName = "Ross" - salaries[p1] = 30_000 - p2.firstName = "소진" - p2.lastName = "박" - salaries[p2] = 45_000 - var - s2 = initOrderedTable[Person, int]() - s3 = initCountTable[Person]() - s2[p1] = 30_000 - s2[p2] = 45_000 - s3[p1] = 30_000 - s3[p2] = 45_000 +iterator mvalues*[A](t: CountTableRef[A]): var int = + ## Iterates over any value in the table `t`. The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTableRef[A]>`_ + ## * `values iterator<#values.i,CountTableRef[A]>`_ + runnableExamples: + var a = newCountTable("abracadabra") + for v in mvalues(a): + v = 2 + doAssert a == newCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +proc hash*[K,V](s: Table[K,V]): Hash = + for p in pairs(s): + result = result xor hash(p) + result = !$result + +proc hash*[K,V](s: OrderedTable[K,V]): Hash = + for p in pairs(s): + result = result !& hash(p) + result = !$result + +proc hash*[V](s: CountTable[V]): Hash = + for p in pairs(s): + result = result xor hash(p) + result = !$result |