diff options
Diffstat (limited to 'lib/pure/collections/critbits.nim')
-rw-r--r-- | lib/pure/collections/critbits.nim | 537 |
1 files changed, 537 insertions, 0 deletions
diff --git a/lib/pure/collections/critbits.nim b/lib/pure/collections/critbits.nim new file mode 100644 index 000000000..24257dacb --- /dev/null +++ b/lib/pure/collections/critbits.nim @@ -0,0 +1,537 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a `crit bit tree`:idx: which is an efficient +## container for a sorted set of strings, or for a sorted mapping of strings. Based on the +## [excellent paper by Adam Langley](https://www.imperialviolet.org/binary/critbit.pdf). +## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.) + +runnableExamples: + from std/sequtils import toSeq + + var critbitAsSet: CritBitTree[void] = ["kitten", "puppy"].toCritBitTree + doAssert critbitAsSet.len == 2 + critbitAsSet.incl("") + doAssert "" in critbitAsSet + critbitAsSet.excl("") + doAssert "" notin critbitAsSet + doAssert toSeq(critbitAsSet.items) == @["kitten", "puppy"] + let same = ["puppy", "kitten", "puppy"].toCritBitTree + doAssert toSeq(same.keys) == toSeq(critbitAsSet.keys) + + var critbitAsDict: CritBitTree[int] = {"key1": 42}.toCritBitTree + doAssert critbitAsDict.len == 1 + critbitAsDict["key2"] = 0 + doAssert "key2" in critbitAsDict + doAssert critbitAsDict["key2"] == 0 + critbitAsDict.excl("key1") + doAssert "key1" notin critbitAsDict + doAssert toSeq(critbitAsDict.pairs) == @[("key2", 0)] + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions + +type + NodeObj[T] {.acyclic.} = object + byte: int ## byte index of the difference + otherBits: char + case isLeaf: bool + of false: child: array[0..1, ref NodeObj[T]] + of true: + key: string + when T isnot void: + val: T + + Node[T] = ref NodeObj[T] + CritBitTree*[T] = object ## The crit bit tree can either be used + ## as a mapping from strings to + ## some type `T` or as a set of + ## strings if `T` is `void`. + root: Node[T] + count: int + +func len*[T](c: CritBitTree[T]): int {.inline.} = + ## Returns the number of elements in `c` in O(1). + runnableExamples: + let c = ["key1", "key2"].toCritBitTree + doAssert c.len == 2 + + result = c.count + +proc rawGet[T](c: CritBitTree[T], key: string): Node[T] = + var it = c.root + while it != nil: + if not it.isLeaf: + let ch = if it.byte < key.len: key[it.byte] else: '\0' + let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8 + it = it.child[dir] + else: + return if it.key == key: it else: nil + +func contains*[T](c: CritBitTree[T], key: string): bool {.inline.} = + ## Returns true if `c` contains the given `key`. + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.contains("key") + + result = rawGet(c, key) != nil + +func hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} = + ## Alias for `contains <#contains,CritBitTree[T],string>`_. + result = rawGet(c, key) != nil + +proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = + if c.root == nil: + c.root = Node[T](isleaf: true, key: key) + result = c.root + else: + var it = c.root + while not it.isLeaf: + let ch = if it.byte < key.len: key[it.byte] else: '\0' + let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8 + it = it.child[dir] + + var newOtherBits = 0 + var newByte = 0 + block blockX: + while newByte < key.len: + let ch = if newByte < it.key.len: it.key[newByte] else: '\0' + if ch != key[newByte]: + newOtherBits = ch.ord xor key[newByte].ord + break blockX + inc newByte + if newByte < it.key.len: + newOtherBits = it.key[newByte].ord + else: + return it + while (newOtherBits and (newOtherBits-1)) != 0: + newOtherBits = newOtherBits and (newOtherBits-1) + newOtherBits = newOtherBits xor 255 + let ch = if newByte < it.key.len: it.key[newByte] else: '\0' + let dir = (1 + (ord(ch) or newOtherBits)) shr 8 + + var inner: Node[T] + new inner + result = Node[T](isLeaf: true, key: key) + inner.otherBits = chr(newOtherBits) + inner.byte = newByte + inner.child[1 - dir] = result + + var wherep = addr(c.root) + while true: + var p = wherep[] + if p.isLeaf: break + if p.byte > newByte: break + if p.byte == newByte and p.otherBits.ord > newOtherBits: break + let ch = if p.byte < key.len: key[p.byte] else: '\0' + let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + wherep = addr(p.child[dir]) + inner.child[dir] = wherep[] + wherep[] = inner + inc c.count + +func exclImpl[T](c: var CritBitTree[T], key: string): int = + var p = c.root + var wherep = addr(c.root) + var whereq: ptr Node[T] = nil + if p == nil: return c.count + var dir = 0 + var q: Node[T] + while not p.isLeaf: + whereq = wherep + q = p + let ch = if p.byte < key.len: key[p.byte] else: '\0' + dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + wherep = addr(p.child[dir]) + p = wherep[] + if p.key == key: + # else: not in tree at all + if whereq == nil: + c.root = nil + else: + whereq[] = q.child[1 - dir] + dec c.count + + return c.count + +proc excl*[T](c: var CritBitTree[T], key: string) = + ## Removes `key` (and its associated value) from the set `c`. + ## If the `key` does not exist, nothing happens. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + excl(c, "key") + doAssert not c.contains("key") + + discard exclImpl(c, key) + +proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool = + ## Returns true if `c` does not contain the given `key`. If the key + ## does exist, `c.excl(key)` is performed. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_ + runnableExamples: + block: + var c: CritBitTree[void] + doAssert c.missingOrExcl("key") + block: + var c: CritBitTree[void] + incl(c, "key") + doAssert not c.missingOrExcl("key") + doAssert not c.contains("key") + + let oldCount = c.count + discard exclImpl(c, key) + result = c.count == oldCount + +proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: sink T): bool = + ## Returns true if `c` contains the given `key`. If the key does not exist, + ## `c[key] = val` is performed. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_ + ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_ + runnableExamples: + block: + var c: CritBitTree[int] + doAssert not c.containsOrIncl("key", 42) + doAssert c.contains("key") + block: + var c: CritBitTree[int] + incl(c, "key", 21) + doAssert c.containsOrIncl("key", 42) + doAssert c["key"] == 21 + + let oldCount = c.count + var n = rawInsert(c, key) + result = c.count == oldCount + when T isnot void: + if not result: n.val = val + +proc containsOrIncl*(c: var CritBitTree[void], key: string): bool = + ## Returns true if `c` contains the given `key`. If the key does not exist, + ## it is inserted into `c`. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_ + ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_ + runnableExamples: + block: + var c: CritBitTree[void] + doAssert not c.containsOrIncl("key") + doAssert c.contains("key") + block: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.containsOrIncl("key") + + let oldCount = c.count + discard rawInsert(c, key) + result = c.count == oldCount + +proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) = + ## Increments `c[key]` by `val`. + runnableExamples: + var c: CritBitTree[int] + c["key"] = 1 + inc(c, "key") + doAssert c["key"] == 2 + + var n = rawInsert(c, key) + inc n.val, val + +proc incl*(c: var CritBitTree[void], key: string) = + ## Includes `key` in `c`. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.hasKey("key") + + discard rawInsert(c, key) + +proc incl*[T](c: var CritBitTree[T], key: string, val: sink T) = + ## Inserts `key` with value `val` into `c`. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `incl proc <#incl,CritBitTree[void],string>`_ + runnableExamples: + var c: CritBitTree[int] + incl(c, "key", 42) + doAssert c["key"] == 42 + + var n = rawInsert(c, key) + n.val = val + +proc `[]=`*[T](c: var CritBitTree[T], key: string, val: sink T) = + ## Alias for `incl <#incl,CritBitTree[T],string,T>`_. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string>`_ + ## * `[] proc <#[],CritBitTree[T],string_2>`_ + var n = rawInsert(c, key) + n.val = val + +template get[T](c: CritBitTree[T], key: string): T = + let n = rawGet(c, key) + if n == nil: + raise newException(KeyError, "key not found: " & key) + + n.val + +func `[]`*[T](c: CritBitTree[T], key: string): lent T {.inline.} = + ## Retrieves the value at `c[key]`. If `key` is not in `t`, the + ## `KeyError` exception is raised. One can check with `hasKey` whether + ## the key exists. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string_2>`_ + ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_ + get(c, key) + +func `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline.} = + ## Retrieves the value at `c[key]`. The value can be modified. + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string>`_ + ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_ + get(c, key) + +iterator leaves[T](n: Node[T]): Node[T] = + if n != nil: + # XXX actually we could compute the necessary stack size in advance: + # it's roughly log2(c.count). + var stack = @[n] + while stack.len > 0: + var it = stack.pop + while not it.isLeaf: + stack.add(it.child[1]) + it = it.child[0] + assert(it != nil) + yield it + +iterator keys*[T](c: CritBitTree[T]): string = + ## Yields all keys in lexicographical order. + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.keys) == @["key1", "key2"] + + for x in leaves(c.root): yield x.key + +iterator values*[T](c: CritBitTree[T]): lent T = + ## Yields all values of `c` in the lexicographical order of the + ## corresponding keys. + ## + ## **See also:** + ## * `mvalues iterator <#mvalues.i,CritBitTree[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.values) == @[1, 2] + + for x in leaves(c.root): yield x.val + +iterator mvalues*[T](c: var CritBitTree[T]): var T = + ## Yields all values of `c` in the lexicographical order of the + ## corresponding keys. The values can be modified. + ## + ## **See also:** + ## * `values iterator <#values.i,CritBitTree[T]>`_ + for x in leaves(c.root): yield x.val + +iterator items*[T](c: CritBitTree[T]): string = + ## Alias for `keys <#keys.i,CritBitTree[T]>`_. + for x in leaves(c.root): yield x.key + +iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] = + ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the + ## corresponding keys. + ## + ## **See also:** + ## * `mpairs iterator <#mpairs.i,CritBitTree[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.pairs) == @[(key: "key1", val: 1), (key: "key2", val: 2)] + + for x in leaves(c.root): yield (x.key, x.val) + +iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] = + ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the + ## corresponding keys. The yielded values can be modified. + ## + ## **See also:** + ## * `pairs iterator <#pairs.i,CritBitTree[T]>`_ + for x in leaves(c.root): yield (x.key, x.val) + +proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] = + var p = c.root + var top = p + if p != nil: + while not p.isLeaf: + var q = p + let ch = if p.byte < key.len: key[p.byte] else: '\0' + let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + p = p.child[dir] + if q.byte < key.len: top = p + for i in 0 ..< key.len: + if i >= p.key.len or p.key[i] != key[i]: return + result = top + +iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string): string = + ## Yields all keys starting with `prefix`. + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.keysWithPrefix("key")) == @["key1", "key2"] + + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.key + +iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): lent T = + ## Yields all values of `c` starting with `prefix` of the + ## corresponding keys. + ## + ## **See also:** + ## * `mvaluesWithPrefix iterator <#mvaluesWithPrefix.i,CritBitTree[T],string>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.valuesWithPrefix("key")) == @[42, 43] + + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.val + +iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string): var T = + ## Yields all values of `c` starting with `prefix` of the + ## corresponding keys. The values can be modified. + ## + ## **See also:** + ## * `valuesWithPrefix iterator <#valuesWithPrefix.i,CritBitTree[T],string>`_ + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.val + +iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string = + ## Alias for `keysWithPrefix <#keysWithPrefix.i,CritBitTree[T],string>`_. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.key + +iterator pairsWithPrefix*[T](c: CritBitTree[T], + prefix: string): tuple[key: string, val: T] = + ## Yields all (key, value)-pairs of `c` starting with `prefix`. + ## + ## **See also:** + ## * `mpairsWithPrefix iterator <#mpairsWithPrefix.i,CritBitTree[T],string>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.pairsWithPrefix("key")) == @[(key: "key1", val: 42), (key: "key2", val: 43)] + + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield (x.key, x.val) + +iterator mpairsWithPrefix*[T](c: var CritBitTree[T], + prefix: string): tuple[key: string, val: var T] = + ## Yields all (key, value)-pairs of `c` starting with `prefix`. + ## The yielded values can be modified. + ## + ## **See also:** + ## * `pairsWithPrefix iterator <#pairsWithPrefix.i,CritBitTree[T],string>`_ + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield (x.key, x.val) + +func `$`*[T](c: CritBitTree[T]): string = + ## Turns `c` into a string representation. + runnableExamples: + doAssert $CritBitTree[int].default == "{:}" + doAssert $toCritBitTree({"key1": 1, "key2": 2}) == """{"key1": 1, "key2": 2}""" + doAssert $CritBitTree[void].default == "{}" + doAssert $toCritBitTree(["key1", "key2"]) == """{"key1", "key2"}""" + + if c.len == 0: + when T is void: + result = "{}" + else: + result = "{:}" + else: + # an educated guess is better than nothing: + when T is void: + const avgItemLen = 8 + else: + const avgItemLen = 16 + result = newStringOfCap(c.count * avgItemLen) + result.add("{") + when T is void: + for key in keys(c): + if result.len > 1: result.add(", ") + result.addQuoted(key) + else: + for key, val in pairs(c): + if result.len > 1: result.add(", ") + result.addQuoted(key) + result.add(": ") + result.addQuoted(val) + result.add("}") + +func commonPrefixLen*[T](c: CritBitTree[T]): int {.inline, since((1, 3)).} = + ## Returns the length of the longest common prefix of all keys in `c`. + ## If `c` is empty, returns 0. + runnableExamples: + var c: CritBitTree[void] + doAssert c.commonPrefixLen == 0 + incl(c, "key1") + doAssert c.commonPrefixLen == 4 + incl(c, "key2") + doAssert c.commonPrefixLen == 3 + + if c.root != nil: + if c.root.isLeaf: len(c.root.key) + else: c.root.byte + else: 0 + +proc toCritBitTree*[T](pairs: sink openArray[(string, T)]): CritBitTree[T] {.since: (1, 3).} = + ## Creates a new `CritBitTree` that contains the given `pairs`. + runnableExamples: + doAssert {"a": "0", "b": "1", "c": "2"}.toCritBitTree is CritBitTree[string] + doAssert {"a": 0, "b": 1, "c": 2}.toCritBitTree is CritBitTree[int] + + for item in pairs: result.incl item[0], item[1] + +proc toCritBitTree*(items: sink openArray[string]): CritBitTree[void] {.since: (1, 3).} = + ## Creates a new `CritBitTree` that contains the given `items`. + runnableExamples: + doAssert ["a", "b", "c"].toCritBitTree is CritBitTree[void] + + for item in items: result.incl item |