summary refs log tree commit diff stats
path: root/lib/pure/collections/critbits.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/collections/critbits.nim')
-rw-r--r--lib/pure/collections/critbits.nim537
1 files changed, 537 insertions, 0 deletions
diff --git a/lib/pure/collections/critbits.nim b/lib/pure/collections/critbits.nim
new file mode 100644
index 000000000..24257dacb
--- /dev/null
+++ b/lib/pure/collections/critbits.nim
@@ -0,0 +1,537 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module implements a `crit bit tree`:idx: which is an efficient
+## container for a sorted set of strings, or for a sorted mapping of strings. Based on the
+## [excellent paper by Adam Langley](https://www.imperialviolet.org/binary/critbit.pdf).
+## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.)
+
+runnableExamples:
+  from std/sequtils import toSeq
+
+  var critbitAsSet: CritBitTree[void] = ["kitten", "puppy"].toCritBitTree
+  doAssert critbitAsSet.len == 2
+  critbitAsSet.incl("")
+  doAssert "" in critbitAsSet
+  critbitAsSet.excl("")
+  doAssert "" notin critbitAsSet
+  doAssert toSeq(critbitAsSet.items) == @["kitten", "puppy"]
+  let same = ["puppy", "kitten", "puppy"].toCritBitTree
+  doAssert toSeq(same.keys) == toSeq(critbitAsSet.keys)
+
+  var critbitAsDict: CritBitTree[int] = {"key1": 42}.toCritBitTree
+  doAssert critbitAsDict.len == 1
+  critbitAsDict["key2"] = 0
+  doAssert "key2" in critbitAsDict
+  doAssert critbitAsDict["key2"] == 0
+  critbitAsDict.excl("key1")
+  doAssert "key1" notin critbitAsDict
+  doAssert toSeq(critbitAsDict.pairs) == @[("key2", 0)]
+
+import std/private/since
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
+
+type
+  NodeObj[T] {.acyclic.} = object
+    byte: int ## byte index of the difference
+    otherBits: char
+    case isLeaf: bool
+    of false: child: array[0..1, ref NodeObj[T]]
+    of true:
+      key: string
+      when T isnot void:
+        val: T
+
+  Node[T] = ref NodeObj[T]
+  CritBitTree*[T] = object ## The crit bit tree can either be used
+                           ## as a mapping from strings to
+                           ## some type `T` or as a set of
+                           ## strings if `T` is `void`.
+    root: Node[T]
+    count: int
+
+func len*[T](c: CritBitTree[T]): int {.inline.} =
+  ## Returns the number of elements in `c` in O(1).
+  runnableExamples:
+    let c = ["key1", "key2"].toCritBitTree
+    doAssert c.len == 2
+
+  result = c.count
+
+proc rawGet[T](c: CritBitTree[T], key: string): Node[T] =
+  var it = c.root
+  while it != nil:
+    if not it.isLeaf:
+      let ch = if it.byte < key.len: key[it.byte] else: '\0'
+      let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
+      it = it.child[dir]
+    else:
+      return if it.key == key: it else: nil
+
+func contains*[T](c: CritBitTree[T], key: string): bool {.inline.} =
+  ## Returns true if `c` contains the given `key`.
+  runnableExamples:
+    var c: CritBitTree[void]
+    incl(c, "key")
+    doAssert c.contains("key")
+
+  result = rawGet(c, key) != nil
+
+func hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} =
+  ## Alias for `contains <#contains,CritBitTree[T],string>`_.
+  result = rawGet(c, key) != nil
+
+proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] =
+  if c.root == nil:
+    c.root = Node[T](isleaf: true, key: key)
+    result = c.root
+  else:
+    var it = c.root
+    while not it.isLeaf:
+      let ch = if it.byte < key.len: key[it.byte] else: '\0'
+      let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
+      it = it.child[dir]
+
+    var newOtherBits = 0
+    var newByte = 0
+    block blockX:
+      while newByte < key.len:
+        let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
+        if ch != key[newByte]:
+          newOtherBits = ch.ord xor key[newByte].ord
+          break blockX
+        inc newByte
+      if newByte < it.key.len:
+        newOtherBits = it.key[newByte].ord
+      else:
+        return it
+    while (newOtherBits and (newOtherBits-1)) != 0:
+      newOtherBits = newOtherBits and (newOtherBits-1)
+    newOtherBits = newOtherBits xor 255
+    let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
+    let dir = (1 + (ord(ch) or newOtherBits)) shr 8
+
+    var inner: Node[T]
+    new inner
+    result = Node[T](isLeaf: true, key: key)
+    inner.otherBits = chr(newOtherBits)
+    inner.byte = newByte
+    inner.child[1 - dir] = result
+
+    var wherep = addr(c.root)
+    while true:
+      var p = wherep[]
+      if p.isLeaf: break
+      if p.byte > newByte: break
+      if p.byte == newByte and p.otherBits.ord > newOtherBits: break
+      let ch = if p.byte < key.len: key[p.byte] else: '\0'
+      let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
+      wherep = addr(p.child[dir])
+    inner.child[dir] = wherep[]
+    wherep[] = inner
+  inc c.count
+
+func exclImpl[T](c: var CritBitTree[T], key: string): int =
+  var p = c.root
+  var wherep = addr(c.root)
+  var whereq: ptr Node[T] = nil
+  if p == nil: return c.count
+  var dir = 0
+  var q: Node[T]
+  while not p.isLeaf:
+    whereq = wherep
+    q = p
+    let ch = if p.byte < key.len: key[p.byte] else: '\0'
+    dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
+    wherep = addr(p.child[dir])
+    p = wherep[]
+  if p.key == key:
+    # else: not in tree at all
+    if whereq == nil:
+      c.root = nil
+    else:
+      whereq[] = q.child[1 - dir]
+    dec c.count
+
+  return c.count
+
+proc excl*[T](c: var CritBitTree[T], key: string) =
+  ## Removes `key` (and its associated value) from the set `c`.
+  ## If the `key` does not exist, nothing happens.
+  ##
+  ## **See also:**
+  ## * `incl proc <#incl,CritBitTree[void],string>`_
+  ## * `incl proc <#incl,CritBitTree[T],string,T>`_
+  runnableExamples:
+    var c: CritBitTree[void]
+    incl(c, "key")
+    excl(c, "key")
+    doAssert not c.contains("key")
+
+  discard exclImpl(c, key)
+
+proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool =
+  ## Returns true if `c` does not contain the given `key`. If the key
+  ## does exist, `c.excl(key)` is performed.
+  ##
+  ## **See also:**
+  ## * `excl proc <#excl,CritBitTree[T],string>`_
+  ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
+  ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
+  runnableExamples:
+    block:
+      var c: CritBitTree[void]
+      doAssert c.missingOrExcl("key")
+    block:
+      var c: CritBitTree[void]
+      incl(c, "key")
+      doAssert not c.missingOrExcl("key")
+      doAssert not c.contains("key")
+
+  let oldCount = c.count
+  discard exclImpl(c, key)
+  result = c.count == oldCount
+
+proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: sink T): bool =
+  ## Returns true if `c` contains the given `key`. If the key does not exist,
+  ## `c[key] = val` is performed.
+  ##
+  ## **See also:**
+  ## * `incl proc <#incl,CritBitTree[void],string>`_
+  ## * `incl proc <#incl,CritBitTree[T],string,T>`_
+  ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
+  ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
+  runnableExamples:
+    block:
+      var c: CritBitTree[int]
+      doAssert not c.containsOrIncl("key", 42)
+      doAssert c.contains("key")
+    block:
+      var c: CritBitTree[int]
+      incl(c, "key", 21)
+      doAssert c.containsOrIncl("key", 42)
+      doAssert c["key"] == 21
+
+  let oldCount = c.count
+  var n = rawInsert(c, key)
+  result = c.count == oldCount
+  when T isnot void:
+    if not result: n.val = val
+
+proc containsOrIncl*(c: var CritBitTree[void], key: string): bool =
+  ## Returns true if `c` contains the given `key`. If the key does not exist,
+  ## it is inserted into `c`.
+  ##
+  ## **See also:**
+  ## * `incl proc <#incl,CritBitTree[void],string>`_
+  ## * `incl proc <#incl,CritBitTree[T],string,T>`_
+  ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
+  ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
+  runnableExamples:
+    block:
+      var c: CritBitTree[void]
+      doAssert not c.containsOrIncl("key")
+      doAssert c.contains("key")
+    block:
+      var c: CritBitTree[void]
+      incl(c, "key")
+      doAssert c.containsOrIncl("key")
+
+  let oldCount = c.count
+  discard rawInsert(c, key)
+  result = c.count == oldCount
+
+proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) =
+  ## Increments `c[key]` by `val`.
+  runnableExamples:
+    var c: CritBitTree[int]
+    c["key"] = 1
+    inc(c, "key")
+    doAssert c["key"] == 2
+
+  var n = rawInsert(c, key)
+  inc n.val, val
+
+proc incl*(c: var CritBitTree[void], key: string) =
+  ## Includes `key` in `c`.
+  ##
+  ## **See also:**
+  ## * `excl proc <#excl,CritBitTree[T],string>`_
+  ## * `incl proc <#incl,CritBitTree[T],string,T>`_
+  runnableExamples:
+    var c: CritBitTree[void]
+    incl(c, "key")
+    doAssert c.hasKey("key")
+
+  discard rawInsert(c, key)
+
+proc incl*[T](c: var CritBitTree[T], key: string, val: sink T) =
+  ## Inserts `key` with value `val` into `c`.
+  ##
+  ## **See also:**
+  ## * `excl proc <#excl,CritBitTree[T],string>`_
+  ## * `incl proc <#incl,CritBitTree[void],string>`_
+  runnableExamples:
+    var c: CritBitTree[int]
+    incl(c, "key", 42)
+    doAssert c["key"] == 42
+
+  var n = rawInsert(c, key)
+  n.val = val
+
+proc `[]=`*[T](c: var CritBitTree[T], key: string, val: sink T) =
+  ## Alias for `incl <#incl,CritBitTree[T],string,T>`_.
+  ##
+  ## **See also:**
+  ## * `[] proc <#[],CritBitTree[T],string>`_
+  ## * `[] proc <#[],CritBitTree[T],string_2>`_
+  var n = rawInsert(c, key)
+  n.val = val
+
+template get[T](c: CritBitTree[T], key: string): T =
+  let n = rawGet(c, key)
+  if n == nil:
+    raise newException(KeyError, "key not found: " & key)
+
+  n.val
+
+func `[]`*[T](c: CritBitTree[T], key: string): lent T {.inline.} =
+  ## Retrieves the value at `c[key]`. If `key` is not in `t`, the
+  ## `KeyError` exception is raised. One can check with `hasKey` whether
+  ## the key exists.
+  ##
+  ## **See also:**
+  ## * `[] proc <#[],CritBitTree[T],string_2>`_
+  ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
+  get(c, key)
+
+func `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline.} =
+  ## Retrieves the value at `c[key]`. The value can be modified.
+  ## If `key` is not in `t`, the `KeyError` exception is raised.
+  ##
+  ## **See also:**
+  ## * `[] proc <#[],CritBitTree[T],string>`_
+  ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
+  get(c, key)
+
+iterator leaves[T](n: Node[T]): Node[T] =
+  if n != nil:
+    # XXX actually we could compute the necessary stack size in advance:
+    # it's roughly log2(c.count).
+    var stack = @[n]
+    while stack.len > 0:
+      var it = stack.pop
+      while not it.isLeaf:
+        stack.add(it.child[1])
+        it = it.child[0]
+        assert(it != nil)
+      yield it
+
+iterator keys*[T](c: CritBitTree[T]): string =
+  ## Yields all keys in lexicographical order.
+  runnableExamples:
+    from std/sequtils import toSeq
+
+    let c = {"key1": 1, "key2": 2}.toCritBitTree
+    doAssert toSeq(c.keys) == @["key1", "key2"]
+
+  for x in leaves(c.root): yield x.key
+
+iterator values*[T](c: CritBitTree[T]): lent T =
+  ## Yields all values of `c` in the lexicographical order of the
+  ## corresponding keys.
+  ##
+  ## **See also:**
+  ## * `mvalues iterator <#mvalues.i,CritBitTree[T]>`_
+  runnableExamples:
+    from std/sequtils import toSeq
+
+    let c = {"key1": 1, "key2": 2}.toCritBitTree
+    doAssert toSeq(c.values) == @[1, 2]
+
+  for x in leaves(c.root): yield x.val
+
+iterator mvalues*[T](c: var CritBitTree[T]): var T =
+  ## Yields all values of `c` in the lexicographical order of the
+  ## corresponding keys. The values can be modified.
+  ##
+  ## **See also:**
+  ## * `values iterator <#values.i,CritBitTree[T]>`_
+  for x in leaves(c.root): yield x.val
+
+iterator items*[T](c: CritBitTree[T]): string =
+  ## Alias for `keys <#keys.i,CritBitTree[T]>`_.
+  for x in leaves(c.root): yield x.key
+
+iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] =
+  ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
+  ## corresponding keys.
+  ##
+  ## **See also:**
+  ## * `mpairs iterator <#mpairs.i,CritBitTree[T]>`_
+  runnableExamples:
+    from std/sequtils import toSeq
+
+    let c = {"key1": 1, "key2": 2}.toCritBitTree
+    doAssert toSeq(c.pairs) == @[(key: "key1", val: 1), (key: "key2", val: 2)]
+
+  for x in leaves(c.root): yield (x.key, x.val)
+
+iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] =
+  ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
+  ## corresponding keys. The yielded values can be modified.
+  ##
+  ## **See also:**
+  ## * `pairs iterator <#pairs.i,CritBitTree[T]>`_
+  for x in leaves(c.root): yield (x.key, x.val)
+
+proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] =
+  var p = c.root
+  var top = p
+  if p != nil:
+    while not p.isLeaf:
+      var q = p
+      let ch = if p.byte < key.len: key[p.byte] else: '\0'
+      let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
+      p = p.child[dir]
+      if q.byte < key.len: top = p
+    for i in 0 ..< key.len:
+      if i >= p.key.len or p.key[i] != key[i]: return
+    result = top
+
+iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
+  ## Yields all keys starting with `prefix`.
+  runnableExamples:
+    from std/sequtils import toSeq
+
+    let c = {"key1": 42, "key2": 43}.toCritBitTree
+    doAssert toSeq(c.keysWithPrefix("key")) == @["key1", "key2"]
+
+  let top = allprefixedAux(c, prefix)
+  for x in leaves(top): yield x.key
+
+iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): lent T =
+  ## Yields all values of `c` starting with `prefix` of the
+  ## corresponding keys.
+  ##
+  ## **See also:**
+  ## * `mvaluesWithPrefix iterator <#mvaluesWithPrefix.i,CritBitTree[T],string>`_
+  runnableExamples:
+    from std/sequtils import toSeq
+
+    let c = {"key1": 42, "key2": 43}.toCritBitTree
+    doAssert toSeq(c.valuesWithPrefix("key")) == @[42, 43]
+
+  let top = allprefixedAux(c, prefix)
+  for x in leaves(top): yield x.val
+
+iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string): var T =
+  ## Yields all values of `c` starting with `prefix` of the
+  ## corresponding keys. The values can be modified.
+  ##
+  ## **See also:**
+  ## * `valuesWithPrefix iterator <#valuesWithPrefix.i,CritBitTree[T],string>`_
+  let top = allprefixedAux(c, prefix)
+  for x in leaves(top): yield x.val
+
+iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
+  ## Alias for `keysWithPrefix <#keysWithPrefix.i,CritBitTree[T],string>`_.
+  let top = allprefixedAux(c, prefix)
+  for x in leaves(top): yield x.key
+
+iterator pairsWithPrefix*[T](c: CritBitTree[T],
+                             prefix: string): tuple[key: string, val: T] =
+  ## Yields all (key, value)-pairs of `c` starting with `prefix`.
+  ##
+  ## **See also:**
+  ## * `mpairsWithPrefix iterator <#mpairsWithPrefix.i,CritBitTree[T],string>`_
+  runnableExamples:
+    from std/sequtils import toSeq
+
+    let c = {"key1": 42, "key2": 43}.toCritBitTree
+    doAssert toSeq(c.pairsWithPrefix("key")) == @[(key: "key1", val: 42), (key: "key2", val: 43)]
+
+  let top = allprefixedAux(c, prefix)
+  for x in leaves(top): yield (x.key, x.val)
+
+iterator mpairsWithPrefix*[T](c: var CritBitTree[T],
+                              prefix: string): tuple[key: string, val: var T] =
+  ## Yields all (key, value)-pairs of `c` starting with `prefix`.
+  ## The yielded values can be modified.
+  ##
+  ## **See also:**
+  ## * `pairsWithPrefix iterator <#pairsWithPrefix.i,CritBitTree[T],string>`_
+  let top = allprefixedAux(c, prefix)
+  for x in leaves(top): yield (x.key, x.val)
+
+func `$`*[T](c: CritBitTree[T]): string =
+  ## Turns `c` into a string representation.
+  runnableExamples:
+    doAssert $CritBitTree[int].default == "{:}"
+    doAssert $toCritBitTree({"key1": 1, "key2": 2}) == """{"key1": 1, "key2": 2}"""
+    doAssert $CritBitTree[void].default == "{}"
+    doAssert $toCritBitTree(["key1", "key2"]) == """{"key1", "key2"}"""
+
+  if c.len == 0:
+    when T is void:
+      result = "{}"
+    else:
+      result = "{:}"
+  else:
+    # an educated guess is better than nothing:
+    when T is void:
+      const avgItemLen = 8
+    else:
+      const avgItemLen = 16
+    result = newStringOfCap(c.count * avgItemLen)
+    result.add("{")
+    when T is void:
+      for key in keys(c):
+        if result.len > 1: result.add(", ")
+        result.addQuoted(key)
+    else:
+      for key, val in pairs(c):
+        if result.len > 1: result.add(", ")
+        result.addQuoted(key)
+        result.add(": ")
+        result.addQuoted(val)
+    result.add("}")
+
+func commonPrefixLen*[T](c: CritBitTree[T]): int {.inline, since((1, 3)).} =
+  ## Returns the length of the longest common prefix of all keys in `c`.
+  ## If `c` is empty, returns 0.
+  runnableExamples:
+    var c: CritBitTree[void]
+    doAssert c.commonPrefixLen == 0
+    incl(c, "key1")
+    doAssert c.commonPrefixLen == 4
+    incl(c, "key2")
+    doAssert c.commonPrefixLen == 3
+
+  if c.root != nil:
+    if c.root.isLeaf: len(c.root.key)
+    else: c.root.byte
+  else: 0
+
+proc toCritBitTree*[T](pairs: sink openArray[(string, T)]): CritBitTree[T] {.since: (1, 3).} =
+  ## Creates a new `CritBitTree` that contains the given `pairs`.
+  runnableExamples:
+    doAssert {"a": "0", "b": "1", "c": "2"}.toCritBitTree is CritBitTree[string]
+    doAssert {"a": 0, "b": 1, "c": 2}.toCritBitTree is CritBitTree[int]
+
+  for item in pairs: result.incl item[0], item[1]
+
+proc toCritBitTree*(items: sink openArray[string]): CritBitTree[void] {.since: (1, 3).} =
+  ## Creates a new `CritBitTree` that contains the given `items`.
+  runnableExamples:
+    doAssert ["a", "b", "c"].toCritBitTree is CritBitTree[void]
+
+  for item in items: result.incl item