summary refs log blame commit diff stats
path: root/lib/pure/collections/critbits.nim
blob: 40a02b65116d599d9da4684964a4989ddcdfd2ba (plain) (tree)
1
2
3
4


                                     
                                         






































































































































































































































































































                                                                               
                                   

          
#
#
#            Nimrod's Runtime Library
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module implements a `crit bit tree`:idx: which is an efficient
## container for a set or a mapping of strings. Based on the excellent paper
## by Adam Langley.

type
  TNode[T] = object {.pure, final, acyclic.}
    byte: int ## byte index of the difference
    otherbits: char
    case isLeaf: bool
    of false: child: array[0..1, ref TNode[T]]
    of true: 
      key: string
      when T isnot void:
        val: T
    
  PNode[T] = ref TNode[T]
  TCritBitTree*[T] = object {.
      pure, final.} ## The crit bit tree can either be used
                    ## as a mapping from strings to
                    ## some type ``T`` or as a set of
                    ## strings if ``T`` is void.
    root: PNode[T]
    count: int
    
proc len*[T](c: TCritBitTree[T]): int =
  ## returns the number of elements in `c` in O(1).
  result = c.count

proc rawGet[T](c: TCritBitTree[T], key: string): PNode[T] =
  var it = c.root
  while it != nil:
    if not it.isLeaf:
      let ch = if it.byte < key.len: key[it.byte] else: '\0'
      let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
      it = it.child[dir]
    else:
      return if it.key == key: it else: nil

proc contains*[T](c: TCritBitTree[T], key: string): bool {.inline.} =
  ## returns true iff `c` contains the given `key`.
  result = rawGet(c, key) != nil

proc hasKey*[T](c: TCritBitTree[T], key: string): bool {.inline.} =
  ## alias for `contains`.
  result = rawGet(c, key) != nil

proc rawInsert[T](c: var TCritBitTree[T], key: string): PNode[T] =
  if c.root == nil:
    new c.root
    c.root.isleaf = true
    c.root.key = key
    result = c.root
  else:
    var it = c.root
    while not it.isLeaf:
      let ch = if it.byte < key.len: key[it.byte] else: '\0'
      let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
      it = it.child[dir]
    
    var newOtherBits = 0
    var newByte = 0
    block blockX:
      while newbyte < key.len:
        if it.key[newbyte] != key[newbyte]:
          newotherbits = it.key[newbyte].ord xor key[newbyte].ord
          break blockX
        inc newbyte
      if it.key[newbyte] != '\0':
        newotherbits = it.key[newbyte].ord
      else:
        return it
    while (newOtherBits and (newOtherBits-1)) != 0:
      newOtherBits = newOtherBits and (newOtherBits-1)
    newOtherBits = newOtherBits xor 255
    let ch = it.key[newByte]
    let dir = (1 + (ord(ch) or newOtherBits)) shr 8
    
    var inner: PNode[T]
    new inner
    new result
    result.isLeaf = true
    result.key = key
    inner.otherBits = chr(newOtherBits)
    inner.byte = newByte
    inner.child[1 - dir] = result
    
    var wherep = addr(c.root)
    while true:
      var p = wherep[]
      if p.isLeaf: break
      if p.byte > newByte: break
      if p.byte == newByte and p.otherBits.ord > newOtherBits: break
      let ch = if p.byte < key.len: key[p.byte] else: '\0'
      let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
      wherep = addr(p.child[dir])
    inner.child[dir] = wherep[]
    wherep[] = inner
  inc c.count

proc containsOrIncl*[T](c: var TCritBitTree[T], key: string, val: T): bool =
  ## returns true iff `c` contains the given `key`. If the key does not exist
  ## ``c[key] = val`` is performed.
  let oldCount = c.count
  var n = rawInsert(c, key)
  result = c.count == oldCount
  when T isnot void:
    if not result: n.val = val

proc containsOrIncl*(c: var TCritBitTree[void], key: string): bool =
  ## returns true iff `c` contains the given `key`. If the key does not exist
  ## it is inserted into `c`.
  let oldCount = c.count
  var n = rawInsert(c, key)
  result = c.count == oldCount

proc incl*(c: var TCritBitTree[void], key: string) =
  ## includes `key` in `c`.
  discard rawInsert(c, key)

proc `[]=`*[T](c: var TCritBitTree[T], key: string, val: T) =
  ## puts a (key, value)-pair into `t`.
  var n = rawInsert(c, key)
  n.val = val

proc `[]`*[T](c: var TCritBitTree[T], key: string): T {.inline.} =
  ## retrieves the value at ``c[key]``. If `key` is not in `t`,
  ## default empty value for the type `B` is returned
  ## and no exception is raised. One can check with ``hasKey`` whether the key
  ## exists.
  let n = rawGet(c, key)
  if n != nil: result = n.val

proc mget*[T](c: var TCritBitTree[T], key: string): var T {.inline.} =
  ## retrieves the value at ``c[key]``. The value can be modified.
  ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised.
  let n = rawGet(c, key)
  if n != nil: result = n.val
  else: raise newException(EInvalidKey, "key not found: " & $key)

proc excl*[T](c: var TCritBitTree[T], key: string) =
  ## removes `key` (and its associated value) from the set `c`.
  ## If the `key` does not exist, nothing happens.
  var p = c.root
  var wherep = addr(c.root)
  var whereq: ptr PNode = nil
  if p == nil: return
  var dir = 0
  var q: PNode
  while not p.isLeaf:
    whereq = wherep
    q = p
    let ch = if p.byte < key.len: key[p.byte] else: '\0'
    dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
    wherep = addr(p.child[dir])
    p = wherep[]
  if p.key == key:
    # else: not in tree at all
    if whereq == nil:
      c.root = nil
    else:
      whereq[] = q.child[1 - dir]
    dec c.count

iterator leaves[T](n: PNode[T]): PNode[T] =
  if n != nil:
    # XXX actually we could compute the necessary stack size in advance:
    # it's rougly log2(c.count).
    var stack = @[n]
    while stack.len > 0: 
      var it = stack.pop
      while not it.isLeaf:
        stack.add(it.child[1])
        it = it.child[0]
        assert(it != nil)
      yield it

iterator keys*[T](c: TCritBitTree[T]): string =
  ## yields all keys in lexicographical order.
  for x in leaves(c.root): yield x.key

iterator values*[T](c: TCritBitTree[T]): T =
  ## yields all values of `c` in the lexicographical order of the
  ## corresponding keys.
  for x in leaves(c.root): yield x.val

iterator mvalues*[T](c: var TCritBitTree[T]): var T =
  ## yields all values of `c` in the lexicographical order of the
  ## corresponding keys. The values can be modified.
  for x in leaves(c.root): yield x.val

iterator items*[T](c: TCritBitTree[T]): string =
  ## yields all keys in lexicographical order.
  for x in leaves(c.root): yield x.key

iterator pairs*[T](c: TCritBitTree[T]): tuple[key: string, val: T] =
  ## yields all (key, value)-pairs of `c`.
  for x in leaves(c.root): yield (x.key, x.val)
  
iterator mpairs*[T](c: var TCritBitTree[T]): tuple[key: string, val: var T] =
  ## yields all (key, value)-pairs of `c`. The yielded values can be modified.
  for x in leaves(c.root): yield (x.key, x.val)

proc allprefixedAux[T](c: TCritBitTree[T], key: string): PNode[T] =
  var p = c.root
  var top = p
  if p != nil:
    while not p.isLeaf:
      var q = p
      let ch = if p.byte < key.len: key[p.byte] else: '\0'
      let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
      p = p.child[dir]
      if q.byte < key.len: top = p
    for i in 0 .. <key.len:
      if p.key[i] != key[i]: return
    result = top

iterator itemsWithPrefix*[T](c: TCritBitTree[T], prefix: string): string =
  ## yields all keys starting with `prefix`.
  let top = allprefixedAux(c, prefix)
  for x in leaves(top): yield x.key

iterator keysWithPrefix*[T](c: TCritBitTree[T], prefix: string): string =
  ## yields all keys starting with `prefix`.
  let top = allprefixedAux(c, prefix)
  for x in leaves(top): yield x.key

iterator valuesWithPrefix*[T](c: TCritBitTree[T], prefix: string): T =
  ## yields all values of `c` starting with `prefix` of the
  ## corresponding keys.
  let top = allprefixedAux(c, prefix)
  for x in leaves(top): yield x.val

iterator mvaluesWithPrefix*[T](c: var TCritBitTree[T], prefix: string): var T =
  ## yields all values of `c` starting with `prefix` of the
  ## corresponding keys. The values can be modified.
  let top = allprefixedAux(c, prefix)
  for x in leaves(top): yield x.val

iterator pairsWithPrefix*[T](c: TCritBitTree[T],
                             prefix: string): tuple[key: string, val: T] =
  ## yields all (key, value)-pairs of `c` starting with `prefix`.
  let top = allprefixedAux(c, prefix)
  for x in leaves(top): yield (x.key, x.val)
  
iterator mpairsWithPrefix*[T](c: var TCritBitTree[T],
                              prefix: string): tuple[key: string, val: var T] =
  ## yields all (key, value)-pairs of `c` starting with `prefix`.
  ## The yielded values can be modified.
  let top = allprefixedAux(c, prefix)
  for x in leaves(top): yield (x.key, x.val)

proc `$`*[T](c: TCritBitTree[T]): string =
  ## turns `c` into a string representation. Example outputs:
  ## ``{keyA: value, keyB: value}``, ``{:}``
  ## If `T` is void the outputs look like:
  ## ``{keyA, keyB}``, ``{}``.
  if c.len == 0:
    when T is void:
      result = "{}"
    else:
      result = "{:}"
  else:
    # an educated guess is better than nothing:
    when T is void:
      const avgItemLen = 8
    else:
      const avgItemLen = 16
    result = newStringOfCap(c.count * avgItemLen)
    result.add("{")
    for key, val in pairs(c):
      if result.len > 1: result.add(", ")
      result.add($key)
      when T isnot void:
        result.add(": ")
        result.add($val)
    result.add("}")

when isMainModule:
  var r: TCritBitTree[void]
  r.incl "abc"
  r.incl "xyz"
  r.incl "def"
  r.incl "definition"
  r.incl "prefix"
  doAssert r.contains"def"
  #r.del "def"

  for w in r.items:
    echo w
    
  for w in r.itemsWithPrefix("de"):
    echo w