summary refs log blame commit diff stats
path: root/nimlib/pure/hashtabs.nim
blob: 68d19d63b9a467b02f69c46e7e4e4839e6a16081 (plain) (tree)


































































































































































                                                                             
#
#
#            Nimrod's Runtime Library
#        (c) Copyright 2009 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## The ``hashtabs`` module implements an efficient generic hash
## table/dictionary data type. 

import
  hashes

const
  growthFactor = 2
  startSize = 8
  sham = sizeof(THash)*8-2 # shift amount
  mask = 0b11 shl sham
  usedSlot = 0b10 shl sham
  delSlot =  0b01 shl sham
  emptySlot = 0

type
  TTable*[TKey, TValue] = object
    counter: int
    data: seq[tuple[key: TKey, val: TValue, h: THash]]

proc init*(t: var TTable, size = startSize) =
  t.counter = 0
  newSeq(t.data, size)

proc markUsed(h: THash): THash {.inline.} =
  return h and not mask or usedSlot

proc len*(t: TTable): int {.inline.} =
  ## returns the number of keys in `t`.
  result = t.counter

proc mustRehash(length, counter: int): bool =
  assert(length > counter)
  result = (length * 2 < counter * 3) or (length - counter < 4)

proc nextTry(h, maxHash: THash): THash {.inline.} =
  result = ((5 * h) + 1) and maxHash

template eq(a, b: expr): expr = a == b

proc rawGet(t: TTable, key: TKey, fullhash: THash): int =
  var h = fullhash and high(t.data)
  while (t.data[h].h and mask) != 0:
    # If it is a deleted entry, the comparison with ``markUsed(fullhash)``
    # fails, so there is no need to check for this explicitely.
    if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): return h
    h = nextTry(h, high(t.data))
  result = - 1

proc `[]`*(t: TTable, key: TKey): TValue =
  ## retrieves the value at ``t[key]``. If `key` is not in `t`,
  ## `EInvalidValue` is raised.
  var index = rawGet(t, key, hash(key))
  if index >= 0: result = t.data[index].val
  else:
    var e: ref EInvalidValue
    new(e)
    e.msg = "invalid key: " & $key
    raise e

proc hasKey*(t: TTable, key: TKey): bool =
  ## returns true iff `key` is in the table `t`.
  result = rawGet(t, key) >= 0

proc rawInsert[TKey, TValue](
               data: var seq[tuple[key: TKey, val: TValue, h: THash]],
               tup: tuple[key: TKey, val: TValue, h: THash]) =
  var h = tup.h and high(data)
  while (data[h].h and mask) == usedSlot: h = nextTry(h, high(data))
  data[h] = tup

proc enlarge(t: var TTable) =
  var n: seq[tuple[key: TKey, val: TValue, h: THash]]
  newSeq(n, len(t.data) * growthFactor)
  for i in 0..high(t.data):
    if (t.data[i].h and mask) == usedSlot: rawInsert(n, t.data[i])
  swap(t.data, n)

proc `[]=`*(t: var TTable, key: TKey, val: TValue) =
  ## puts a (key, value)-pair into `t`.
  var fullhash = hash(key)
  var index = rawGet(t, key, fullhash)
  if index >= 0:
    t.data[index].val = val
  else:
    if mustRehash(len(t.data), t.counter): enlarge(t)
    rawInsert(t.data, (key, val, markUsed(fullhash)))
    inc(t.counter)

proc add*(t: var TTable, key: TKey, val: TValue) =
  ## puts a (key, value)-pair into `t`, but does not check if key already
  ## exists.
  if mustRehash(len(t.data), t.counter): enlarge(t)
  rawInsert(t.data, (key, val, markUsed(hash(key))))
  inc(t.counter)

proc del*(t: var TTable, key: TKey) =
  ## deletes a (key, val)-pair in `t`.
  var index = rawGet(t, key)
  if index >= 0:
    t.data[index].h = delSlot

proc delAll*(t: var TTable, key: TKey) =
  ## deletes all (key, val)-pairs in `t`.
  while true:
    var index = rawGet(t, key)
    if index < 0: break
    t.data[index].h = delSlot

iterator pairs*(t: TTable): tuple[key: TKey, value: TValue] =
  ## iterates over any (key, value) pair in the table `t`.
  for h in 0..high(t.data):
    if (t.data[h].h and mask) == usedSlot:
      yield (t.data[h].key, t.data[h].val)

iterator keys*(t: TTable): TKey =
  ## iterate over any key in the table `t`. If key occurs multiple times, it
  ## is yielded multiple times.
  for h in 0..high(t.data):
    if (t.data[h].h and mask) == usedSlot:
      yield t.data[h].key

iterator values*(t: TTable): TValue =
  ## iterate over any value in the table `t`. 
  for h in 0..high(t.data):
    if (t.data[h].h and mask) == usedSlot:
      yield t.data[h].val

iterator values*(t: TTable, key: TKey): TValue =
  ## iterate over any value associated with `key` in `t`.
  var fullhash = hash(key)
  var h = fullhash and high(t.data)
  while (t.data[h].h and mask) != 0:
    # If it is a deleted entry, the comparison with ``markUsed(fullhash)``
    # fails, so there is no need to check for this explicitely.
    if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): 
      yield t.data[h].val
    h = nextTry(h, high(t.data))

proc `$`*[KeyToStr=`$`, ValueToStr=`$`](t: TTable): string =
  ## turns the table into its string representation. `$` must be available
  ## for TKey and TValue for this to work.
  if t.len == 0:
    result = "{:}"
  else:
    result = "{"
    var i = 0
    for k, v in pairs(t):
      if i > 0: add(result, ", ")
      add(result, KeyToStr(k))
      add(result, ": ")
      add(result, ValueToStr(v))
      inc(i)
    add(result, "}")