# # # Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## The ``intsets`` module implements an efficient int set implemented as a ## `sparse bit set`:idx:. ## **Note**: Since Nim currently does not allow the assignment operator to ## be overloaded, ``=`` for int sets performs some rather meaningless shallow ## copy; use ``assign`` to get a deep copy. import hashes, math type BitScalar = int const InitIntSetSize = 8 # must be a power of two! TrunkShift = 9 BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and # divisible by 64 TrunkMask = BitsPerTrunk - 1 IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8) IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width IntMask = 1 shl IntShift - 1 type PTrunk = ref Trunk Trunk = object next: PTrunk # all nodes are connected with this pointer key: int # start address at bit 0 bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector TrunkSeq = seq[PTrunk] IntSet* = object ## an efficient set of 'int' implemented as a sparse bit set elems: int # only valid for small numbers counter, max: int head: PTrunk data: TrunkSeq a: array[0..33, int] # profiling shows that 34 elements are enough {.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkSeq: TrunkSeq].} proc mustRehash(length, counter: int): bool {.inline.} = assert(length > counter) result = (length * 2 < counter * 3) or (length - counter < 4) proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = ((5 * h) + 1) and maxHash proc intSetGet(t: IntSet, key: int): PTrunk = var h = key and t.max while t.data[h] != nil: if t.data[h].key == key: return t.data[h] h = nextTry(h, t.max) result = nil proc intSetRawInsert(t: IntSet, data: var TrunkSeq, desc: PTrunk) = var h = desc.key and t.max while data[h] != nil: assert(data[h] != desc) h = nextTry(h, t.max) assert(data[h] == nil) data[h] = desc proc intSetEnlarge(t: var IntSet) = var n: TrunkSeq var oldMax = t.max t.max = ((t.max + 1) * 2) - 1 newSeq(n, t.max + 1) for i in countup(0, oldMax): if t.data[i] != nil: intSetRawInsert(t, n, t.data[i]) swap(t.data, n) proc intSetPut(t: var IntSet, key: int): PTrunk = var h = key and t.max while t.data[h] != nil: if t.data[h].key == key: return t.data[h] h = nextTry(h, t.max) if mustRehash(t.max + 1, t.counter): intSetEnlarge(t) inc(t.counter) h = key and t.max while t.data[h] != nil: h = nextTry(h, t.max) assert(t.data[h] == nil) new(result) result.next = t.head result.key = key t.head = result t.data[h] = result proc contains*(s: IntSet, key: int): bool = ## returns true iff `key` is in `s`. if s.elems <= s.a.len: for i in 0.. 1: result.add(", ") result.add($key) result.add("}") proc `$`*(s: IntSet): string = ## The `$` operator for int sets. dollarImpl() proc empty*(s: IntSet): bool {.inline, deprecated.} = ## returns true if `s` is empty. This is safe to call even before ## the set has been initialized with `initIntSet`. Note this never ## worked reliably and so is deprecated. result = s.counter == 0 when isMainModule: import sequtils, algorithm var x = initIntSet() x.incl(1) x.incl(2) x.incl(7) x.incl(1056) x.incl(1044) x.excl(1044) assert x.containsOrIncl(888) == false assert 888 in x assert x.containsOrIncl(888) == true assert x.missingOrExcl(888) == false assert 888 notin x assert x.missingOrExcl(888) == true var xs = toSeq(items(x)) xs.sort(cmp[int]) assert xs == @[1, 2, 7, 1056] var y: IntSet assign(y, x) var ys = toSeq(items(y)) ys.sort(cmp[int]) assert ys == @[1, 2, 7, 1056] var z: IntSet for i in 0..1000: incl z, i for i in 0..1000: assert i in z