# # # Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## The ``intsets`` module implements an efficient int set implemented as a ## `sparse bit set`:idx:. ## **Note**: Since Nim currently does not allow the assignment operator to ## be overloaded, ``=`` for int sets performs some rather meaningless shallow ## copy; use ``assign`` to get a deep copy. import hashes, math type BitScalar = int const InitIntSetSize = 8 # must be a power of two! TrunkShift = 9 BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and # divisible by 64 TrunkMask = BitsPerTrunk - 1 IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8) IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width IntMask = 1 shl IntShift - 1 type PTrunk = ref Trunk Trunk = object next: PTrunk # all nodes are connected with this pointer key: int # start address at bit 0 bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector TrunkSeq = seq[PTrunk] IntSet* = object ## an efficient set of 'int' implemented as a sparse bit set elems: int # only valid for small numbers counter, max: int head: PTrunk data: TrunkSeq a: array[0..33, int] # profiling shows that 34 elements are enough {.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkSeq: TrunkSeq].} proc mustRehash(length, counter: int): bool {.inline.} = assert(length > counter) result = (length * 2 < counter * 3) or (length - counter < 4) proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = ((5 * h) + 1) and maxHash proc intSetGet(t: IntSet, key: int): PTrunk = var h = key and t.max while t.data[h] != nil: if t.data[h].key == key: return t.data[h] h = nextTry(h, t.max) result = nil proc intSetRawInsert(t: IntSet, data: var TrunkSeq, desc: PTrunk) = var h = desc.key and t.max while data[h] != nil: assert(data[h] != desc) h = nextTry(h, t.max) assert(data[h] == nil) data[h] = desc proc intSetEnlarge(t: var IntSet) = var n: TrunkSeq var oldMax = t.max t.max = ((t.max + 1) * 2) - 1 newSeq(n, t.max + 1) for i in countup(0, oldMax): if t.data[i] != nil: intSetRawInsert(t, n, t.data[i]) swap(t.data, n) proc intSetPut(t: var IntSet, key: int): PTrunk = var h = key and t.max while t.data[h] != nil: if t.data[h].key == key: return t.data[h] h = nextTry(h, t.max) if mustRehash(t.max + 1, t.counter): intSetEnlarge(t) inc(t.counter) h = key and t.max while t.data[h] != nil: h = nextTry(h, t.max) assert(t.data[h] == nil) new(result) result.next = t.head result.key = key t.head = result t.data[h] = result proc contains*(s: IntSet, key: int): bool = ## returns true iff `key` is in `s`. if s.elems <= s.a.len: for i in 0..`_. result = union(s1, s2) proc `*`*(s1, s2: IntSet): IntSet {.inline.} = ## Alias for `intersection(s1, s2) <#intersection>`_. result = intersection(s1, s2) proc `-`*(s1, s2: IntSet): IntSet {.inline.} = ## Alias for `difference(s1, s2) <#difference>`_. result = difference(s1, s2) proc disjoint*(s1, s2: IntSet): bool = ## Returns true iff the sets `s1` and `s2` have no items in common. for item in s1: if contains(s2, item): return false return true proc len*(s: IntSet): int {.inline.} = ## Returns the number of keys in `s`. if s.elems < s.a.len: result = s.elems else: result = 0 for _ in s: inc(result) proc card*(s: IntSet): int {.inline.} = ## alias for `len() <#len>` _. result = s.len() proc `<=`*(s1, s2: IntSet): bool = ## Returns true iff `s1` is subset of `s2`. for item in s1: if not s2.contains(item): return false return true proc `<`*(s1, s2: IntSet): bool = ## Returns true iff `s1` is proper subset of `s2`. return s1 <= s2 and not (s2 <= s1) proc `==`*(s1, s2: IntSet): bool = ## Returns true if both `s` and `t` have the same members and set size. return s1 <= s2 and s2 <= s1 template dollarImpl(): untyped = result = "{" for key in items(s): if result.len > 1: result.add(", ") result.add($key) result.add("}") proc `$`*(s: IntSet): string = ## The `$` operator for int sets. dollarImpl() proc empty*(s: IntSet): bool {.inline, deprecated.} = ## returns true if `s` is empty. This is safe to call even before ## the set has been initialized with `initIntSet`. Note this never ## worked reliably and so is deprecated. result = s.counter == 0 when isMainModule: import sequtils, algorithm var x = initIntSet() x.incl(1) x.incl(2) x.incl(7) x.incl(1056) x.incl(1044) x.excl(1044) assert x.containsOrIncl(888) == false assert 888 in x assert x.containsOrIncl(888) == true assert x.missingOrExcl(888) == false assert 888 notin x assert x.missingOrExcl(888) == true var xs = toSeq(items(x)) xs.sort(cmp[int]) assert xs == @[1, 2, 7, 1056] var y: IntSet assign(y, x) var ys = toSeq(items(y)) ys.sort(cmp[int]) assert ys == @[1, 2, 7, 1056] assert x == y var z: IntSet for i in 0..1000: incl z, i assert z.len() == i+1 for i in 0..1000: assert z.contains(i) var w = initIntSet() w.incl(1) w.incl(4) w.incl(50) w.incl(1001) w.incl(1056) var xuw = x.union(w) var xuws = toSeq(items(xuw)) xuws.sort(cmp[int]) assert xuws == @[1, 2, 4, 7, 50, 1001, 1056] var xiw = x.intersection(w) var xiws = toSeq(items(xiw)) xiws.sort(cmp[int]) assert xiws == @[1, 1056] var xdw = x.difference(w) var xdws = toSeq(items(xdw)) xdws.sort(cmp[int]) assert xdws == @[2, 7] var xsw = x.symmetricDifference(w) var xsws = toSeq(items(xsw)) xsws.sort(cmp[int]) assert xsws == @[2, 4, 7, 50, 1001] x.incl(w) xs = toSeq(items(x)) xs.sort(cmp[int]) assert xs == @[1, 2, 4, 7, 50, 1001, 1056] assert w <= x assert w < x assert(not disjoint(w, x)) var u = initIntSet() u.incl(3) u.incl(5) u.incl(500) assert disjoint(u, x) var v = initIntSet() v.incl(2) v.incl(50) x.excl(v) xs = toSeq(items(x)) xs.sort(cmp[int]) assert xs == @[1, 4, 7, 1001, 1056]