diff options
author | landerlo <lander.lopez@gmail.com> | 2020-11-13 13:12:51 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-13 14:12:51 +0100 |
commit | c39fa0d49535da7f803d9972abdb15d09eed92a6 (patch) | |
tree | 7ff8240bfed5581db815ca0416ac8806b8bd436d /lib/std/packedsets.nim | |
parent | a9bd4c4e80a73fde73ae4a976d1ee4d65a4a3a66 (diff) | |
download | Nim-c39fa0d49535da7f803d9972abdb15d09eed92a6.tar.gz |
Make IntSet a generic ordinal set OrdSet[A] (#15564)
* Make IntSet an ordinal set OrdSet[A: Ordinal] Backward compatibility with IntSet is maintained. IntSet is an alias for OrdSet[int] * move ordsets to new file, intsets exports it * ordset, move to lib/std folder * Fix `$` for ordsets and test cleanup * Fix ordsets compilation in doc example * Rename ordsets to packedsets
Diffstat (limited to 'lib/std/packedsets.nim')
-rw-r--r-- | lib/std/packedsets.nim | 607 |
1 files changed, 607 insertions, 0 deletions
diff --git a/lib/std/packedsets.nim b/lib/std/packedsets.nim new file mode 100644 index 000000000..20002d07f --- /dev/null +++ b/lib/std/packedsets.nim @@ -0,0 +1,607 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``packedsets`` module implements an efficient `Ordinal`set implemented as a +## `sparse bit set`:idx:. +## +## Supports any Ordinal type. +## +## **Note**: Currently the assignment operator ``=`` for ``PackedSet[A]`` +## performs some rather meaningless shallow copy. Since Nim currently does +## not allow the assignment operator to be overloaded, use `assign proc +## <#assign,PackedSet[A],PackedSet[A]>`_ to get a deep copy. +## +## **See also:** +## * `sets module <sets.html>`_ for more general hash sets + +import std/private/since +import hashes + +type + BitScalar = uint + +const + InitIntSetSize = 8 # must be a power of two! + TrunkShift = 9 + BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and + # divisible by 64 + TrunkMask = BitsPerTrunk - 1 + IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8) + IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width + IntMask = 1 shl IntShift - 1 + +type + PTrunk = ref Trunk + Trunk = object + next: PTrunk # all nodes are connected with this pointer + key: int # start address at bit 0 + bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector + + TrunkSeq = seq[PTrunk] + + ## An efficient set of `Ordinal` types implemented as a sparse bit set. + PackedSet*[A: Ordinal] = object + elems: int # only valid for small numbers + counter, max: int + head: PTrunk + data: TrunkSeq + a: array[0..33, int] # profiling shows that 34 elements are enough + +proc mustRehash[T](t: T): bool {.inline.} = + let length = t.max + 1 + assert length > t.counter + result = (length * 2 < t.counter * 3) or (length - t.counter < 4) + +proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} = + const PERTURB_SHIFT = 5 + var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT + perturb = cast[Hash](perturb2) + result = ((5*h) + 1 + perturb) and maxHash + +proc packedSetGet[A](t: PackedSet[A], key: int): PTrunk = + var h = key and t.max + var perturb = key + while t.data[h] != nil: + if t.data[h].key == key: + return t.data[h] + h = nextTry(h, t.max, perturb) + result = nil + +proc intSetRawInsert[A](t: PackedSet[A], data: var TrunkSeq, desc: PTrunk) = + var h = desc.key and t.max + var perturb = desc.key + while data[h] != nil: + assert(data[h] != desc) + h = nextTry(h, t.max, perturb) + assert(data[h] == nil) + data[h] = desc + +proc intSetEnlarge[A](t: var PackedSet[A]) = + var n: TrunkSeq + var oldMax = t.max + t.max = ((t.max + 1) * 2) - 1 + newSeq(n, t.max + 1) + for i in countup(0, oldMax): + if t.data[i] != nil: intSetRawInsert(t, n, t.data[i]) + swap(t.data, n) + +proc intSetPut[A](t: var PackedSet[A], key: int): PTrunk = + var h = key and t.max + var perturb = key + while t.data[h] != nil: + if t.data[h].key == key: + return t.data[h] + h = nextTry(h, t.max, perturb) + if mustRehash(t): intSetEnlarge(t) + inc(t.counter) + h = key and t.max + perturb = key + while t.data[h] != nil: h = nextTry(h, t.max, perturb) + assert(t.data[h] == nil) + new(result) + result.next = t.head + result.key = key + t.head = result + t.data[h] = result + +proc bitincl[A](s: var PackedSet[A], key: int) {.inline.} = + var ret: PTrunk + var t = intSetPut(s, `shr`(key, TrunkShift)) + var u = key and TrunkMask + t.bits[u shr IntShift] = t.bits[u shr IntShift] or + (BitScalar(1) shl (u and IntMask)) + +proc exclImpl[A](s: var PackedSet[A], key: int) = + if s.elems <= s.a.len: + for i in 0..<s.elems: + if s.a[i] == key: + s.a[i] = s.a[s.elems-1] + dec s.elems + return + else: + var t = packedSetGet(s, key shr TrunkShift) + if t != nil: + var u = key and TrunkMask + t.bits[u shr IntShift] = t.bits[u shr IntShift] and + not(BitScalar(1) shl (u and IntMask)) + +template dollarImpl(): untyped = + result = "{" + for key in items(s): + if result.len > 1: result.add(", ") + result.add $key + result.add("}") + +iterator items*[A](s: PackedSet[A]): A {.inline.} = + ## Iterates over any included element of `s`. + if s.elems <= s.a.len: + for i in 0..<s.elems: + yield A(s.a[i]) + else: + var r = s.head + while r != nil: + var i = 0 + while i <= high(r.bits): + var w: uint = r.bits[i] + # taking a copy of r.bits[i] here is correct, because + # modifying operations are not allowed during traversation + var j = 0 + while w != 0: # test all remaining bits for zero + if (w and 1) != 0: # the bit is set! + yield A((r.key shl TrunkShift) or (i shl IntShift +% j)) + inc(j) + w = w shr 1 + inc(i) + r = r.next + +proc initPackedSet*[A]: PackedSet[A] = + ## Returns an empty PackedSet[A]. + ## A must be Ordinal + ## + ## See also: + ## * `toPackedSet[A] proc <#toPackedSet[A],openArray[int]>`_ + runnableExamples: + var a = initPackedSet[int]() + assert len(a) == 0 + + type Id = distinct int + var ids = initPackedSet[Id]() + ids.incl(3.Id) + + result = PackedSet[A]( + elems: 0, + counter: 0, + max: 0, + head: nil, + data: when defined(nimNoNilSeqs): @[] else: nil) + # a: array[0..33, int] # profiling shows that 34 elements are enough + +proc contains*[A](s: PackedSet[A], key: A): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + runnableExamples: + type ABCD = enum A, B, C, D + + var a = initPackedSet[int]() + for x in [1, 3, 5]: + a.incl(x) + assert a.contains(3) + assert 3 in a + assert(not a.contains(8)) + assert 8 notin a + + var letters = initPackedSet[ABCD]() + for x in [A, C]: + letters.incl(x) + assert A in letters + assert C in letters + assert B notin letters + + if s.elems <= s.a.len: + for i in 0..<s.elems: + if s.a[i] == ord(key): return true + else: + var t = packedSetGet(s, `shr`(ord(key), TrunkShift)) + if t != nil: + var u = ord(key) and TrunkMask + result = (t.bits[u shr IntShift] and + (BitScalar(1) shl (u and IntMask))) != 0 + else: + result = false + +proc incl*[A](s: var PackedSet[A], key: A) = + ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_ + runnableExamples: + var a = initPackedSet[int]() + a.incl(3) + a.incl(3) + assert len(a) == 1 + + if s.elems <= s.a.len: + for i in 0..<s.elems: + if s.a[i] == ord(key): return + if s.elems < s.a.len: + s.a[s.elems] = ord(key) + inc s.elems + return + newSeq(s.data, InitIntSetSize) + s.max = InitIntSetSize-1 + for i in 0..<s.elems: + bitincl(s, s.a[i]) + s.elems = s.a.len + 1 + # fall through: + bitincl(s, ord(key)) + +proc incl*[A](s: var PackedSet[A], other: PackedSet[A]) = + ## Includes all elements from `other` into `s`. + ## + ## This is the in-place version of `s + other <#+,PackedSet[A],PackedSet[A]>`_. + ## + ## See also: + ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding other set + ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_ + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1) + b.incl(5) + a.incl(b) + assert len(a) == 2 + assert 5 in a + + for item in other: incl(s, item) + +proc toPackedSet*[A](x: openArray[A]): PackedSet[A] {.since: (1, 3).} = + ## Creates a new PackedSet[A] that contains the elements of `x`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initPackedSet[A] proc <#initPackedSet[A]>`_ + runnableExamples: + var + a = toPackedSet([5, 6, 7]) + b = toPackedSet(@[1, 8, 8, 8]) + assert len(a) == 3 + assert len(b) == 2 + + result = initPackedSet[A]() + for item in x: + result.incl(item) + +proc containsOrIncl*[A](s: var PackedSet[A], key: A): bool = + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,PackedSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return `false` if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_ + runnableExamples: + var a = initPackedSet[int]() + assert a.containsOrIncl(3) == false + assert a.containsOrIncl(3) == true + assert a.containsOrIncl(4) == false + + if s.elems <= s.a.len: + for i in 0..<s.elems: + if s.a[i] == ord(key): + return true + incl(s, key) + result = false + else: + var t = packedSetGet(s, `shr`(ord(key), TrunkShift)) + if t != nil: + var u = ord(key) and TrunkMask + result = (t.bits[u shr IntShift] and BitScalar(1) shl (u and IntMask)) != 0 + if not result: + t.bits[u shr IntShift] = t.bits[u shr IntShift] or + (BitScalar(1) shl (u and IntMask)) + else: + incl(s, key) + result = false + +proc excl*[A](s: var PackedSet[A], key: A) = + ## Excludes `key` from the set `s`. + ## + ## This doesn't do anything if `key` is not found in `s`. + ## + ## See also: + ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element + ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding other set + ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_ + runnableExamples: + var a = initPackedSet[int]() + a.incl(3) + a.excl(3) + a.excl(3) + a.excl(99) + assert len(a) == 0 + exclImpl[A](s, cast[int](key)) + +proc excl*[A](s: var PackedSet[A], other: PackedSet[A]) = + ## Excludes all elements from `other` from `s`. + ## + ## This is the in-place version of `s - other <#-,PackedSet[A],PackedSet[A]>`_. + ## + ## See also: + ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including other set + ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element + ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_ + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1) + a.incl(5) + b.incl(5) + a.excl(b) + assert len(a) == 1 + assert 5 notin a + + for item in other: + excl(s, item) + +proc len*[A](s: PackedSet[A]): int {.inline.} = + ## Returns the number of elements in `s`. + if s.elems < s.a.len: + result = s.elems + else: + result = 0 + for _ in s: + inc(result) + +proc missingOrExcl*[A](s: var PackedSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## + ## The difference with regards to the `excl proc <#excl,PackedSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element + ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding other set + ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_ + runnableExamples: + var a = initPackedSet[int]() + a.incl(5) + assert a.missingOrExcl(5) == false + assert a.missingOrExcl(5) == true + + var count = s.len + exclImpl(s, cast[int](key)) + result = count == s.len + +proc clear*[A](result: var PackedSet[A]) = + ## Clears the PackedSet[A] back to an empty state. + runnableExamples: + var a = initPackedSet[int]() + a.incl(5) + a.incl(7) + clear(a) + assert len(a) == 0 + + # setLen(result.data, InitIntSetSize) + # for i in 0..InitIntSetSize-1: result.data[i] = nil + # result.max = InitIntSetSize-1 + when defined(nimNoNilSeqs): + result.data = @[] + else: + result.data = nil + result.max = 0 + result.counter = 0 + result.head = nil + result.elems = 0 + +proc isNil*[A](x: PackedSet[A]): bool {.inline.} = x.head.isNil and x.elems == 0 + +proc assign*[A](dest: var PackedSet[A], src: PackedSet[A]) = + ## Copies `src` to `dest`. + ## `dest` does not need to be initialized by `initPackedSet[A] proc <#initPackedSet[A]>`_. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + b.incl(5) + b.incl(7) + a.assign(b) + assert len(a) == 2 + + if src.elems <= src.a.len: + when defined(nimNoNilSeqs): + dest.data = @[] + else: + dest.data = nil + dest.max = 0 + dest.counter = src.counter + dest.head = nil + dest.elems = src.elems + dest.a = src.a + else: + dest.counter = src.counter + dest.max = src.max + dest.elems = src.elems + newSeq(dest.data, src.data.len) + + var it = src.head + while it != nil: + var h = it.key and dest.max + var perturb = it.key + while dest.data[h] != nil: h = nextTry(h, dest.max, perturb) + assert(dest.data[h] == nil) + var n: PTrunk + new(n) + n.next = dest.head + n.key = it.key + n.bits = it.bits + dest.head = n + dest.data[h] = n + it = it.next + +proc union*[A](s1, s2: PackedSet[A]): PackedSet[A] = + ## Returns the union of the sets `s1` and `s2`. + ## + ## The same as `s1 + s2 <#+,PackedSet[A],PackedSet[A]>`_. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert union(a, b).len == 5 + ## {1, 2, 3, 4, 5} + + result.assign(s1) + incl(result, s2) + +proc intersection*[A](s1, s2: PackedSet[A]): PackedSet[A] = + ## Returns the intersection of the sets `s1` and `s2`. + ## + ## The same as `s1 * s2 <#*,PackedSet[A],PackedSet[A]>`_. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert intersection(a, b).len == 1 + ## {3} + + result = initPackedSet[A]() + for item in s1: + if contains(s2, item): + incl(result, item) + +proc difference*[A](s1, s2: PackedSet[A]): PackedSet[A] = + ## Returns the difference of the sets `s1` and `s2`. + ## + ## The same as `s1 - s2 <#-,PackedSet[A],PackedSet[A]>`_. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert difference(a, b).len == 2 + ## {1, 2} + + result = initPackedSet[A]() + for item in s1: + if not contains(s2, item): + incl(result, item) + +proc symmetricDifference*[A](s1, s2: PackedSet[A]): PackedSet[A] = + ## Returns the symmetric difference of the sets `s1` and `s2`. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert symmetricDifference(a, b).len == 4 + ## {1, 2, 4, 5} + + result.assign(s1) + for item in s2: + if containsOrIncl(result, item): excl(result, item) + +proc `+`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} = + ## Alias for `union(s1, s2) <#union,PackedSet[A],PackedSet[A]>`_. + result = union(s1, s2) + +proc `*`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} = + ## Alias for `intersection(s1, s2) <#intersection,PackedSet[A],PackedSet[A]>`_. + result = intersection(s1, s2) + +proc `-`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} = + ## Alias for `difference(s1, s2) <#difference,PackedSet[A],PackedSet[A]>`_. + result = difference(s1, s2) + +proc disjoint*[A](s1, s2: PackedSet[A]): bool = + ## Returns true if the sets `s1` and `s2` have no items in common. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1); a.incl(2) + b.incl(2); b.incl(3) + assert disjoint(a, b) == false + b.excl(2) + assert disjoint(a, b) == true + + for item in s1: + if contains(s2, item): + return false + return true + +proc card*[A](s: PackedSet[A]): int {.inline.} = + ## Alias for `len() <#len,PackedSet[A]>`_. + result = s.len() + +proc `<=`*[A](s1, s2: PackedSet[A]): bool = + ## Returns true if `s1` is subset of `s2`. + ## + ## A subset `s1` has all of its elements in `s2`, and `s2` doesn't necessarily + ## have more elements than `s1`. That is, `s1` can be equal to `s2`. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1) + b.incl(1); b.incl(2) + assert a <= b + a.incl(2) + assert a <= b + a.incl(3) + assert(not (a <= b)) + + for item in s1: + if not s2.contains(item): + return false + return true + +proc `<`*[A](s1, s2: PackedSet[A]): bool = + ## Returns true if `s1` is proper subset of `s2`. + ## + ## A strict or proper subset `s1` has all of its elements in `s2`, but `s2` has + ## more elements than `s1`. + runnableExamples: + var + a = initPackedSet[int]() + b = initPackedSet[int]() + a.incl(1) + b.incl(1); b.incl(2) + assert a < b + a.incl(2) + assert(not (a < b)) + return s1 <= s2 and not (s2 <= s1) + +proc `==`*[A](s1, s2: PackedSet[A]): bool = + ## Returns true if both `s1` and `s2` have the same elements and set size. + return s1 <= s2 and s2 <= s1 + +proc `$`*[A](s: PackedSet[A]): string = + ## The `$` operator for int sets. + ## + ## Converts the set `s` to a string, mostly for logging and printing purposes. + dollarImpl() |