diff options
Diffstat (limited to 'lib/pure/collections/sets.nim')
-rw-r--r-- | lib/pure/collections/sets.nim | 1313 |
1 files changed, 779 insertions, 534 deletions
diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index d1f941e92..5da5d9243 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -14,8 +14,38 @@ ## <manual.html#types-set-type>`_. Sets allow you to store any value that can be ## `hashed <hashes.html>`_ and they don't contain duplicate entries. ## -## **Note**: The data types declared here have *value semantics*: This means +## Common usages of sets: +## * removing duplicates from a container by converting it with `toSet proc +## <#toSet,openArray[A]>`_ (see also `sequtils.deduplicate proc +## <sequtils.html#deduplicate,openArray[T],bool>`_) +## * membership testing +## * mathematical operations on two sets, such as +## `union <#union,HashSet[A],HashSet[A]>`_, +## `intersection <#intersection,HashSet[A],HashSet[A]>`_, +## `difference <#difference,HashSet[A],HashSet[A]>`_, and +## `symmetric difference <#symmetricDifference,HashSet[A],HashSet[A]>`_ +## +## .. code-block:: +## echo toSet([9, 5, 1]) # {9, 1, 5} +## echo toOrderedSet([9, 5, 1]) # {9, 5, 1} +## +## let +## s1 = toSet([9, 5, 1]) +## s2 = toSet([3, 5, 7]) +## +## echo s1 + s2 # {9, 1, 3, 5, 7} +## echo s1 - s2 # {1, 9} +## echo s1 * s2 # {5} +## echo s1 -+- s2 # {9, 1, 3, 7} +## +## +## Note: The data types declared here have *value semantics*: This means ## that ``=`` performs a copy of the set. +## +## **See also:** +## * `intsets module <intsets.html>`_ for efficient int sets +## * `tables module <tables.html>`_ for hash tables + import hashes, math @@ -31,27 +61,24 @@ when not defined(nimhygiene): type KeyValuePair[A] = tuple[hcode: Hash, key: A] KeyValuePairSeq[A] = seq[KeyValuePair[A]] - HashSet* {.myShallow.}[A] = object ## \ + HashSet* {.myShallow.} [A] = object ## \ ## A generic hash set. ## - ## Use `init() <#init,HashSet[A],int>`_ or `initSet[type]() <#initSet>`_ + ## Use `init proc <#init,HashSet[A],int>`_ or `initSet proc <#initSet,int>`_ ## before calling other procs on it. data: KeyValuePairSeq[A] counter: int + +# ---------------------- helpers ----------------------------------- + +const growthFactor = 2 + template default[T](t: typedesc[T]): T = ## Used by clear methods to get a default value. var v: T v -proc clear*[A](s: var HashSet[A]) = - ## Clears the HashSet back to an empty state, without shrinking - ## any of the existing storage. O(n) where n is the size of the hash bucket. - s.counter = 0 - for i in 0..<s.data.len: - s.data[i].hcode = 0 - s.data[i].key = default(type(s.data[i].key)) - # hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These # two procs retain clarity of that encoding without the space cost of an enum. proc isEmpty(hcode: Hash): bool {.inline.} = @@ -60,87 +87,6 @@ proc isEmpty(hcode: Hash): bool {.inline.} = proc isFilled(hcode: Hash): bool {.inline.} = result = hcode != 0 -proc isValid*[A](s: HashSet[A]): bool = - ## Returns `true` if the set has been initialized with `initSet <#initSet>`_. - ## - ## Most operations over an uninitialized set will crash at runtime and - ## `assert <system.html#assert>`_ in debug builds. You can use this proc in - ## your own procs to verify that sets passed to your procs are correctly - ## initialized. Example: - ## - ## .. code-block :: - ## proc savePreferences(options: HashSet[string]) = - ## assert options.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = s.data.len > 0 - -proc len*[A](s: HashSet[A]): int = - ## Returns the number of keys in `s`. - ## - ## Due to an implementation detail you can call this proc on variables which - ## have not been initialized yet. The proc will return zero as the length - ## then. Example: - ## - ## .. code-block:: - ## - ## var values: HashSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 - result = s.counter - -proc card*[A](s: HashSet[A]): int = - ## Alias for `len() <#len,TSet[A]>`_. - ## - ## Card stands for the `cardinality - ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. - result = s.counter - -iterator items*[A](s: HashSet[A]): A = - ## Iterates over keys in the set `s`. - ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: - ## - ## .. code-block:: - ## type - ## pair = tuple[a, b: int] - ## var - ## a, b = initSet[pair]() - ## a.incl((2, 3)) - ## a.incl((3, 2)) - ## a.incl((2, 3)) - ## for x, y in a.items: - ## b.incl((x - 2, y + 1)) - ## assert a.len == 2 - ## echo b - ## # --> {(a: 1, b: 3), (a: 0, b: 4)} - assert s.isValid, "The set needs to be initialized." - for h in 0..high(s.data): - if isFilled(s.data[h].hcode): yield s.data[h].key - -proc hash*[A](s: HashSet[A]): Hash = - ## hashing of HashSet - assert s.isValid, "The set needs to be initialized." - for h in 0..high(s.data): - result = result xor s.data[h].hcode - result = !$result - -const - growthFactor = 2 - -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc rightSize*(count: Natural): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. - ## - ## If more items are expected to be added, simply add that - ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want mustRehash(rightSize(x), x) == false. - result = nextPowerOfTwo(count * 3 div 2 + 4) - proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = (h + 1) and maxHash @@ -176,38 +122,6 @@ proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: Hash): int {.inline.} = proc rawGet[A](s: HashSet[A], key: A, hc: var Hash): int {.inline.} = rawGetImpl() -proc `[]`*[A](s: var HashSet[A], key: A): var A = - ## returns the element that is actually stored in 's' which has the same - ## value as 'key' or raises the ``KeyError`` exception. This is useful - ## when one overloaded 'hash' and '==' but still needs reference semantics - ## for sharing. - assert s.isValid, "The set needs to be initialized." - var hc: Hash - var index = rawGet(s, key, hc) - if index >= 0: result = s.data[index].key - else: - when compiles($key): - raise newException(KeyError, "key not found: " & $key) - else: - raise newException(KeyError, "key not found") - -proc contains*[A](s: HashSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - ## values.excl(2) - ## assert(not values.contains(2)) - assert s.isValid, "The set needs to be initialized." - var hc: Hash - var index = rawGet(s, key, hc) - result = index >= 0 - proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, hc: Hash, h: Hash) = rawInsertImpl() @@ -243,34 +157,6 @@ template containsOrInclImpl() {.dirty.} = rawInsert(s, s.data, key, hc, -1 - index) inc(s.counter) -proc incl*[A](s: var HashSet[A], key: A) = - ## Includes an element `key` in `s`. - ## - ## This doesn't do anything if `key` is already in `s`. Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." - inclImpl() - -proc incl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## var others = toSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) - template doWhile(a, b) = while true: b @@ -302,51 +188,279 @@ proc exclImpl[A](s: var HashSet[A], key: A) : bool {. inline .} = r = s.data[i].hcode and msk # "home" location of key@i shallowCopy(s.data[j], s.data[i]) # data[i] will be marked EMPTY next loop -proc missingOrExcl*[A](s: var HashSet[A], key: A): bool = - ## Excludes `key` in the set `s` and tells if `key` was removed from `s`. +proc mustRehash(length, counter: int): bool {.inline.} = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +template dollarImpl() {.dirty.} = + result = "{" + for key in items(s): + if result.len > 1: result.add(", ") + result.addQuoted(key) + result.add("}") + +proc rightSize*(count: Natural): int {.inline.} + + + + + + + + +# --------------------------------------------------------------------- +# ------------------------------ HashSet ------------------------------ +# --------------------------------------------------------------------- + + +proc init*[A](s: var HashSet[A], initialSize=64) = + ## Initializes a hash set. ## - ## The difference with regards to the `excl() <#excl,TSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was not present in `s`. Example: + ## The `initialSize` parameter needs to be a power of two (default: 64). + ## If you need to accept runtime values for this, you can use + ## `math.nextPowerOfTwo proc <math.html#nextPowerOfTwo>`_ or `rightSize proc + ## <#rightSize,Natural>`_ from this module. ## - ## .. code-block:: - ## var s = toSet([2, 3, 6, 7]) - ## assert s.missingOrExcl(4) == true - ## assert s.missingOrExcl(6) == false - exclImpl(s, key) + ## All set variables must be initialized before + ## use with other procs from this module, with the exception of `isValid proc + ## <#isValid,HashSet[A]>`_ and `len() <#len,HashSet[A]>`_. + ## + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. + ## + ## See also: + ## * `initSet proc <#initSet,int>`_ + ## * `toSet proc <#toSet,openArray[A]>`_ + runnableExamples: + var a: HashSet[int] + assert(not a.isValid) + init(a) + assert a.isValid + + assert isPowerOfTwo(initialSize) + s.counter = 0 + newSeq(s.data, initialSize) + +proc initSet*[A](initialSize=64): HashSet[A] = + ## Wrapper around `init proc <#init,HashSet[A],int>`_ for initialization of + ## hash sets. + ## + ## Returns an empty hash set you can assign directly in ``var`` blocks in a + ## single line. + ## + ## See also: + ## * `toSet proc <#toSet,openArray[A]>`_ + runnableExamples: + var a = initSet[int]() + assert a.isValid + a.incl(3) + assert len(a) == 1 + result.init(initialSize) + +proc toSet*[A](keys: openArray[A]): HashSet[A] = + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initSet proc <#initSet,int>`_ + runnableExamples: + let + a = toSet([5, 3, 2]) + b = toSet("abracadabra") + assert len(a) == 3 + ## a == {2, 3, 5} + assert len(b) == 5 + ## b == {'a', 'b', 'c', 'd', 'r'} + + result = initSet[A](rightSize(keys.len)) + for key in items(keys): result.incl(key) + +proc isValid*[A](s: HashSet[A]): bool = + ## Returns `true` if the set has been initialized (with `initSet proc + ## <#initSet,int>`_ or `init proc <#init,HashSet[A],int>`_). + ## + ## Most operations over an uninitialized set will crash at runtime and + ## `assert <system.html#assert>`_ in debug builds. You can use this proc in + ## your own procs to verify that sets passed to your procs are correctly + ## initialized. + ## + ## **Examples:** + ## + ## .. code-block :: + ## proc savePreferences(options: HashSet[string]) = + ## assert options.isValid, "Pass an initialized set!" + ## # Do stuff here, may crash in release builds! + result = s.data.len > 0 + +proc `[]`*[A](s: var HashSet[A], key: A): var A = + ## Returns the element that is actually stored in `s` which has the same + ## value as `key` or raises the ``KeyError`` exception. + ## + ## This is useful when one overloaded `hash` and `==` but still needs + ## reference semantics for sharing. + assert s.isValid, "The set needs to be initialized." + var hc: Hash + var index = rawGet(s, key, hc) + if index >= 0: result = s.data[index].key + else: + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +proc contains*[A](s: HashSet[A], key: A): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + + assert s.isValid, "The set needs to be initialized." + var hc: Hash + var index = rawGet(s, key, hc) + result = index >= 0 + +proc incl*[A](s: var HashSet[A], key: A) = + ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + + assert s.isValid, "The set needs to be initialized." + inclImpl() + +proc incl*[A](s: var HashSet[A], other: HashSet[A]) = + ## Includes all elements from `other` set into `s` (must be declared as `var`). + ## + ## This is the in-place version of `s + other <#+,HashSet[A],HashSet[A]>`_. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var + values = toSet([1, 2, 3]) + others = toSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + + assert s.isValid, "The set `s` needs to be initialized." + assert other.isValid, "The set `other` needs to be initialized." + for item in other: incl(s, item) + +proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,HashSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return `false` if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var values = initSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false + + assert s.isValid, "The set needs to be initialized." + containsOrInclImpl() proc excl*[A](s: var HashSet[A], key: A) = ## Excludes `key` from the set `s`. ## - ## This doesn't do anything if `key` is not found in `s`. Example: - ## - ## .. code-block:: - ## var s = toSet([2, 3, 6, 7]) - ## s.excl(2) - ## s.excl(2) - ## assert s.len == 3 + ## This doesn't do anything if `key` is not found in `s`. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var s = toSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 discard exclImpl(s, key) proc excl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Excludes everything in `other` from `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## numbers = toSet([1, 2, 3, 4, 5]) - ## even = toSet([2, 4, 6, 8]) - ## numbers.excl(even) - ## echo numbers - ## # --> {1, 3, 5} + ## Excludes all elements of `other` set from `s`. + ## + ## This is the in-place version of `s - other <#-,HashSet[A],HashSet[A]>`_. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var + numbers = toSet([1, 2, 3, 4, 5]) + even = toSet([2, 4, 6, 8]) + numbers.excl(even) + assert len(numbers) == 3 + ## numbers == {1, 3, 5} + assert s.isValid, "The set `s` needs to be initialized." assert other.isValid, "The set `other` needs to be initialized." for item in other: discard exclImpl(s, item) +proc missingOrExcl*[A](s: var HashSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## + ## The difference with regards to the `excl proc <#excl,HashSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var s = toSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true + exclImpl(s, key) + proc pop*[A](s: var HashSet[A]): A = ## Remove and return an arbitrary element from the set `s`. ## ## Raises KeyError if the set `s` is empty. ## + ## See also: + ## * `clear proc <#clear,HashSet[A]>`_ + runnableExamples: + var s = toSet([2, 1]) + assert s.pop == 1 + assert s.pop == 2 + doAssertRaises(KeyError, echo s.pop) + for h in 0..high(s.data): if isFilled(s.data[h].hcode): result = s.data[h].key @@ -354,103 +468,64 @@ proc pop*[A](s: var HashSet[A]): A = return result raise newException(KeyError, "set is empty") -proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. +proc clear*[A](s: var HashSet[A]) = + ## Clears the HashSet back to an empty state, without shrinking + ## any of the existing storage. ## - ## The difference with regards to the `incl() <#incl,TSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was already present in `s`. The - ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: + ## `O(n)` operation, where `n` is the size of the hash bucket. ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() + ## See also: + ## * `pop proc <#pop,HashSet[A]>`_ + runnableExamples: + var s = toSet([3, 5, 7]) + clear(s) + assert len(s) == 0 -proc init*[A](s: var HashSet[A], initialSize=64) = - ## Initializes a hash set. - ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TSet[A]>`_ and `len() <#len,TSet[A]>`_. - ## - ## You can call this proc on a previously initialized hash set, which will - ## discard all its values. This might be more convenient than iterating over - ## existing values and calling `excl() <#excl,TSet[A],A>`_ on them. Example: - ## - ## .. code-block :: - ## var a: HashSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid - assert isPowerOfTwo(initialSize) s.counter = 0 - newSeq(s.data, initialSize) + for i in 0..<s.data.len: + s.data[i].hcode = 0 + s.data[i].key = default(type(s.data[i].key)) -proc initSet*[A](initialSize=64): HashSet[A] = - ## Wrapper around `init() <#init,TSet[A],int>`_ for initialization of hash - ## sets. - ## - ## Returns an empty hash set you can assign directly in ``var`` blocks in a - ## single line. Example: +proc len*[A](s: HashSet[A]): int = + ## Returns the number of elements in `s`. ## - ## .. code-block :: - ## var a = initSet[int](4) - ## a.incl(2) - result.init(initialSize) + ## Due to an implementation detail you can call this proc on variables which + ## have not been initialized yet. The proc will return zero as the length + ## then. + runnableExamples: + var a: HashSet[string] + assert len(a) == 0 + let s = toSet([3, 5, 7]) + assert len(s) == 3 + result = s.counter -proc toSet*[A](keys: openArray[A]): HashSet[A] = - ## Creates a new hash set that contains the given `keys`. - ## - ## Example: +proc card*[A](s: HashSet[A]): int = + ## Alias for `len() <#len,HashSet[A]>`_. ## - ## .. code-block:: - ## var numbers = toSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) - result = initSet[A](rightSize(keys.len)) - for key in items(keys): result.incl(key) - -template dollarImpl() {.dirty.} = - result = "{" - for key in items(s): - if result.len > 1: result.add(", ") - result.addQuoted(key) - result.add("}") + ## Card stands for the `cardinality + ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. + result = s.counter -proc `$`*[A](s: HashSet[A]): string = - ## Converts the set `s` to a string, mostly for logging purposes. - ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: - ## - ## Example: - ## - ## .. code-block:: - ## echo toSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} - assert s.isValid, "The set needs to be initialized." - dollarImpl() proc union*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the union of the sets `s1` and `s2`. ## - ## The union of two sets is represented mathematically as *A ∪ B* and is the - ## set of all objects that are members of `s1`, `s2` or both. Example: + ## The same as `s1 + s2 <#+,HashSet[A],HashSet[A]>`_. ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = union(a, b) - ## assert c == toSet(["a", "b", "c"]) + ## The union of two sets is represented mathematically as *A ∪ B* and is the + ## set of all objects that are members of `s1`, `s2` or both. + ## + ## See also: + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = union(a, b) + assert c == toSet(["a", "b", "c"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = s1 @@ -459,16 +534,23 @@ proc union*[A](s1, s2: HashSet[A]): HashSet[A] = proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the intersection of the sets `s1` and `s2`. ## + ## The same as `s1 * s2 <#*,HashSet[A],HashSet[A]>`_. + ## ## The intersection of two sets is represented mathematically as *A ∩ B* and ## is the set of all objects that are members of `s1` and `s2` at the same - ## time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c == toSet(["b"]) + ## time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = intersection(a, b) + assert c == toSet(["b"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = initSet[A](min(s1.data.len, s2.data.len)) @@ -478,16 +560,22 @@ proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] = proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the difference of the sets `s1` and `s2`. ## + ## The same as `s1 - s2 <#-,HashSet[A],HashSet[A]>`_. + ## ## The difference of two sets is represented mathematically as *A \ B* and is ## the set of all objects that are members of `s1` and not members of `s2`. - ## Example: ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = difference(a, b) - ## assert c == toSet(["a"]) + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = difference(a, b) + assert c == toSet(["a"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = initSet[A]() @@ -498,16 +586,23 @@ proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the symmetric difference of the sets `s1` and `s2`. ## + ## The same as `s1 -+- s2 <#-+-,HashSet[A],HashSet[A]>`_. + ## ## The symmetric difference of two sets is represented mathematically as *A △ ## B* or *A ⊖ B* and is the set of all objects that are members of `s1` or - ## `s2` but not both at the same time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = symmetricDifference(a, b) - ## assert c == toSet(["a", "c"]) + ## `s2` but not both at the same time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = symmetricDifference(a, b) + assert c == toSet(["a", "c"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = s1 @@ -515,32 +610,31 @@ proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] = if containsOrIncl(result, item): excl(result, item) proc `+`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `union(s1, s2) <#union>`_. + ## Alias for `union(s1, s2) <#union,HashSet[A],HashSet[A]>`_. result = union(s1, s2) proc `*`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `intersection(s1, s2) <#intersection>`_. + ## Alias for `intersection(s1, s2) <#intersection,HashSet[A],HashSet[A]>`_. result = intersection(s1, s2) proc `-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `difference(s1, s2) <#difference>`_. + ## Alias for `difference(s1, s2) <#difference,HashSet[A],HashSet[A]>`_. result = difference(s1, s2) proc `-+-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `symmetricDifference(s1, s2) <#symmetricDifference>`_. + ## Alias for `symmetricDifference(s1, s2) + ## <#symmetricDifference,HashSet[A],HashSet[A]>`_. result = symmetricDifference(s1, s2) proc disjoint*[A](s1, s2: HashSet[A]): bool = - ## Returns true iff the sets `s1` and `s2` have no items in common. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## assert disjoint(a, b) == false - ## assert disjoint(a, b - a) == true + ## Returns `true` if the sets `s1` and `s2` have no items in common. + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + assert disjoint(a, b) == false + assert disjoint(a, b - a) == true + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." for item in s1: @@ -551,30 +645,29 @@ proc `<`*[A](s, t: HashSet[A]): bool = ## Returns true if `s` is a strict or proper subset of `t`. ## ## A strict or proper subset `s` has all of its members in `t` but `t` has - ## more elements than `s`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c < a and c < b - ## assert((a < a) == false) + ## more elements than `s`. + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = intersection(a, b) + assert c < a and c < b + assert(not (a < a)) s.counter != t.counter and s <= t proc `<=`*[A](s, t: HashSet[A]): bool = - ## Returns true if `s` is subset of `t`. + ## Returns true if `s` is a subset of `t`. ## ## A subset `s` has all of its members in `t` and `t` doesn't necessarily - ## have more members than `s`. That is, `s` can be equal to `t`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c <= a and c <= b - ## assert((a <= a)) + ## have more members than `s`. That is, `s` can be equal to `t`. + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = intersection(a, b) + assert c <= a and c <= b + assert a <= a + result = false if s.counter > t.counter: return result = true @@ -585,90 +678,109 @@ proc `<=`*[A](s, t: HashSet[A]): bool = proc `==`*[A](s, t: HashSet[A]): bool = ## Returns true if both `s` and `t` have the same members and set size. + runnableExamples: + var + a = toSet([1, 2]) + b = toSet([2, 1]) + assert a == b + s.counter == t.counter and s <= t + +proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = + ## Returns a new set after applying `op` pric on each of the elements of + ##`data` set. ## - ## Example: + ## You can use this proc to transform the elements from a set. + runnableExamples: + let + a = toSet([1, 2, 3]) + b = a.map(proc (x: int): string = $x) + assert b == toSet(["1", "2", "3"]) + + result = initSet[B]() + for item in data: result.incl(op(item)) + +proc hash*[A](s: HashSet[A]): Hash = + ## Hashing of HashSet. + assert s.isValid, "The set needs to be initialized." + for h in 0..high(s.data): + result = result xor s.data[h].hcode + result = !$result + +proc `$`*[A](s: HashSet[A]): string = + ## Converts the set `s` to a string, mostly for logging and printing purposes. + ## + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. + ## + ## **Examples:** ## ## .. code-block:: - ## var - ## a = toSet([1, 2]) - ## b = toSet([1]) - ## b.incl(2) - ## assert a == b - s.counter == t.counter and s <= t + ## echo toSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + assert s.isValid, "The set needs to be initialized." + dollarImpl() -proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = - ## Returns a new set after applying `op` on each of the elements of `data`. +proc rightSize*(count: Natural): int {.inline.} = + ## Return the value of `initialSize` to support `count` items. + ## + ## If more items are expected to be added, simply add that + ## expected extra amount to the parameter before calling this. + ## + ## Internally, we want `mustRehash(rightSize(x), x) == false`. + result = nextPowerOfTwo(count * 3 div 2 + 4) + + + +iterator items*[A](s: HashSet[A]): A = + ## Iterates over elements of the set `s`. ## - ## You can use this proc to transform the elements from a set. Example: + ## If you need a sequence with the elelments you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. ## ## .. code-block:: - ## var a = toSet([1, 2, 3]) - ## var b = a.map(proc (x: int): string = $x) - ## assert b == toSet(["1", "2", "3"]) - result = initSet[B]() - for item in data: result.incl(op(item)) + ## type + ## pair = tuple[a, b: int] + ## var + ## a, b = initSet[pair]() + ## a.incl((2, 3)) + ## a.incl((3, 2)) + ## a.incl((2, 3)) + ## for x, y in a.items: + ## b.incl((x - 2, y + 1)) + ## assert a.len == 2 + ## echo b + ## # --> {(a: 1, b: 3), (a: 0, b: 4)} + assert s.isValid, "The set needs to be initialized." + for h in 0..high(s.data): + if isFilled(s.data[h].hcode): yield s.data[h].key + + + + -# ------------------------------ ordered set ------------------------------ + + + +# --------------------------------------------------------------------- +# --------------------------- OrderedSet ------------------------------ +# --------------------------------------------------------------------- type OrderedKeyValuePair[A] = tuple[ hcode: Hash, next: int, key: A] OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]] - OrderedSet* {.myShallow.}[A] = object ## \ + OrderedSet* {.myShallow.} [A] = object ## \ ## A generic hash set that remembers insertion order. ## - ## Use `init() <#init,OrderedSet[A],int>`_ or `initOrderedSet[type]() - ## <#initOrderedSet>`_ before calling other procs on it. + ## Use `init proc <#init,OrderedSet[A],int>`_ or `initOrderedSet proc + ## <#initOrderedSet,int>`_ before calling other procs on it. data: OrderedKeyValuePairSeq[A] counter, first, last: int -proc clear*[A](s: var OrderedSet[A]) = - ## Clears the OrderedSet back to an empty state, without shrinking - ## any of the existing storage. O(n) where n is the size of the hash bucket. - s.counter = 0 - s.first = -1 - s.last = -1 - for i in 0..<s.data.len: - s.data[i].hcode = 0 - s.data[i].next = 0 - s.data[i].key = default(type(s.data[i].key)) - - -proc isValid*[A](s: OrderedSet[A]): bool = - ## Returns `true` if the ordered set has been initialized with `initSet - ## <#initOrderedSet>`_. - ## - ## Most operations over an uninitialized ordered set will crash at runtime - ## and `assert <system.html#assert>`_ in debug builds. You can use this proc - ## in your own procs to verify that ordered sets passed to your procs are - ## correctly initialized. Example: - ## - ## .. code-block:: - ## proc saveTarotCards(cards: OrderedSet[int]) = - ## assert cards.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = s.data.len > 0 - -proc len*[A](s: OrderedSet[A]): int {.inline.} = - ## Returns the number of keys in `s`. - ## - ## Due to an implementation detail you can call this proc on variables which - ## have not been initialized yet. The proc will return zero as the length - ## then. Example: - ## - ## .. code-block:: - ## - ## var values: OrderedSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 - result = s.counter -proc card*[A](s: OrderedSet[A]): int {.inline.} = - ## Alias for `len() <#len,TOrderedSet[A]>`_. - ## - ## Card stands for the `cardinality - ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. - result = s.counter +# ---------------------- helpers ----------------------------------- template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = var h = s.first @@ -680,61 +792,12 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = inc(idx) h = nxt -iterator items*[A](s: OrderedSet[A]): A = - ## Iterates over keys in the ordered set `s` in insertion order. - ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: - ## - ## .. code-block:: - ## var a = initOrderedSet[int]() - ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: - ## a.incl(value) - ## for value in a.items: - ## echo "Got ", value - ## # --> Got 9 - ## # --> Got 2 - ## # --> Got 1 - ## # --> Got 5 - ## # --> Got 8 - ## # --> Got 4 - assert s.isValid, "The set needs to be initialized." - forAllOrderedPairs: - yield s.data[h].key - -proc hash*[A](s: OrderedSet[A]): Hash = - ## hashing of OrderedSet - assert s.isValid, "The set needs to be initialized." - forAllOrderedPairs: - result = result !& s.data[h].hcode - result = !$result - -iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] = - assert s.isValid, "The set needs to be initialized" - forAllOrderedPairs: - yield (idx, s.data[h].key) - proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: Hash): int {.inline.} = rawGetKnownHCImpl() proc rawGet[A](s: OrderedSet[A], key: A, hc: var Hash): int {.inline.} = rawGetImpl() -proc contains*[A](s: OrderedSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - assert s.isValid, "The set needs to be initialized." - var hc: Hash - var index = rawGet(s, key, hc) - result = index >= 0 - proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A], key: A, hc: Hash, h: Hash) = rawInsertImpl() @@ -757,33 +820,7 @@ proc enlarge[A](s: var OrderedSet[A]) = rawInsert(s, s.data, n[h].key, n[h].hcode, j) h = nxt -proc incl*[A](s: var OrderedSet[A], key: A) = - ## Includes an element `key` in `s`. - ## - ## This doesn't do anything if `key` is already in `s`. Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." - inclImpl() - -proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## var others = toOrderedSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) +proc isValid*[A](s: OrderedSet[A]): bool proc exclImpl[A](s: var OrderedSet[A], key: A) : bool {. inline .} = assert s.isValid, "The set needs to be initialized." @@ -806,65 +843,37 @@ proc exclImpl[A](s: var OrderedSet[A], key: A) : bool {. inline .} = rawInsert(s, s.data, n[h].key, n[h].hcode, j) h = nxt -proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool = - ## Excludes `key` in the set `s` and tells if `key` was removed from `s`. Efficiency: O(n). - ## - ## The difference with regards to the `excl() <#excl,TOrderedSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was not present in `s`. Example: - ## - ## .. code-block:: - ## var s = toOrderedSet([2, 3, 6, 7]) - ## assert s.missingOrExcl(4) == true - ## assert s.missingOrExcl(6) == false - exclImpl(s, key) -proc excl*[A](s: var OrderedSet[A], key: A) = - ## Excludes `key` from the set `s`. Efficiency: O(n). - ## - ## This doesn't do anything if `key` is not found in `s`. Example: - ## - ## .. code-block:: - ## var s = toOrderedSet([2, 3, 6, 7]) - ## s.excl(2) - ## s.excl(2) - ## assert s.len == 3 - discard exclImpl(s, key) +# ----------------------------------------------------------------------- + -proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. - ## - ## The difference with regards to the `incl() <#incl,TOrderedSet[A],A>`_ proc - ## is that this proc returns `true` if `key` was already present in `s`. The - ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() proc init*[A](s: var OrderedSet[A], initialSize=64) = ## Initializes an ordered hash set. ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TOrderedSet[A]>`_ and `len() <#len,TOrderedSet[A]>`_. + ## The `initialSize` parameter needs to be a power of two (default: 64). + ## If you need to accept runtime values for this, you can use + ## `math.nextPowerOfTwo proc <math.html#nextPowerOfTwo>`_ or `rightSize proc + ## <#rightSize,Natural>`_ from this module. ## - ## You can call this proc on a previously initialized ordered hash set to - ## discard its values. At the moment this is the only proc to remove elements - ## from an ordered hash set. Example: + ## All set variables must be initialized before + ## use with other procs from this module, with the exception of `isValid proc + ## <#isValid,HashSet[A]>`_ and `len() <#len,HashSet[A]>`_. ## - ## .. code-block :: - ## var a: OrderedSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. + ## + ## See also: + ## * `initOrderedSet proc <#initOrderedSet,int>`_ + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a: OrderedSet[int] + assert(not a.isValid) + init(a) + assert a.isValid + assert isPowerOfTwo(initialSize) s.counter = 0 s.first = -1 @@ -872,47 +881,215 @@ proc init*[A](s: var OrderedSet[A], initialSize=64) = newSeq(s.data, initialSize) proc initOrderedSet*[A](initialSize=64): OrderedSet[A] = - ## Wrapper around `init() <#init,TOrderedSet[A],int>`_ for initialization of + ## Wrapper around `init proc <#init,OrderedSet[A],int>`_ for initialization of ## ordered hash sets. ## - ## Returns an empty ordered hash set you can assign directly in ``var`` - ## blocks in a single line. Example: + ## Returns an empty ordered hash set you can assign directly in ``var`` blocks + ## in a single line. ## - ## .. code-block :: - ## var a = initOrderedSet[int](4) - ## a.incl(2) + ## See also: + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a = initOrderedSet[int]() + assert a.isValid + a.incl(3) + assert len(a) == 1 result.init(initialSize) proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] = - ## Creates a new ordered hash set that contains the given `keys`. - ## - ## Example: - ## - ## .. code-block:: - ## var numbers = toOrderedSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initOrderedSet proc <#initOrderedSet,int>`_ + runnableExamples: + let + a = toOrderedSet([5, 3, 2]) + b = toOrderedSet("abracadabra") + assert len(a) == 3 + ## a == {5, 3, 2} # different than in HashSet + assert len(b) == 5 + ## b == {'a', 'b', 'r', 'c', 'd'} # different than in HashSet + result = initOrderedSet[A](rightSize(keys.len)) for key in items(keys): result.incl(key) -proc `$`*[A](s: OrderedSet[A]): string = - ## Converts the ordered hash set `s` to a string, mostly for logging purposes. +proc isValid*[A](s: OrderedSet[A]): bool = + ## Returns `true` if the set has been initialized (with `initSet proc + ## <#initOrderedSet,int>`_ or `init proc <#init,OrderedSet[A],int>`_). ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: + ## Most operations over an uninitialized set will crash at runtime and + ## `assert <system.html#assert>`_ in debug builds. You can use this proc in + ## your own procs to verify that sets passed to your procs are correctly + ## initialized. ## - ## Example: + ## **Examples:** ## - ## .. code-block:: - ## echo toOrderedSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} + ## .. code-block :: + ## proc savePreferences(options: OrderedSet[string]) = + ## assert options.isValid, "Pass an initialized set!" + ## # Do stuff here, may crash in release builds! + result = s.data.len > 0 + +proc contains*[A](s: OrderedSet[A], key: A): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + assert s.isValid, "The set needs to be initialized." - dollarImpl() + var hc: Hash + var index = rawGet(s, key, hc) + result = index >= 0 + +proc incl*[A](s: var OrderedSet[A], key: A) = + ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],OrderedSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + + assert s.isValid, "The set needs to be initialized." + inclImpl() + +proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) = + ## Includes all elements from the OrderedSet `other` into + ## HashSet `s` (must be declared as `var`). + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var + values = toSet([1, 2, 3]) + others = toOrderedSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + assert s.isValid, "The set `s` needs to be initialized." + assert other.isValid, "The set `other` needs to be initialized." + for item in other: incl(s, item) + +proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool = + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return false if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false + + assert s.isValid, "The set needs to be initialized." + containsOrInclImpl() + +proc excl*[A](s: var OrderedSet[A], key: A) = + ## Excludes `key` from the set `s`. Efficiency: `O(n)`. + ## + ## This doesn't do anything if `key` is not found in `s`. + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 + discard exclImpl(s, key) + +proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## Efficiency: O(n). + ## + ## The difference with regards to the `excl proc <#excl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true + exclImpl(s, key) + +proc clear*[A](s: var OrderedSet[A]) = + ## Clears the OrderedSet back to an empty state, without shrinking + ## any of the existing storage. + ## + ## `O(n)` operation where `n` is the size of the hash bucket. + runnableExamples: + var s = toOrderedSet([3, 5, 7]) + clear(s) + assert len(s) == 0 + + s.counter = 0 + s.first = -1 + s.last = -1 + for i in 0..<s.data.len: + s.data[i].hcode = 0 + s.data[i].next = 0 + s.data[i].key = default(type(s.data[i].key)) + +proc len*[A](s: OrderedSet[A]): int {.inline.} = + ## Returns the number of elements in `s`. + ## + ## Due to an implementation detail you can call this proc on variables which + ## have not been initialized yet. The proc will return zero as the length + ## then. + runnableExamples: + var a: OrderedSet[string] + assert len(a) == 0 + let s = toSet([3, 5, 7]) + assert len(s) == 3 + result = s.counter + +proc card*[A](s: OrderedSet[A]): int {.inline.} = + ## Alias for `len() <#len,OrderedSet[A]>`_. + ## + ## Card stands for the `cardinality + ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. + result = s.counter proc `==`*[A](s, t: OrderedSet[A]): bool = ## Equality for ordered sets. + runnableExamples: + let + a = toOrderedSet([1, 2]) + b = toOrderedSet([2, 1]) + assert(not (a == b)) + if s.counter != t.counter: return false var h = s.first var g = t.first @@ -929,6 +1106,74 @@ proc `==`*[A](s, t: OrderedSet[A]): bool = g = nxg result = compared == s.counter +proc hash*[A](s: OrderedSet[A]): Hash = + ## Hashing of OrderedSet. + assert s.isValid, "The set needs to be initialized." + forAllOrderedPairs: + result = result !& s.data[h].hcode + result = !$result + +proc `$`*[A](s: OrderedSet[A]): string = + ## Converts the ordered hash set `s` to a string, mostly for logging and + ## printing purposes. + ## + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. + ## + ## **Examples:** + ## + ## .. code-block:: + ## echo toOrderedSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + assert s.isValid, "The set needs to be initialized." + dollarImpl() + + + +iterator items*[A](s: OrderedSet[A]): A = + ## Iterates over keys in the ordered set `s` in insertion order. + ## + ## If you need a sequence with the elelments you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. + ## + ## .. code-block:: + ## var a = initOrderedSet[int]() + ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: + ## a.incl(value) + ## for value in a.items: + ## echo "Got ", value + ## # --> Got 9 + ## # --> Got 2 + ## # --> Got 1 + ## # --> Got 5 + ## # --> Got 8 + ## # --> Got 4 + assert s.isValid, "The set needs to be initialized." + forAllOrderedPairs: + yield s.data[h].key + + +iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] = + ## Iterates through (position, value) tuples of OrderedSet `s`. + runnableExamples: + let a = toOrderedSet("abracadabra") + var p = newSeq[(int, char)]() + for x in pairs(a): + p.add(x) + assert p == @[(0, 'a'), (1, 'b'), (2, 'r'), (3, 'c'), (4, 'd')] + + assert s.isValid, "The set needs to be initialized" + forAllOrderedPairs: + yield (idx, s.data[h].key) + + + +# ----------------------------------------------------------------------- + + + when isMainModule and not defined(release): proc testModule() = ## Internal micro test to validate docstrings and such. |