diff options
Diffstat (limited to 'lib/pure')
-rw-r--r-- | lib/pure/algorithm.nim | 439 | ||||
-rw-r--r-- | lib/pure/asyncdispatch.nim | 43 | ||||
-rw-r--r-- | lib/pure/asyncftpclient.nim | 52 | ||||
-rw-r--r-- | lib/pure/asyncmacro.nim | 6 | ||||
-rw-r--r-- | lib/pure/bitops.nim | 60 | ||||
-rw-r--r-- | lib/pure/collections/heapqueue.nim | 157 | ||||
-rw-r--r-- | lib/pure/collections/intsets.nim | 362 | ||||
-rw-r--r-- | lib/pure/collections/lists.nim | 8 | ||||
-rw-r--r-- | lib/pure/collections/sequtils.nim | 9 | ||||
-rw-r--r-- | lib/pure/collections/sets.nim | 1313 | ||||
-rw-r--r-- | lib/pure/httpclient.nim | 2 | ||||
-rw-r--r-- | lib/pure/json.nim | 30 | ||||
-rw-r--r-- | lib/pure/memfiles.nim | 5 | ||||
-rw-r--r-- | lib/pure/parsecsv.nim | 170 | ||||
-rw-r--r-- | lib/pure/parseopt.nim | 315 | ||||
-rw-r--r-- | lib/pure/parseopt2.nim | 155 | ||||
-rw-r--r-- | lib/pure/uri.nim | 9 | ||||
-rw-r--r-- | lib/pure/xmltree.nim | 543 |
18 files changed, 2523 insertions, 1155 deletions
diff --git a/lib/pure/algorithm.nim b/lib/pure/algorithm.nim index a5b18ae58..8c8838ed2 100644 --- a/lib/pure/algorithm.nim +++ b/lib/pure/algorithm.nim @@ -8,19 +8,59 @@ # ## This module implements some common generic algorithms. +## +## Basic usage +## =========== +## +## .. code-block:: +## import algorithm +## +## type People = tuple +## year: int +## name: string +## +## var a: seq[People] +## +## a.add((2000, "John")) +## a.add((2005, "Marie")) +## a.add((2010, "Jane")) +## +## # Sorting with default system.cmp +## a.sort() +## assert a == @[(year: 2000, name: "John"), (year: 2005, name: "Marie"), +## (year: 2010, name: "Jane")] +## +## proc myCmp(x, y: People): int = +## if x.name < y.name: -1 else: 1 +## +## # Sorting with custom proc +## a.sort(myCmp) +## assert a == @[(year: 2010, name: "Jane"), (year: 2000, name: "John"), +## (year: 2005, name: "Marie")] +## +## +## See also +## ======== +## * `sequtils module<sequtils.html>`_ for working with the built-in seq type +## * `tables module<tables.html>`_ for sorting tables type SortOrder* = enum Descending, Ascending proc `*`*(x: int, order: SortOrder): int {.inline.} = - ## flips ``x`` if ``order == Descending``. + ## Flips ``x`` if ``order == Descending``. ## If ``order == Ascending`` then ``x`` is returned. ## ## ``x`` is supposed to be the result of a comparator, i.e. ## | ``< 0`` for *less than*, ## | ``== 0`` for *equal*, ## | ``> 0`` for *greater than*. + runnableExamples: + assert `*`(-123, Descending) == 123 + assert `*`(123, Descending) == -123 + assert `*`(-123, Ascending) == -123 + assert `*`(123, Ascending) == 123 var y = order.ord - 1 result = (x xor y) - y @@ -31,28 +71,44 @@ template fillImpl[T](a: var openArray[T], first, last: int, value: T) = inc(x) proc fill*[T](a: var openArray[T], first, last: Natural, value: T) = - ## fills the slice ``a[first..last]`` with ``value``. + ## Fills the slice ``a[first..last]`` with ``value``. + ## + ## If an invalid range is passed, it raises IndexError. runnableExamples: - var a: array[6, int] - a.fill(1, 3, 9) - doAssert a == [0, 9, 9, 9, 0, 0] + var a: array[6, int] + a.fill(1, 3, 9) + assert a == [0, 9, 9, 9, 0, 0] + a.fill(3, 5, 7) + assert a == [0, 9, 9, 7, 7, 7] + doAssertRaises(IndexError, a.fill(1, 7, 9)) fillImpl(a, first, last, value) proc fill*[T](a: var openArray[T], value: T) = - ## fills the container ``a`` with ``value``. + ## Fills the container ``a`` with ``value``. runnableExamples: - var a: array[6, int] - a.fill(9) - doAssert a == [9, 9, 9, 9, 9, 9] + var a: array[6, int] + a.fill(9) + assert a == [9, 9, 9, 9, 9, 9] + a.fill(4) + assert a == [4, 4, 4, 4, 4, 4] fillImpl(a, 0, a.high, value) proc reverse*[T](a: var openArray[T], first, last: Natural) = - ## reverses the slice ``a[first..last]``. + ## Reverses the slice ``a[first..last]``. + ## + ## If an invalid range is passed, it raises IndexError. + ## + ## **See also:** + ## * `reversed proc<#reversed,openArray[T],Natural,int>`_ reverse a slice and returns a ``seq[T]`` + ## * `reversed proc<#reversed,openArray[T]>`_ reverse and returns a ``seq[T]`` runnableExamples: - var a = [1, 2, 3, 4, 5, 6] - a.reverse(1, 3) - doAssert a == [1, 4, 3, 2, 5, 6] + var a = [1, 2, 3, 4, 5, 6] + a.reverse(1, 3) + assert a == [1, 4, 3, 2, 5, 6] + a.reverse(1, 3) + assert a == [1, 2, 3, 4, 5, 6] + doAssertRaises(IndexError, a.reverse(1, 7)) var x = first var y = last while x < y: @@ -61,20 +117,32 @@ proc reverse*[T](a: var openArray[T], first, last: Natural) = inc(x) proc reverse*[T](a: var openArray[T]) = - ## reverses the contents of the container ``a``. + ## Reverses the contents of the container ``a``. + ## + ## **See also:** + ## * `reversed proc<#reversed,openArray[T],Natural,int>`_ reverse a slice and returns a ``seq[T]`` + ## * `reversed proc<#reversed,openArray[T]>`_ reverse and returns a ``seq[T]`` runnableExamples: - var a = [1, 2, 3, 4, 5, 6] - a.reverse() - doAssert a == [6, 5, 4, 3, 2, 1] + var a = [1, 2, 3, 4, 5, 6] + a.reverse() + assert a == [6, 5, 4, 3, 2, 1] + a.reverse() + assert a == [1, 2, 3, 4, 5, 6] reverse(a, 0, max(0, a.high)) proc reversed*[T](a: openArray[T], first: Natural, last: int): seq[T] = - ## returns the reverse of the slice ``a[first..last]``. + ## Returns the reverse of the slice ``a[first..last]``. + ## + ## If an invalid range is passed, it raises IndexError. + ## + ## **See also:** + ## * `reverse proc<#reverse,openArray[T],Natural,Natural>`_ reverse a slice + ## * `reverse proc<#reverse,openArray[T]>`_ runnableExamples: - let - a = [1, 2, 3, 4, 5, 6] - b = reversed(a, 1, 3) - doAssert b == @[4, 3, 2] + let + a = [1, 2, 3, 4, 5, 6] + b = a.reversed(1, 3) + assert b == @[4, 3, 2] assert last >= first-1 var i = last - first var x = first.int @@ -85,12 +153,16 @@ proc reversed*[T](a: openArray[T], first: Natural, last: int): seq[T] = inc(x) proc reversed*[T](a: openArray[T]): seq[T] = - ## returns the reverse of the container ``a``. + ## Returns the reverse of the container ``a``. + ## + ## **See also:** + ## * `reverse proc<#reverse,openArray[T],Natural,Natural>`_ reverse a slice + ## * `reverse proc<#reverse,openArray[T]>`_ runnableExamples: - let - a = [1, 2, 3, 4, 5, 6] - b = reversed(a) - doAssert b == @[6, 5, 4, 3, 2, 1] + let + a = [1, 2, 3, 4, 5, 6] + b = reversed(a) + assert b == @[6, 5, 4, 3, 2, 1] reversed(a, 0, a.high) proc binarySearch*[T, K](a: openArray[T], key: K, @@ -99,6 +171,9 @@ proc binarySearch*[T, K](a: openArray[T], key: K, ## ## ``cmp`` is the comparator function to use, the expected return values are ## the same as that of system.cmp. + runnableExamples: + assert binarySearch(["a","b","c","d"], "d", system.cmp[string]) == 3 + assert binarySearch(["a","b","d","c"], "d", system.cmp[string]) == 2 if a.len == 0: return -1 @@ -141,31 +216,41 @@ proc binarySearch*[T, K](a: openArray[T], key: K, proc binarySearch*[T](a: openArray[T], key: T): int = ## Binary search for ``key`` in ``a``. Returns -1 if not found. + runnableExamples: + assert binarySearch([0, 1, 2, 3, 4], 4) == 4 + assert binarySearch([0, 1, 4, 2, 3], 4) == 2 binarySearch(a, key, cmp[T]) proc smartBinarySearch*[T](a: openArray[T], key: T): int {.deprecated.} = - ## **Deprecated since version 0.18.1**; Use ``binarySearch`` instead. + ## **Deprecated since version 0.18.1**; Use `binarySearch proc + ## <#binarySearch,openArray[T],T>`_ instead. binarySearch(a, key, cmp[T]) const onlySafeCode = true proc lowerBound*[T, K](a: openArray[T], key: K, cmp: proc(x: T, k: K): int {.closure.}): int = - ## returns a position to the first element in the ``a`` that is greater than + ## Returns a position to the first element in the ``a`` that is greater than ## ``key``, or last if no such element is found. ## In other words if you have a sorted sequence and you call ## ``insert(thing, elm, lowerBound(thing, elm))`` ## the sequence will still be sorted. ## - ## The first version uses ``cmp`` to compare the elements. - ## The expected return values are the same as that of ``system.cmp``. - ## The second version uses the default comparison function ``cmp``. + ## If an invalid range is passed, it raises IndexError. ## - ## .. code-block:: nim + ## The version uses ``cmp`` to compare the elements. + ## The expected return values are the same as that of ``system.cmp``. ## - ## var arr = @[1,2,3,5,6,7,8,9] - ## arr.insert(4, arr.lowerBound(4)) - ## # after running the above arr is `[1,2,3,4,5,6,7,8,9]` + ## **See also:** + ## * `upperBound proc<#upperBound,openArray[T],K,proc(T,K)>`_ sorted by ``cmp`` in the specified order + ## * `upperBound proc<#upperBound,openArray[T],T>`_ + runnableExamples: + var arr = @[1,2,3,5,6,7,8,9] + assert arr.lowerBound(3, system.cmp[int]) == 2 + assert arr.lowerBound(4, system.cmp[int]) == 3 + assert arr.lowerBound(5, system.cmp[int]) == 3 + arr.insert(4, arr.lowerBound(4, system.cmp[int])) + assert arr == [1,2,3,4,5,6,7,8,9] result = a.low var count = a.high - a.low + 1 var step, pos: int @@ -179,23 +264,40 @@ proc lowerBound*[T, K](a: openArray[T], key: K, cmp: proc(x: T, k: K): int {.clo count = step proc lowerBound*[T](a: openArray[T], key: T): int = lowerBound(a, key, cmp[T]) + ## Returns a position to the first element in the ``a`` that is greater than + ## ``key``, or last if no such element is found. + ## In other words if you have a sorted sequence and you call + ## ``insert(thing, elm, lowerBound(thing, elm))`` + ## the sequence will still be sorted. + ## + ## The version uses the default comparison function ``cmp``. + ## + ## **See also:** + ## * `upperBound proc<#upperBound,openArray[T],K,proc(T,K)>`_ sorted by ``cmp`` in the specified order + ## * `upperBound proc<#upperBound,openArray[T],T>`_ proc upperBound*[T, K](a: openArray[T], key: K, cmp: proc(x: T, k: K): int {.closure.}): int = - ## returns a position to the first element in the ``a`` that is not less + ## Returns a position to the first element in the ``a`` that is not less ## (i.e. greater or equal to) than ``key``, or last if no such element is found. ## In other words if you have a sorted sequence and you call ## ``insert(thing, elm, upperBound(thing, elm))`` ## the sequence will still be sorted. ## - ## The first version uses ``cmp`` to compare the elements. The expected - ## return values are the same as that of ``system.cmp``. - ## The second version uses the default comparison function ``cmp``. + ## If an invalid range is passed, it raises IndexError. ## - ## .. code-block:: nim + ## The version uses ``cmp`` to compare the elements. The expected + ## return values are the same as that of ``system.cmp``. ## - ## var arr = @[1,2,3,4,6,7,8,9] - ## arr.insert(5, arr.upperBound(4)) - ## # after running the above arr is `[1,2,3,4,5,6,7,8,9]` + ## **See also:** + ## * `lowerBound proc<#lowerBound,openArray[T],K,proc(T,K)>`_ sorted by ``cmp`` in the specified order + ## * `lowerBound proc<#lowerBound,openArray[T],T>`_ + runnableExamples: + var arr = @[1,2,3,5,6,7,8,9] + assert arr.upperBound(2, system.cmp[int]) == 2 + assert arr.upperBound(3, system.cmp[int]) == 3 + assert arr.upperBound(4, system.cmp[int]) == 3 + arr.insert(4, arr.upperBound(3, system.cmp[int])) + assert arr == [1,2,3,4,5,6,7,8,9] result = a.low var count = a.high - a.low + 1 var step, pos: int @@ -209,6 +311,17 @@ proc upperBound*[T, K](a: openArray[T], key: K, cmp: proc(x: T, k: K): int {.clo count = step proc upperBound*[T](a: openArray[T], key: T): int = upperBound(a, key, cmp[T]) + ## Returns a position to the first element in the ``a`` that is not less + ## (i.e. greater or equal to) than ``key``, or last if no such element is found. + ## In other words if you have a sorted sequence and you call + ## ``insert(thing, elm, upperBound(thing, elm))`` + ## the sequence will still be sorted. + ## + ## The version uses the default comparison function ``cmp``. + ## + ## **See also:** + ## * `lowerBound proc<#lowerBound,openArray[T],K,proc(T,K)>`_ sorted by ``cmp`` in the specified order + ## * `lowerBound proc<#lowerBound,openArray[T],T>`_ template `<-` (a, b) = when false: @@ -263,6 +376,7 @@ func sort*[T](a: var openArray[T], ## Default Nim sort (an implementation of merge sort). The sorting ## is guaranteed to be stable and the worst case is guaranteed to ## be O(n log n). + ## ## The current implementation uses an iterative ## mergesort to achieve this. It uses a temporary sequence of ## length ``a.len div 2``. If you do not wish to provide your own @@ -272,7 +386,6 @@ func sort*[T](a: var openArray[T], ## .. code-block:: nim ## ## sort(myIntArray, system.cmp[int]) - ## ## # do not use cmp[string] here as we want to use the specialized ## # overload: ## sort(myStrArray, system.cmp) @@ -286,6 +399,19 @@ func sort*[T](a: var openArray[T], ## result = cmp(x.surname, y.surname) ## if result == 0: ## result = cmp(x.name, y.name) + ## + ## **See also:** + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sorted proc<#sorted,openArray[T],proc(T,T)>`_ sorted by ``cmp`` in the specified order + ## * `sorted proc<#sorted,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ + runnableExamples: + var d = ["boo", "fo", "barr", "qux"] + proc myCmp(x, y: string): int = + if x.len() > y.len() or x.len() == y.len(): 1 + else: -1 + sort(d, myCmp) + assert d == ["fo", "qux", "boo", "barr"] var n = a.len var b: seq[T] newSeq(b, n div 2) @@ -299,17 +425,30 @@ func sort*[T](a: var openArray[T], proc sort*[T](a: var openArray[T], order = SortOrder.Ascending) = sort[T](a, system.cmp[T], order) ## Shortcut version of ``sort`` that uses ``system.cmp[T]`` as the comparison function. + ## + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sorted proc<#sorted,openArray[T],proc(T,T)>`_ sorted by ``cmp`` in the specified order + ## * `sorted proc<#sorted,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ proc sorted*[T](a: openArray[T], cmp: proc(x, y: T): int {.closure.}, order = SortOrder.Ascending): seq[T] = - ## returns ``a`` sorted by ``cmp`` in the specified ``order``. + ## Returns ``a`` sorted by ``cmp`` in the specified ``order``. + ## + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ runnableExamples: - let - a = [2, 3, 1, 5, 4] - b = sorted(a, system.cmp) - c = sorted(a, system.cmp, Descending) - doAssert b == @[1, 2, 3, 4, 5] - doAssert c == @[5, 4, 3, 2, 1] + let + a = [2, 3, 1, 5, 4] + b = sorted(a, system.cmp[int]) + c = sorted(a, system.cmp[int], Descending) + d = sorted(["adam", "dande", "brian", "cat"], system.cmp[string]) + assert b == @[1, 2, 3, 4, 5] + assert c == @[5, 4, 3, 2, 1] + assert d == @["adam", "brian", "cat", "dande"] result = newSeq[T](a.len) for i in 0 .. a.high: result[i] = a[i] @@ -317,33 +456,48 @@ proc sorted*[T](a: openArray[T], cmp: proc(x, y: T): int {.closure.}, proc sorted*[T](a: openArray[T], order = SortOrder.Ascending): seq[T] = ## Shortcut version of ``sorted`` that uses ``system.cmp[T]`` as the comparison function. + ## + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ + runnableExamples: + let + a = [2, 3, 1, 5, 4] + b = sorted(a) + c = sorted(a, Descending) + d = sorted(["adam", "dande", "brian", "cat"]) + assert b == @[1, 2, 3, 4, 5] + assert c == @[5, 4, 3, 2, 1] + assert d == @["adam", "brian", "cat", "dande"] sorted[T](a, system.cmp[T], order) template sortedByIt*(seq1, op: untyped): untyped = ## Convenience template around the ``sorted`` proc to reduce typing. ## ## The template injects the ``it`` variable which you can use directly in an - ## expression. Example: - ## - ## .. code-block:: nim - ## - ## type Person = tuple[name: string, age: int] - ## var - ## p1: Person = (name: "p1", age: 60) - ## p2: Person = (name: "p2", age: 20) - ## p3: Person = (name: "p3", age: 30) - ## p4: Person = (name: "p4", age: 30) - ## people = @[p1,p2,p4,p3] - ## - ## echo people.sortedByIt(it.name) + ## expression. ## ## Because the underlying ``cmp()`` is defined for tuples you can do - ## a nested sort like in the following example: - ## - ## .. code-block:: nim - ## - ## echo people.sortedByIt((it.age, it.name)) + ## a nested sort. ## + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sorted proc<#sorted,openArray[T],proc(T,T)>`_ sorted by ``cmp`` in the specified order + ## * `sorted proc<#sorted,openArray[T]>`_ + runnableExamples: + type Person = tuple[name: string, age: int] + var + p1: Person = (name: "p1", age: 60) + p2: Person = (name: "p2", age: 20) + p3: Person = (name: "p3", age: 30) + p4: Person = (name: "p4", age: 30) + people = @[p1,p2,p4,p3] + + assert people.sortedByIt(it.name) == @[(name: "p1", age: 60), (name: "p2", age: 20), (name: "p3", age: 30), (name: "p4", age: 30)] + # Nested sort + assert people.sortedByIt((it.age, it.name)) == @[(name: "p2", age: 20), (name: "p3", age: 30), (name: "p4", age: 30), (name: "p1", age: 60)] var result = sorted(seq1, proc(x, y: type(seq1[0])): int = var it {.inject.} = x let a = op @@ -355,9 +509,25 @@ template sortedByIt*(seq1, op: untyped): untyped = func isSorted*[T](a: openArray[T], cmp: proc(x, y: T): int {.closure.}, order = SortOrder.Ascending): bool = - ## checks to see whether ``a`` is already sorted in ``order`` + ## Checks to see whether ``a`` is already sorted in ``order`` ## using ``cmp`` for the comparison. Parameters identical ## to ``sort``. + ## + ## **See also:** + ## * `isSorted proc<#isSorted,openArray[T]>`_ + runnableExamples: + let + a = [2, 3, 1, 5, 4] + b = [1, 2, 3, 4, 5] + c = [5, 4, 3, 2, 1] + d = ["adam", "brian", "cat", "dande"] + e = ["adam", "dande", "brian", "cat"] + assert isSorted(a) == false + assert isSorted(b) == true + assert isSorted(c) == false + assert isSorted(c, Descending) == true + assert isSorted(d) == true + assert isSorted(e) == false result = true for i in 0..<len(a)-1: if cmp(a[i],a[i+1]) * order > 0: @@ -365,11 +535,30 @@ func isSorted*[T](a: openArray[T], proc isSorted*[T](a: openarray[T], order = SortOrder.Ascending): bool = ## Shortcut version of ``isSorted`` that uses ``system.cmp[T]`` as the comparison function. + ## + ## **See also:** + ## * `isSorted func<#isSorted,openArray[T],proc(T,T)>`_ + runnableExamples: + let + a = [2, 3, 1, 5, 4] + b = [1, 2, 3, 4, 5] + c = [5, 4, 3, 2, 1] + d = ["adam", "brian", "cat", "dande"] + e = ["adam", "dande", "brian", "cat"] + assert isSorted(a) == false + assert isSorted(b) == true + assert isSorted(c) == false + assert isSorted(c, Descending) == true + assert isSorted(d) == true + assert isSorted(e) == false isSorted(a, system.cmp[T], order) proc product*[T](x: openArray[seq[T]]): seq[seq[T]] = - ## produces the Cartesian product of the array. Warning: complexity + ## Produces the Cartesian product of the array. Warning: complexity ## may explode. + runnableExamples: + assert product(@[@[1], @[2]]) == @[@[1, 2]] + assert product(@[@["A", "K"], @["Q"]]) == @[@["K", "Q"], @["A", "Q"]] result = newSeq[seq[T]]() if x.len == 0: return @@ -401,15 +590,26 @@ proc product*[T](x: openArray[seq[T]]): seq[seq[T]] = indexes[index] -= 1 proc nextPermutation*[T](x: var openarray[T]): bool {.discardable.} = - ## calculates the next lexicographic permutation, directly modifying ``x``. + ## Calculates the next lexicographic permutation, directly modifying ``x``. ## The result is whether a permutation happened, otherwise we have reached ## the last-ordered permutation. ## - ## .. code-block:: nim + ## If you start with an unsorted array/seq, the repeated permutations + ## will **not** give you all permutations but stop with last. ## - ## var v = @[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - ## v.nextPermutation() - ## echo v # @[0, 1, 2, 3, 4, 5, 6, 7, 9, 8] + ## **See also:** + ## * `prevPermutation proc<#prevPermutation,openArray[T]>`_ + runnableExamples: + var v = @[0, 1, 2, 3] + assert v.nextPermutation() == true + assert v == @[0, 1, 3, 2] + assert v.nextPermutation() == true + assert v == @[0, 2, 1, 3] + assert v.prevPermutation() == true + assert v == @[0, 1, 3, 2] + v = @[3, 2, 1, 0] + assert v.nextPermutation() == false + assert v == @[3, 2, 1, 0] if x.len < 2: return false @@ -430,15 +630,20 @@ proc nextPermutation*[T](x: var openarray[T]): bool {.discardable.} = result = true proc prevPermutation*[T](x: var openarray[T]): bool {.discardable.} = - ## calculates the previous lexicographic permutation, directly modifying + ## Calculates the previous lexicographic permutation, directly modifying ## ``x``. The result is whether a permutation happened, otherwise we have ## reached the first-ordered permutation. ## - ## .. code-block:: nim - ## - ## var v = @[0, 1, 2, 3, 4, 5, 6, 7, 9, 8] - ## v.prevPermutation() - ## echo v # @[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + ## **See also:** + ## * `nextPermutation proc<#nextPermutation,openArray[T]>`_ + runnableExamples: + var v = @[0, 1, 2, 3] + assert v.prevPermutation() == false + assert v == @[0, 1, 2, 3] + assert v.nextPermutation() == true + assert v == @[0, 1, 3, 2] + assert v.prevPermutation() == true + assert v == @[0, 1, 2, 3] if x.len < 2: return false @@ -542,7 +747,7 @@ proc rotatedInternal[T](arg: openarray[T]; first, middle, last: int): seq[T] = result[i] = arg[i] proc rotateLeft*[T](arg: var openarray[T]; slice: HSlice[int, int]; dist: int): int {.discardable.} = - ## performs a left rotation on a range of elements. If you want to rotate + ## Performs a left rotation on a range of elements. If you want to rotate ## right, use a negative ``dist``. Specifically, ``rotateLeft`` rotates ## the elements at ``slice`` by ``dist`` positions. ## @@ -553,6 +758,7 @@ proc rotateLeft*[T](arg: var openarray[T]; slice: HSlice[int, int]; dist: int): ## ## Elements outside of ``slice`` will be left unchanged. ## The time complexity is linear to ``slice.b - slice.a + 1``. + ## If an invalid range (``HSlice``) is passed, it raises IndexError. ## ## ``slice`` ## The indices of the element range that should be rotated. @@ -561,11 +767,18 @@ proc rotateLeft*[T](arg: var openarray[T]; slice: HSlice[int, int]; dist: int): ## The distance in amount of elements that the data should be rotated. ## Can be negative, can be any number. ## - ## .. code-block:: nim - ## - ## var list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - ## list.rotateLeft(1 .. 8, 3) - ## doAssert list == [0, 4, 5, 6, 7, 8, 1, 2, 3, 9, 10] + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],int>`_ for a version which rotates the whole container + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],HSlice[int,int],int>`_ for a version which returns a ``seq[T]`` + runnableExamples: + var a = [0, 1, 2, 3, 4, 5] + a.rotateLeft(1 .. 4, 3) + assert a == [0, 4, 1, 2, 3, 5] + a.rotateLeft(1 .. 4, 3) + assert a == [0, 3, 4, 1, 2, 5] + a.rotateLeft(1 .. 4, -3) + assert a == [0, 4, 1, 2, 3, 5] + doAssertRaises(IndexError, a.rotateLeft(1 .. 7, 2)) let sliceLen = slice.b + 1 - slice.a let distLeft = ((dist mod sliceLen) + sliceLen) mod sliceLen arg.rotateInternal(slice.a, slice.a+distLeft, slice.b + 1) @@ -573,10 +786,18 @@ proc rotateLeft*[T](arg: var openarray[T]; slice: HSlice[int, int]; dist: int): proc rotateLeft*[T](arg: var openarray[T]; dist: int): int {.discardable.} = ## Default arguments for slice, so that this procedure operates on the entire ## ``arg``, and not just on a part of it. + ## + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],HSlice[int,int],int>`_ for a version which rotates a range + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],int>`_ for a version which returns a ``seq[T]`` runnableExamples: - var a = [1, 2, 3, 4, 5] - a.rotateLeft(2) - doAssert a == [3, 4, 5, 1, 2] + var a = [1, 2, 3, 4, 5] + a.rotateLeft(2) + assert a == [3, 4, 5, 1, 2] + a.rotateLeft(4) + assert a == [2, 3, 4, 5, 1] + a.rotateLeft(-6) + assert a == [1, 2, 3, 4, 5] let arglen = arg.len let distLeft = ((dist mod arglen) + arglen) mod arglen arg.rotateInternal(0, distLeft, arglen) @@ -584,6 +805,28 @@ proc rotateLeft*[T](arg: var openarray[T]; dist: int): int {.discardable.} = proc rotatedLeft*[T](arg: openarray[T]; slice: HSlice[int, int], dist: int): seq[T] = ## Same as ``rotateLeft``, just with the difference that it does ## not modify the argument. It creates a new ``seq`` instead. + ## + ## Elements outside of ``slice`` will be left unchanged. + ## If an invalid range (``HSlice``) is passed, it raises IndexError. + ## + ## ``slice`` + ## The indices of the element range that should be rotated. + ## + ## ``dist`` + ## The distance in amount of elements that the data should be rotated. + ## Can be negative, can be any number. + ## + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],HSlice[int,int],int>`_ for the in-place version of this proc + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],int>`_ for a version which rotates the whole container + runnableExamples: + var a = @[1, 2, 3, 4, 5] + a = rotatedLeft(a, 1 .. 4, 3) + assert a == @[1, 5, 2, 3, 4] + a = rotatedLeft(a, 1 .. 3, 2) + assert a == @[1, 3, 5, 2, 4] + a = rotatedLeft(a, 1 .. 3, -2) + assert a == @[1, 5, 2, 3, 4] let sliceLen = slice.b + 1 - slice.a let distLeft = ((dist mod sliceLen) + sliceLen) mod sliceLen arg.rotatedInternal(slice.a, slice.a+distLeft, slice.b+1) @@ -591,6 +834,18 @@ proc rotatedLeft*[T](arg: openarray[T]; slice: HSlice[int, int], dist: int): seq proc rotatedLeft*[T](arg: openarray[T]; dist: int): seq[T] = ## Same as ``rotateLeft``, just with the difference that it does ## not modify the argument. It creates a new ``seq`` instead. + ## + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],int>`_ for the in-place version of this proc + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],HSlice[int,int],int>`_ for a version which rotates a range + runnableExamples: + var a = @[1, 2, 3, 4, 5] + a = rotatedLeft(a, 2) + assert a == @[3, 4, 5, 1, 2] + a = rotatedLeft(a, 4) + assert a == @[2, 3, 4, 5, 1] + a = rotatedLeft(a, -6) + assert a == @[1, 2, 3, 4, 5] let arglen = arg.len let distLeft = ((dist mod arglen) + arglen) mod arglen arg.rotatedInternal(0, distLeft, arg.len) diff --git a/lib/pure/asyncdispatch.nim b/lib/pure/asyncdispatch.nim index 36319a317..5953ed975 100644 --- a/lib/pure/asyncdispatch.nim +++ b/lib/pure/asyncdispatch.nim @@ -1674,7 +1674,7 @@ template asyncAddrInfoLoop(addrInfo: ptr AddrInfo, fd: untyped, curFd = fdPerDomain[ord(domain)] if curFd == osInvalidSocket.AsyncFD: try: - curFd = newAsyncNativeSocket(domain, sockType, protocol) + curFd = createAsyncNativeSocket(domain, sockType, protocol) except: freeAddrInfo(addrInfo) closeUnusedFds() @@ -1806,47 +1806,6 @@ proc readAll*(future: FutureStream[string]): Future[string] {.async.} = else: break -proc recvLine*(socket: AsyncFD): Future[string] {.async, deprecated.} = - ## Reads a line of data from ``socket``. Returned future will complete once - ## a full line is read or an error occurs. - ## - ## If a full line is read ``\r\L`` is not - ## added to ``line``, however if solely ``\r\L`` is read then ``line`` - ## will be set to it. - ## - ## If the socket is disconnected, ``line`` will be set to ``""``. - ## - ## If the socket is disconnected in the middle of a line (before ``\r\L`` - ## is read) then line will be set to ``""``. - ## The partial line **will be lost**. - ## - ## **Warning**: This assumes that lines are delimited by ``\r\L``. - ## - ## **Note**: This procedure is mostly used for testing. You likely want to - ## use ``asyncnet.recvLine`` instead. - ## - ## **Deprecated since version 0.15.0**: Use ``asyncnet.recvLine()`` instead. - - template addNLIfEmpty(): typed = - if result.len == 0: - result.add("\c\L") - - result = "" - var c = "" - while true: - c = await recv(socket, 1) - if c.len == 0: - return "" - if c == "\r": - c = await recv(socket, 1) - assert c == "\l" - addNLIfEmpty() - return - elif c == "\L": - addNLIfEmpty() - return - add(result, c) - proc callSoon*(cbproc: proc ()) = ## Schedule `cbproc` to be called as soon as possible. ## The callback is called when control returns to the event loop. diff --git a/lib/pure/asyncftpclient.nim b/lib/pure/asyncftpclient.nim index 3d6a9a015..d28e9fb57 100644 --- a/lib/pure/asyncftpclient.nim +++ b/lib/pure/asyncftpclient.nim @@ -75,14 +75,54 @@ ## waitFor(main()) -import asyncdispatch, asyncnet, strutils, parseutils, os, times - -from ftpclient import FtpBaseObj, ReplyError, FtpEvent +import asyncdispatch, asyncnet, nativesockets, strutils, parseutils, os, times from net import BufferSize type - AsyncFtpClientObj* = FtpBaseObj[AsyncSocket] - AsyncFtpClient* = ref AsyncFtpClientObj + AsyncFtpClient* = ref object + csock*: AsyncSocket + dsock*: AsyncSocket + user*, pass*: string + address*: string + port*: Port + jobInProgress*: bool + job*: FTPJob + dsockConnected*: bool + + FTPJobType* = enum + JRetrText, JRetr, JStore + + FtpJob = ref object + prc: proc (ftp: AsyncFtpClient, async: bool): bool {.nimcall, gcsafe.} + case typ*: FTPJobType + of JRetrText: + lines: string + of JRetr, JStore: + file: File + filename: string + total: BiggestInt # In bytes. + progress: BiggestInt # In bytes. + oneSecond: BiggestInt # Bytes transferred in one second. + lastProgressReport: float # Time + toStore: string # Data left to upload (Only used with async) + + FTPEventType* = enum + EvTransferProgress, EvLines, EvRetr, EvStore + + FTPEvent* = object ## Event + filename*: string + case typ*: FTPEventType + of EvLines: + lines*: string ## Lines that have been transferred. + of EvRetr, EvStore: ## Retr/Store operation finished. + nil + of EvTransferProgress: + bytesTotal*: BiggestInt ## Bytes total. + bytesFinished*: BiggestInt ## Bytes transferred. + speed*: BiggestInt ## Speed in bytes/s + currentJob*: FTPJobType ## The current job being performed. + + ReplyError* = object of IOError ProgressChangedProc* = proc (total, progress: BiggestInt, speed: float): @@ -183,7 +223,7 @@ proc listDirs*(ftp: AsyncFtpClient, dir = ""): Future[seq[string]] {.async.} = ## Returns a list of filenames in the given directory. If ``dir`` is "", ## the current directory is used. If ``async`` is true, this ## function will return immediately and it will be your job to - ## use asyncio's ``poll`` to progress this operation. + ## use asyncdispatch's ``poll`` to progress this operation. await ftp.pasv() assertReply(await(ftp.send("NLST " & dir.normalizePathSep)), ["125", "150"]) diff --git a/lib/pure/asyncmacro.nim b/lib/pure/asyncmacro.nim index b18d20d55..f1e0aa568 100644 --- a/lib/pure/asyncmacro.nim +++ b/lib/pure/asyncmacro.nim @@ -245,6 +245,12 @@ proc asyncSingleProc(prc: NimNode): NimNode {.compileTime.} = var outerProcBody = newNimNode(nnkStmtList, prc.body) + # Extract the documentation comment from the original procedure declaration. + # Note that we're not removing it from the body in order not to make this + # transformation even more complex. + if prc.body.len > 1 and prc.body[0].kind == nnkCommentStmt: + outerProcBody.add(prc.body[0]) + # -> var retFuture = newFuture[T]() var retFutureSym = genSym(nskVar, "retFuture") var subRetType = diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim index 3f213c5ea..d258a42de 100644 --- a/lib/pure/bitops.nim +++ b/lib/pure/bitops.nim @@ -8,7 +8,8 @@ # ## This module implements a series of low level methods for bit manipulation. -## By default, this module use compiler intrinsics to improve performance + +## By default, this module use compiler intrinsics where possible to improve performance ## on supported compilers: ``GCC``, ``LLVM_GCC``, ``CLANG``, ``VCC``, ``ICC``. ## ## The module will fallback to pure nim procs incase the backend is not supported. @@ -32,6 +33,63 @@ const useICC_builtins = defined(icc) and useBuiltins const useVCC_builtins = defined(vcc) and useBuiltins const arch64 = sizeof(int) == 8 +when defined(nimHasalignOf): + + import macros + + type BitsRange*[T] = range[0..sizeof(T)*8-1] + ## Returns a range with all bit positions for type ``T`` + + proc setMask*[T: SomeInteger](v: var T, mask: T) {.inline.} = + ## Returns ``v``, with all the ``1`` bits from ``mask`` set to 1 + v = v or mask + + proc clearMask*[T: SomeInteger](v: var T, mask: T) {.inline.} = + ## Returns ``v``, with all the ``1`` bits from ``mask`` set to 0 + v = v and not mask + + proc flipMask*[T: SomeInteger](v: var T, mask: T) {.inline.} = + ## Returns ``v``, with all the ``1`` bits from ``mask`` flipped + v = v xor mask + + proc setBit*[T: SomeInteger](v: var T, bit: BitsRange[T]) {.inline.} = + ## Returns ``v``, with the bit at position ``bit`` set to 1 + v.setMask(1.T shl bit) + + proc clearBit*[T: SomeInteger](v: var T, bit: BitsRange[T]) {.inline.} = + ## Returns ``v``, with the bit at position ``bit`` set to 0 + v.clearMask(1.T shl bit) + + proc flipBit*[T: SomeInteger](v: var T, bit: BitsRange[T]) {.inline.} = + ## Returns ``v``, with the bit at position ``bit`` flipped + v.flipMask(1.T shl bit) + + macro setBits*(v: typed, bits: varargs[typed]): untyped = + ## Returns ``v``, with the bits at positions ``bits`` set to 1 + bits.expectKind(nnkBracket) + result = newStmtList() + for bit in bits: + result.add newCall("setBit", v, bit) + + macro clearBits*(v: typed, bits: varargs[typed]): untyped = + ## Returns ``v``, with the bits at positions ``bits`` set to 0 + bits.expectKind(nnkBracket) + result = newStmtList() + for bit in bits: + result.add newCall("clearBit", v, bit) + + macro flipBits*(v: typed, bits: varargs[typed]): untyped = + ## Returns ``v``, with the bits at positions ``bits`` set to 0 + bits.expectKind(nnkBracket) + result = newStmtList() + for bit in bits: + result.add newCall("flipBit", v, bit) + + proc testBit*[T: SomeInteger](v: var T, bit: BitsRange[T]): bool {.inline.} = + ## Returns true if the bit in ``v`` at positions ``bit`` is set to 1 + let mask = 1.T shl bit + return (v and mask) == mask + # #### Pure Nim version #### proc firstSetBit_nim(x: uint32): int {.inline, nosideeffect.} = diff --git a/lib/pure/collections/heapqueue.nim b/lib/pure/collections/heapqueue.nim index 60869142e..cdb8db6e1 100644 --- a/lib/pure/collections/heapqueue.nim +++ b/lib/pure/collections/heapqueue.nim @@ -1,4 +1,3 @@ - # # # Nim's Runtime Library @@ -7,32 +6,74 @@ # See the file "copying.txt", included in this # distribution, for details about the copyright. -##[ Heap queue algorithm (a.k.a. priority queue). Ported from Python heapq. - -Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for -all k, counting elements from 0. For the sake of comparison, -non-existing elements are considered to be infinite. The interesting -property of a heap is that a[0] is always its smallest element. - +##[ + The `heapqueue` module implements a + `heap data structure<https://en.wikipedia.org/wiki/Heap_(data_structure)>`_ + that can be used as a + `priority queue<https://en.wikipedia.org/wiki/Priority_queue>`_. + Heaps are arrays for which `a[k] <= a[2*k+1]` and `a[k] <= a[2*k+2]` for + all `k`, counting elements from 0. The interesting property of a heap is that + `a[0]` is always its smallest element. + + Basic usage + ----------- + .. code-block:: Nim + import heapqueue + + var heap = initHeapQueue[int]() + heap.push(8) + heap.push(2) + heap.push(5) + # The first element is the lowest element + assert heap[0] == 2 + # Remove and return the lowest element + assert heap.pop() == 2 + # The lowest element remaining is 5 + assert heap[0] == 5 + + Usage with custom object + ------------------------ + To use a `HeapQueue` with a custom object, the `<` operator must be + implemented. + + .. code-block:: Nim + import heapqueue + + type Job = object + priority: int + + proc `<`(a, b: Job): bool = a.priority < b.priority + + var jobs = initHeapQueue[Job]() + jobs.push(Job(priority: 1)) + jobs.push(Job(priority: 2)) + + assert jobs[0].priority == 1 ]## -type HeapQueue*[T] = distinct seq[T] +type HeapQueue*[T] = object + ## A heap queue, commonly known as a priority queue. + data: seq[T] -proc newHeapQueue*[T](): HeapQueue[T] {.inline.} = HeapQueue[T](newSeq[T]()) -proc newHeapQueue*[T](h: var HeapQueue[T]) {.inline.} = h = HeapQueue[T](newSeq[T]()) +proc initHeapQueue*[T](): HeapQueue[T] = + ## Create a new empty heap. + discard -proc len*[T](h: HeapQueue[T]): int {.inline.} = seq[T](h).len -proc `[]`*[T](h: HeapQueue[T], i: int): T {.inline.} = seq[T](h)[i] -proc `[]=`[T](h: var HeapQueue[T], i: int, v: T) {.inline.} = seq[T](h)[i] = v -proc add[T](h: var HeapQueue[T], v: T) {.inline.} = seq[T](h).add(v) +proc len*[T](heap: HeapQueue[T]): int {.inline.} = + ## Return the number of elements of `heap`. + heap.data.len + +proc `[]`*[T](heap: HeapQueue[T], i: Natural): T {.inline.} = + ## Access the i-th element of `heap`. + heap.data[i] proc heapCmp[T](x, y: T): bool {.inline.} = return (x < y) -# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos -# is the index of a leaf with a possibly out-of-order value. Restore the -# heap invariant. proc siftdown[T](heap: var HeapQueue[T], startpos, p: int) = + ## 'heap' is a heap at all indices >= startpos, except possibly for pos. pos + ## is the index of a leaf with a possibly out-of-order value. Restore the + ## heap invariant. var pos = p var newitem = heap[pos] # Follow the path to the root, moving parents down until finding a place @@ -41,11 +82,11 @@ proc siftdown[T](heap: var HeapQueue[T], startpos, p: int) = let parentpos = (pos - 1) shr 1 let parent = heap[parentpos] if heapCmp(newitem, parent): - heap[pos] = parent + heap.data[pos] = parent pos = parentpos else: break - heap[pos] = newitem + heap.data[pos] = newitem proc siftup[T](heap: var HeapQueue[T], p: int) = let endpos = len(heap) @@ -60,48 +101,50 @@ proc siftup[T](heap: var HeapQueue[T], p: int) = if rightpos < endpos and not heapCmp(heap[childpos], heap[rightpos]): childpos = rightpos # Move the smaller child up. - heap[pos] = heap[childpos] + heap.data[pos] = heap[childpos] pos = childpos childpos = 2*pos + 1 # The leaf at pos is empty now. Put newitem there, and bubble it up # to its final resting place (by sifting its parents down). - heap[pos] = newitem + heap.data[pos] = newitem siftdown(heap, startpos, pos) proc push*[T](heap: var HeapQueue[T], item: T) = - ## Push item onto heap, maintaining the heap invariant. - (seq[T](heap)).add(item) + ## Push `item` onto heap, maintaining the heap invariant. + heap.data.add(item) siftdown(heap, 0, len(heap)-1) proc pop*[T](heap: var HeapQueue[T]): T = - ## Pop the smallest item off the heap, maintaining the heap invariant. - let lastelt = seq[T](heap).pop() + ## Pop and return the smallest item from `heap`, + ## maintaining the heap invariant. + let lastelt = heap.data.pop() if heap.len > 0: result = heap[0] - heap[0] = lastelt + heap.data[0] = lastelt siftup(heap, 0) else: result = lastelt -proc del*[T](heap: var HeapQueue[T], index: int) = - ## Removes element at `index`, maintaining the heap invariant. - swap(seq[T](heap)[^1], seq[T](heap)[index]) +proc del*[T](heap: var HeapQueue[T], index: Natural) = + ## Removes the element at `index` from `heap`, maintaining the heap invariant. + swap(heap.data[^1], heap.data[index]) let newLen = heap.len - 1 - seq[T](heap).setLen(newLen) + heap.data.setLen(newLen) if index < newLen: heap.siftup(index) proc replace*[T](heap: var HeapQueue[T], item: T): T = ## Pop and return the current smallest value, and add the new item. ## This is more efficient than pop() followed by push(), and can be - ## more appropriate when using a fixed-size heap. Note that the value - ## returned may be larger than item! That constrains reasonable uses of + ## more appropriate when using a fixed-size heap. Note that the value + ## returned may be larger than item! That constrains reasonable uses of ## this routine unless written as part of a conditional replacement: - + ## + ## .. code-block:: nim ## if item > heap[0]: ## item = replace(heap, item) result = heap[0] - heap[0] = item + heap.data[0] = item siftup(heap, 0) proc pushpop*[T](heap: var HeapQueue[T], item: T): T = @@ -111,6 +154,36 @@ proc pushpop*[T](heap: var HeapQueue[T], item: T): T = siftup(heap, 0) return item +proc clear*[T](heap: var HeapQueue[T]) = + ## Remove all elements from `heap`, making it empty. + runnableExamples: + var heap = initHeapQueue[int]() + heap.push(1) + heap.clear() + assert heap.len == 0 + heap.data.setLen(0) + +proc `$`*[T](heap: HeapQueue[T]): string = + ## Turn a heap into its string representation. + runnableExamples: + var heap = initHeapQueue[int]() + heap.push(1) + heap.push(2) + assert $heap == "[1, 2]" + result = "[" + for x in heap.data: + if result.len > 1: result.add(", ") + result.addQuoted(x) + result.add("]") + +proc newHeapQueue*[T](): HeapQueue[T] {.deprecated.} = + ## **Deprecated since v0.20.0:** use ``initHeapQueue`` instead. + initHeapQueue[T]() + +proc newHeapQueue*[T](heap: var HeapQueue[T]) {.deprecated.} = + ## **Deprecated since v0.20.0:** use ``clear`` instead. + heap.clear() + when isMainModule: proc toSortedSeq[T](h: HeapQueue[T]): seq[T] = var tmp = h @@ -119,7 +192,7 @@ when isMainModule: result.add(pop(tmp)) block: # Simple sanity test - var heap = newHeapQueue[int]() + var heap = initHeapQueue[int]() let data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] for item in data: push(heap, item) @@ -127,27 +200,27 @@ when isMainModule: doAssert(heap.toSortedSeq == @[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) block: # Test del - var heap = newHeapQueue[int]() + var heap = initHeapQueue[int]() let data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] for item in data: push(heap, item) heap.del(0) doAssert(heap[0] == 1) - heap.del(seq[int](heap).find(7)) + heap.del(heap.data.find(7)) doAssert(heap.toSortedSeq == @[1, 2, 3, 4, 5, 6, 8, 9]) - heap.del(seq[int](heap).find(5)) + heap.del(heap.data.find(5)) doAssert(heap.toSortedSeq == @[1, 2, 3, 4, 6, 8, 9]) - heap.del(seq[int](heap).find(6)) + heap.del(heap.data.find(6)) doAssert(heap.toSortedSeq == @[1, 2, 3, 4, 8, 9]) - heap.del(seq[int](heap).find(2)) + heap.del(heap.data.find(2)) doAssert(heap.toSortedSeq == @[1, 3, 4, 8, 9]) block: # Test del last - var heap = newHeapQueue[int]() + var heap = initHeapQueue[int]() let data = [1, 2, 3] for item in data: push(heap, item) diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim index 9d7950ea6..226401b92 100644 --- a/lib/pure/collections/intsets.nim +++ b/lib/pure/collections/intsets.nim @@ -7,13 +7,16 @@ # distribution, for details about the copyright. # -## The ``intsets`` module implements an efficient int set implemented as a +## The ``intsets`` module implements an efficient `int` set implemented as a ## `sparse bit set`:idx:. - -## **Note**: Currently the assignment operator ``=`` for ``intsets`` +## +## **Note**: Currently the assignment operator ``=`` for ``IntSet`` ## performs some rather meaningless shallow copy. Since Nim currently does -## not allow the assignment operator to be overloaded, use ``assign`` to -## get a deep copy. +## not allow the assignment operator to be overloaded, use `assign proc +## <#assign,IntSet,IntSet>`_ to get a deep copy. +## +## **See also:** +## * `sets module <sets.html>`_ for more general hash sets import @@ -40,7 +43,7 @@ type bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector TrunkSeq = seq[PTrunk] - IntSet* = object ## an efficient set of 'int' implemented as a sparse bit set + IntSet* = object ## An efficient set of `int` implemented as a sparse bit set. elems: int # only valid for small numbers counter, max: int head: PTrunk @@ -96,18 +99,33 @@ proc intSetPut(t: var IntSet, key: int): PTrunk = t.head = result t.data[h] = result -proc contains*(s: IntSet, key: int): bool = - ## Returns true iff `key` is in `s`. +proc bitincl(s: var IntSet, key: int) {.inline.} = + var t = intSetPut(s, `shr`(key, TrunkShift)) + var u = key and TrunkMask + t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] or + `shl`(1, u and IntMask) + +proc exclImpl(s: var IntSet, key: int) = if s.elems <= s.a.len: for i in 0..<s.elems: - if s.a[i] == key: return true + if s.a[i] == key: + s.a[i] = s.a[s.elems-1] + dec s.elems + return else: var t = intSetGet(s, `shr`(key, TrunkShift)) if t != nil: var u = key and TrunkMask - result = (t.bits[`shr`(u, IntShift)] and `shl`(1, u and IntMask)) != 0 - else: - result = false + t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] and + not `shl`(1, u and IntMask) + +template dollarImpl(): untyped = + result = "{" + for key in items(s): + if result.len > 1: result.add(", ") + result.add($key) + result.add("}") + iterator items*(s: IntSet): int {.inline.} = ## Iterates over any included element of `s`. @@ -131,14 +149,62 @@ iterator items*(s: IntSet): int {.inline.} = inc(i) r = r.next -proc bitincl(s: var IntSet, key: int) {.inline.} = - var t = intSetPut(s, `shr`(key, TrunkShift)) - var u = key and TrunkMask - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] or - `shl`(1, u and IntMask) + +proc initIntSet*: IntSet = + ## Returns an empty IntSet. + runnableExamples: + var a = initIntSet() + assert len(a) == 0 + + # newSeq(result.data, InitIntSetSize) + # result.max = InitIntSetSize-1 + result = IntSet( + elems: 0, + counter: 0, + max: 0, + head: nil, + data: when defined(nimNoNilSeqs): @[] else: nil) + # a: array[0..33, int] # profiling shows that 34 elements are enough + +proc contains*(s: IntSet, key: int): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + runnableExamples: + var a = initIntSet() + for x in [1, 3, 5]: + a.incl(x) + assert a.contains(3) + assert 3 in a + assert(not a.contains(8)) + assert 8 notin a + + if s.elems <= s.a.len: + for i in 0..<s.elems: + if s.a[i] == key: return true + else: + var t = intSetGet(s, `shr`(key, TrunkShift)) + if t != nil: + var u = key and TrunkMask + result = (t.bits[`shr`(u, IntShift)] and `shl`(1, u and IntMask)) != 0 + else: + result = false proc incl*(s: var IntSet, key: int) = ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,IntSet,int>`_ for excluding an element + ## * `incl proc <#incl,IntSet,IntSet>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,IntSet,int>`_ + runnableExamples: + var a = initIntSet() + a.incl(3) + a.incl(3) + assert len(a) == 1 + if s.elems <= s.a.len: for i in 0..<s.elems: if s.a[i] == key: return @@ -156,40 +222,42 @@ proc incl*(s: var IntSet, key: int) = proc incl*(s: var IntSet, other: IntSet) = ## Includes all elements from `other` into `s`. - for item in other: incl(s, item) - -proc exclImpl(s: var IntSet, key: int) = - if s.elems <= s.a.len: - for i in 0..<s.elems: - if s.a[i] == key: - s.a[i] = s.a[s.elems-1] - dec s.elems - return - else: - var t = intSetGet(s, `shr`(key, TrunkShift)) - if t != nil: - var u = key and TrunkMask - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] and - not `shl`(1, u and IntMask) - -proc excl*(s: var IntSet, key: int) = - ## Excludes `key` from the set `s`. - exclImpl(s, key) - -proc excl*(s: var IntSet, other: IntSet) = - ## Excludes all elements from `other` from `s`. - for item in other: excl(s, item) + ## + ## This is the in-place version of `s + other <#+,IntSet,IntSet>`_. + ## + ## See also: + ## * `excl proc <#excl,IntSet,IntSet>`_ for excluding other set + ## * `incl proc <#incl,IntSet,int>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,IntSet,int>`_ + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1) + b.incl(5) + a.incl(b) + assert len(a) == 2 + assert 5 in a -proc missingOrExcl*(s: var IntSet, key: int) : bool = - ## Returns true if `s` does not contain `key`, otherwise - ## `key` is removed from `s` and false is returned. - var count = s.elems - exclImpl(s, key) - result = count == s.elems + for item in other: incl(s, item) proc containsOrIncl*(s: var IntSet, key: int): bool = - ## Returns true if `s` contains `key`, otherwise `key` is included in `s` - ## and false is returned. + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,IntSet,int>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return `false` if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,IntSet,int>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,IntSet,int>`_ + runnableExamples: + var a = initIntSet() + assert a.containsOrIncl(3) == false + assert a.containsOrIncl(3) == true + assert a.containsOrIncl(4) == false + if s.elems <= s.a.len: for i in 0..<s.elems: if s.a[i] == key: @@ -208,25 +276,76 @@ proc containsOrIncl*(s: var IntSet, key: int): bool = incl(s, key) result = false -proc initIntSet*: IntSet = - ## Returns an empty IntSet. Example: +proc excl*(s: var IntSet, key: int) = + ## Excludes `key` from the set `s`. ## - ## .. code-block :: - ## var a = initIntSet() - ## a.incl(2) + ## This doesn't do anything if `key` is not found in `s`. + ## + ## See also: + ## * `incl proc <#incl,IntSet,int>`_ for including an element + ## * `excl proc <#excl,IntSet,IntSet>`_ for excluding other set + ## * `missingOrExcl proc <#missingOrExcl,IntSet,int>`_ + runnableExamples: + var a = initIntSet() + a.incl(3) + a.excl(3) + a.excl(3) + a.excl(99) + assert len(a) == 0 + exclImpl(s, key) - # newSeq(result.data, InitIntSetSize) - # result.max = InitIntSetSize-1 - result = IntSet( - elems: 0, - counter: 0, - max: 0, - head: nil, - data: when defined(nimNoNilSeqs): @[] else: nil) - # a: array[0..33, int] # profiling shows that 34 elements are enough +proc excl*(s: var IntSet, other: IntSet) = + ## Excludes all elements from `other` from `s`. + ## + ## This is the in-place version of `s - other <#-,IntSet,IntSet>`_. + ## + ## See also: + ## * `incl proc <#incl,IntSet,IntSet>`_ for including other set + ## * `excl proc <#excl,IntSet,int>`_ for excluding an element + ## * `missingOrExcl proc <#missingOrExcl,IntSet,int>`_ + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1) + a.incl(5) + b.incl(5) + a.excl(b) + assert len(a) == 1 + assert 5 notin a + + for item in other: excl(s, item) + +proc missingOrExcl*(s: var IntSet, key: int) : bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## + ## The difference with regards to the `excl proc <#excl,IntSet,int>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,IntSet,int>`_ for excluding an element + ## * `excl proc <#excl,IntSet,IntSet>`_ for excluding other set + ## * `containsOrIncl proc <#containsOrIncl,IntSet,int>`_ + runnableExamples: + var a = initIntSet() + a.incl(5) + assert a.missingOrExcl(5) == false + assert a.missingOrExcl(5) == true + + var count = s.elems + exclImpl(s, key) + result = count == s.elems proc clear*(result: var IntSet) = ## Clears the IntSet back to an empty state. + runnableExamples: + var a = initIntSet() + a.incl(5) + a.incl(7) + clear(a) + assert len(a) == 0 # setLen(result.data, InitIntSetSize) # for i in 0..InitIntSetSize-1: result.data[i] = nil @@ -243,8 +362,17 @@ proc clear*(result: var IntSet) = proc isNil*(x: IntSet): bool {.inline.} = x.head.isNil and x.elems == 0 proc assign*(dest: var IntSet, src: IntSet) = - ## copies `src` to `dest`. `dest` does not need to be initialized by - ## `initIntSet`. + ## Copies `src` to `dest`. + ## `dest` does not need to be initialized by `initIntSet proc <#initIntSet>`_. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + b.incl(5) + b.incl(7) + a.assign(b) + assert len(a) == 2 + if src.elems <= src.a.len: when defined(nimNoNilSeqs): dest.data = @[] @@ -276,11 +404,33 @@ proc assign*(dest: var IntSet, src: IntSet) = proc union*(s1, s2: IntSet): IntSet = ## Returns the union of the sets `s1` and `s2`. + ## + ## The same as `s1 + s2 <#+,IntSet,IntSet>`_. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert union(a, b).len == 5 + ## {1, 2, 3, 4, 5} + result.assign(s1) incl(result, s2) proc intersection*(s1, s2: IntSet): IntSet = ## Returns the intersection of the sets `s1` and `s2`. + ## + ## The same as `s1 * s2 <#*,IntSet,IntSet>`_. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert intersection(a, b).len == 1 + ## {3} + result = initIntSet() for item in s1: if contains(s2, item): @@ -288,6 +438,17 @@ proc intersection*(s1, s2: IntSet): IntSet = proc difference*(s1, s2: IntSet): IntSet = ## Returns the difference of the sets `s1` and `s2`. + ## + ## The same as `s1 - s2 <#-,IntSet,IntSet>`_. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert difference(a, b).len == 2 + ## {1, 2} + result = initIntSet() for item in s1: if not contains(s2, item): @@ -295,31 +456,50 @@ proc difference*(s1, s2: IntSet): IntSet = proc symmetricDifference*(s1, s2: IntSet): IntSet = ## Returns the symmetric difference of the sets `s1` and `s2`. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1); a.incl(2); a.incl(3) + b.incl(3); b.incl(4); b.incl(5) + assert symmetricDifference(a, b).len == 4 + ## {1, 2, 4, 5} + result.assign(s1) for item in s2: if containsOrIncl(result, item): excl(result, item) proc `+`*(s1, s2: IntSet): IntSet {.inline.} = - ## Alias for `union(s1, s2) <#union>`_. + ## Alias for `union(s1, s2) <#union,IntSet,IntSet>`_. result = union(s1, s2) proc `*`*(s1, s2: IntSet): IntSet {.inline.} = - ## Alias for `intersection(s1, s2) <#intersection>`_. + ## Alias for `intersection(s1, s2) <#intersection,IntSet,IntSet>`_. result = intersection(s1, s2) proc `-`*(s1, s2: IntSet): IntSet {.inline.} = - ## Alias for `difference(s1, s2) <#difference>`_. + ## Alias for `difference(s1, s2) <#difference,IntSet,IntSet>`_. result = difference(s1, s2) proc disjoint*(s1, s2: IntSet): bool = ## Returns true if the sets `s1` and `s2` have no items in common. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1); a.incl(2) + b.incl(2); b.incl(3) + assert disjoint(a, b) == false + b.excl(2) + assert disjoint(a, b) == true + for item in s1: if contains(s2, item): return false return true proc len*(s: IntSet): int {.inline.} = - ## Returns the number of keys in `s`. + ## Returns the number of elements in `s`. if s.elems < s.a.len: result = s.elems else: @@ -328,35 +508,59 @@ proc len*(s: IntSet): int {.inline.} = inc(result) proc card*(s: IntSet): int {.inline.} = - ## Alias for `len() <#len>` _. + ## Alias for `len() <#len,IntSet>`_. result = s.len() proc `<=`*(s1, s2: IntSet): bool = - ## Returns true iff `s1` is subset of `s2`. + ## Returns true if `s1` is subset of `s2`. + ## + ## A subset `s1` has all of its elements in `s2`, and `s2` doesn't necessarily + ## have more elements than `s1`. That is, `s1` can be equal to `s2`. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1) + b.incl(1); b.incl(2) + assert a <= b + a.incl(2) + assert a <= b + a.incl(3) + assert(not (a <= b)) + for item in s1: if not s2.contains(item): return false return true proc `<`*(s1, s2: IntSet): bool = - ## Returns true iff `s1` is proper subset of `s2`. + ## Returns true if `s1` is proper subset of `s2`. + ## + ## A strict or proper subset `s1` has all of its elements in `s2`, but `s2` has + ## more elements than `s1`. + runnableExamples: + var + a = initIntSet() + b = initIntSet() + a.incl(1) + b.incl(1); b.incl(2) + assert a < b + a.incl(2) + assert(not (a < b)) return s1 <= s2 and not (s2 <= s1) proc `==`*(s1, s2: IntSet): bool = - ## Returns true if both `s` and `t` have the same members and set size. + ## Returns true if both `s1` and `s2` have the same elements and set size. return s1 <= s2 and s2 <= s1 -template dollarImpl(): untyped = - result = "{" - for key in items(s): - if result.len > 1: result.add(", ") - result.add($key) - result.add("}") - proc `$`*(s: IntSet): string = ## The `$` operator for int sets. + ## + ## Converts the set `s` to a string, mostly for logging and printing purposes. dollarImpl() + + when isMainModule: import sequtils, algorithm diff --git a/lib/pure/collections/lists.nim b/lib/pure/collections/lists.nim index 182eb8442..1fd32c9fa 100644 --- a/lib/pure/collections/lists.nim +++ b/lib/pure/collections/lists.nim @@ -93,25 +93,25 @@ type SinglyLinkedList*[T] = object ## A singly linked list. ## - ## Use `initSinglyLinkedList proc <#initSinglyLinkedList,>`_ to create + ## Use `initSinglyLinkedList proc <#initSinglyLinkedList>`_ to create ## a new empty list. head*, tail*: SinglyLinkedNode[T] DoublyLinkedList*[T] = object ## A doubly linked list. ## - ## Use `initDoublyLinkedList proc <#initDoublyLinkedList,>`_ to create + ## Use `initDoublyLinkedList proc <#initDoublyLinkedList>`_ to create ## a new empty list. head*, tail*: DoublyLinkedNode[T] SinglyLinkedRing*[T] = object ## A singly linked ring. ## - ## Use `initSinglyLinkedRing proc <#initSinglyLinkedRing,>`_ to create + ## Use `initSinglyLinkedRing proc <#initSinglyLinkedRing>`_ to create ## a new empty ring. head*, tail*: SinglyLinkedNode[T] DoublyLinkedRing*[T] = object ## A doubly linked ring. ## - ## Use `initDoublyLinkedRing proc <#initDoublyLinkedRing,>`_ to create + ## Use `initDoublyLinkedRing proc <#initDoublyLinkedRing>`_ to create ## a new empty ring. head*: DoublyLinkedNode[T] diff --git a/lib/pure/collections/sequtils.nim b/lib/pure/collections/sequtils.nim index b0d50bce2..253340379 100644 --- a/lib/pure/collections/sequtils.nim +++ b/lib/pure/collections/sequtils.nim @@ -846,6 +846,15 @@ template mapIt*(s: typed, op: untyped): untyped = result.add(op) result +template mapIt*(s, typ, op: untyped): untyped {.error: + "Use 'mapIt(seq1, op)' - without specifying the type of the returned seqence".} = + ## **Deprecated since version 0.12.0:** Use the `mapIt(seq1, op) template + ## <#mapIt.t,typed,untyped>`_ instead. + var result: seq[typ] = @[] + for it {.inject.} in items(s): + result.add(op) + result + template applyIt*(varSeq, op: untyped) = ## Convenience template around the mutable ``apply`` proc to reduce typing. ## diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index d1f941e92..5da5d9243 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -14,8 +14,38 @@ ## <manual.html#types-set-type>`_. Sets allow you to store any value that can be ## `hashed <hashes.html>`_ and they don't contain duplicate entries. ## -## **Note**: The data types declared here have *value semantics*: This means +## Common usages of sets: +## * removing duplicates from a container by converting it with `toSet proc +## <#toSet,openArray[A]>`_ (see also `sequtils.deduplicate proc +## <sequtils.html#deduplicate,openArray[T],bool>`_) +## * membership testing +## * mathematical operations on two sets, such as +## `union <#union,HashSet[A],HashSet[A]>`_, +## `intersection <#intersection,HashSet[A],HashSet[A]>`_, +## `difference <#difference,HashSet[A],HashSet[A]>`_, and +## `symmetric difference <#symmetricDifference,HashSet[A],HashSet[A]>`_ +## +## .. code-block:: +## echo toSet([9, 5, 1]) # {9, 1, 5} +## echo toOrderedSet([9, 5, 1]) # {9, 5, 1} +## +## let +## s1 = toSet([9, 5, 1]) +## s2 = toSet([3, 5, 7]) +## +## echo s1 + s2 # {9, 1, 3, 5, 7} +## echo s1 - s2 # {1, 9} +## echo s1 * s2 # {5} +## echo s1 -+- s2 # {9, 1, 3, 7} +## +## +## Note: The data types declared here have *value semantics*: This means ## that ``=`` performs a copy of the set. +## +## **See also:** +## * `intsets module <intsets.html>`_ for efficient int sets +## * `tables module <tables.html>`_ for hash tables + import hashes, math @@ -31,27 +61,24 @@ when not defined(nimhygiene): type KeyValuePair[A] = tuple[hcode: Hash, key: A] KeyValuePairSeq[A] = seq[KeyValuePair[A]] - HashSet* {.myShallow.}[A] = object ## \ + HashSet* {.myShallow.} [A] = object ## \ ## A generic hash set. ## - ## Use `init() <#init,HashSet[A],int>`_ or `initSet[type]() <#initSet>`_ + ## Use `init proc <#init,HashSet[A],int>`_ or `initSet proc <#initSet,int>`_ ## before calling other procs on it. data: KeyValuePairSeq[A] counter: int + +# ---------------------- helpers ----------------------------------- + +const growthFactor = 2 + template default[T](t: typedesc[T]): T = ## Used by clear methods to get a default value. var v: T v -proc clear*[A](s: var HashSet[A]) = - ## Clears the HashSet back to an empty state, without shrinking - ## any of the existing storage. O(n) where n is the size of the hash bucket. - s.counter = 0 - for i in 0..<s.data.len: - s.data[i].hcode = 0 - s.data[i].key = default(type(s.data[i].key)) - # hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These # two procs retain clarity of that encoding without the space cost of an enum. proc isEmpty(hcode: Hash): bool {.inline.} = @@ -60,87 +87,6 @@ proc isEmpty(hcode: Hash): bool {.inline.} = proc isFilled(hcode: Hash): bool {.inline.} = result = hcode != 0 -proc isValid*[A](s: HashSet[A]): bool = - ## Returns `true` if the set has been initialized with `initSet <#initSet>`_. - ## - ## Most operations over an uninitialized set will crash at runtime and - ## `assert <system.html#assert>`_ in debug builds. You can use this proc in - ## your own procs to verify that sets passed to your procs are correctly - ## initialized. Example: - ## - ## .. code-block :: - ## proc savePreferences(options: HashSet[string]) = - ## assert options.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = s.data.len > 0 - -proc len*[A](s: HashSet[A]): int = - ## Returns the number of keys in `s`. - ## - ## Due to an implementation detail you can call this proc on variables which - ## have not been initialized yet. The proc will return zero as the length - ## then. Example: - ## - ## .. code-block:: - ## - ## var values: HashSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 - result = s.counter - -proc card*[A](s: HashSet[A]): int = - ## Alias for `len() <#len,TSet[A]>`_. - ## - ## Card stands for the `cardinality - ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. - result = s.counter - -iterator items*[A](s: HashSet[A]): A = - ## Iterates over keys in the set `s`. - ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: - ## - ## .. code-block:: - ## type - ## pair = tuple[a, b: int] - ## var - ## a, b = initSet[pair]() - ## a.incl((2, 3)) - ## a.incl((3, 2)) - ## a.incl((2, 3)) - ## for x, y in a.items: - ## b.incl((x - 2, y + 1)) - ## assert a.len == 2 - ## echo b - ## # --> {(a: 1, b: 3), (a: 0, b: 4)} - assert s.isValid, "The set needs to be initialized." - for h in 0..high(s.data): - if isFilled(s.data[h].hcode): yield s.data[h].key - -proc hash*[A](s: HashSet[A]): Hash = - ## hashing of HashSet - assert s.isValid, "The set needs to be initialized." - for h in 0..high(s.data): - result = result xor s.data[h].hcode - result = !$result - -const - growthFactor = 2 - -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc rightSize*(count: Natural): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. - ## - ## If more items are expected to be added, simply add that - ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want mustRehash(rightSize(x), x) == false. - result = nextPowerOfTwo(count * 3 div 2 + 4) - proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = (h + 1) and maxHash @@ -176,38 +122,6 @@ proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: Hash): int {.inline.} = proc rawGet[A](s: HashSet[A], key: A, hc: var Hash): int {.inline.} = rawGetImpl() -proc `[]`*[A](s: var HashSet[A], key: A): var A = - ## returns the element that is actually stored in 's' which has the same - ## value as 'key' or raises the ``KeyError`` exception. This is useful - ## when one overloaded 'hash' and '==' but still needs reference semantics - ## for sharing. - assert s.isValid, "The set needs to be initialized." - var hc: Hash - var index = rawGet(s, key, hc) - if index >= 0: result = s.data[index].key - else: - when compiles($key): - raise newException(KeyError, "key not found: " & $key) - else: - raise newException(KeyError, "key not found") - -proc contains*[A](s: HashSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - ## values.excl(2) - ## assert(not values.contains(2)) - assert s.isValid, "The set needs to be initialized." - var hc: Hash - var index = rawGet(s, key, hc) - result = index >= 0 - proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, hc: Hash, h: Hash) = rawInsertImpl() @@ -243,34 +157,6 @@ template containsOrInclImpl() {.dirty.} = rawInsert(s, s.data, key, hc, -1 - index) inc(s.counter) -proc incl*[A](s: var HashSet[A], key: A) = - ## Includes an element `key` in `s`. - ## - ## This doesn't do anything if `key` is already in `s`. Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." - inclImpl() - -proc incl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## var others = toSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) - template doWhile(a, b) = while true: b @@ -302,51 +188,279 @@ proc exclImpl[A](s: var HashSet[A], key: A) : bool {. inline .} = r = s.data[i].hcode and msk # "home" location of key@i shallowCopy(s.data[j], s.data[i]) # data[i] will be marked EMPTY next loop -proc missingOrExcl*[A](s: var HashSet[A], key: A): bool = - ## Excludes `key` in the set `s` and tells if `key` was removed from `s`. +proc mustRehash(length, counter: int): bool {.inline.} = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +template dollarImpl() {.dirty.} = + result = "{" + for key in items(s): + if result.len > 1: result.add(", ") + result.addQuoted(key) + result.add("}") + +proc rightSize*(count: Natural): int {.inline.} + + + + + + + + +# --------------------------------------------------------------------- +# ------------------------------ HashSet ------------------------------ +# --------------------------------------------------------------------- + + +proc init*[A](s: var HashSet[A], initialSize=64) = + ## Initializes a hash set. ## - ## The difference with regards to the `excl() <#excl,TSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was not present in `s`. Example: + ## The `initialSize` parameter needs to be a power of two (default: 64). + ## If you need to accept runtime values for this, you can use + ## `math.nextPowerOfTwo proc <math.html#nextPowerOfTwo>`_ or `rightSize proc + ## <#rightSize,Natural>`_ from this module. ## - ## .. code-block:: - ## var s = toSet([2, 3, 6, 7]) - ## assert s.missingOrExcl(4) == true - ## assert s.missingOrExcl(6) == false - exclImpl(s, key) + ## All set variables must be initialized before + ## use with other procs from this module, with the exception of `isValid proc + ## <#isValid,HashSet[A]>`_ and `len() <#len,HashSet[A]>`_. + ## + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. + ## + ## See also: + ## * `initSet proc <#initSet,int>`_ + ## * `toSet proc <#toSet,openArray[A]>`_ + runnableExamples: + var a: HashSet[int] + assert(not a.isValid) + init(a) + assert a.isValid + + assert isPowerOfTwo(initialSize) + s.counter = 0 + newSeq(s.data, initialSize) + +proc initSet*[A](initialSize=64): HashSet[A] = + ## Wrapper around `init proc <#init,HashSet[A],int>`_ for initialization of + ## hash sets. + ## + ## Returns an empty hash set you can assign directly in ``var`` blocks in a + ## single line. + ## + ## See also: + ## * `toSet proc <#toSet,openArray[A]>`_ + runnableExamples: + var a = initSet[int]() + assert a.isValid + a.incl(3) + assert len(a) == 1 + result.init(initialSize) + +proc toSet*[A](keys: openArray[A]): HashSet[A] = + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initSet proc <#initSet,int>`_ + runnableExamples: + let + a = toSet([5, 3, 2]) + b = toSet("abracadabra") + assert len(a) == 3 + ## a == {2, 3, 5} + assert len(b) == 5 + ## b == {'a', 'b', 'c', 'd', 'r'} + + result = initSet[A](rightSize(keys.len)) + for key in items(keys): result.incl(key) + +proc isValid*[A](s: HashSet[A]): bool = + ## Returns `true` if the set has been initialized (with `initSet proc + ## <#initSet,int>`_ or `init proc <#init,HashSet[A],int>`_). + ## + ## Most operations over an uninitialized set will crash at runtime and + ## `assert <system.html#assert>`_ in debug builds. You can use this proc in + ## your own procs to verify that sets passed to your procs are correctly + ## initialized. + ## + ## **Examples:** + ## + ## .. code-block :: + ## proc savePreferences(options: HashSet[string]) = + ## assert options.isValid, "Pass an initialized set!" + ## # Do stuff here, may crash in release builds! + result = s.data.len > 0 + +proc `[]`*[A](s: var HashSet[A], key: A): var A = + ## Returns the element that is actually stored in `s` which has the same + ## value as `key` or raises the ``KeyError`` exception. + ## + ## This is useful when one overloaded `hash` and `==` but still needs + ## reference semantics for sharing. + assert s.isValid, "The set needs to be initialized." + var hc: Hash + var index = rawGet(s, key, hc) + if index >= 0: result = s.data[index].key + else: + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +proc contains*[A](s: HashSet[A], key: A): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + + assert s.isValid, "The set needs to be initialized." + var hc: Hash + var index = rawGet(s, key, hc) + result = index >= 0 + +proc incl*[A](s: var HashSet[A], key: A) = + ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + + assert s.isValid, "The set needs to be initialized." + inclImpl() + +proc incl*[A](s: var HashSet[A], other: HashSet[A]) = + ## Includes all elements from `other` set into `s` (must be declared as `var`). + ## + ## This is the in-place version of `s + other <#+,HashSet[A],HashSet[A]>`_. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var + values = toSet([1, 2, 3]) + others = toSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + + assert s.isValid, "The set `s` needs to be initialized." + assert other.isValid, "The set `other` needs to be initialized." + for item in other: incl(s, item) + +proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,HashSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return `false` if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var values = initSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false + + assert s.isValid, "The set needs to be initialized." + containsOrInclImpl() proc excl*[A](s: var HashSet[A], key: A) = ## Excludes `key` from the set `s`. ## - ## This doesn't do anything if `key` is not found in `s`. Example: - ## - ## .. code-block:: - ## var s = toSet([2, 3, 6, 7]) - ## s.excl(2) - ## s.excl(2) - ## assert s.len == 3 + ## This doesn't do anything if `key` is not found in `s`. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var s = toSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 discard exclImpl(s, key) proc excl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Excludes everything in `other` from `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## numbers = toSet([1, 2, 3, 4, 5]) - ## even = toSet([2, 4, 6, 8]) - ## numbers.excl(even) - ## echo numbers - ## # --> {1, 3, 5} + ## Excludes all elements of `other` set from `s`. + ## + ## This is the in-place version of `s - other <#-,HashSet[A],HashSet[A]>`_. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var + numbers = toSet([1, 2, 3, 4, 5]) + even = toSet([2, 4, 6, 8]) + numbers.excl(even) + assert len(numbers) == 3 + ## numbers == {1, 3, 5} + assert s.isValid, "The set `s` needs to be initialized." assert other.isValid, "The set `other` needs to be initialized." for item in other: discard exclImpl(s, item) +proc missingOrExcl*[A](s: var HashSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## + ## The difference with regards to the `excl proc <#excl,HashSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var s = toSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true + exclImpl(s, key) + proc pop*[A](s: var HashSet[A]): A = ## Remove and return an arbitrary element from the set `s`. ## ## Raises KeyError if the set `s` is empty. ## + ## See also: + ## * `clear proc <#clear,HashSet[A]>`_ + runnableExamples: + var s = toSet([2, 1]) + assert s.pop == 1 + assert s.pop == 2 + doAssertRaises(KeyError, echo s.pop) + for h in 0..high(s.data): if isFilled(s.data[h].hcode): result = s.data[h].key @@ -354,103 +468,64 @@ proc pop*[A](s: var HashSet[A]): A = return result raise newException(KeyError, "set is empty") -proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. +proc clear*[A](s: var HashSet[A]) = + ## Clears the HashSet back to an empty state, without shrinking + ## any of the existing storage. ## - ## The difference with regards to the `incl() <#incl,TSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was already present in `s`. The - ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: + ## `O(n)` operation, where `n` is the size of the hash bucket. ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() + ## See also: + ## * `pop proc <#pop,HashSet[A]>`_ + runnableExamples: + var s = toSet([3, 5, 7]) + clear(s) + assert len(s) == 0 -proc init*[A](s: var HashSet[A], initialSize=64) = - ## Initializes a hash set. - ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TSet[A]>`_ and `len() <#len,TSet[A]>`_. - ## - ## You can call this proc on a previously initialized hash set, which will - ## discard all its values. This might be more convenient than iterating over - ## existing values and calling `excl() <#excl,TSet[A],A>`_ on them. Example: - ## - ## .. code-block :: - ## var a: HashSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid - assert isPowerOfTwo(initialSize) s.counter = 0 - newSeq(s.data, initialSize) + for i in 0..<s.data.len: + s.data[i].hcode = 0 + s.data[i].key = default(type(s.data[i].key)) -proc initSet*[A](initialSize=64): HashSet[A] = - ## Wrapper around `init() <#init,TSet[A],int>`_ for initialization of hash - ## sets. - ## - ## Returns an empty hash set you can assign directly in ``var`` blocks in a - ## single line. Example: +proc len*[A](s: HashSet[A]): int = + ## Returns the number of elements in `s`. ## - ## .. code-block :: - ## var a = initSet[int](4) - ## a.incl(2) - result.init(initialSize) + ## Due to an implementation detail you can call this proc on variables which + ## have not been initialized yet. The proc will return zero as the length + ## then. + runnableExamples: + var a: HashSet[string] + assert len(a) == 0 + let s = toSet([3, 5, 7]) + assert len(s) == 3 + result = s.counter -proc toSet*[A](keys: openArray[A]): HashSet[A] = - ## Creates a new hash set that contains the given `keys`. - ## - ## Example: +proc card*[A](s: HashSet[A]): int = + ## Alias for `len() <#len,HashSet[A]>`_. ## - ## .. code-block:: - ## var numbers = toSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) - result = initSet[A](rightSize(keys.len)) - for key in items(keys): result.incl(key) - -template dollarImpl() {.dirty.} = - result = "{" - for key in items(s): - if result.len > 1: result.add(", ") - result.addQuoted(key) - result.add("}") + ## Card stands for the `cardinality + ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. + result = s.counter -proc `$`*[A](s: HashSet[A]): string = - ## Converts the set `s` to a string, mostly for logging purposes. - ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: - ## - ## Example: - ## - ## .. code-block:: - ## echo toSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} - assert s.isValid, "The set needs to be initialized." - dollarImpl() proc union*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the union of the sets `s1` and `s2`. ## - ## The union of two sets is represented mathematically as *A ∪ B* and is the - ## set of all objects that are members of `s1`, `s2` or both. Example: + ## The same as `s1 + s2 <#+,HashSet[A],HashSet[A]>`_. ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = union(a, b) - ## assert c == toSet(["a", "b", "c"]) + ## The union of two sets is represented mathematically as *A ∪ B* and is the + ## set of all objects that are members of `s1`, `s2` or both. + ## + ## See also: + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = union(a, b) + assert c == toSet(["a", "b", "c"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = s1 @@ -459,16 +534,23 @@ proc union*[A](s1, s2: HashSet[A]): HashSet[A] = proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the intersection of the sets `s1` and `s2`. ## + ## The same as `s1 * s2 <#*,HashSet[A],HashSet[A]>`_. + ## ## The intersection of two sets is represented mathematically as *A ∩ B* and ## is the set of all objects that are members of `s1` and `s2` at the same - ## time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c == toSet(["b"]) + ## time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = intersection(a, b) + assert c == toSet(["b"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = initSet[A](min(s1.data.len, s2.data.len)) @@ -478,16 +560,22 @@ proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] = proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the difference of the sets `s1` and `s2`. ## + ## The same as `s1 - s2 <#-,HashSet[A],HashSet[A]>`_. + ## ## The difference of two sets is represented mathematically as *A \ B* and is ## the set of all objects that are members of `s1` and not members of `s2`. - ## Example: ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = difference(a, b) - ## assert c == toSet(["a"]) + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = difference(a, b) + assert c == toSet(["a"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = initSet[A]() @@ -498,16 +586,23 @@ proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the symmetric difference of the sets `s1` and `s2`. ## + ## The same as `s1 -+- s2 <#-+-,HashSet[A],HashSet[A]>`_. + ## ## The symmetric difference of two sets is represented mathematically as *A △ ## B* or *A ⊖ B* and is the set of all objects that are members of `s1` or - ## `s2` but not both at the same time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = symmetricDifference(a, b) - ## assert c == toSet(["a", "c"]) + ## `s2` but not both at the same time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = symmetricDifference(a, b) + assert c == toSet(["a", "c"]) + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." result = s1 @@ -515,32 +610,31 @@ proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] = if containsOrIncl(result, item): excl(result, item) proc `+`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `union(s1, s2) <#union>`_. + ## Alias for `union(s1, s2) <#union,HashSet[A],HashSet[A]>`_. result = union(s1, s2) proc `*`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `intersection(s1, s2) <#intersection>`_. + ## Alias for `intersection(s1, s2) <#intersection,HashSet[A],HashSet[A]>`_. result = intersection(s1, s2) proc `-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `difference(s1, s2) <#difference>`_. + ## Alias for `difference(s1, s2) <#difference,HashSet[A],HashSet[A]>`_. result = difference(s1, s2) proc `-+-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `symmetricDifference(s1, s2) <#symmetricDifference>`_. + ## Alias for `symmetricDifference(s1, s2) + ## <#symmetricDifference,HashSet[A],HashSet[A]>`_. result = symmetricDifference(s1, s2) proc disjoint*[A](s1, s2: HashSet[A]): bool = - ## Returns true iff the sets `s1` and `s2` have no items in common. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## assert disjoint(a, b) == false - ## assert disjoint(a, b - a) == true + ## Returns `true` if the sets `s1` and `s2` have no items in common. + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + assert disjoint(a, b) == false + assert disjoint(a, b - a) == true + assert s1.isValid, "The set `s1` needs to be initialized." assert s2.isValid, "The set `s2` needs to be initialized." for item in s1: @@ -551,30 +645,29 @@ proc `<`*[A](s, t: HashSet[A]): bool = ## Returns true if `s` is a strict or proper subset of `t`. ## ## A strict or proper subset `s` has all of its members in `t` but `t` has - ## more elements than `s`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c < a and c < b - ## assert((a < a) == false) + ## more elements than `s`. + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = intersection(a, b) + assert c < a and c < b + assert(not (a < a)) s.counter != t.counter and s <= t proc `<=`*[A](s, t: HashSet[A]): bool = - ## Returns true if `s` is subset of `t`. + ## Returns true if `s` is a subset of `t`. ## ## A subset `s` has all of its members in `t` and `t` doesn't necessarily - ## have more members than `s`. That is, `s` can be equal to `t`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c <= a and c <= b - ## assert((a <= a)) + ## have more members than `s`. That is, `s` can be equal to `t`. + runnableExamples: + let + a = toSet(["a", "b"]) + b = toSet(["b", "c"]) + c = intersection(a, b) + assert c <= a and c <= b + assert a <= a + result = false if s.counter > t.counter: return result = true @@ -585,90 +678,109 @@ proc `<=`*[A](s, t: HashSet[A]): bool = proc `==`*[A](s, t: HashSet[A]): bool = ## Returns true if both `s` and `t` have the same members and set size. + runnableExamples: + var + a = toSet([1, 2]) + b = toSet([2, 1]) + assert a == b + s.counter == t.counter and s <= t + +proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = + ## Returns a new set after applying `op` pric on each of the elements of + ##`data` set. ## - ## Example: + ## You can use this proc to transform the elements from a set. + runnableExamples: + let + a = toSet([1, 2, 3]) + b = a.map(proc (x: int): string = $x) + assert b == toSet(["1", "2", "3"]) + + result = initSet[B]() + for item in data: result.incl(op(item)) + +proc hash*[A](s: HashSet[A]): Hash = + ## Hashing of HashSet. + assert s.isValid, "The set needs to be initialized." + for h in 0..high(s.data): + result = result xor s.data[h].hcode + result = !$result + +proc `$`*[A](s: HashSet[A]): string = + ## Converts the set `s` to a string, mostly for logging and printing purposes. + ## + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. + ## + ## **Examples:** ## ## .. code-block:: - ## var - ## a = toSet([1, 2]) - ## b = toSet([1]) - ## b.incl(2) - ## assert a == b - s.counter == t.counter and s <= t + ## echo toSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + assert s.isValid, "The set needs to be initialized." + dollarImpl() -proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = - ## Returns a new set after applying `op` on each of the elements of `data`. +proc rightSize*(count: Natural): int {.inline.} = + ## Return the value of `initialSize` to support `count` items. + ## + ## If more items are expected to be added, simply add that + ## expected extra amount to the parameter before calling this. + ## + ## Internally, we want `mustRehash(rightSize(x), x) == false`. + result = nextPowerOfTwo(count * 3 div 2 + 4) + + + +iterator items*[A](s: HashSet[A]): A = + ## Iterates over elements of the set `s`. ## - ## You can use this proc to transform the elements from a set. Example: + ## If you need a sequence with the elelments you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. ## ## .. code-block:: - ## var a = toSet([1, 2, 3]) - ## var b = a.map(proc (x: int): string = $x) - ## assert b == toSet(["1", "2", "3"]) - result = initSet[B]() - for item in data: result.incl(op(item)) + ## type + ## pair = tuple[a, b: int] + ## var + ## a, b = initSet[pair]() + ## a.incl((2, 3)) + ## a.incl((3, 2)) + ## a.incl((2, 3)) + ## for x, y in a.items: + ## b.incl((x - 2, y + 1)) + ## assert a.len == 2 + ## echo b + ## # --> {(a: 1, b: 3), (a: 0, b: 4)} + assert s.isValid, "The set needs to be initialized." + for h in 0..high(s.data): + if isFilled(s.data[h].hcode): yield s.data[h].key + + + + -# ------------------------------ ordered set ------------------------------ + + + +# --------------------------------------------------------------------- +# --------------------------- OrderedSet ------------------------------ +# --------------------------------------------------------------------- type OrderedKeyValuePair[A] = tuple[ hcode: Hash, next: int, key: A] OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]] - OrderedSet* {.myShallow.}[A] = object ## \ + OrderedSet* {.myShallow.} [A] = object ## \ ## A generic hash set that remembers insertion order. ## - ## Use `init() <#init,OrderedSet[A],int>`_ or `initOrderedSet[type]() - ## <#initOrderedSet>`_ before calling other procs on it. + ## Use `init proc <#init,OrderedSet[A],int>`_ or `initOrderedSet proc + ## <#initOrderedSet,int>`_ before calling other procs on it. data: OrderedKeyValuePairSeq[A] counter, first, last: int -proc clear*[A](s: var OrderedSet[A]) = - ## Clears the OrderedSet back to an empty state, without shrinking - ## any of the existing storage. O(n) where n is the size of the hash bucket. - s.counter = 0 - s.first = -1 - s.last = -1 - for i in 0..<s.data.len: - s.data[i].hcode = 0 - s.data[i].next = 0 - s.data[i].key = default(type(s.data[i].key)) - - -proc isValid*[A](s: OrderedSet[A]): bool = - ## Returns `true` if the ordered set has been initialized with `initSet - ## <#initOrderedSet>`_. - ## - ## Most operations over an uninitialized ordered set will crash at runtime - ## and `assert <system.html#assert>`_ in debug builds. You can use this proc - ## in your own procs to verify that ordered sets passed to your procs are - ## correctly initialized. Example: - ## - ## .. code-block:: - ## proc saveTarotCards(cards: OrderedSet[int]) = - ## assert cards.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = s.data.len > 0 - -proc len*[A](s: OrderedSet[A]): int {.inline.} = - ## Returns the number of keys in `s`. - ## - ## Due to an implementation detail you can call this proc on variables which - ## have not been initialized yet. The proc will return zero as the length - ## then. Example: - ## - ## .. code-block:: - ## - ## var values: OrderedSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 - result = s.counter -proc card*[A](s: OrderedSet[A]): int {.inline.} = - ## Alias for `len() <#len,TOrderedSet[A]>`_. - ## - ## Card stands for the `cardinality - ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. - result = s.counter +# ---------------------- helpers ----------------------------------- template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = var h = s.first @@ -680,61 +792,12 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = inc(idx) h = nxt -iterator items*[A](s: OrderedSet[A]): A = - ## Iterates over keys in the ordered set `s` in insertion order. - ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: - ## - ## .. code-block:: - ## var a = initOrderedSet[int]() - ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: - ## a.incl(value) - ## for value in a.items: - ## echo "Got ", value - ## # --> Got 9 - ## # --> Got 2 - ## # --> Got 1 - ## # --> Got 5 - ## # --> Got 8 - ## # --> Got 4 - assert s.isValid, "The set needs to be initialized." - forAllOrderedPairs: - yield s.data[h].key - -proc hash*[A](s: OrderedSet[A]): Hash = - ## hashing of OrderedSet - assert s.isValid, "The set needs to be initialized." - forAllOrderedPairs: - result = result !& s.data[h].hcode - result = !$result - -iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] = - assert s.isValid, "The set needs to be initialized" - forAllOrderedPairs: - yield (idx, s.data[h].key) - proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: Hash): int {.inline.} = rawGetKnownHCImpl() proc rawGet[A](s: OrderedSet[A], key: A, hc: var Hash): int {.inline.} = rawGetImpl() -proc contains*[A](s: OrderedSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - assert s.isValid, "The set needs to be initialized." - var hc: Hash - var index = rawGet(s, key, hc) - result = index >= 0 - proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A], key: A, hc: Hash, h: Hash) = rawInsertImpl() @@ -757,33 +820,7 @@ proc enlarge[A](s: var OrderedSet[A]) = rawInsert(s, s.data, n[h].key, n[h].hcode, j) h = nxt -proc incl*[A](s: var OrderedSet[A], key: A) = - ## Includes an element `key` in `s`. - ## - ## This doesn't do anything if `key` is already in `s`. Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." - inclImpl() - -proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## var others = toOrderedSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) +proc isValid*[A](s: OrderedSet[A]): bool proc exclImpl[A](s: var OrderedSet[A], key: A) : bool {. inline .} = assert s.isValid, "The set needs to be initialized." @@ -806,65 +843,37 @@ proc exclImpl[A](s: var OrderedSet[A], key: A) : bool {. inline .} = rawInsert(s, s.data, n[h].key, n[h].hcode, j) h = nxt -proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool = - ## Excludes `key` in the set `s` and tells if `key` was removed from `s`. Efficiency: O(n). - ## - ## The difference with regards to the `excl() <#excl,TOrderedSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was not present in `s`. Example: - ## - ## .. code-block:: - ## var s = toOrderedSet([2, 3, 6, 7]) - ## assert s.missingOrExcl(4) == true - ## assert s.missingOrExcl(6) == false - exclImpl(s, key) -proc excl*[A](s: var OrderedSet[A], key: A) = - ## Excludes `key` from the set `s`. Efficiency: O(n). - ## - ## This doesn't do anything if `key` is not found in `s`. Example: - ## - ## .. code-block:: - ## var s = toOrderedSet([2, 3, 6, 7]) - ## s.excl(2) - ## s.excl(2) - ## assert s.len == 3 - discard exclImpl(s, key) +# ----------------------------------------------------------------------- + -proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. - ## - ## The difference with regards to the `incl() <#incl,TOrderedSet[A],A>`_ proc - ## is that this proc returns `true` if `key` was already present in `s`. The - ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() proc init*[A](s: var OrderedSet[A], initialSize=64) = ## Initializes an ordered hash set. ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TOrderedSet[A]>`_ and `len() <#len,TOrderedSet[A]>`_. + ## The `initialSize` parameter needs to be a power of two (default: 64). + ## If you need to accept runtime values for this, you can use + ## `math.nextPowerOfTwo proc <math.html#nextPowerOfTwo>`_ or `rightSize proc + ## <#rightSize,Natural>`_ from this module. ## - ## You can call this proc on a previously initialized ordered hash set to - ## discard its values. At the moment this is the only proc to remove elements - ## from an ordered hash set. Example: + ## All set variables must be initialized before + ## use with other procs from this module, with the exception of `isValid proc + ## <#isValid,HashSet[A]>`_ and `len() <#len,HashSet[A]>`_. ## - ## .. code-block :: - ## var a: OrderedSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. + ## + ## See also: + ## * `initOrderedSet proc <#initOrderedSet,int>`_ + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a: OrderedSet[int] + assert(not a.isValid) + init(a) + assert a.isValid + assert isPowerOfTwo(initialSize) s.counter = 0 s.first = -1 @@ -872,47 +881,215 @@ proc init*[A](s: var OrderedSet[A], initialSize=64) = newSeq(s.data, initialSize) proc initOrderedSet*[A](initialSize=64): OrderedSet[A] = - ## Wrapper around `init() <#init,TOrderedSet[A],int>`_ for initialization of + ## Wrapper around `init proc <#init,OrderedSet[A],int>`_ for initialization of ## ordered hash sets. ## - ## Returns an empty ordered hash set you can assign directly in ``var`` - ## blocks in a single line. Example: + ## Returns an empty ordered hash set you can assign directly in ``var`` blocks + ## in a single line. ## - ## .. code-block :: - ## var a = initOrderedSet[int](4) - ## a.incl(2) + ## See also: + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a = initOrderedSet[int]() + assert a.isValid + a.incl(3) + assert len(a) == 1 result.init(initialSize) proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] = - ## Creates a new ordered hash set that contains the given `keys`. - ## - ## Example: - ## - ## .. code-block:: - ## var numbers = toOrderedSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initOrderedSet proc <#initOrderedSet,int>`_ + runnableExamples: + let + a = toOrderedSet([5, 3, 2]) + b = toOrderedSet("abracadabra") + assert len(a) == 3 + ## a == {5, 3, 2} # different than in HashSet + assert len(b) == 5 + ## b == {'a', 'b', 'r', 'c', 'd'} # different than in HashSet + result = initOrderedSet[A](rightSize(keys.len)) for key in items(keys): result.incl(key) -proc `$`*[A](s: OrderedSet[A]): string = - ## Converts the ordered hash set `s` to a string, mostly for logging purposes. +proc isValid*[A](s: OrderedSet[A]): bool = + ## Returns `true` if the set has been initialized (with `initSet proc + ## <#initOrderedSet,int>`_ or `init proc <#init,OrderedSet[A],int>`_). ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: + ## Most operations over an uninitialized set will crash at runtime and + ## `assert <system.html#assert>`_ in debug builds. You can use this proc in + ## your own procs to verify that sets passed to your procs are correctly + ## initialized. ## - ## Example: + ## **Examples:** ## - ## .. code-block:: - ## echo toOrderedSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} + ## .. code-block :: + ## proc savePreferences(options: OrderedSet[string]) = + ## assert options.isValid, "Pass an initialized set!" + ## # Do stuff here, may crash in release builds! + result = s.data.len > 0 + +proc contains*[A](s: OrderedSet[A], key: A): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + assert s.isValid, "The set needs to be initialized." - dollarImpl() + var hc: Hash + var index = rawGet(s, key, hc) + result = index >= 0 + +proc incl*[A](s: var OrderedSet[A], key: A) = + ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],OrderedSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + + assert s.isValid, "The set needs to be initialized." + inclImpl() + +proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) = + ## Includes all elements from the OrderedSet `other` into + ## HashSet `s` (must be declared as `var`). + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var + values = toSet([1, 2, 3]) + others = toOrderedSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + assert s.isValid, "The set `s` needs to be initialized." + assert other.isValid, "The set `other` needs to be initialized." + for item in other: incl(s, item) + +proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool = + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return false if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false + + assert s.isValid, "The set needs to be initialized." + containsOrInclImpl() + +proc excl*[A](s: var OrderedSet[A], key: A) = + ## Excludes `key` from the set `s`. Efficiency: `O(n)`. + ## + ## This doesn't do anything if `key` is not found in `s`. + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 + discard exclImpl(s, key) + +proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## Efficiency: O(n). + ## + ## The difference with regards to the `excl proc <#excl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true + exclImpl(s, key) + +proc clear*[A](s: var OrderedSet[A]) = + ## Clears the OrderedSet back to an empty state, without shrinking + ## any of the existing storage. + ## + ## `O(n)` operation where `n` is the size of the hash bucket. + runnableExamples: + var s = toOrderedSet([3, 5, 7]) + clear(s) + assert len(s) == 0 + + s.counter = 0 + s.first = -1 + s.last = -1 + for i in 0..<s.data.len: + s.data[i].hcode = 0 + s.data[i].next = 0 + s.data[i].key = default(type(s.data[i].key)) + +proc len*[A](s: OrderedSet[A]): int {.inline.} = + ## Returns the number of elements in `s`. + ## + ## Due to an implementation detail you can call this proc on variables which + ## have not been initialized yet. The proc will return zero as the length + ## then. + runnableExamples: + var a: OrderedSet[string] + assert len(a) == 0 + let s = toSet([3, 5, 7]) + assert len(s) == 3 + result = s.counter + +proc card*[A](s: OrderedSet[A]): int {.inline.} = + ## Alias for `len() <#len,OrderedSet[A]>`_. + ## + ## Card stands for the `cardinality + ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. + result = s.counter proc `==`*[A](s, t: OrderedSet[A]): bool = ## Equality for ordered sets. + runnableExamples: + let + a = toOrderedSet([1, 2]) + b = toOrderedSet([2, 1]) + assert(not (a == b)) + if s.counter != t.counter: return false var h = s.first var g = t.first @@ -929,6 +1106,74 @@ proc `==`*[A](s, t: OrderedSet[A]): bool = g = nxg result = compared == s.counter +proc hash*[A](s: OrderedSet[A]): Hash = + ## Hashing of OrderedSet. + assert s.isValid, "The set needs to be initialized." + forAllOrderedPairs: + result = result !& s.data[h].hcode + result = !$result + +proc `$`*[A](s: OrderedSet[A]): string = + ## Converts the ordered hash set `s` to a string, mostly for logging and + ## printing purposes. + ## + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. + ## + ## **Examples:** + ## + ## .. code-block:: + ## echo toOrderedSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + assert s.isValid, "The set needs to be initialized." + dollarImpl() + + + +iterator items*[A](s: OrderedSet[A]): A = + ## Iterates over keys in the ordered set `s` in insertion order. + ## + ## If you need a sequence with the elelments you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. + ## + ## .. code-block:: + ## var a = initOrderedSet[int]() + ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: + ## a.incl(value) + ## for value in a.items: + ## echo "Got ", value + ## # --> Got 9 + ## # --> Got 2 + ## # --> Got 1 + ## # --> Got 5 + ## # --> Got 8 + ## # --> Got 4 + assert s.isValid, "The set needs to be initialized." + forAllOrderedPairs: + yield s.data[h].key + + +iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] = + ## Iterates through (position, value) tuples of OrderedSet `s`. + runnableExamples: + let a = toOrderedSet("abracadabra") + var p = newSeq[(int, char)]() + for x in pairs(a): + p.add(x) + assert p == @[(0, 'a'), (1, 'b'), (2, 'r'), (3, 'c'), (4, 'd')] + + assert s.isValid, "The set needs to be initialized" + forAllOrderedPairs: + yield (idx, s.data[h].key) + + + +# ----------------------------------------------------------------------- + + + when isMainModule and not defined(release): proc testModule() = ## Internal micro test to validate docstrings and such. diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim index 269ae476f..e5c0c6ac4 100644 --- a/lib/pure/httpclient.nim +++ b/lib/pure/httpclient.nim @@ -17,6 +17,7 @@ ## ``http://google.com``: ## ## .. code-block:: Nim +## import httpClient ## var client = newHttpClient() ## echo client.getContent("http://google.com") ## @@ -24,6 +25,7 @@ ## ``AsyncHttpClient``: ## ## .. code-block:: Nim +## import httpClient ## var client = newAsyncHttpClient() ## echo await client.getContent("http://google.com") ## diff --git a/lib/pure/json.nim b/lib/pure/json.nim index cf979e2d0..ffb8f4f35 100644 --- a/lib/pure/json.nim +++ b/lib/pure/json.nim @@ -625,7 +625,7 @@ proc escapeJsonUnquoted*(s: string; result: var string) = of '\r': result.add("\\r") of '"': result.add("\\\"") of '\0'..'\7': result.add("\\u000" & $ord(c)) - of '\14'..'\31': result.add("\\u00" & $ord(c)) + of '\14'..'\31': result.add("\\u00" & toHex(ord(c), 2)) of '\\': result.add("\\\\") else: result.add(c) @@ -1328,6 +1328,12 @@ proc createConstructor(typeSym, jsonNode: NimNode): NimNode = let obj = getType(typeSym[1]) result = processType(newIdentNode(typeName), obj, jsonNode, true) + of "range": + let typeNode = typeSym + # Deduce the base type from one of the endpoints + let baseType = getType(typeNode[1]) + + result = createConstructor(baseType, jsonNode) of "seq": let seqT = typeSym[1] let forLoopI = genSym(nskForVar, "i") @@ -1686,9 +1692,9 @@ when isMainModule: doAssert(parsed2{"repository", "description"}.str=="IRC Library for Haskell", "Couldn't fetch via multiply nested key using {}") doAssert escapeJsonUnquoted("\10Foo🎃barÄ") == "\\nFoo🎃barÄ" - doAssert escapeJsonUnquoted("\0\7\20") == "\\u0000\\u0007\\u0020" # for #7887 + doAssert escapeJsonUnquoted("\0\7\20") == "\\u0000\\u0007\\u0014" # for #7887 doAssert escapeJson("\10Foo🎃barÄ") == "\"\\nFoo🎃barÄ\"" - doAssert escapeJson("\0\7\20") == "\"\\u0000\\u0007\\u0020\"" # for #7887 + doAssert escapeJson("\0\7\20") == "\"\\u0000\\u0007\\u0014\"" # for #7887 # Test with extra data when not defined(js): @@ -1721,3 +1727,21 @@ when isMainModule: foo = js.to Foo doAssert(foo.b == "abc") + + # Generate constructors for range[T] types + block: + type + Q1 = range[0..10] + Q2 = range[0'i8..10'i8] + Q3 = range[0'u16..10'u16] + X = object + m1: Q1 + m2: Q2 + m3: Q3 + + let + obj = X(m1: 1, m2: 2'i8, m3: 3'u16) + jsonObj = %obj + desObj = to(jsonObj, type(obj)) + + doAssert(desObj == obj) diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim index d1a006eee..277c4ddb6 100644 --- a/lib/pure/memfiles.nim +++ b/lib/pure/memfiles.nim @@ -372,9 +372,8 @@ proc `==`*(x, y: MemSlice): bool = proc `$`*(ms: MemSlice): string {.inline.} = ## Return a Nim string built from a MemSlice. - var buf = newString(ms.size) - copyMem(addr(buf[0]), ms.data, ms.size) - result = buf + result.setLen(ms.size) + copyMem(addr(result[0]), ms.data, ms.size) iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = ## Iterates over [optional `eat`] `delim`-delimited slices in MemFile `mfile`. diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim index 796114d37..e0c4f38a4 100644 --- a/lib/pure/parsecsv.nim +++ b/lib/pure/parsecsv.nim @@ -10,13 +10,18 @@ ## This module implements a simple high performance `CSV`:idx: ## (`comma separated value`:idx:) parser. ## -## Example: How to use the parser -## ============================== +## Basic usage +## =========== ## ## .. code-block:: nim -## import os, parsecsv, streams +## import parsecsv +## from os import paramStr +## from streams import newFileStream +## ## var s = newFileStream(paramStr(1), fmRead) -## if s == nil: quit("cannot open the file" & paramStr(1)) +## if s == nil: +## quit("cannot open the file" & paramStr(1)) +## ## var x: CsvParser ## open(x, s, paramStr(1)) ## while readRow(x): @@ -26,11 +31,11 @@ ## close(x) ## ## For CSV files with a header row, the header can be read and then used as a -## reference for item access with `rowEntry <#rowEntry.CsvParser.string>`_: +## reference for item access with `rowEntry <#rowEntry,CsvParser,string>`_: ## ## .. code-block:: nim ## import parsecsv -## import os +## ## # Prepare a file ## let content = """One,Two,Three,Four ## 1,2,3,4 @@ -47,24 +52,40 @@ ## for col in items(p.headers): ## echo "##", col, ":", p.rowEntry(col), "##" ## p.close() +## +## See also +## ======== +## +## * `streams module <streams.html>`_ for using +## `open proc <#open,CsvParser,Stream,string,Char,Char,Char>`_ +## and other stream processing (like `close proc <streams.html#close,Stream>`_) +## * `parseopt module <parseopt.html>`_ for a command line parser +## * `parsecfg module <parsecfg.html>`_ for a configuration file parser +## * `parsexml module <parsexml.html>`_ for a XML / HTML parser +## * `parsesql module <parsesql.html>`_ for a SQL parser +## * `other parsers <lib.html#pure-libraries-parsers>`_ for other parsers import lexbase, streams type - CsvRow* = seq[string] ## a row in a CSV file - CsvParser* = object of BaseLexer ## the parser object. - row*: CsvRow ## the current row + CsvRow* = seq[string] ## A row in a CSV file. + CsvParser* = object of BaseLexer ## The parser object. + ## + ## It consists of two public fields: + ## * `row` is the current row + ## * `headers` are the columns that are defined in the csv file + ## (read using `readHeaderRow <#readHeaderRow,CsvParser>`_). + ## Used with `rowEntry <#rowEntry,CsvParser,string>`_). + row*: CsvRow filename: string sep, quote, esc: char skipWhite: bool currRow: int - headers*: seq[string] ## The columns that are defined in the csv file - ## (read using `readHeaderRow <#readHeaderRow.CsvParser>`_). - ## Used with `rowEntry <#rowEntry.CsvParser.string>`_). + headers*: seq[string] - CsvError* = object of IOError ## exception that is raised if - ## a parsing error occurs + CsvError* = object of IOError ## An exception that is raised if + ## a parsing error occurs. proc raiseEInvalidCsv(filename: string, line, col: int, msg: string) {.noreturn.} = @@ -82,7 +103,7 @@ proc error(my: CsvParser, pos: int, msg: string) = proc open*(my: var CsvParser, input: Stream, filename: string, separator = ',', quote = '"', escape = '\0', skipInitialSpace = false) = - ## initializes the parser with an input stream. `Filename` is only used + ## Initializes the parser with an input stream. `Filename` is only used ## for nice error messages. The parser's behaviour can be controlled by ## the diverse optional parameters: ## - `separator`: character used to separate fields @@ -94,6 +115,18 @@ proc open*(my: var CsvParser, input: Stream, filename: string, ## two `quote` characters are parsed one literal `quote` character. ## - `skipInitialSpace`: If true, whitespace immediately following the ## `separator` is ignored. + ## + ## See also: + ## * `open proc <#open,CsvParser,string,Char,Char,Char>`_ which creates the + ## file stream for you + runnableExamples: + import streams + var strm = newStringStream("One,Two,Three\n1,2,3\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + parser.close() + strm.close() + lexbase.open(my, input) my.filename = filename my.sep = separator @@ -106,7 +139,16 @@ proc open*(my: var CsvParser, input: Stream, filename: string, proc open*(my: var CsvParser, filename: string, separator = ',', quote = '"', escape = '\0', skipInitialSpace = false) = - ## same as the other `open` but creates the file stream for you. + ## Similar to the `other open proc<#open,CsvParser,Stream,string,Char,Char,Char>`_, + ## but creates the file stream for you. + runnableExamples: + from os import removeFile + writeFile("tmp.csv", "One,Two,Three\n1,2,3\n10,20,300") + var parser: CsvParser + parser.open("tmp.csv") + parser.close() + removeFile("tmp.csv") + var s = newFileStream(filename, fmRead) if s == nil: my.error(0, "cannot open: " & filename) open(my, s, filename, separator, @@ -159,17 +201,66 @@ proc parseField(my: var CsvParser, a: var string) = my.bufpos = pos proc processedRows*(my: var CsvParser): int = - ## returns number of the processed rows + ## Returns number of the processed rows. + ## + ## But even if `readRow <#readRow,CsvParser,int>`_ arrived at EOF then + ## processed rows counter is incremented. + runnableExamples: + import streams + + var strm = newStringStream("One,Two,Three\n1,2,3") + var parser: CsvParser + parser.open(strm, "tmp.csv") + doAssert parser.readRow() + doAssert parser.processedRows() == 1 + doAssert parser.readRow() + doAssert parser.processedRows() == 2 + ## Even if `readRow` arrived at EOF then `processedRows` is incremented. + doAssert parser.readRow() == false + doAssert parser.processedRows() == 3 + doAssert parser.readRow() == false + doAssert parser.processedRows() == 4 + parser.close() + strm.close() + return my.currRow proc readRow*(my: var CsvParser, columns = 0): bool = - ## reads the next row; if `columns` > 0, it expects the row to have + ## Reads the next row; if `columns` > 0, it expects the row to have ## exactly this many columns. Returns false if the end of the file ## has been encountered else true. ## ## Blank lines are skipped. + runnableExamples: + import streams + var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + doAssert parser.readRow() + doAssert parser.row == @["One", "Two", "Three"] + doAssert parser.readRow() + doAssert parser.row == @["1", "2", "3"] + ## Blank lines are skipped. + doAssert parser.readRow() + doAssert parser.row == @["10", "20", "30"] + + var emptySeq: seq[string] + doAssert parser.readRow() == false + doAssert parser.row == emptySeq + doAssert parser.readRow() == false + doAssert parser.row == emptySeq + + parser.close() + strm.close() + var col = 0 # current column let oldpos = my.bufpos + # skip initial empty lines #8365 + while true: + case my.buf[my.bufpos] + of '\c': my.bufpos = handleCR(my, my.bufpos) + of '\l': my.bufpos = handleLF(my, my.bufpos) + else: break while my.buf[my.bufpos] != '\0': let oldlen = my.row.len if oldlen < col+1: @@ -200,12 +291,31 @@ proc readRow*(my: var CsvParser, columns = 0): bool = inc(my.currRow) proc close*(my: var CsvParser) {.inline.} = - ## closes the parser `my` and its associated input stream. + ## Closes the parser `my` and its associated input stream. lexbase.close(my) proc readHeaderRow*(my: var CsvParser) = ## Reads the first row and creates a look-up table for column numbers - ## See also `rowEntry <#rowEntry.CsvParser.string>`_. + ## See also: + ## * `rowEntry proc <#rowEntry,CsvParser,string>`_ + runnableExamples: + import streams + + var strm = newStringStream("One,Two,Three\n1,2,3") + var parser: CsvParser + parser.open(strm, "tmp.csv") + + parser.readHeaderRow() + doAssert parser.headers == @["One", "Two", "Three"] + doAssert parser.row == @["One", "Two", "Three"] + + doAssert parser.readRow() + doAssert parser.headers == @["One", "Two", "Three"] + doAssert parser.row == @["1", "2", "3"] + + parser.close() + strm.close() + let present = my.readRow() if present: my.headers = my.row @@ -213,8 +323,23 @@ proc readHeaderRow*(my: var CsvParser) = proc rowEntry*(my: var CsvParser, entry: string): var string = ## Acceses a specified `entry` from the current row. ## - ## Assumes that `readHeaderRow <#readHeaderRow.CsvParser>`_ has already been + ## Assumes that `readHeaderRow <#readHeaderRow,CsvParser>`_ has already been ## called. + runnableExamples: + import streams + var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + ## Need calling `readHeaderRow`. + parser.readHeaderRow() + doAssert parser.readRow() + doAssert parser.rowEntry("One") == "1" + doAssert parser.rowEntry("Two") == "2" + doAssert parser.rowEntry("Three") == "3" + ## `parser.rowEntry("NotExistEntry")` causes SIGSEGV fault. + parser.close() + strm.close() + let index = my.headers.find(entry) if index >= 0: result = my.row[index] @@ -235,7 +360,7 @@ when isMainModule: import os import strutils block: # Tests for reading the header row - let content = "One,Two,Three,Four\n1,2,3,4\n10,20,30,40,\n100,200,300,400\n" + let content = "\nOne,Two,Three,Four\n1,2,3,4\n10,20,30,40,\n100,200,300,400\n" writeFile("temp.csv", content) var p: CsvParser @@ -262,4 +387,3 @@ when isMainModule: # Tidy up removeFile("temp.csv") - diff --git a/lib/pure/parseopt.nim b/lib/pure/parseopt.nim index 06f32f032..0f8f8197c 100644 --- a/lib/pure/parseopt.nim +++ b/lib/pure/parseopt.nim @@ -11,23 +11,141 @@ ## It supports one convenience iterator over all command line options and some ## lower-level features. ## -## Supported syntax with default empty ``shortNoVal``/``longNoVal``: +## Supported Syntax +## ================ ## -## 1. short options - ``-abcd``, where a, b, c, d are names -## 2. long option - ``--foo:bar``, ``--foo=bar`` or ``--foo`` -## 3. argument - everything else +## The following syntax is supported when arguments for the ``shortNoVal`` and +## ``longNoVal`` parameters, which are +## `described later<#shortnoval-and-longnoval>`_, are not provided: ## -## When ``shortNoVal``/``longNoVal`` are non-empty then the ':' and '=' above -## are still accepted, but become optional. Note that these option key sets -## must be updated along with the set of option keys taking no value, but -## keys which do take values need no special updates as their set evolves. +## 1. Short options: ``-abcd``, ``-e:5``, ``-e=5`` +## 2. Long options: ``--foo:bar``, ``--foo=bar``, ``--foo`` +## 3. Arguments: everything that does not start with a ``-`` ## -## When option values begin with ':' or '=' they need to be doubled up (as in +## These three kinds of tokens are enumerated in the +## `CmdLineKind enum<#CmdLineKind>`_. +## +## When option values begin with ':' or '=', they need to be doubled up (as in ## ``--delim::``) or alternated (as in ``--delim=:``). ## -## The common ``--`` non-option argument delimiter appears as an empty string -## long option key. ``OptParser.cmd``, ``OptParser.pos``, and -## ``os.parseCmdLine`` may be used to complete parsing in that case. +## The ``--`` option, commonly used to denote that every token that follows is +## an argument, is interpreted as a long option, and its name is the empty +## string. +## +## Parsing +## ======= +## +## Use an `OptParser<#OptParser>`_ to parse command line options. It can be +## created with `initOptParser<#initOptParser,string,set[char],seq[string]>`_, +## and `next<#next,OptParser>`_ advances the parser by one token. +## +## For each token, the parser's ``kind``, ``key``, and ``val`` fields give +## information about that token. If the token is a long or short option, ``key`` +## is the option's name, and ``val`` is either the option's value, if provided, +## or the empty string. For arguments, the ``key`` field contains the argument +## itself, and ``val`` is unused. To check if the end of the command line has +## been reached, check if ``kind`` is equal to ``cmdEnd``. +## +## Here is an example: +## +## .. code-block:: +## import parseopt +## +## var p = initOptParser("-ab -e:5 --foo --bar=20 file.txt") +## while true: +## p.next() +## case p.kind +## of cmdEnd: break +## of cmdShortOption, cmdLongOption: +## if p.val == "": +## echo "Option: ", p.key +## else: +## echo "Option and value: ", p.key, ", ", p.val +## of cmdArgument: +## echo "Argument: ", p.key +## +## # Output: +## # Option: a +## # Option: b +## # Option and value: e, 5 +## # Option: foo +## # Option and value: bar, 20 +## # Argument: file.txt +## +## The `getopt iterator<#getopt.i,OptParser>`_, which is provided for +## convenience, can be used to iterate through all command line options as well. +## +## ``shortNoVal`` and ``longNoVal`` +## ================================ +## +## The optional ``shortNoVal`` and ``longNoVal`` parameters present in +## `initOptParser<#initOptParser,string,set[char],seq[string]>`_ are for +## specifying which short and long options do not accept values. +## +## When ``shortNoVal`` is non-empty, users are not required to separate short +## options and their values with a ':' or '=' since the parser knows which +## options accept values and which ones do not. This behavior also applies for +## long options if ``longNoVal`` is non-empty. For short options, ``-j4`` +## becomes supported syntax, and for long options, ``--foo bar`` becomes +## supported. This is in addition to the `previously mentioned +## syntax<#supported-syntax>`_. Users can still separate options and their +## values with ':' or '=', but that becomes optional. +## +## As more options which do not accept values are added to your program, +## remember to amend ``shortNoVal`` and ``longNoVal`` accordingly. +## +## The following example illustrates the difference between having an empty +## ``shortNoVal`` and ``longNoVal``, which is the default, and providing +## arguments for those two parameters: +## +## .. code-block:: +## import parseopt +## +## proc printToken(kind: CmdLineKind, key: string, val: string) = +## case kind +## of cmdEnd: doAssert(false) # Doesn't happen with getopt() +## of cmdShortOption, cmdLongOption: +## if val == "": +## echo "Option: ", key +## else: +## echo "Option and value: ", key, ", ", val +## of cmdArgument: +## echo "Argument: ", key +## +## let cmdLine = "-j4 --first bar" +## +## var emptyNoVal = initOptParser(cmdLine) +## for kind, key, val in emptyNoVal.getopt(): +## printToken(kind, key, val) +## +## # Output: +## # Option: j +## # Option: 4 +## # Option: first +## # Argument: bar +## +## var withNoVal = initOptParser(cmdLine, shortNoVal = {'c'}, +## longNoVal = @["second"]) +## for kind, key, val in withNoVal.getopt(): +## printToken(kind, key, val) +## +## # Output: +## # Option and value: j, 4 +## # Option and value: first, bar +## +## See also +## ======== +## +## * `os module<os.html>`_ for lower-level command line parsing procs +## * `parseutils module<parseutils.html>`_ for helpers that parse tokens, +## numbers, identifiers, etc. +## * `strutils module<strutils.html>`_ for common string handling operations +## * `json module<json.html>`_ for a JSON parser +## * `parsecfg module<parsecfg.html>`_ for a configuration file parser +## * `parsecsv module<parsecsv.html>`_ for a simple CSV (comma separated value) +## parser +## * `parsexml module<parsexml.html>`_ for a XML / HTML parser +## * `other parsers<lib.html#pure-libraries-parsers>`_ for more parsers {.push debugger: off.} @@ -37,23 +155,26 @@ import os, strutils type - CmdLineKind* = enum ## the detected command line token - cmdEnd, ## end of command line reached - cmdArgument, ## argument detected - cmdLongOption, ## a long option ``--option`` detected - cmdShortOption ## a short option ``-c`` detected + CmdLineKind* = enum ## The detected command line token. + cmdEnd, ## End of command line reached + cmdArgument, ## An argument such as a filename + cmdLongOption, ## A long option such as --option + cmdShortOption ## A short option such as -c OptParser* = - object of RootObj ## this object implements the command line parser - pos*: int # ..empty key or subcmd cmdArg & handle specially + object of RootObj ## Implementation of the command line parser. + ## + ## To initialize it, use the + ## `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_. + pos*: int inShortState: bool allowWhitespaceAfterColon: bool shortNoVal: set[char] longNoVal: seq[string] cmds: seq[string] idx: int - kind*: CmdLineKind ## the dected command line token - key*, val*: TaintedString ## key and value pair; ``key`` is the option - ## or the argument, ``value`` is not "" if + kind*: CmdLineKind ## The detected command line token + key*, val*: TaintedString ## Key and value pair; the key is the option + ## or the argument, and the value is not "" if ## the option was given a value proc parseWord(s: string, i: int, w: var string, @@ -79,13 +200,24 @@ when declared(os.paramCount): proc initOptParser*(cmdline = "", shortNoVal: set[char]={}, longNoVal: seq[string] = @[]; allowWhitespaceAfterColon = true): OptParser = - ## inits the option parser. If ``cmdline == ""``, the real command line - ## (as provided by the ``OS`` module) is taken. If ``shortNoVal`` is - ## provided command users do not need to delimit short option keys and - ## values with a ':' or '='. If ``longNoVal`` is provided command users do - ## not need to delimit long option keys and values with a ':' or '=' - ## (though they still need at least a space). In both cases, ':' or '=' - ## may still be used if desired. They just become optional. + ## Initializes the command line parser. + ## + ## If ``cmdline == ""``, the real command line as provided by the + ## ``os`` module is retrieved instead. + ## + ## ``shortNoVal`` and ``longNoVal`` are used to specify which options + ## do not take values. See the `documentation about these + ## parameters<#shortnoval-and-longnoval>`_ for more information on + ## how this affects parsing. + ## + ## See also: + ## * `getopt iterator<#getopt.i,OptParser>`_ + runnableExamples: + var p = initOptParser() + p = initOptParser("--left --debug:3 -l -r:2") + p = initOptParser("--left --debug:3 -l -r:2", + shortNoVal = {'l'}, longNoVal = @["left"]) + result.pos = 0 result.idx = 0 result.inShortState = false @@ -106,9 +238,21 @@ when declared(os.paramCount): proc initOptParser*(cmdline: seq[TaintedString], shortNoVal: set[char]={}, longNoVal: seq[string] = @[]; allowWhitespaceAfterColon = true): OptParser = - ## inits the option parser. If ``cmdline.len == 0``, the real command line - ## (as provided by the ``OS`` module) is taken. ``shortNoVal`` and - ## ``longNoVal`` behavior is the same as for ``initOptParser(string,...)``. + ## Initializes the command line parser. + ## + ## If ``cmdline.len == 0``, the real command line as provided by the + ## ``os`` module is retrieved instead. Behavior of the other parameters + ## remains the same as in `initOptParser(string, ...) + ## <#initOptParser,string,set[char],seq[string]>`_. + ## + ## See also: + ## * `getopt iterator<#getopt.i,seq[TaintedString],set[char],seq[string]>`_ + runnableExamples: + var p = initOptParser() + p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"]) + p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"], + shortNoVal = {'l'}, longNoVal = @["left"]) + result.pos = 0 result.idx = 0 result.inShortState = false @@ -153,8 +297,21 @@ proc handleShortOption(p: var OptParser; cmd: string) = inc p.idx proc next*(p: var OptParser) {.rtl, extern: "npo$1".} = - ## parses the first or next option; ``p.kind`` describes what token has been - ## parsed. ``p.key`` and ``p.val`` are set accordingly. + ## Parses the next token. + ## + ## ``p.kind`` describes what kind of token has been parsed. ``p.key`` and + ## ``p.val`` are set accordingly. + runnableExamples: + var p = initOptParser("--left -r:2 file.txt") + p.next() + doAssert p.kind == cmdLongOption and p.key == "left" + p.next() + doAssert p.kind == cmdShortOption and p.key == "r" and p.val == "2" + p.next() + doAssert p.kind == cmdArgument and p.key == "file.txt" + p.next() + doAssert p.kind == cmdEnd + if p.idx >= p.cmds.len: p.kind = cmdEnd return @@ -209,20 +366,61 @@ proc next*(p: var OptParser) {.rtl, extern: "npo$1".} = when declared(os.paramCount): proc cmdLineRest*(p: OptParser): TaintedString {.rtl, extern: "npo$1".} = - ## retrieves the rest of the command line that has not been parsed yet. + ## Retrieves the rest of the command line that has not been parsed yet. + ## + ## See also: + ## * `remainingArgs proc<#remainingArgs,OptParser>`_ + ## + ## **Examples:** + ## + ## .. code-block:: + ## var p = initOptParser("--left -r:2 -- foo.txt bar.txt") + ## while true: + ## p.next() + ## if p.kind == cmdLongOption and p.key == "": # Look for "--" + ## break + ## else: continue + ## doAssert p.cmdLineRest == "foo.txt bar.txt" result = p.cmds[p.idx .. ^1].quoteShellCommand.TaintedString proc remainingArgs*(p: OptParser): seq[TaintedString] {.rtl, extern: "npo$1".} = - ## retrieves the rest of the command line that has not been parsed yet. + ## Retrieves a sequence of the arguments that have not been parsed yet. + ## + ## See also: + ## * `cmdLineRest proc<#cmdLineRest,OptParser>`_ + ## + ## **Examples:** + ## + ## .. code-block:: + ## var p = initOptParser("--left -r:2 -- foo.txt bar.txt") + ## while true: + ## p.next() + ## if p.kind == cmdLongOption and p.key == "": # Look for "--" + ## break + ## else: continue + ## doAssert p.remainingArgs == @["foo.txt", "bar.txt"] result = @[] for i in p.idx..<p.cmds.len: result.add TaintedString(p.cmds[i]) iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key, val: TaintedString] = - ## This is an convenience iterator for iterating over the given OptParser object. - ## Example: + ## Convenience iterator for iterating over the given + ## `OptParser<#OptParser>`_. + ## + ## There is no need to check for ``cmdEnd`` while iterating. ## - ## .. code-block:: nim + ## See also: + ## * `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_ + ## + ## **Examples:** + ## + ## .. code-block:: + ## # these are placeholders, of course + ## proc writeHelp() = discard + ## proc writeVersion() = discard + ## + ## var filename: string ## var p = initOptParser("--left --debug:3 -l -r:2") + ## ## for kind, key, val in p.getopt(): ## case kind ## of cmdArgument: @@ -233,7 +431,7 @@ iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key, val: TaintedSt ## of "version", "v": writeVersion() ## of cmdEnd: assert(false) # cannot happen ## if filename == "": - ## # no filename has been given, so we show the help: + ## # no filename has been given, so we show the help ## writeHelp() p.pos = 0 p.idx = 0 @@ -246,15 +444,34 @@ when declared(initOptParser): iterator getopt*(cmdline: seq[TaintedString] = commandLineParams(), shortNoVal: set[char]={}, longNoVal: seq[string] = @[]): tuple[kind: CmdLineKind, key, val: TaintedString] = - ## This is an convenience iterator for iterating over command line arguments. - ## This creates a new OptParser. See the above ``getopt(var OptParser)`` - ## example for using default empty ``NoVal`` parameters. This example is - ## for the same option keys as that example but here option key-value - ## separators become optional for command users: + ## Convenience iterator for iterating over command line arguments. + ## + ## This creates a new `OptParser<#OptParser>`_. If no command line + ## arguments are provided, the real command line as provided by the + ## ``os`` module is retrieved instead. + ## + ## ``shortNoVal`` and ``longNoVal`` are used to specify which options + ## do not take values. See the `documentation about these + ## parameters<#shortnoval-and-longnoval>`_ for more information on + ## how this affects parsing. + ## + ## There is no need to check for ``cmdEnd`` while iterating. ## - ## .. code-block:: nim - ## for kind, key, val in getopt(shortNoVal = { 'l' }, - ## longNoVal = @[ "left" ]): + ## See also: + ## * `initOptParser proc<#initOptParser,seq[TaintedString],set[char],seq[string]>`_ + ## + ## **Examples:** + ## + ## .. code-block:: + ## + ## # these are placeholders, of course + ## proc writeHelp() = discard + ## proc writeVersion() = discard + ## + ## var filename: string + ## let params = @["--left", "--debug:3", "-l", "-r:2"] + ## + ## for kind, key, val in getopt(params): ## case kind ## of cmdArgument: ## filename = key @@ -264,8 +481,8 @@ when declared(initOptParser): ## of "version", "v": writeVersion() ## of cmdEnd: assert(false) # cannot happen ## if filename == "": + ## # no filename has been written, so we show the help ## writeHelp() - ## var p = initOptParser(cmdline, shortNoVal=shortNoVal, longNoVal=longNoVal) while true: next(p) diff --git a/lib/pure/parseopt2.nim b/lib/pure/parseopt2.nim deleted file mode 100644 index 9fd6cd2c7..000000000 --- a/lib/pure/parseopt2.nim +++ /dev/null @@ -1,155 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module provides the standard Nim command line parser. -## It supports one convenience iterator over all command line options and some -## lower-level features. -## -## Supported syntax: -## -## 1. short options - ``-abcd``, where a, b, c, d are names -## 2. long option - ``--foo:bar``, ``--foo=bar`` or ``--foo`` -## 3. argument - everything else - -{.deprecated: "Use the 'parseopt' module instead".} -{.push debugger: off.} - -include "system/inclrtl" - -import - os, strutils - -type - CmdLineKind* = enum ## the detected command line token - cmdEnd, ## end of command line reached - cmdArgument, ## argument detected - cmdLongOption, ## a long option ``--option`` detected - cmdShortOption ## a short option ``-c`` detected - OptParser* = - object of RootObj ## this object implements the command line parser - cmd: seq[string] - pos: int - remainingShortOptions: string - kind*: CmdLineKind ## the detected command line token - key*, val*: TaintedString ## key and value pair; ``key`` is the option - ## or the argument, ``value`` is not "" if - ## the option was given a value - -proc initOptParser*(cmdline: seq[string]): OptParser {.rtl.} = - ## Initalizes option parses with cmdline. cmdline should not contain - ## argument 0 - program name. - ## If cmdline.len == 0 default to current command line arguments. - result.remainingShortOptions = "" - when not defined(createNimRtl): - if cmdline.len == 0: - result.cmd = commandLineParams() - return - else: - assert cmdline != nil, "Cannot determine command line arguments." - - result.cmd = @cmdline - -when not defined(createNimRtl): - proc initOptParser*(): OptParser = - ## Initializes option parser from current command line arguments. - return initOptParser(commandLineParams()) - -proc next*(p: var OptParser) {.rtl, extern: "npo2$1".} - -proc nextOption(p: var OptParser, token: string, allowEmpty: bool) = - for splitchar in [':', '=']: - if splitchar in token: - let pos = token.find(splitchar) - p.key = token[0..pos-1] - p.val = token[pos+1..token.len-1] - return - - p.key = token - if allowEmpty: - p.val = "" - else: - p.remainingShortOptions = token[0..token.len-1] - p.next() - -proc next(p: var OptParser) = - if p.remainingShortOptions.len != 0: - p.kind = cmdShortOption - p.key = TaintedString(p.remainingShortOptions[0..0]) - p.val = "" - p.remainingShortOptions = p.remainingShortOptions[1..p.remainingShortOptions.len-1] - return - - if p.pos >= p.cmd.len: - p.kind = cmdEnd - return - - let token = p.cmd[p.pos] - p.pos += 1 - - if token.startsWith("--"): - p.kind = cmdLongOption - nextOption(p, token[2..token.len-1], allowEmpty=true) - elif token.startsWith("-"): - p.kind = cmdShortOption - nextOption(p, token[1..token.len-1], allowEmpty=true) - else: - p.kind = cmdArgument - p.key = token - p.val = "" - -proc cmdLineRest*(p: OptParser): TaintedString {.rtl, extern: "npo2$1".} = - ## Returns the part of command line string that has not been parsed yet, - ## properly quoted. - return p.cmd[p.pos..p.cmd.len-1].quoteShellCommand - -type - GetoptResult* = tuple[kind: CmdLineKind, key, val: TaintedString] - -iterator getopt*(p: var OptParser): GetoptResult = - ## This is an convenience iterator for iterating over the given OptParser object. - ## Example: - ## - ## .. code-block:: nim - ## var p = initOptParser("--left --debug:3 -l=4 -r:2") - ## for kind, key, val in p.getopt(): - ## case kind - ## of cmdArgument: - ## filename = key - ## of cmdLongOption, cmdShortOption: - ## case key - ## of "help", "h": writeHelp() - ## of "version", "v": writeVersion() - ## of cmdEnd: assert(false) # cannot happen - ## if filename == "": - ## # no filename has been given, so we show the help: - ## writeHelp() - p.pos = 0 - while true: - next(p) - if p.kind == cmdEnd: break - yield (p.kind, p.key, p.val) - -when declared(paramCount): - iterator getopt*(): GetoptResult = - ## This is an convenience iterator for iterating over the command line arguments. - ## This create a new OptParser object. - ## See above for a more detailed example - ## - ## .. code-block:: nim - ## for kind, key, val in getopt(): - ## # this will iterate over all arguments passed to the cmdline. - ## continue - ## - var p = initOptParser() - while true: - next(p) - if p.kind == cmdEnd: break - yield (p.kind, p.key, p.val) - -{.pop.} diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim index bfb411fc8..e49bfb3c6 100644 --- a/lib/pure/uri.nim +++ b/lib/pure/uri.nim @@ -33,8 +33,7 @@ ## import uri ## let res = parseUri("sftp://127.0.0.1:4343") ## if isAbsolute(res): -## echo "Connect to port: " & res.port -## # --> Connect to port: 4343 +## assert res.port == "4343" ## else: ## echo "Wrong format" @@ -189,7 +188,7 @@ proc parseUri*(uri: string, result: var Uri) = ## ## **See also:** ## * `Uri type <#Uri>`_ for available fields in the URI type - ## * `initUri proc <#initUri,>`_ for initializing a URI + ## * `initUri proc <#initUri>`_ for initializing a URI runnableExamples: var res = initUri() parseUri("https://nim-lang.org/docs/manual.html", res) @@ -343,9 +342,9 @@ proc combine*(uris: varargs[Uri]): Uri = ## **See also:** ## * `/ proc <#/,Uri,string>`_ for building URIs runnableExamples: - let foo = combine(parseUri("https://nim-lang.org/blog.html"), parseUri("/install.html")) + let foo = combine(parseUri("https://nim-lang.org/"), parseUri("docs/"), parseUri("manual.html")) assert foo.hostname == "nim-lang.org" - assert foo.path == "/install.html" + assert foo.path == "/docs/manual.html" result = uris[0] for i in 1 ..< uris.len: result = combine(result, uris[i]) diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim index b99d46bc8..e1d24ea42 100644 --- a/lib/pure/xmltree.nim +++ b/lib/pure/xmltree.nim @@ -7,21 +7,52 @@ # distribution, for details about the copyright. # -## A simple XML tree. +## A simple XML tree generator. +## +## .. code-block:: +## import xmltree +## +## var g = newElement("myTag") +## g.add newText("some text") +## g.add newComment("this is comment") +## +## var h = newElement("secondTag") +## h.add newEntity("some entity") +## +## let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes +## let k = newXmlTree("treeTag", [g, h], att) +## +## echo k +## # <treeTag key2="second value" key1="first value"> +## # <myTag>some text<!-- this is comment --></myTag> +## # <secondTag>&some entity;</secondTag> +## # </treeTag> +## +## +## **See also:** +## * `xmlparser module <xmlparser.html>`_ for high-level XML parsing +## * `parsexml module <parsexml.html>`_ for low-level XML parsing +## * `htmlgen module <htmlgen.html>`_ for html code generator import macros, strtabs, strutils type - XmlNode* = ref XmlNodeObj ## an XML tree consists of ``XmlNode``'s. + XmlNode* = ref XmlNodeObj ## An XML tree consisting of XML nodes. + ## + ## Use `newXmlTree proc <#newXmlTree,string,openArray[XmlNode],XmlAttributes>`_ + ## for creating a new tree. - XmlNodeKind* = enum ## different kinds of ``XmlNode``'s + XmlNodeKind* = enum ## Different kinds of XML nodes. xnText, ## a text element xnElement, ## an element with 0 or more children xnCData, ## a CDATA node xnEntity, ## an entity (like ``&thing;``) xnComment ## an XML comment - XmlAttributes* = StringTableRef ## an alias for a string to string mapping + XmlAttributes* = StringTableRef ## An alias for a string to string mapping. + ## + ## Use `toXmlAttributes proc <#toXmlAttributes,varargs[tuple[string,string]]>`_ + ## to create `XmlAttributes`. XmlNodeObj {.acyclic.} = object case k: XmlNodeKind # private, use the kind() proc to read this field. @@ -33,67 +64,203 @@ type fAttr: XmlAttributes fClientData: int ## for other clients +const + xmlHeader* = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" + ## Header to use for complete XML output. + proc newXmlNode(kind: XmlNodeKind): XmlNode = - ## creates a new ``XmlNode``. + ## Creates a new ``XmlNode``. new(result) result.k = kind proc newElement*(tag: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnText`` with the given `tag`. + ## Creates a new ``XmlNode`` of kind ``xnElement`` with the given `tag`. + ## + ## See also: + ## * `newXmlTree proc <#newXmlTree,string,openArray[XmlNode],XmlAttributes>`_ + ## * [<> macro](#<>.m,untyped) + runnableExamples: + var a = newElement("firstTag") + a.add newElement("childTag") + assert a.kind == xnElement + assert $a == "<firstTag><childTag /></firstTag>" + result = newXmlNode(xnElement) result.fTag = tag result.s = @[] - # init attributes lazily to safe memory + # init attributes lazily to save memory proc newText*(text: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnText`` with the text `text`. + ## Creates a new ``XmlNode`` of kind ``xnText`` with the text `text`. + runnableExamples: + var b = newText("my text") + assert b.kind == xnText + assert $b == "my text" + result = newXmlNode(xnText) result.fText = text proc newComment*(comment: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `comment`. + ## Creates a new ``XmlNode`` of kind ``xnComment`` with the text `comment`. + runnableExamples: + var c = newComment("my comment") + assert c.kind == xnComment + assert $c == "<!-- my comment -->" + result = newXmlNode(xnComment) result.fText = comment proc newCData*(cdata: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `cdata`. + ## Creates a new ``XmlNode`` of kind ``xnCData`` with the text `cdata`. + runnableExamples: + var d = newCData("my cdata") + assert d.kind == xnCData + assert $d == "<![CDATA[my cdata]]>" + result = newXmlNode(xnCData) result.fText = cdata proc newEntity*(entity: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`. + ## Creates a new ``XmlNode`` of kind ``xnEntity`` with the text `entity`. + runnableExamples: + var e = newEntity("my entity") + assert e.kind == xnEntity + assert $e == "&my entity;" + result = newXmlNode(xnEntity) result.fText = entity +proc newXmlTree*(tag: string, children: openArray[XmlNode], + attributes: XmlAttributes = nil): XmlNode = + ## Creates a new XML tree with `tag`, `children` and `attributes`. + ## + ## See also: + ## * `newElement proc <#newElement,string>`_ + ## * [<> macro](#<>.m,untyped) + runnableExamples: + from strutils import unindent + var g = newElement("myTag") + g.add newText("some text") + g.add newComment("this is comment") + var h = newElement("secondTag") + h.add newEntity("some entity") + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + let k = newXmlTree("treeTag", [g, h], att) + assert ($k).unindent == """<treeTag key2="second value" key1="first value"> + <myTag>some text<!-- this is comment --></myTag> + <secondTag>&some entity;</secondTag> + </treeTag>""".unindent + + result = newXmlNode(xnElement) + result.fTag = tag + newSeq(result.s, children.len) + for i in 0..children.len-1: result.s[i] = children[i] + result.fAttr = attributes + proc text*(n: XmlNode): string {.inline.} = - ## gets the associated text with the node `n`. `n` can be a CDATA, Text, - ## comment, or entity node. + ## Gets the associated text with the node `n`. + ## + ## `n` can be a CDATA, Text, comment, or entity node. + ## + ## See also: + ## * `text= proc <#text=,XmlNode,string>`_ for text setter + ## * `tag proc <#tag,XmlNode>`_ for tag getter + ## * `tag= proc <#tag=,XmlNode,string>`_ for tag setter + ## * `innerText proc <#innerText,XmlNode>`_ + runnableExamples: + var c = newComment("my comment") + assert $c == "<!-- my comment -->" + assert c.text == "my comment" + assert n.k in {xnText, xnComment, xnCData, xnEntity} result = n.fText proc `text=`*(n: XmlNode, text: string){.inline.} = - ## sets the associated text with the node `n`. `n` can be a CDATA, Text, - ## comment, or entity node. + ## Sets the associated text with the node `n`. + ## + ## `n` can be a CDATA, Text, comment, or entity node. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ for text getter + ## * `tag proc <#tag,XmlNode>`_ for tag getter + ## * `tag= proc <#tag=,XmlNode,string>`_ for tag setter + runnableExamples: + var e = newEntity("my entity") + assert $e == "&my entity;" + e.text = "a new entity text" + assert $e == "&a new entity text;" + assert n.k in {xnText, xnComment, xnCData, xnEntity} n.fText = text +proc tag*(n: XmlNode): string {.inline.} = + ## Gets the tag name of `n`. + ## + ## `n` has to be an ``xnElement`` node. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ for text getter + ## * `text= proc <#text=,XmlNode,string>`_ for text setter + ## * `tag= proc <#tag=,XmlNode,string>`_ for tag setter + ## * `innerText proc <#innerText,XmlNode>`_ + runnableExamples: + var a = newElement("firstTag") + a.add newElement("childTag") + assert $a == "<firstTag><childTag /></firstTag>" + assert a.tag == "firstTag" + + assert n.k == xnElement + result = n.fTag + +proc `tag=`*(n: XmlNode, tag: string) {.inline.} = + ## Sets the tag name of `n`. + ## + ## `n` has to be an ``xnElement`` node. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ for text getter + ## * `text= proc <#text=,XmlNode,string>`_ for text setter + ## * `tag proc <#tag,XmlNode>`_ for tag getter + runnableExamples: + var a = newElement("firstTag") + a.add newElement("childTag") + assert $a == "<firstTag><childTag /></firstTag>" + a.tag = "newTag" + assert $a == "<newTag><childTag /></newTag>" + + assert n.k == xnElement + n.fTag = tag + proc rawText*(n: XmlNode): string {.inline.} = - ## returns the underlying 'text' string by reference. + ## Returns the underlying 'text' string by reference. + ## ## This is only used for speed hacks. shallowCopy(result, n.fText) proc rawTag*(n: XmlNode): string {.inline.} = - ## returns the underlying 'tag' string by reference. + ## Returns the underlying 'tag' string by reference. + ## ## This is only used for speed hacks. shallowCopy(result, n.fTag) proc innerText*(n: XmlNode): string = - ## gets the inner text of `n`: + ## Gets the inner text of `n`: ## ## - If `n` is `xnText` or `xnEntity`, returns its content. ## - If `n` is `xnElement`, runs recursively on each child node and ## concatenates the results. ## - Otherwise returns an empty string. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ + runnableExamples: + var f = newElement("myTag") + f.add newText("my text") + f.add newComment("my comment") + f.add newEntity("my entity") + assert $f == "<myTag>my text<!-- my comment -->&my entity;</myTag>" + assert innerText(f) == "my textmy entity" + proc worker(res: var string, n: XmlNode) = case n.k of xnText, xnEntity: @@ -107,89 +274,218 @@ proc innerText*(n: XmlNode): string = result = "" worker(result, n) -proc tag*(n: XmlNode): string {.inline.} = - ## gets the tag name of `n`. `n` has to be an ``xnElement`` node. - assert n.k == xnElement - result = n.fTag - -proc `tag=`*(n: XmlNode, tag: string) {.inline.} = - ## sets the tag name of `n`. `n` has to be an ``xnElement`` node. - assert n.k == xnElement - n.fTag = tag - proc add*(father, son: XmlNode) {.inline.} = - ## adds the child `son` to `father`. + ## Adds the child `son` to `father`. + ## + ## See also: + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + runnableExamples: + var f = newElement("myTag") + f.add newText("my text") + f.add newElement("sonTag") + f.add newEntity("my entity") + assert $f == "<myTag>my text<sonTag />&my entity;</myTag>" add(father.s, son) proc insert*(father, son: XmlNode, index: int) {.inline.} = - ## insert the child `son` to a given position in `father`. + ## Insert the child `son` to a given position in `father`. + ## + ## `father` and `son` must be of `xnElement` kind. + ## + ## See also: + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + runnableExamples: + from strutils import unindent + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + assert ($f).unindent == "<myTag>\n<second />\n<first />\n</myTag>" + assert father.k == xnElement and son.k == xnElement if len(father.s) > index: insert(father.s, son, index) else: insert(father.s, son, len(father.s)) +proc delete*(n: XmlNode, i: Natural) {.noSideEffect.} = + ## Delete the `i`'th child of `n`. + ## + ## See also: + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + f.delete(0) + assert $f == "<myTag><first /></myTag>" + + assert n.k == xnElement + n.s.delete(i) + proc len*(n: XmlNode): int {.inline.} = - ## returns the number `n`'s children. + ## Returns the number of `n`'s children. + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + assert len(f) == 2 if n.k == xnElement: result = len(n.s) proc kind*(n: XmlNode): XmlNodeKind {.inline.} = - ## returns `n`'s kind. + ## Returns `n`'s kind. + runnableExamples: + var a = newElement("firstTag") + assert a.kind == xnElement + var b = newText("my text") + assert b.kind == xnText result = n.k proc `[]`* (n: XmlNode, i: int): XmlNode {.inline.} = - ## returns the `i`'th child of `n`. - assert n.k == xnElement - result = n.s[i] + ## Returns the `i`'th child of `n`. + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + assert $f[1] == "<first />" + assert $f[0] == "<second />" -proc delete*(n: XmlNode, i: Natural) {.noSideEffect.} = - ## delete the `i`'th child of `n`. assert n.k == xnElement - n.s.delete(i) + result = n.s[i] proc `[]`* (n: var XmlNode, i: int): var XmlNode {.inline.} = - ## returns the `i`'th child of `n` so that it can be modified + ## Returns the `i`'th child of `n` so that it can be modified. assert n.k == xnElement result = n.s[i] iterator items*(n: XmlNode): XmlNode {.inline.} = - ## iterates over any child of `n`. + ## Iterates over any child of `n`. + ## + ## **Examples:** + ## + ## .. code-block:: + ## var g = newElement("myTag") + ## g.add newText("some text") + ## g.add newComment("this is comment") + ## + ## var h = newElement("secondTag") + ## h.add newEntity("some entity") + ## g.add h + ## + ## assert $g == "<myTag>some text<!-- this is comment --><secondTag>&some entity;</secondTag></myTag>" + ## for x in g: # the same as `for x in items(g):` + ## echo x + ## + ## # some text + ## # <!-- this is comment --> + ## # <secondTag>&some entity;<![CDATA[some cdata]]></secondTag> + assert n.k == xnElement for i in 0 .. n.len-1: yield n[i] iterator mitems*(n: var XmlNode): var XmlNode {.inline.} = - ## iterates over any child of `n`. + ## Iterates over any child of `n` so that it can be modified. assert n.k == xnElement for i in 0 .. n.len-1: yield n[i] +proc toXmlAttributes*(keyValuePairs: varargs[tuple[key, val: string]]): XmlAttributes = + ## Converts `{key: value}` pairs into `XmlAttributes`. + runnableExamples: + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + var j = newElement("myTag") + j.attrs = att + assert $j == """<myTag key2="second value" key1="first value" />""" + + newStringTable(keyValuePairs) + proc attrs*(n: XmlNode): XmlAttributes {.inline.} = - ## gets the attributes belonging to `n`. + ## Gets the attributes belonging to `n`. + ## ## Returns `nil` if attributes have not been initialised for this node. + ## + ## See also: + ## * `attrs= proc <#attrs=,XmlNode,XmlAttributes>`_ for XmlAttributes setter + ## * `attrsLen proc <#attrsLen,XmlNode>`_ for numbef of attributes + ## * `attr proc <#attr,XmlNode,string>`_ for finding an attribute + runnableExamples: + var j = newElement("myTag") + assert j.attrs == nil + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attrs == att + assert n.k == xnElement result = n.fAttr proc `attrs=`*(n: XmlNode, attr: XmlAttributes) {.inline.} = - ## sets the attributes belonging to `n`. + ## Sets the attributes belonging to `n`. + ## + ## See also: + ## * `attrs proc <#attrs,XmlNode>`_ for XmlAttributes getter + ## * `attrsLen proc <#attrsLen,XmlNode>`_ for numbef of attributes + ## * `attr proc <#attr,XmlNode,string>`_ for finding an attribute + runnableExamples: + var j = newElement("myTag") + assert j.attrs == nil + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attrs == att + assert n.k == xnElement n.fAttr = attr proc attrsLen*(n: XmlNode): int {.inline.} = - ## returns the number of `n`'s attributes. + ## Returns the number of `n`'s attributes. + ## + ## See also: + ## * `attrs proc <#attrs,XmlNode>`_ for XmlAttributes getter + ## * `attrs= proc <#attrs=,XmlNode,XmlAttributes>`_ for XmlAttributes setter + ## * `attr proc <#attr,XmlNode,string>`_ for finding an attribute + runnableExamples: + var j = newElement("myTag") + assert j.attrsLen == 0 + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attrsLen == 2 + assert n.k == xnElement if not isNil(n.fAttr): result = len(n.fAttr) +proc attr*(n: XmlNode, name: string): string = + ## Finds the first attribute of `n` with a name of `name`. + ## Returns "" on failure. + ## + ## See also: + ## * `attrs proc <#attrs,XmlNode>`_ for XmlAttributes getter + ## * `attrs= proc <#attrs=,XmlNode,XmlAttributes>`_ for XmlAttributes setter + ## * `attrsLen proc <#attrsLen,XmlNode>`_ for numbef of attributes + runnableExamples: + var j = newElement("myTag") + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attr("key1") == "first value" + assert j.attr("key2") == "second value" + + assert n.kind == xnElement + if n.attrs == nil: return "" + return n.attrs.getOrDefault(name) + proc clientData*(n: XmlNode): int {.inline.} = - ## gets the client data of `n`. The client data field is used by the HTML - ## parser and generator. + ## Gets the client data of `n`. + ## + ## The client data field is used by the HTML parser and generator. result = n.fClientData proc `clientData=`*(n: XmlNode, data: int) {.inline.} = - ## sets the client data of `n`. The client data field is used by the HTML - ## parser and generator. + ## Sets the client data of `n`. + ## + ## The client data field is used by the HTML parser and generator. n.fClientData = data proc addEscaped*(result: var string, s: string) = - ## same as ``result.add(escape(s))``, but more efficient. + ## The same as `result.add(escape(s)) <#escape,string>`_, but more efficient. for c in items(s): case c of '<': result.add("<") @@ -201,7 +497,8 @@ proc addEscaped*(result: var string, s: string) = else: result.add(c) proc escape*(s: string): string = - ## escapes `s` for inclusion into an XML document. + ## Escapes `s` for inclusion into an XML document. + ## ## Escapes these characters: ## ## ------------ ------------------- @@ -214,6 +511,8 @@ proc escape*(s: string): string = ## ``'`` ``'`` ## ``/`` ``/`` ## ------------ ------------------- + ## + ## You can also use `addEscaped proc <#addEscaped,string,string>`_. result = newStringOfCap(s.len) addEscaped(result, s) @@ -223,14 +522,22 @@ proc addIndent(result: var string, indent: int, addNewLines: bool) = for i in 1..indent: result.add(' ') proc noWhitespace(n: XmlNode): bool = - #for i in 1..n.len-1: - # if n[i].kind != n[0].kind: return true for i in 0..n.len-1: if n[i].kind in {xnText, xnEntity}: return true proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2, addNewLines=true) = - ## adds the textual representation of `n` to `result`. + ## Adds the textual representation of `n` to string `result`. + runnableExamples: + var + a = newElement("firstTag") + b = newText("my text") + c = newComment("my comment") + s = "" + s.add(c) + s.add(a) + s.add(b) + assert s == "<!-- my comment --><firstTag />my text" proc addEscapedAttr(result: var string, s: string) = # `addEscaped` alternative with less escaped characters. @@ -291,24 +598,76 @@ proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2, result.add(n.fText) result.add(';') -const - xmlHeader* = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" - ## header to use for complete XML output - proc `$`*(n: XmlNode): string = - ## converts `n` into its string representation. No ``<$xml ...$>`` declaration - ## is produced, so that the produced XML fragments are composable. + ## Converts `n` into its string representation. + ## + ## No ``<$xml ...$>`` declaration is produced, so that the produced + ## XML fragments are composable. result = "" result.add(n) -proc newXmlTree*(tag: string, children: openArray[XmlNode], - attributes: XmlAttributes = nil): XmlNode = - ## creates a new XML tree with `tag`, `children` and `attributes` - result = newXmlNode(xnElement) - result.fTag = tag - newSeq(result.s, children.len) - for i in 0..children.len-1: result.s[i] = children[i] - result.fAttr = attributes +proc child*(n: XmlNode, name: string): XmlNode = + ## Finds the first child element of `n` with a name of `name`. + ## Returns `nil` on failure. + runnableExamples: + var f = newElement("myTag") + f.add newElement("firstSon") + f.add newElement("secondSon") + f.add newElement("thirdSon") + assert $(f.child("secondSon")) == "<secondSon />" + + assert n.kind == xnElement + for i in items(n): + if i.kind == xnElement: + if i.tag == name: + return i + +proc findAll*(n: XmlNode, tag: string, result: var seq[XmlNode]) = + ## Iterates over all the children of `n` returning those matching `tag`. + ## + ## Found nodes satisfying the condition will be appended to the `result` + ## sequence. + runnableExamples: + var + b = newElement("good") + c = newElement("bad") + d = newElement("bad") + e = newElement("good") + b.add newText("b text") + c.add newText("c text") + d.add newText("d text") + e.add newText("e text") + let a = newXmlTree("father", [b, c, d, e]) + var s = newSeq[XmlNode]() + a.findAll("good", s) + assert $s == "@[<good>b text</good>, <good>e text</good>]" + + assert n.k == xnElement + for child in n.items(): + if child.k != xnElement: + continue + if child.tag == tag: + result.add(child) + child.findAll(tag, result) + +proc findAll*(n: XmlNode, tag: string): seq[XmlNode] = + ## A shortcut version to assign in let blocks. + runnableExamples: + var + b = newElement("good") + c = newElement("bad") + d = newElement("bad") + e = newElement("good") + b.add newText("b text") + c.add newText("c text") + d.add newText("d text") + e.add newText("e text") + let a = newXmlTree("father", [b, c, d, e]) + assert $(a.findAll("good")) == "@[<good>b text</good>, <good>e text</good>]" + assert $(a.findAll("bad")) == "@[<bad>c text</bad>, <bad>d text</bad>]" + + newSeq(result, 0) + findAll(n, tag, result) proc xmlConstructor(a: NimNode): NimNode {.compileTime.} = if a.kind == nnkCall: @@ -346,56 +705,6 @@ macro `<>`*(x: untyped): untyped = ## result = xmlConstructor(x) -proc child*(n: XmlNode, name: string): XmlNode = - ## Finds the first child element of `n` with a name of `name`. - ## Returns `nil` on failure. - assert n.kind == xnElement - for i in items(n): - if i.kind == xnElement: - if i.tag == name: - return i - -proc attr*(n: XmlNode, name: string): string = - ## Finds the first attribute of `n` with a name of `name`. - ## Returns "" on failure. - assert n.kind == xnElement - if n.attrs == nil: return "" - return n.attrs.getOrDefault(name) - -proc findAll*(n: XmlNode, tag: string, result: var seq[XmlNode]) = - ## Iterates over all the children of `n` returning those matching `tag`. - ## - ## Found nodes satisfying the condition will be appended to the `result` - ## sequence, which can't be nil or the proc will crash. Usage example: - ## - ## .. code-block:: - ## var - ## html: XmlNode - ## tags: seq[XmlNode] = @[] - ## - ## html = buildHtml() - ## findAll(html, "img", tags) - ## for imgTag in tags: - ## process(imgTag) - assert n.k == xnElement - for child in n.items(): - if child.k != xnElement: - continue - if child.tag == tag: - result.add(child) - child.findAll(tag, result) - -proc findAll*(n: XmlNode, tag: string): seq[XmlNode] = - ## Shortcut version to assign in let blocks. Example: - ## - ## .. code-block:: - ## var html: XmlNode - ## - ## html = buildHtml(html) - ## for imgTag in html.findAll("img"): - ## process(imgTag) - newSeq(result, 0) - findAll(n, tag, result) when isMainModule: assert """<a href="http://nim-lang.org">Nim rules.</a>""" == |