diff options
author | Andy Davidoff <disruptek@users.noreply.github.com> | 2019-04-16 03:42:54 -0400 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2019-04-16 09:42:54 +0200 |
commit | 01f09567c43031d3d35a54c8856d79f6cd1d4bf7 (patch) | |
tree | 214b30c3a0c6244c9dab0dffcf631259131107db | |
parent | 374a85bb9cf952cd3ad65f04f1aef75cc321ad22 (diff) | |
download | Nim-01f09567c43031d3d35a54c8856d79f6cd1d4bf7.tar.gz |
faster CountTable sort(), optional SortOrder (#11010)
* use existing sort for CountTable, and add SortOrder options to CountTable, OrderedTable sort(s) * add some tests, runnables, etc. * fix runnable imports
-rw-r--r-- | lib/pure/collections/tables.nim | 57 | ||||
-rw-r--r-- | tests/collections/ttables.nim | 45 |
2 files changed, 60 insertions, 42 deletions
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index 84ec422d4..50c727be1 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -216,7 +216,7 @@ ## * `hashes module<hashes.html>`_ for helper functions for hashing -import hashes, math +import hashes, math, algorithm include "system/inclrtl" @@ -1467,7 +1467,7 @@ proc clear*[A, B](t: var OrderedTable[A, B]) = t.first = -1 t.last = -1 -proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int) = +proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int, order = SortOrder.Ascending) = ## Sorts ``t`` according to the function ``cmp``. ## ## This modifies the internal list @@ -1475,12 +1475,15 @@ proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int) = ## call but key lookup and insertions remain possible after ``sort`` (in ## contrast to the `sort proc<#sort,CountTable[A]>`_ for count tables). runnableExamples: + import algorithm var a = initOrderedTable[char, int]() for i, c in "cab": a[c] = 10*i doAssert a == {'c': 0, 'a': 10, 'b': 20}.toOrderedTable a.sort(system.cmp) doAssert a == {'a': 10, 'b': 20, 'c': 0}.toOrderedTable + a.sort(system.cmp, order=SortOrder.Descending) + doAssert a == {'c': 0, 'b': 20, 'a': 10}.toOrderedTable var list = t.first var @@ -1508,7 +1511,7 @@ proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int) = elif qsize == 0 or q < 0: e = p; p = t.data[p].next; dec(psize) elif cmp((t.data[p].key, t.data[p].val), - (t.data[q].key, t.data[q].val)) <= 0: + (t.data[q].key, t.data[q].val)) * order <= 0: e = p; p = t.data[p].next; dec(psize) else: e = q; q = t.data[q].next; dec(qsize) @@ -1875,7 +1878,7 @@ proc clear*[A, B](t: var OrderedTableRef[A, B]) = doAssert len(a) == 0 clear(t[]) -proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x,y: (A, B)): int) = +proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x,y: (A, B)): int, order = SortOrder.Ascending) = ## Sorts ``t`` according to the function ``cmp``. ## ## This modifies the internal list @@ -1883,13 +1886,16 @@ proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x,y: (A, B)): int) = ## call but key lookup and insertions remain possible after ``sort`` (in ## contrast to the `sort proc<#sort,CountTableRef[A]>`_ for count tables). runnableExamples: + import algorithm var a = newOrderedTable[char, int]() for i, c in "cab": a[c] = 10*i doAssert a == {'c': 0, 'a': 10, 'b': 20}.newOrderedTable a.sort(system.cmp) doAssert a == {'a': 10, 'b': 20, 'c': 0}.newOrderedTable - t[].sort(cmp) + a.sort(system.cmp, order=SortOrder.Descending) + doAssert a == {'c': 0, 'b': 20, 'a': 10}.newOrderedTable + t[].sort(cmp, order=order) proc `$`*[A, B](t: OrderedTableRef[A, B]): string = ## The ``$`` operator for hash tables. Used internally when calling `echo` @@ -2199,30 +2205,27 @@ proc clear*[A](t: var CountTable[A]) = ## Resets the table so that it is empty. clearImpl() -proc sort*[A](t: var CountTable[A]) = - ## Sorts the count table so that the entry with the highest counter comes - ## first. +func ctCmp[T](a, b: tuple[key: T, val: int]): int = + result = system.cmp(a.val, b.val) + +proc sort*[A](t: var CountTable[A], order = SortOrder.Descending) = + ## Sorts the count table so that, by default, the entry with the + ## highest counter comes first. ## ## **This is destructive! You must not modify ``t`` afterwards!** ## ## You can use the iterators `pairs<#pairs.i,CountTable[A]>`_, ## `keys<#keys.i,CountTable[A]>`_, and `values<#values.i,CountTable[A]>`_ ## to iterate over ``t`` in the sorted order. - - # we use shellsort here; fast enough and simple - var h = 1 - while true: - h = 3 * h + 1 - if h >= high(t.data): break - while true: - h = h div 3 - for i in countup(h, high(t.data)): - var j = i - while t.data[j-h].val <= t.data[j].val: - swap(t.data[j], t.data[j-h]) - j = j-h - if j < h: break - if h == 1: break + runnableExamples: + import algorithm, sequtils + var a = toCountTable("abracadabra") + doAssert a == "aaaaabbrrcd".toCountTable + a.sort() + doAssert toSeq(a.values) == @[5, 2, 2, 1, 1] + a.sort(SortOrder.Ascending) + doAssert toSeq(a.values) == @[1, 1, 2, 2, 5] + t.data.sort(cmp=ctCmp, order=order) proc merge*[A](s: var CountTable[A], t: CountTable[A]) = ## Merges the second table into the first one (must be declared as `var`). @@ -2467,16 +2470,16 @@ proc clear*[A](t: CountTableRef[A]) = ## Resets the table so that it is empty. clearImpl() -proc sort*[A](t: CountTableRef[A]) = - ## Sorts the count table so that the entry with the highest counter comes - ## first. +proc sort*[A](t: CountTableRef[A], order = SortOrder.Descending) = + ## Sorts the count table so that, by default, the entry with the + ## highest counter comes first. ## ## **This is destructive! You must not modify `t` afterwards!** ## ## You can use the iterators `pairs<#pairs.i,CountTableRef[A]>`_, ## `keys<#keys.i,CountTableRef[A]>`_, and `values<#values.i,CountTableRef[A]>`_ ## to iterate over ``t`` in the sorted order. - t[].sort + t[].sort(order=order) proc merge*[A](s, t: CountTableRef[A]) = ## Merges the second table into the first one. diff --git a/tests/collections/ttables.nim b/tests/collections/ttables.nim index 6798e5731..0a5a01367 100644 --- a/tests/collections/ttables.nim +++ b/tests/collections/ttables.nim @@ -8,7 +8,7 @@ And we get here ''' joinable: false """ -import hashes, sequtils, tables +import hashes, sequtils, tables, algorithm # test should not be joined because it takes too long. block tableadds: @@ -274,22 +274,30 @@ block tablesref: block countTableTest1: var s = data.toTable var t = newCountTable[string]() - for k in s.keys: t.inc(k) - for k in t.keys: assert t[k] == 1 - t.inc("90", 3) - t.inc("12", 2) - t.inc("34", 1) + var r = newCountTable[string]() + for x in [t, r]: + for k in s.keys: + x.inc(k) + assert x[k] == 1 + x.inc("90", 3) + x.inc("12", 2) + x.inc("34", 1) assert t.largest()[0] == "90" t.sort() - var i = 0 - for k, v in t.pairs: - case i - of 0: assert k == "90" and v == 4 - of 1: assert k == "12" and v == 3 - of 2: assert k == "34" and v == 2 - else: break - inc i + r.sort(SortOrder.Ascending) + var ps1 = toSeq t.pairs + var ps2 = toSeq r.pairs + ps2.reverse() + for ps in [ps1, ps2]: + var i = 0 + for (k, v) in ps: + case i + of 0: assert k == "90" and v == 4 + of 1: assert k == "12" and v == 3 + of 2: assert k == "34" and v == 2 + else: break + inc i block SyntaxTest: var x = newTable[int, string]({:}) @@ -308,13 +316,20 @@ block tablesref: var t = newOrderedTable[string, int](2) for key, val in items(data): t[key] = val for key, val in items(data): assert t[key] == val - t.sort(proc (x, y: tuple[key: string, val: int]): int = cmp(x.key, y.key)) + proc cmper(x, y: tuple[key: string, val: int]): int = cmp(x.key, y.key) + t.sort(cmper) var i = 0 # `pairs` needs to yield in sorted order: for key, val in pairs(t): doAssert key == sorteddata[i][0] doAssert val == sorteddata[i][1] inc(i) + t.sort(cmper, order=SortOrder.Descending) + i = 0 + for key, val in pairs(t): + doAssert key == sorteddata[high(data)-i][0] + doAssert val == sorteddata[high(data)-i][1] + inc(i) # check that lookup still works: for key, val in pairs(t): |