From 7ac1fc81fd119dba3739e05a89ad63fcee405ac1 Mon Sep 17 00:00:00 2001
From: c-blake <c-blake@users.noreply.github.com>
Date: Mon, 31 Dec 2018 08:52:51 -0500
Subject: Resolve things raised in https://github.com/nim-lang/Nim/issues/10081
 ? (#10084)

* Resolve things raised in https://github.com/nim-lang/Nim/issues/10081 ?
CDF is a standard ident in all things related to random numbers/sampling,
and full words "cumulativeDistributionFunction" would be silly long, in
this case, IMO.  We use lowercase `cdf` to make it not look like a type,
remove all looping from `sample` letting callers do it.  Besides just
side-stepping any `sampleSize` name choice, callers may want to filter
out samples anyway which this makes slightly simpler.

Also add two variants of `cumsum`, value return and in-place update
distinguished by the var-ness of the first argument.  Add tests for
`int` and `float` for both `cumsum` and the new `sample`.  (The sample
tests exercise the value return mode of `cumsum`.)

Functionality pre-this-PR `sample(a, w)` is now the almost as simple
`for i in 0..<n: sample(a, w.cumsum)`, but this new code factoring is
almost surely better.  The statistical tests pass, as before.

* Address Araq comment in https://github.com/nim-lang/Nim/pull/10084
We can always add in some `var` version later if desired to save
memory, but this change now at least firms up the `sample` interface.

* Rename `cumsum` -> `cumsummed` to honor NEP1 style.  Re-instate `cumsum` as
the in-place transformation.  Test both in `tests/stdlib/tmath.nim` and use
`cumsummed` in the example code for sample since that's a simpler example.

* Fix requests from https://github.com/nim-lang/Nim/pull/10084 :
  example in lib/pure/math.nim and comment whitespace in lib/pure/random.nim
---
 lib/pure/random.nim | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'lib/pure/random.nim')

diff --git a/lib/pure/random.nim b/lib/pure/random.nim
index 26e6740ea..ee728ad4a 100644
--- a/lib/pure/random.nim
+++ b/lib/pure/random.nim
@@ -175,27 +175,26 @@ proc sample*[T](a: openArray[T]): T =
   ## returns a random element from openArray ``a`` using non-thread-safe state.
   result = a[rand(a.low..a.high)]
 
-proc sample*[T, U](r: var Rand; a: openArray[T], w: openArray[U], n=1): seq[T] =
-  ## Return a sample (with replacement) of size ``n`` from elements of ``a``
-  ## according to convertible-to-``float``, not necessarily normalized, and
-  ## non-negative weights ``w``.  Uses state in ``r``.  Must have sum ``w > 0.0``.
-  assert(w.len == a.len)
-  var cdf = newSeq[float](a.len)   # The *unnormalized* CDF
-  var tot = 0.0                    # Unnormalized is fine if we sample up to tot
-  for i, w in w:
-    assert(w >= 0)
-    tot += float(w)
-    cdf[i] = tot
-  assert(tot > 0.0)                # Need at least one non-zero weight
-  for i in 0 ..< n:
-    result.add(a[cdf.upperBound(r.rand(tot))])
-
-proc sample*[T, U](a: openArray[T], w: openArray[U], n=1): seq[T] =
-  ## Return a sample (with replacement) of size ``n`` from elements of ``a``
-  ## according to convertible-to-``float``, not necessarily normalized, and
-  ## non-negative weights ``w``.  Uses default non-thread-safe state.
-  state.sample(a, w, n)
-
+proc sample*[T, U](r: var Rand; a: openArray[T], cdf: openArray[U]): T =
+  ## Sample one element from openArray ``a`` when it has cumulative distribution
+  ## function (CDF) ``cdf`` (not necessarily normalized, any type of elements
+  ## convertible to ``float``). Uses state in ``r``. E.g.:
+  ##
+  ## .. code-block:: nim
+  ##   let val = [ "a", "b", "c", "d" ]  # some values
+  ##   var cnt = [1, 2, 3, 4]            # histogram of counts
+  ##   echo r.sample(val, cnt.cumsummed) # echo a sample
+  assert(cdf.len == a.len)              # Two basic sanity checks.
+  assert(float(cdf[^1]) > 0.0)
+  #While we could check cdf[i-1] <= cdf[i] for i in 1..cdf.len, that could get
+  #awfully expensive even in debugging modes.
+  let u = r.rand(float(cdf[^1]))
+  a[cdf.upperBound(U(u))]
+
+proc sample*[T, U](a: openArray[T], cdf: openArray[U]): T =
+  ## Like ``sample(var Rand; openArray[T], openArray[U])``, but uses default
+  ## non-thread-safe state.
+  state.sample(a, cdf)
 
 proc initRand*(seed: int64): Rand =
   ## Creates a new ``Rand`` state from ``seed``.
-- 
cgit 1.4.1-2-gfad0