diff options
author | c-blake <c-blake@users.noreply.github.com> | 2018-12-31 08:52:51 -0500 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2018-12-31 14:52:51 +0100 |
commit | 7ac1fc81fd119dba3739e05a89ad63fcee405ac1 (patch) | |
tree | 643b3764fcf5c2a9de12c45e50a1569108cb467a /tests/stdlib | |
parent | e87910197a209d02cf69bba3aacd9766723aeb37 (diff) | |
download | Nim-7ac1fc81fd119dba3739e05a89ad63fcee405ac1.tar.gz |
Resolve things raised in https://github.com/nim-lang/Nim/issues/10081 ? (#10084)
* Resolve things raised in https://github.com/nim-lang/Nim/issues/10081 ? CDF is a standard ident in all things related to random numbers/sampling, and full words "cumulativeDistributionFunction" would be silly long, in this case, IMO. We use lowercase `cdf` to make it not look like a type, remove all looping from `sample` letting callers do it. Besides just side-stepping any `sampleSize` name choice, callers may want to filter out samples anyway which this makes slightly simpler. Also add two variants of `cumsum`, value return and in-place update distinguished by the var-ness of the first argument. Add tests for `int` and `float` for both `cumsum` and the new `sample`. (The sample tests exercise the value return mode of `cumsum`.) Functionality pre-this-PR `sample(a, w)` is now the almost as simple `for i in 0..<n: sample(a, w.cumsum)`, but this new code factoring is almost surely better. The statistical tests pass, as before. * Address Araq comment in https://github.com/nim-lang/Nim/pull/10084 We can always add in some `var` version later if desired to save memory, but this change now at least firms up the `sample` interface. * Rename `cumsum` -> `cumsummed` to honor NEP1 style. Re-instate `cumsum` as the in-place transformation. Test both in `tests/stdlib/tmath.nim` and use `cumsummed` in the example code for sample since that's a simpler example. * Fix requests from https://github.com/nim-lang/Nim/pull/10084 : example in lib/pure/math.nim and comment whitespace in lib/pure/random.nim
Diffstat (limited to 'tests/stdlib')
-rw-r--r-- | tests/stdlib/tmath.nim | 57 |
1 files changed, 48 insertions, 9 deletions
diff --git a/tests/stdlib/tmath.nim b/tests/stdlib/tmath.nim index 7c1851e7a..544fa8dc0 100644 --- a/tests/stdlib/tmath.nim +++ b/tests/stdlib/tmath.nim @@ -4,6 +4,8 @@ discard """ [Suite] random float +[Suite] cumsum + [Suite] random sample [Suite] ^ @@ -74,29 +76,66 @@ suite "random float": var rand2:float = random(1000000.0) check rand1 != rand2 +suite "cumsum": + test "cumsum int seq return": + let counts = [ 1, 2, 3, 4 ] + check counts.cumsummed == [ 1, 3, 6, 10 ] + + test "cumsum float seq return": + let counts = [ 1.0, 2.0, 3.0, 4.0 ] + check counts.cumsummed == [ 1.0, 3.0, 6.0, 10.0 ] + + test "cumsum int in-place": + var counts = [ 1, 2, 3, 4 ] + counts.cumsum + check counts == [ 1, 3, 6, 10 ] + + test "cumsum float in-place": + var counts = [ 1.0, 2.0, 3.0, 4.0 ] + counts.cumsum + check counts == [ 1.0, 3.0, 6.0, 10.0 ] + suite "random sample": - test "non-uniform array sample": + test "non-uniform array sample unnormalized int CDF": let values = [ 10, 20, 30, 40, 50 ] # values - let weight = [ 4, 3, 2, 1, 0 ] # weights aka unnormalized probabilities - let weightSum = 10.0 # sum of weights + let counts = [ 4, 3, 2, 1, 0 ] # weights aka unnormalized probabilities var histo = initCountTable[int]() - for v in sample(values, weight, 5000): - histo.inc(v) - check histo.len == 4 # number of non-zero in `weight` + let cdf = counts.cumsummed # unnormalized CDF + for i in 0 ..< 5000: + histo.inc(sample(values, cdf)) + check histo.len == 4 # number of non-zero in `counts` # Any one bin is a binomial random var for n samples, each with prob p of # adding a count to k; E[k]=p*n, Var k=p*(1-p)*n, approximately Normal for # big n. So, P(abs(k - p*n)/sqrt(p*(1-p)*n))>3.0) =~ 0.0027, while # P(wholeTestFails) =~ 1 - P(binPasses)^4 =~ 1 - (1-0.0027)^4 =~ 0.01. - for i, w in weight: - if w == 0: + for i, c in counts: + if c == 0: check values[i] notin histo continue - let p = float(w) / float(weightSum) + let p = float(c) / float(cdf[^1]) let n = 5000.0 let expected = p * n let stdDev = sqrt(n * p * (1.0 - p)) check abs(float(histo[values[i]]) - expected) <= 3.0 * stdDev + test "non-uniform array sample normalized float CDF": + let values = [ 10, 20, 30, 40, 50 ] # values + let counts = [ 0.4, 0.3, 0.2, 0.1, 0 ] # probabilities + var histo = initCountTable[int]() + let cdf = counts.cumsummed # normalized CDF + for i in 0 ..< 5000: + histo.inc(sample(values, cdf)) + check histo.len == 4 # number of non-zero in ``counts`` + for i, c in counts: + if c == 0: + check values[i] notin histo + continue + let p = float(c) / float(cdf[^1]) + let n = 5000.0 + let expected = p * n + let stdDev = sqrt(n * p * (1.0 - p)) + # NOTE: like unnormalized int CDF test, P(wholeTestFails) =~ 0.01. + check abs(float(histo[values[i]]) - expected) <= 3.0 * stdDev suite "^": test "compiles for valid types": |