diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/packages/docutils/highlite.nim | 36 | ||||
-rw-r--r-- | lib/pure/events.nim | 2 | ||||
-rw-r--r-- | lib/pure/math.nim | 62 | ||||
-rw-r--r-- | lib/pure/parseutils.nim | 7 | ||||
-rw-r--r-- | lib/pure/stats.nim | 348 | ||||
-rw-r--r-- | lib/pure/strutils.nim | 10 | ||||
-rw-r--r-- | lib/wrappers/linenoise/clinenoise.c | 26 |
7 files changed, 407 insertions, 84 deletions
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index 640b8cd5a..1bc0af1b6 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -173,7 +173,41 @@ proc nimNextToken(g: var GeneralTokenizer) = while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) of '#': g.kind = gtComment - while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos) + inc(pos) + var isDoc = false + if g.buf[pos] == '#': + inc(pos) + isDoc = true + if g.buf[pos] == '[': + g.kind = gtLongComment + var nesting = 0 + while true: + case g.buf[pos] + of '\0': break + of '#': + if isDoc: + if g.buf[pos+1] == '#' and g.buf[pos+2] == '[': + inc nesting + elif g.buf[pos+1] == '[': + inc nesting + inc pos + of ']': + if isDoc: + if g.buf[pos+1] == '#' and g.buf[pos+2] == '#': + if nesting == 0: + inc(pos, 3) + break + dec nesting + elif g.buf[pos+1] == '#': + if nesting == 0: + inc(pos, 2) + break + dec nesting + inc pos + else: + inc pos + else: + while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos) of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF': var id = "" while g.buf[pos] in SymChars + {'_'}: diff --git a/lib/pure/events.nim b/lib/pure/events.nim index 62800c5c8..23a8a2c58 100644 --- a/lib/pure/events.nim +++ b/lib/pure/events.nim @@ -57,7 +57,7 @@ proc addHandler*(handler: var EventHandler, fn: proc(e: EventArgs) {.closure.}) proc removeHandler*(handler: var EventHandler, fn: proc(e: EventArgs) {.closure.}) = ## Removes the callback from the specified event handler. - for i in countup(0, len(handler.handlers) -1): + for i in countup(0, len(handler.handlers)-1): if fn == handler.handlers[i]: handler.handlers.del(i) break diff --git a/lib/pure/math.nim b/lib/pure/math.nim index 391a880ae..b0104336e 100644 --- a/lib/pure/math.nim +++ b/lib/pure/math.nim @@ -118,26 +118,6 @@ proc sum*[T](x: openArray[T]): T {.noSideEffect.} = ## If `x` is empty, 0 is returned. for i in items(x): result = result + i -template toFloat(f: float): float = f - -proc mean*[T](x: openArray[T]): float {.noSideEffect.} = - ## Computes the mean of the elements in `x`, which are first converted to floats. - ## If `x` is empty, NaN is returned. - ## ``toFloat(x: T): float`` must be defined. - for i in items(x): result = result + toFloat(i) - result = result / toFloat(len(x)) - -proc variance*[T](x: openArray[T]): float {.noSideEffect.} = - ## Computes the variance of the elements in `x`. - ## If `x` is empty, NaN is returned. - ## ``toFloat(x: T): float`` must be defined. - result = 0.0 - var m = mean(x) - for i in items(x): - var diff = toFloat(i) - m - result = result + diff*diff - result = result / toFloat(len(x)) - proc random*(max: int): int {.benign.} ## Returns a random number in the range 0..max-1. The sequence of ## random number is always the same, unless `randomize` is called @@ -376,48 +356,6 @@ proc random*[T](a: openArray[T]): T = ## returns a random element from the openarray `a`. result = a[random(a.low..a.len)] -type - RunningStat* = object ## an accumulator for statistical data - n*: int ## number of pushed data - sum*, min*, max*, mean*: float ## self-explaining - oldM, oldS, newS: float - -{.deprecated: [TFloatClass: FloatClass, TRunningStat: RunningStat].} - -proc push*(s: var RunningStat, x: float) = - ## pushes a value `x` for processing - inc(s.n) - # See Knuth TAOCP vol 2, 3rd edition, page 232 - if s.n == 1: - s.min = x - s.max = x - s.oldM = x - s.mean = x - s.oldS = 0.0 - else: - if s.min > x: s.min = x - if s.max < x: s.max = x - s.mean = s.oldM + (x - s.oldM)/toFloat(s.n) - s.newS = s.oldS + (x - s.oldM)*(x - s.mean) - - # set up for next iteration: - s.oldM = s.mean - s.oldS = s.newS - s.sum = s.sum + x - -proc push*(s: var RunningStat, x: int) = - ## pushes a value `x` for processing. `x` is simply converted to ``float`` - ## and the other push operation is called. - push(s, toFloat(x)) - -proc variance*(s: RunningStat): float = - ## computes the current variance of `s` - if s.n > 1: result = s.newS / (toFloat(s.n - 1)) - -proc standardDeviation*(s: RunningStat): float = - ## computes the current standard deviation of `s` - result = sqrt(variance(s)) - {.pop.} {.pop.} diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim index b3708838a..698bde42a 100644 --- a/lib/pure/parseutils.nim +++ b/lib/pure/parseutils.nim @@ -25,7 +25,7 @@ const proc toLower(c: char): char {.inline.} = result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c -proc parseHex*(s: string, number: var int, start = 0): int {. +proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {. rtl, extern: "npuParseHex", noSideEffect.} = ## Parses a hexadecimal number and stores its value in ``number``. ## @@ -45,11 +45,14 @@ proc parseHex*(s: string, number: var int, start = 0): int {. ## discard parseHex("0x38", value) ## assert value == -200 ## + ## If 'maxLen==0' the length of the hexadecimal number has no + ## upper bound. Not more than ```maxLen`` characters are parsed. var i = start var foundDigit = false if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) elif s[i] == '#': inc(i) - while true: + let last = if maxLen == 0: s.len else: i+maxLen + while i < last: case s[i] of '_': discard of '0'..'9': diff --git a/lib/pure/stats.nim b/lib/pure/stats.nim new file mode 100644 index 000000000..ec4cd182b --- /dev/null +++ b/lib/pure/stats.nim @@ -0,0 +1,348 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# +## Statistical analysis framework for performing +## basic statistical analysis of data. +## The data is analysed in a single pass, when a data value +## is pushed to the ``RunningStat`` or ``RunningRegress`` objects +## +## ``RunningStat`` calculates for a single data set +## - n (data count) +## - min (smallest value) +## - max (largest value) +## - sum +## - mean +## - variance +## - varianceS (sample var) +## - standardDeviation +## - standardDeviationS (sample stddev) +## - skewness (the third statistical moment) +## - kurtosis (the fourth statistical moment) +## +## ``RunningRegress`` calculates for two sets of data +## - n +## - slope +## - intercept +## - correlation +## +## Procs have been provided to calculate statistics on arrays and sequences. +## +## However, if more than a single statistical calculation is required, it is more +## efficient to push the data once to the RunningStat object, and +## call the numerous statistical procs for the RunningStat object. +## +## .. code-block:: Nim +## +## var rs: RunningStat +## rs.push(MySeqOfData) +## rs.mean() +## rs.variance() +## rs.skewness() +## rs.kurtosis() + +from math import FloatClass, sqrt, pow, round + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! +{.push checks:off, line_dir:off, stack_trace:off.} + +type + RunningStat* = object ## an accumulator for statistical data + n*: int ## number of pushed data + min*, max*, sum*: float ## self-explaining + mom1, mom2, mom3, mom4: float ## statistical moments, mom1 is mean + + + RunningRegress* = object ## an accumulator for regression calculations + n*: int ## number of pushed data + x_stats*: RunningStat ## stats for first set of data + y_stats*: RunningStat ## stats for second set of data + s_xy: float ## accumulated data for combined xy + +{.deprecated: [TFloatClass: FloatClass, TRunningStat: RunningStat].} + +# ----------- RunningStat -------------------------- +proc clear*(s: var RunningStat) = + ## reset `s` + s.n = 0 + s.min = toBiggestFloat(int.high) + s.max = 0.0 + s.sum = 0.0 + s.mom1 = 0.0 + s.mom2 = 0.0 + s.mom3 = 0.0 + s.mom4 = 0.0 + +proc push*(s: var RunningStat, x: float) = + ## pushes a value `x` for processing + if s.n == 0: s.min = x + inc(s.n) + # See Knuth TAOCP vol 2, 3rd edition, page 232 + if s.min > x: s.min = x + if s.max < x: s.max = x + s.sum += x + let n = toFloat(s.n) + let delta = x - s.mom1 + let delta_n = delta / toFloat(s.n) + let delta_n2 = delta_n * delta_n + let term1 = delta * delta_n * toFloat(s.n - 1) + s.mom4 += term1 * delta_n2 * (n*n - 3*n + 3) + + 6*delta_n2*s.mom2 - 4*delta_n*s.mom3 + s.mom3 += term1 * delta_n * (n - 2) - 3*delta_n*s.mom2 + s.mom2 += term1 + s.mom1 += delta_n + +proc push*(s: var RunningStat, x: int) = + ## pushes a value `x` for processing. + ## + ## `x` is simply converted to ``float`` + ## and the other push operation is called. + s.push(toFloat(x)) + +proc push*(s: var RunningStat, x: openarray[float|int]) = + ## pushes all values of `x` for processing. + ## + ## Int values of `x` are simply converted to ``float`` and + ## the other push operation is called. + for val in x: + s.push(val) + +proc mean*(s: RunningStat): float = + ## computes the current mean of `s` + result = s.mom1 + +proc variance*(s: RunningStat): float = + ## computes the current population variance of `s` + result = s.mom2 / toFloat(s.n) + +proc varianceS*(s: RunningStat): float = + ## computes the current sample variance of `s` + if s.n > 1: result = s.mom2 / toFloat(s.n - 1) + +proc standardDeviation*(s: RunningStat): float = + ## computes the current population standard deviation of `s` + result = sqrt(variance(s)) + +proc standardDeviationS*(s: RunningStat): float = + ## computes the current sample standard deviation of `s` + result = sqrt(varianceS(s)) + +proc skewness*(s: RunningStat): float = + ## computes the current population skewness of `s` + result = sqrt(toFloat(s.n)) * s.mom3 / pow(s.mom2, 1.5) + +proc skewnessS*(s: RunningStat): float = + ## computes the current sample skewness of `s` + let s2 = skewness(s) + result = sqrt(toFloat(s.n*(s.n-1)))*s2 / toFloat(s.n-2) + +proc kurtosis*(s: RunningStat): float = + ## computes the current population kurtosis of `s` + result = toFloat(s.n) * s.mom4 / (s.mom2 * s.mom2) - 3.0 + +proc kurtosisS*(s: RunningStat): float = + ## computes the current sample kurtosis of `s` + result = toFloat(s.n-1) / toFloat((s.n-2)*(s.n-3)) * + (toFloat(s.n+1)*kurtosis(s) + 6) + +proc `+`*(a, b: RunningStat): RunningStat = + ## combine two RunningStats. + ## + ## Useful if performing parallel analysis of data series + ## and need to re-combine parallel result sets + result.clear() + result.n = a.n + b.n + + let delta = b.mom1 - a.mom1 + let delta2 = delta*delta + let delta3 = delta*delta2 + let delta4 = delta2*delta2 + let n = toFloat(result.n) + + result.mom1 = (a.n.float*a.mom1 + b.n.float*b.mom1) / n + result.mom2 = a.mom2 + b.mom2 + delta2 * a.n.float * b.n.float / n + result.mom3 = a.mom3 + b.mom3 + + delta3 * a.n.float * b.n.float * (a.n.float - b.n.float)/(n*n); + result.mom3 += 3.0*delta * (a.n.float*b.mom2 - b.n.float*a.mom2) / n + result.mom4 = a.mom4 + b.mom4 + + delta4*a.n.float*b.n.float * toFloat(a.n*a.n - a.n*b.n + b.n*b.n) / + (n*n*n) + result.mom4 += 6.0*delta2 * (a.n.float*a.n.float*b.mom2 + b.n.float*b.n.float*a.mom2) / + (n*n) + + 4.0*delta*(a.n.float*b.mom3 - b.n.float*a.mom3) / n + result.max = max(a.max, b.max) + result.min = max(a.min, b.min) + +proc `+=`*(a: var RunningStat, b: RunningStat) {.inline.} = + ## add a second RunningStats `b` to `a` + a = a + b +# ---------------------- standalone array/seq stats --------------------- +proc mean*[T](x: openArray[T]): float = + ## computes the mean of `x` + var rs: RunningStat + rs.push(x) + result = rs.mean() + +proc variance*[T](x: openArray[T]): float = + ## computes the population variance of `x` + var rs: RunningStat + rs.push(x) + result = rs.variance() + +proc varianceS*[T](x: openArray[T]): float = + ## computes the sample variance of `x` + var rs: RunningStat + rs.push(x) + result = rs.varianceS() + +proc standardDeviation*[T](x: openArray[T]): float = + ## computes the population standardDeviation of `x` + var rs: RunningStat + rs.push(x) + result = rs.standardDeviation() + +proc standardDeviationS*[T](x: openArray[T]): float = + ## computes the sanple standardDeviation of `x` + var rs: RunningStat + rs.push(x) + result = rs.standardDeviationS() + +proc skewness*[T](x: openArray[T]): float = + ## computes the population skewness of `x` + var rs: RunningStat + rs.push(x) + result = rs.skewness() + +proc skewnessS*[T](x: openArray[T]): float = + ## computes the sample skewness of `x` + var rs: RunningStat + rs.push(x) + result = rs.skewnessS() + +proc kurtosis*[T](x: openArray[T]): float = + ## computes the population kurtosis of `x` + var rs: RunningStat + rs.push(x) + result = rs.kurtosis() + +proc kurtosisS*[T](x: openArray[T]): float = + ## computes the sample kurtosis of `x` + var rs: RunningStat + rs.push(x) + result = rs.kurtosisS() + +# ---------------------- Running Regression ----------------------------- + +proc clear*(r: var RunningRegress) = + ## reset `r` + r.x_stats.clear() + r.y_stats.clear() + r.s_xy = 0.0 + r.n = 0 + +proc push*(r: var RunningRegress, x, y: float) = + ## pushes two values `x` and `y` for processing + r.s_xy += (r.x_stats.mean() - x)*(r.y_stats.mean() - y)* + toFloat(r.n) / toFloat(r.n + 1) + r.x_stats.push(x) + r.y_stats.push(y) + inc(r.n) + +proc push*(r: var RunningRegress, x, y: int) {.inline.} = + ## pushes two values `x` and `y` for processing. + ## + ## `x` and `y` are converted to ``float`` + ## and the other push operation is called. + r.push(toFloat(x), toFloat(y)) + +proc push*(r: var RunningRegress, x, y: openarray[float|int]) = + ## pushes two sets of values `x` and `y` for processing. + assert(x.len == y.len) + for i in 0..<x.len: + r.push(x[i], y[i]) + +proc slope*(r: RunningRegress): float = + ## computes the current slope of `r` + let s_xx = r.x_stats.varianceS()*toFloat(r.n - 1) + result = r.s_xy / s_xx + +proc intercept*(r: RunningRegress): float = + ## computes the current intercept of `r` + result = r.y_stats.mean() - r.slope()*r.x_stats.mean() + +proc correlation*(r: RunningRegress): float = + ## computes the current correlation of the two data + ## sets pushed into `r` + let t = r.x_stats.standardDeviation() * r.y_stats.standardDeviation() + result = r.s_xy / ( toFloat(r.n) * t ) + +proc `+`*(a, b: RunningRegress): RunningRegress = + ## combine two `RunningRegress` objects. + ## + ## Useful if performing parallel analysis of data series + ## and need to re-combine parallel result sets + result.clear() + result.x_stats = a.x_stats + b.x_stats + result.y_stats = a.y_stats + b.y_stats + result.n = a.n + b.n + + let delta_x = b.x_stats.mean() - a.x_stats.mean() + let delta_y = b.y_stats.mean() - a.y_stats.mean() + result.s_xy = a.s_xy + b.s_xy + + toFloat(a.n*b.n)*delta_x*delta_y/toFloat(result.n) + +proc `+=`*(a: var RunningRegress, b: RunningRegress) = + ## add RunningRegress `b` to `a` + a = a + b + +{.pop.} +{.pop.} + +when isMainModule: + proc clean(x: float): float = + result = round(1.0e8*x).float * 1.0e-8 + + var rs: RunningStat + rs.push(@[1.0, 2.0, 1.0, 4.0, 1.0, 4.0, 1.0, 2.0]) + doAssert(rs.n == 8) + doAssert(clean(rs.mean) == 2.0) + doAssert(clean(rs.variance()) == 1.5) + doAssert(clean(rs.varianceS()) == 1.71428571) + doAssert(clean(rs.skewness()) == 0.81649658) + doAssert(clean(rs.skewnessS()) == 1.01835015) + doAssert(clean(rs.kurtosis()) == -1.0) + doAssert(clean(rs.kurtosisS()) == -0.7000000000000001) + + var rs1, rs2: RunningStat + rs1.push(@[1.0, 2.0, 1.0, 4.0]) + rs2.push(@[1.0, 4.0, 1.0, 2.0]) + let rs3 = rs1 + rs2 + doAssert(clean(rs3.mom2) == clean(rs.mom2)) + doAssert(clean(rs3.mom3) == clean(rs.mom3)) + doAssert(clean(rs3.mom4) == clean(rs.mom4)) + rs1 += rs2 + doAssert(clean(rs1.mom2) == clean(rs.mom2)) + doAssert(clean(rs1.mom3) == clean(rs.mom3)) + doAssert(clean(rs1.mom4) == clean(rs.mom4)) + rs1.clear() + rs1.push(@[1.0, 2.2, 1.4, 4.9]) + doAssert(rs1.sum == 9.5) + doAssert(rs1.mean() == 2.375) + + var rr: RunningRegress + rr.push(@[0.0,1.0,2.8,3.0,4.0], @[0.0,1.0,2.3,3.0,4.0]) + doAssert(rr.slope() == 0.9695585996955861) + doAssert(rr.intercept() == -0.03424657534246611) + doAssert(rr.correlation() == 0.9905100362239381) + var rr1, rr2: RunningRegress + rr1.push(@[0.0,1.0], @[0.0,1.0]) + rr2.push(@[2.8,3.0,4.0], @[2.3,3.0,4.0]) + let rr3 = rr1 + rr2 + doAssert(rr3.correlation() == rr.correlation()) + doAssert(clean(rr3.slope()) == clean(rr.slope())) + doAssert(clean(rr3.intercept()) == clean(rr.intercept())) diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index aa29bb073..a446f85b4 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -1210,22 +1210,21 @@ proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, ## If `s` does not begin with ``prefix`` and end with ``suffix`` a ## ValueError exception will be raised. result = newStringOfCap(s.len) - var i = 0 + var i = prefix.len if not s.startsWith(prefix): raise newException(ValueError, "String does not start with a prefix of: " & prefix) - inc(i) while true: if i == s.len-suffix.len: break case s[i] of '\\': case s[i+1]: of 'x': - inc i + inc i, 2 var c: int - i += parseutils.parseHex(s, c, i) + i += parseutils.parseHex(s, c, i, maxLen=2) result.add(chr(c)) - inc(i, 2) + dec i, 2 of '\\': result.add('\\') of '\'': @@ -1721,3 +1720,4 @@ when isMainModule: doAssert isUpper("ABC") doAssert(not isUpper("AAcc")) doAssert(not isUpper("A#$")) + doAssert(unescape(r"\x013", "", "") == "\x013") diff --git a/lib/wrappers/linenoise/clinenoise.c b/lib/wrappers/linenoise/clinenoise.c index b4ae32472..dd3aa736c 100644 --- a/lib/wrappers/linenoise/clinenoise.c +++ b/lib/wrappers/linenoise/clinenoise.c @@ -417,10 +417,10 @@ void linenoiseAddCompletion(linenoiseCompletions *lc, const char *str) { size_t len = strlen(str); char *copy, **cvec; - copy = malloc(len+1); + copy = (char*)malloc(len+1); if (copy == NULL) return; memcpy(copy,str,len+1); - cvec = realloc(lc->cvec,sizeof(char*)*(lc->len+1)); + cvec = (char**)realloc(lc->cvec,sizeof(char*)*(lc->len+1)); if (cvec == NULL) { free(copy); return; @@ -446,11 +446,11 @@ static void abInit(struct abuf *ab) { } static void abAppend(struct abuf *ab, const char *s, int len) { - char *new = realloc(ab->b,ab->len+len); + char *neww = (char*)realloc(ab->b,ab->len+len); - if (new == NULL) return; - memcpy(new+ab->len,s,len); - ab->b = new; + if (neww == NULL) return; + memcpy(neww+ab->len,s,len); + ab->b = neww; ab->len += len; } @@ -1016,7 +1016,7 @@ int linenoiseHistoryAdd(const char *line) { /* Initialization on first call. */ if (history == NULL) { - history = malloc(sizeof(char*)*history_max_len); + history = (char**)malloc(sizeof(char*)*history_max_len); if (history == NULL) return 0; memset(history,0,(sizeof(char*)*history_max_len)); } @@ -1043,14 +1043,14 @@ int linenoiseHistoryAdd(const char *line) { * just the latest 'len' elements if the new history length value is smaller * than the amount of items already inside the history. */ int linenoiseHistorySetMaxLen(int len) { - char **new; + char **neww; if (len < 1) return 0; if (history) { int tocopy = history_len; - new = malloc(sizeof(char*)*len); - if (new == NULL) return 0; + neww = (char**)malloc(sizeof(char*)*len); + if (neww == NULL) return 0; /* If we can't copy everything, free the elements we'll not use. */ if (len < tocopy) { @@ -1059,10 +1059,10 @@ int linenoiseHistorySetMaxLen(int len) { for (j = 0; j < tocopy-len; j++) free(history[j]); tocopy = len; } - memset(new,0,sizeof(char*)*len); - memcpy(new,history+(history_len-tocopy), sizeof(char*)*tocopy); + memset(neww,0,sizeof(char*)*len); + memcpy(neww,history+(history_len-tocopy), sizeof(char*)*tocopy); free(history); - history = new; + history = neww; } history_max_len = len; if (history_len > history_max_len) |