diff options
Diffstat (limited to 'lib/pure')
-rw-r--r-- | lib/pure/actors.nim | 14 | ||||
-rw-r--r-- | lib/pure/collections/critbits.nim | 302 | ||||
-rw-r--r-- | lib/pure/events.nim | 39 | ||||
-rwxr-xr-x | lib/pure/os.nim | 8 | ||||
-rwxr-xr-x | lib/pure/osproc.nim | 9 | ||||
-rwxr-xr-x | lib/pure/strutils.nim | 273 | ||||
-rw-r--r-- | lib/pure/subexes.nim | 380 |
7 files changed, 881 insertions, 144 deletions
diff --git a/lib/pure/actors.nim b/lib/pure/actors.nim index c07adfd93..d51b973b7 100644 --- a/lib/pure/actors.nim +++ b/lib/pure/actors.nim @@ -119,15 +119,25 @@ proc createActorPool*[TIn, TOut](a: var TActorPool[TIn, TOut], poolSize = 4) = proc sync*[TIn, TOut](a: var TActorPool[TIn, TOut], polling=50) = ## waits for every actor of `a` to finish with its work. Currently this is - ## implemented as polling every `polling` ms. This will change in a later + ## implemented as polling every `polling` ms and has a slight chance + ## of failing since we check for every actor to be in `ready` state and not + ## for messages still in ether. This will change in a later ## version, however. + var allReadyCount = 0 while true: var wait = false for i in 0..high(a.actors): if not a.actors[i].i.ready: wait = true + allReadyCount = 0 break - if not wait: break + if not wait: + # it's possible that some actor sent a message to some other actor but + # both appeared to be non-working as the message takes some time to + # arrive. We assume that this won't take longer than `polling` and + # simply attempt a second time and declare victory then. ;-) + inc allReadyCount + if allReadyCount > 1: break sleep(polling) proc terminate*[TIn, TOut](a: var TActorPool[TIn, TOut]) = diff --git a/lib/pure/collections/critbits.nim b/lib/pure/collections/critbits.nim new file mode 100644 index 000000000..a18c42e58 --- /dev/null +++ b/lib/pure/collections/critbits.nim @@ -0,0 +1,302 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a `crit bit tree`:idx: which is an efficient +## container for a set or a mapping of strings. Based on the excellent paper +## by Adam Langley. + +type + TNode[T] = object {.pure, final, acyclic.} + byte: int ## byte index of the difference + otherbits: char + case isLeaf: bool + of false: child: array[0..1, ref TNode[T]] + of true: + key: string + when T isnot void: + val: T + + PNode[T] = ref TNode[T] + TCritBitTree*[T] = object {. + pure, final.} ## The crit bit tree can either be used + ## as a mapping from strings to + ## some type ``T`` or as a set of + ## strings if ``T`` is void. + root: PNode[T] + count: int + +proc len*[T](c: TCritBitTree[T]): int = + ## returns the number of elements in `c` in O(1). + result = c.count + +proc rawGet[T](c: TCritBitTree[T], key: string): PNode[T] = + var it = c.root + while it != nil: + if not it.isLeaf: + let ch = if it.byte < key.len: key[it.byte] else: '\0' + let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8 + it = it.child[dir] + else: + return if it.key == key: it else: nil + +proc contains*[T](c: TCritBitTree[T], key: string): bool {.inline.} = + ## returns true iff `c` contains the given `key`. + result = rawGet(c, key) != nil + +proc hasKey*[T](c: TCritBitTree[T], key: string): bool {.inline.} = + ## alias for `contains`. + result = rawGet(c, key) != nil + +proc rawInsert[T](c: var TCritBitTree[T], key: string): PNode[T] = + if c.root == nil: + new c.root + c.root.isleaf = true + c.root.key = key + result = c.root + else: + var it = c.root + while not it.isLeaf: + let ch = if it.byte < key.len: key[it.byte] else: '\0' + let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8 + it = it.child[dir] + + var newOtherBits = 0 + var newByte = 0 + block blockX: + while newbyte < key.len: + if it.key[newbyte] != key[newbyte]: + newotherbits = it.key[newbyte].ord xor key[newbyte].ord + break blockX + inc newbyte + if it.key[newbyte] != '\0': + newotherbits = it.key[newbyte].ord + else: + return it + while (newOtherBits and (newOtherBits-1)) != 0: + newOtherBits = newOtherBits and (newOtherBits-1) + newOtherBits = newOtherBits xor 255 + let ch = it.key[newByte] + let dir = (1 + (ord(ch) or newOtherBits)) shr 8 + + var inner: PNode[T] + new inner + new result + result.isLeaf = true + result.key = key + inner.otherBits = chr(newOtherBits) + inner.byte = newByte + inner.child[1 - dir] = result + + var wherep = addr(c.root) + while true: + var p = wherep[] + if p.isLeaf: break + if p.byte > newByte: break + if p.byte == newByte and p.otherBits.ord > newOtherBits: break + let ch = if p.byte < key.len: key[p.byte] else: '\0' + let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + wherep = addr(p.child[dir]) + inner.child[dir] = wherep[] + wherep[] = inner + inc c.count + +proc containsOrIncl*[T](c: var TCritBitTree[T], key: string, val: T): bool = + ## returns true iff `c` contains the given `key`. If the key does not exist + ## ``c[key] = val`` is performed. + let oldCount = c.count + var n = rawInsert(c, key) + result = c.count == oldCount + when T isnot void: + if not result: n.val = val + +proc containsOrIncl*(c: var TCritBitTree[void], key: string): bool = + ## returns true iff `c` contains the given `key`. If the key does not exist + ## it is inserted into `c`. + let oldCount = c.count + var n = rawInsert(c, key) + result = c.count == oldCount + +proc incl*(c: var TCritBitTree[void], key: string) = + ## includes `key` in `c`. + discard rawInsert(c, key) + +proc `[]=`*[T](c: var TCritBitTree[T], key: string, val: T) = + ## puts a (key, value)-pair into `t`. + var n = rawInsert(c, key) + n.val = val + +proc `[]`*[T](c: var TCritBitTree[T], key: string): T {.inline.} = + ## retrieves the value at ``c[key]``. If `key` is not in `t`, + ## default empty value for the type `B` is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + let n = rawGet(c, key) + if n != nil: result = n.val + +proc mget*[T](c: var TCritBitTree[T], key: string): var T {.inline.} = + ## retrieves the value at ``c[key]``. The value can be modified. + ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. + let n = rawGet(c, key) + if n != nil: result = n.val + else: raise newException(EInvalidKey, "key not found: " & $key) + +proc excl*[T](c: var TCritBitTree[T], key: string) = + ## removes `key` (and its associated value) from the set `c`. + ## If the `key` does not exist, nothing happens. + var p = c.root + var wherep = addr(c.root) + var whereq: ptr PNode = nil + if p == nil: return + var dir = 0 + var q: PNode + while not p.isLeaf: + whereq = wherep + q = p + let ch = if p.byte < key.len: key[p.byte] else: '\0' + dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + wherep = addr(p.child[dir]) + p = wherep[] + if p.key == key: + # else: not in tree at all + if whereq == nil: + c.root = nil + else: + whereq[] = q.child[1 - dir] + dec c.count + +iterator leaves[T](n: PNode[T]): PNode[T] = + if n != nil: + # XXX actually we could compute the necessary stack size in advance: + # it's rougly log2(c.count). + var stack = @[n] + while stack.len > 0: + var it = stack.pop + while not it.isLeaf: + stack.add(it.child[1]) + it = it.child[0] + assert(it != nil) + yield it + +iterator keys*[T](c: TCritBitTree[T]): string = + ## yields all keys in lexicographical order. + for x in leaves(c.root): yield x.key + +iterator values*[T](c: TCritBitTree[T]): T = + ## yields all values of `c` in the lexicographical order of the + ## corresponding keys. + for x in leaves(c.root): yield x.val + +iterator mvalues*[T](c: var TCritBitTree[T]): var T = + ## yields all values of `c` in the lexicographical order of the + ## corresponding keys. The values can be modified. + for x in leaves(c.root): yield x.val + +iterator items*[T](c: TCritBitTree[T]): string = + ## yields all keys in lexicographical order. + for x in leaves(c.root): yield x.key + +iterator pairs*[T](c: TCritBitTree[T]): tuple[key: string, val: T] = + ## yields all (key, value)-pairs of `c`. + for x in leaves(c.root): yield (x.key, x.val) + +iterator mpairs*[T](c: var TCritBitTree[T]): tuple[key: string, val: var T] = + ## yields all (key, value)-pairs of `c`. The yielded values can be modified. + for x in leaves(c.root): yield (x.key, x.val) + +proc allprefixedAux[T](c: TCritBitTree[T], key: string): PNode[T] = + var p = c.root + var top = p + if p != nil: + while not p.isLeaf: + var q = p + let ch = if p.byte < key.len: key[p.byte] else: '\0' + let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + p = p.child[dir] + if q.byte < key.len: top = p + for i in 0 .. <key.len: + if p.key[i] != key[i]: return + result = top + +iterator itemsWithPrefix*[T](c: TCritBitTree[T], prefix: string): string = + ## yields all keys starting with `prefix`. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.key + +iterator keysWithPrefix*[T](c: TCritBitTree[T], prefix: string): string = + ## yields all keys starting with `prefix`. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.key + +iterator valuesWithPrefix*[T](c: TCritBitTree[T], prefix: string): T = + ## yields all values of `c` starting with `prefix` of the + ## corresponding keys. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.val + +iterator mvaluesWithPrefix*[T](c: var TCritBitTree[T], prefix: string): var T = + ## yields all values of `c` starting with `prefix` of the + ## corresponding keys. The values can be modified. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.val + +iterator pairsWithPrefix*[T](c: TCritBitTree[T], + prefix: string): tuple[key: string, val: T] = + ## yields all (key, value)-pairs of `c` starting with `prefix`. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield (x.key, x.val) + +iterator mpairsWithPrefix*[T](c: var TCritBitTree[T], + prefix: string): tuple[key: string, val: var T] = + ## yields all (key, value)-pairs of `c` starting with `prefix`. + ## The yielded values can be modified. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield (x.key, x.val) + +proc `$`*[T](c: TCritBitTree[T]): string = + ## turns `c` into a string representation. Example outputs: + ## ``{keyA: value, keyB: value}``, ``{:}`` + ## If `T` is void the outputs look like: + ## ``{keyA, keyB}``, ``{}``. + if c.len == 0: + when T is void: + result = "{}" + else: + result = "{:}" + else: + # an educated guess is better than nothing: + when T is void: + const avgItemLen = 8 + else: + const avgItemLen = 16 + result = newStringOfCap(c.count * avgItemLen) + result.add("{") + for key, val in pairs(c): + if result.len > 1: result.add(", ") + result.add($key) + when T isnot void: + result.add(": ") + result.add($val) + result.add("}") + +when isMainModule: + var r: TCritBitTree[void] + r.incl "abc" + r.incl "xyz" + r.incl "def" + r.incl "definition" + r.incl "prefix" + doAssert r.contains"def" + #r.del "def" + + for w in r.items: + echo w + + for w in r.allPrefixed("de"): + echo w + diff --git a/lib/pure/events.nim b/lib/pure/events.nim index a3a609b20..c3c4cb0dc 100644 --- a/lib/pure/events.nim +++ b/lib/pure/events.nim @@ -12,25 +12,27 @@ ## This module implements an event system that is not dependant on external ## graphical toolkits. It was originally called ``NimEE`` because ## it was inspired by Python's PyEE module. There are two ways you can use events: one is a python-inspired way; the other is more of a C-style way. +## ## .. code-block:: Nimrod -## var ee = initEventEmitter() -## var genericargs: TEventArgs -## proc handleevent(e: TEventArgs) = -## echo("Handled!") +## var ee = initEventEmitter() +## var genericargs: TEventArgs +## proc handleevent(e: TEventArgs) = +## echo("Handled!") ## -## # Python way -## ee.on("EventName", handleevent) -## ee.emit("EventName", genericargs) +## # Python way +## ee.on("EventName", handleevent) +## ee.emit("EventName", genericargs) ## -## # C/Java way -## # Declare a type -## type -## TSomeObject = object of TObject -## SomeEvent: TEventHandler -## var myobj: TSomeObject -## myobj.SomeEvent = initEventHandler("SomeEvent") -## myobj.SomeEvent.addHandler(handleevent) -## ee.emit(myobj.SomeEvent, genericargs) +## # C/Java way +## # Declare a type +## type +## TSomeObject = object of TObject +## SomeEvent: TEventHandler +## var myobj: TSomeObject +## myobj.SomeEvent = initEventHandler("SomeEvent") +## myobj.SomeEvent.addHandler(handleevent) +## ee.emit(myobj.SomeEvent, genericargs) + type TEventArgs* = object of TObject ## Base object for event arguments that are passed to callback functions. TEventHandler* = tuple[name: string, handlers: seq[proc(e:TEventArgs)]] ## An eventhandler for an event. @@ -57,6 +59,11 @@ proc removeHandler*(handler: var TEventHandler, func: proc(e: TEventArgs)) = handler.handlers.del(i) break +proc containsHandler*(handler: var TEventHandler, func: proc(e: TEventArgs)): bool = + ## Checks if a callback is registered to this event handler. + return handler.handlers.contains(func) + + proc clearHandlers*(handler: var TEventHandler) = ## Clears all of the callbacks from the event handler. setLen(handler.handlers, 0) diff --git a/lib/pure/os.nim b/lib/pure/os.nim index f22a53dd8..f96998831 100755 --- a/lib/pure/os.nim +++ b/lib/pure/os.nim @@ -554,7 +554,7 @@ proc cmpPaths*(pathA, pathB: string): int {. proc isAbsolute*(path: string): bool {.rtl, noSideEffect, extern: "nos$1".} = ## Checks whether a given `path` is absolute. ## - ## on Windows, network paths are considered absolute too. + ## On Windows, network paths are considered absolute too. when doslike: var len = len(path) result = (len > 1 and path[0] in {'/', '\\'}) or @@ -694,8 +694,8 @@ proc execShellCmd*(command: string): int {.rtl, extern: "nos$1".} = # iterator depends on ``environment``. var - envComputed: bool = false - environment: seq[string] = @[] + envComputed {.threadvar.}: bool + environment {.threadvar.}: seq[string] when defined(windows): # because we support Windows GUI applications, things get really @@ -705,6 +705,7 @@ when defined(windows): proc getEnvVarsC() = if not envComputed: + environment = @[] var env = getEnvironmentStringsA() e = env @@ -738,6 +739,7 @@ else: proc getEnvVarsC() = # retrieves the variables of char** env of C's main proc if not envComputed: + environment = @[] when useNSGetEnviron: var gEnv = NSGetEnviron()[] var i = 0 diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index dc107b382..510dff232 100755 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -102,7 +102,7 @@ proc processID*(p: PProcess): int {.rtl, extern: "nosp$1".} = ## returns `p`'s process ID. return p.id -proc waitForExit*(p: PProcess): int {.rtl, extern: "nosp$1".} +proc waitForExit*(p: PProcess, timeout: int = -1): int {.rtl, extern: "nosp$1".} ## waits for the process to finish and returns `p`'s error code. proc peekExitCode*(p: PProcess): int @@ -382,8 +382,9 @@ when defined(Windows) and not defined(useNimRtl): if running(p): discard TerminateProcess(p.FProcessHandle, 0) - proc waitForExit(p: PProcess): int = - discard WaitForSingleObject(p.FProcessHandle, Infinite) + proc waitForExit(p: PProcess, timeout: int = -1): int = + discard WaitForSingleObject(p.FProcessHandle, timeout) + var res: int32 discard GetExitCodeProcess(p.FProcessHandle, res) result = res @@ -640,7 +641,7 @@ elif not defined(useNimRtl): if kill(-p.id, SIGKILL) != 0'i32: OSError() else: OSError() - proc waitForExit(p: PProcess): int = + proc waitForExit(p: PProcess, timeout: int = -1): int = #if waitPid(p.id, p.exitCode, 0) == int(p.id): # ``waitPid`` fails if the process is not running anymore. But then # ``running`` probably set ``p.exitCode`` for us. Since ``p.exitCode`` is diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 46dcb3849..6d4544425 100755 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -140,105 +140,6 @@ proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect, {.pop.} -proc findNormalized(x: string, inArray: openarray[string]): int = - var i = 0 - while i < high(inArray): - if cmpIgnoreStyle(x, inArray[i]) == 0: return i - inc(i, 2) # incrementing by 1 would probably lead to a - # security hole... - return -1 - -proc addf*(s: var string, formatstr: string, a: openarray[string]) {. - noSideEffect, rtl, extern: "nsuAddf".} = - ## The same as ``add(s, formatstr % a)``, but more efficient. - const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} - var i = 0 - var num = 0 - while i < len(formatstr): - if formatstr[i] == '$': - case formatstr[i+1] # again we use the fact that strings - # are zero-terminated here - of '#': - add s, a[num] - inc i, 2 - inc num - of '$': - add s, '$' - inc(i, 2) - of '1'..'9': - var j = 0 - inc(i) # skip $ - while formatstr[i] in Digits: - j = j * 10 + ord(formatstr[i]) - ord('0') - inc(i) - add s, a[j - 1] - of '{': - var j = i+1 - while formatstr[j] notin {'\0', '}'}: inc(j) - var x = findNormalized(substr(formatstr, i+2, j-1), a) - if x >= 0 and x < high(a): add s, a[x+1] - else: raise newException(EInvalidValue, "invalid format string") - i = j+1 - of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': - var j = i+1 - while formatstr[j] in PatternChars: inc(j) - var x = findNormalized(substr(formatstr, i+1, j-1), a) - if x >= 0 and x < high(a): add s, a[x+1] - else: raise newException(EInvalidValue, "invalid format string") - i = j - else: raise newException(EInvalidValue, "invalid format string") - else: - add s, formatstr[i] - inc(i) - -proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect, - rtl, extern: "nsuFormatOpenArray".} = - ## The `substitution`:idx: operator performs string substitutions in - ## `formatstr` and returns a modified `formatstr`. This is often called - ## `string interpolation`:idx:. - ## - ## This is best explained by an example: - ## - ## .. code-block:: nimrod - ## "$1 eats $2." % ["The cat", "fish"] - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "The cat eats fish." - ## - ## The substitution variables (the thing after the ``$``) are enumerated - ## from 1 to ``a.len``. - ## To produce a verbatim ``$``, use ``$$``. - ## The notation ``$#`` can be used to refer to the next substitution variable: - ## - ## .. code-block:: nimrod - ## "$# eats $#." % ["The cat", "fish"] - ## - ## Substitution variables can also be words (that is - ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even - ## indices are keys and with odd indices are the corresponding values. - ## An example: - ## - ## .. code-block:: nimrod - ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "The cat eats fish." - ## - ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is - ## raised if an ill-formed format string has been passed to the `%` operator. - result = newStringOfCap(formatstr.len + a.len shl 4) - addf(result, formatstr, a) - -proc `%` *(formatstr, a: string): string {.noSideEffect, - rtl, extern: "nsuFormatSingleElem".} = - ## This is the same as ``formatstr % [a]``. - result = newStringOfCap(formatstr.len + a.len) - addf(result, formatstr, [a]) - proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect, rtl, extern: "nsuStrip".} = ## Strips whitespace from `s` and returns the resulting string. @@ -467,16 +368,16 @@ proc ParseHexInt*(s: string): int {.noSideEffect, procvar, inc(i) of '\0': break else: raise newException(EInvalidValue, "invalid integer: " & s) - -proc parseBool*(s: string): bool = - ## Parses a value into a `bool`. If ``s`` is one of the following values: - ## ``y, yes, true, 1, on``, then returns `true`. If ``s`` is one of the - ## following values: ``n, no, false, 0, off``, then returns `false`. - ## If ``s`` is something else a ``EInvalidValue`` exception is raised. - case normalize(s) - of "y", "yes", "true", "1", "on": result = true - of "n", "no", "false", "0", "off": result = false - else: raise newException(EInvalidValue, "cannot interpret as a bool: " & s) + +proc parseBool*(s: string): bool = + ## Parses a value into a `bool`. If ``s`` is one of the following values: + ## ``y, yes, true, 1, on``, then returns `true`. If ``s`` is one of the + ## following values: ``n, no, false, 0, off``, then returns `false`. + ## If ``s`` is something else a ``EInvalidValue`` exception is raised. + case normalize(s) + of "y", "yes", "true", "1", "on": result = true + of "n", "no", "false", "0", "off": result = false + else: raise newException(EInvalidValue, "cannot interpret as a bool: " & s) proc repeatChar*(count: int, c: Char = ' '): string {.noSideEffect, rtl, extern: "nsuRepeatChar".} = @@ -709,7 +610,7 @@ proc find*(s, sub: string, start: int = 0): int {.noSideEffect, rtl, extern: "nsuFindStr".} = ## Searches for `sub` in `s` starting at position `start`. Searching is ## case-sensitive. If `sub` is not in `s`, -1 is returned. - var a: TSkipTable + var a {.noinit.}: TSkipTable preprocessSub(sub, a) result = findAux(s, sub, start, a) @@ -752,7 +653,7 @@ proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} = proc replace*(s, sub: string, by = ""): string {.noSideEffect, rtl, extern: "nsuReplaceStr".} = ## Replaces `sub` in `s` by the string `by`. - var a: TSkipTable + var a {.noinit.}: TSkipTable result = "" preprocessSub(sub, a) var i = 0 @@ -921,7 +822,7 @@ proc editDistance*(a, b: string): int {.noSideEffect, inc(len2) var half = len1 shr 1 # initalize first row: - #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2 * sizeof(int))) + #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int))) var row: seq[int] newSeq(row, len2) var e = s + len2 - 1 # end marker @@ -999,7 +900,7 @@ proc formatBiggestFloat*(f: BiggestFloat, format: TFloatFormat = ffDefault, ## after the decimal point for Nimrod's ``biggestFloat`` type. const floatFormatToChar: array[TFloatFormat, char] = ['g', 'f', 'e'] var - frmtstr: array[0..5, char] + frmtstr {.noinit.}: array[0..5, char] buf: array[0..80, char] frmtstr[0] = '%' frmtstr[1] = '#' @@ -1028,16 +929,150 @@ proc formatFloat*(f: float, format: TFloatFormat = ffDefault, ## after the decimal point for Nimrod's ``float`` type. result = formatBiggestFloat(f, format, precision) +proc formatSize*(bytes: biggestInt, decimalSep = '.'): string = + ## Rounds and formats `bytes`. Examples: + ## + ## .. code-block:: nimrod + ## + ## formatSize(1'i64 shl 31 + 300'i64) == "2.204GB" + ## formatSize(4096) == "4KB" + ## + template frmt(a, b, c: expr): expr = + let bs = $b + insertSep($a) & decimalSep & bs.substr(0, 2) & c + let gigabytes = bytes shr 30 + let megabytes = bytes shr 20 + let kilobytes = bytes shr 10 + if gigabytes != 0: + result = frmt(gigabytes, megabytes, "GB") + elif megabytes != 0: + result = frmt(megabytes, kilobytes, "MB") + elif kilobytes != 0: + result = frmt(kilobytes, bytes, "KB") + else: + result = insertSep($bytes) & "B" + +proc findNormalized(x: string, inArray: openarray[string]): int = + var i = 0 + while i < high(inArray): + if cmpIgnoreStyle(x, inArray[i]) == 0: return i + inc(i, 2) # incrementing by 1 would probably lead to a + # security hole... + return -1 + +proc addf*(s: var string, formatstr: string, a: openarray[string]) {. + noSideEffect, rtl, extern: "nsuAddf".} = + ## The same as ``add(s, formatstr % a)``, but more efficient. + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} + var i = 0 + var num = 0 + while i < len(formatstr): + if formatstr[i] == '$': + case formatstr[i+1] # again we use the fact that strings + # are zero-terminated here + of '#': + add s, a[num] + inc i, 2 + inc num + of '$': + add s, '$' + inc(i, 2) + of '1'..'9', '-': + var j = 0 + inc(i) # skip $ + var negative = formatstr[i] == '-' + if negative: inc i + while formatstr[i] in Digits: + j = j * 10 + ord(formatstr[i]) - ord('0') + inc(i) + if not negative: + add s, a[j - 1] + else: + add s, a[a.len - j] + of '{': + var j = i+1 + while formatstr[j] notin {'\0', '}'}: inc(j) + var x = findNormalized(substr(formatstr, i+2, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j+1 + of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': + var j = i+1 + while formatstr[j] in PatternChars: inc(j) + var x = findNormalized(substr(formatstr, i+1, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j + else: + raise newException(EInvalidValue, "invalid format string") + else: + add s, formatstr[i] + inc(i) + +proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect, + rtl, extern: "nsuFormatOpenArray".} = + ## The `substitution`:idx: operator performs string substitutions in + ## `formatstr` and returns a modified `formatstr`. This is often called + ## `string interpolation`:idx:. + ## + ## This is best explained by an example: + ## + ## .. code-block:: nimrod + ## "$1 eats $2." % ["The cat", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The substitution variables (the thing after the ``$``) are enumerated + ## from 1 to ``a.len``. + ## To produce a verbatim ``$``, use ``$$``. + ## The notation ``$#`` can be used to refer to the next substitution + ## variable: + ## + ## .. code-block:: nimrod + ## "$# eats $#." % ["The cat", "fish"] + ## + ## Substitution variables can also be words (that is + ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even + ## indices are keys and with odd indices are the corresponding values. + ## An example: + ## + ## .. code-block:: nimrod + ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is + ## raised if an ill-formed format string has been passed to the `%` operator. + result = newStringOfCap(formatstr.len + a.len shl 4) + addf(result, formatstr, a) + +proc `%` *(formatstr, a: string): string {.noSideEffect, + rtl, extern: "nsuFormatSingleElem".} = + ## This is the same as ``formatstr % [a]``. + result = newStringOfCap(formatstr.len + a.len) + addf(result, formatstr, [a]) + {.pop.} when isMainModule: - assert align("abc", 4) == " abc" - assert align("a", 0) == "a" - assert align("1232", 6) == " 1232" + doAssert align("abc", 4) == " abc" + doAssert align("a", 0) == "a" + doAssert align("1232", 6) == " 1232" echo wordWrap(""" this is a long text -- muchlongerthan10chars and here it goes""", 10, false) - assert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001" - assert formatBiggestFloat(0.00000000001, ffScientific, 1) == "1.0e-11" + doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001" + doAssert formatBiggestFloat(0.00000000001, ffScientific, 1) == "1.0e-11" + + doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c" + echo formatSize(1'i64 shl 31 + 300'i64) # == "4,GB" + echo formatSize(1'i64 shl 31) - assert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c" + doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] == + "The cat eats fish." diff --git a/lib/pure/subexes.nim b/lib/pure/subexes.nim new file mode 100644 index 000000000..363cf6d04 --- /dev/null +++ b/lib/pure/subexes.nim @@ -0,0 +1,380 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Nimrod support for `substitution expressions`:idx: (`subex`:idx:). +## +## .. include:: ../doc/subexes.txt +## + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +from strutils import parseInt, cmpIgnoreStyle, Digits +include "system/inclrtl" + + +proc findNormalized(x: string, inArray: openarray[string]): int = + var i = 0 + while i < high(inArray): + if cmpIgnoreStyle(x, inArray[i]) == 0: return i + inc(i, 2) # incrementing by 1 would probably lead to a + # security hole... + return -1 + +type + EInvalidSubex* = object of EInvalidValue ## exception that is raised for + ## an invalid subex + +proc raiseInvalidFormat(msg: string) {.noinline.} = + raise newException(EInvalidSubex, "invalid format string: " & msg) + +type + TFormatParser = object {.pure, final.} + f: cstring + num, i, lineLen: int + +template call(x: stmt) = + p.i = i + x + i = p.i + +template callNoLineLenTracking(x: stmt) = + let oldLineLen = p.lineLen + p.i = i + x + i = p.i + p.lineLen = oldLineLen + +proc getFormatArg(p: var TFormatParser, a: openArray[string]): int = + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} + var i = p.i + var f = p.f + case f[i] + of '#': + result = p.num + inc i + inc p.num + of '1'..'9', '-': + var j = 0 + var negative = f[i] == '-' + if negative: inc i + while f[i] in Digits: + j = j * 10 + ord(f[i]) - ord('0') + inc i + result = if not negative: j-1 else: a.len-j + of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': + var name = "" + while f[i] in PatternChars: + name.add(f[i]) + inc(i) + result = findNormalized(name, a)+1 + of '$': + inc(i) + call: + result = getFormatArg(p, a) + result = parseInt(a[result])-1 + else: + raiseInvalidFormat("'#', '$', number or identifier expected") + if result >=% a.len: raiseInvalidFormat("index out of bounds: " & $result) + p.i = i + +proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) + +proc emitChar(p: var TFormatParser, x: var string, ch: char) {.inline.} = + x.add(ch) + if ch == '\L': p.lineLen = 0 + else: inc p.lineLen + +proc emitStrLinear(p: var TFormatParser, x: var string, y: string) {.inline.} = + for ch in items(y): emitChar(p, x, ch) + +proc emitStr(p: var TFormatParser, x: var string, y: string) {.inline.} = + x.add(y) + inc p.lineLen, y.len + +proc scanQuote(p: var TFormatParser, x: var string, toAdd: bool) = + var i = p.i+1 + var f = p.f + while true: + if f[i] == '\'': + inc i + if f[i] != '\'': break + inc i + if toAdd: emitChar(p, x, '\'') + elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected") + else: + if toAdd: emitChar(p, x, f[i]) + inc i + p.i = i + +proc scanBranch(p: var TFormatParser, a: openArray[string], + x: var string, choice: int) = + var i = p.i + var f = p.f + var c = 0 + var elsePart = i + var toAdd = choice == 0 + while true: + case f[i] + of ']': break + of '|': + inc i + elsePart = i + inc c + if toAdd: break + toAdd = choice == c + of '\'': + call: scanQuote(p, x, toAdd) + of '\0': raiseInvalidFormat("closing ']' expected") + else: + if toAdd: + if f[i] == '$': + inc i + call: scanDollar(p, a, x) + else: + emitChar(p, x, f[i]) + inc i + else: + inc i + if not toAdd and choice >= 0: + # evaluate 'else' part: + var last = i + i = elsePart + while true: + case f[i] + of '|', ']': break + of '\'': + call: scanQuote(p, x, true) + of '$': + inc i + call: scanDollar(p, a, x) + else: + emitChar(p, x, f[i]) + inc i + i = last + p.i = i+1 + +proc scanSlice(p: var TFormatParser, a: openarray[string]): tuple[x, y: int] = + var slice = false + var i = p.i + var f = p.f + + if f[i] == '{': inc i + else: raiseInvalidFormat("'{' expected") + if f[i] == '.' and f[i+1] == '.': + inc i, 2 + slice = true + else: + call: result.x = getFormatArg(p, a) + if f[i] == '.' and f[i+1] == '.': + inc i, 2 + slice = true + if slice: + if f[i] != '}': + call: result.y = getFormatArg(p, a) + else: + result.y = high(a) + else: + result.y = result.x + if f[i] != '}': raiseInvalidFormat("'}' expected") + inc i + p.i = i + +proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) = + var i = p.i + var f = p.f + case f[i] + of '$': + emitChar p, s, '$' + inc i + of '{': + call: + let (x, y) = scanSlice(p, a) + for j in x..y: emitStr p, s, a[j] + of '[': + inc i + var start = i + call: scanBranch(p, a, s, -1) + var x: int + if f[i] == '{': + inc i + call: x = getFormatArg(p, a) + if f[i] != '}': raiseInvalidFormat("'}' expected") + inc i + else: + call: x = getFormatArg(p, a) + var last = i + let choice = parseInt(a[x]) + i = start + call: scanBranch(p, a, s, choice) + i = last + of '\'': + var sep = "" + callNoLineLenTracking: scanQuote(p, sep, true) + if f[i] == '~': + # $' '~{1..3} + # insert space followed by 1..3 if not empty + inc i + call: + let (x, y) = scanSlice(p, a) + var L = 0 + for j in x..y: inc L, a[j].len + if L > 0: + emitStrLinear p, s, sep + for j in x..y: emitStr p, s, a[j] + else: + block StringJoin: + block OptionalLineLengthSpecifier: + var maxLen = 0 + case f[i] + of '0'..'9': + while f[i] in Digits: + maxLen = maxLen * 10 + ord(f[i]) - ord('0') + inc i + of '$': + # do not skip the '$' here for `getFormatArg`! + call: + maxLen = getFormatArg(p, a) + else: break OptionalLineLengthSpecifier + var indent = "" + case f[i] + of 'i': + inc i + callNoLineLenTracking: scanQuote(p, indent, true) + + call: + let (x, y) = scanSlice(p, a) + if maxLen < 1: emitStrLinear(p, s, indent) + var items = 1 + emitStr p, s, a[x] + for j in x+1..y: + emitStr p, s, sep + if items >= maxLen: + emitStrLinear p, s, indent + items = 0 + emitStr p, s, a[j] + inc items + of 'c': + inc i + callNoLineLenTracking: scanQuote(p, indent, true) + + call: + let (x, y) = scanSlice(p, a) + if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent) + emitStr p, s, a[x] + for j in x+1..y: + emitStr p, s, sep + if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent) + emitStr p, s, a[j] + + else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected") + break StringJoin + + call: + let (x, y) = scanSlice(p, a) + emitStr p, s, a[x] + for j in x+1..y: + emitStr p, s, sep + emitStr p, s, a[j] + else: + call: + var x = getFormatArg(p, a) + emitStr p, s, a[x] + p.i = i + + +type + TSubex* = distinct string ## string that contains a substitution expression + +proc subex*(s: string): TSubex = + ## constructs a *substitution expression* from `s`. Currently this performs + ## no syntax checking but this may change in later versions. + result = TSubex(s) + +proc addf*(s: var string, formatstr: TSubex, a: openarray[string]) {. + noSideEffect, rtl, extern: "nfrmtAddf".} = + ## The same as ``add(s, formatstr % a)``, but more efficient. + var p: TFormatParser + p.f = formatstr.string + var i = 0 + while i < len(formatstr.string): + if p.f[i] == '$': + inc i + call: scanDollar(p, a, s) + else: + emitChar(p, s, p.f[i]) + inc(i) + +proc `%` *(formatstr: TSubex, a: openarray[string]): string {.noSideEffect, + rtl, extern: "nfrmtFormatOpenArray".} = + ## The `substitution`:idx: operator performs string substitutions in + ## `formatstr` and returns a modified `formatstr`. This is often called + ## `string interpolation`:idx:. + ## + result = newStringOfCap(formatstr.string.len + a.len shl 4) + addf(result, formatstr, a) + +proc `%` *(formatstr: TSubex, a: string): string {.noSideEffect, + rtl, extern: "nfrmtFormatSingleElem".} = + ## This is the same as ``formatstr % [a]``. + result = newStringOfCap(formatstr.string.len + a.len) + addf(result, formatstr, [a]) + +{.pop.} + +when isMainModule: + + proc `%`(formatstr: string, a: openarray[string]): string = + result = newStringOfCap(formatstr.len + a.len shl 4) + addf(result, formatstr.TSubex, a) + + proc `%`(formatstr: string, a: string): string = + result = newStringOfCap(formatstr.len + a.len) + addf(result, formatstr.TSubex, [a]) + + + doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c" + doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] == + "The cat eats fish." + + + doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c" + doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c" + doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c" + doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c" + + doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c" + doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)" + + doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3" + + doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)" + doAssert "$' '~{..}" % "" == "" + doAssert "$' '~{..}" % "P0" == " P0" + doAssert "${$1}" % "1" == "1" + doAssert "${$$-1} $$1" % "1" == "1 $1" + + doAssert "$#($', '10c'\n '{#..})" % ["doAssert", "longishA", "longish"] == + """doAssert( + longishA, + longish)""" + + echo "type TMyEnum* = enum\n $', '2i'\n '{..}" % ["fieldA", + "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"] + + doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)" + + doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied" + + doAssert subex"$['''|'|''''|']']#" % "0" == "'|" + + echo subex("type\n TEnum = enum\n $', '40c'\n '{..}") % [ + "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"] + + |