summary refs log tree commit diff stats
path: root/lib/pure/strutils.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r--lib/pure/strutils.nim361
1 files changed, 226 insertions, 135 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 8b3d36f66..81be7db17 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -70,16 +70,16 @@ runnableExamples:
 ##   easier substring extraction than regular expressions
 
 
-import parseutils
-from math import pow, floor, log10
-from algorithm import fill, reverse
+import std/parseutils
+from std/math import pow, floor, log10
+from std/algorithm import fill, reverse
 import std/enumutils
 
-from unicode import toLower, toUpper
+from std/unicode import toLower, toUpper
 export toLower, toUpper
 
 include "system/inclrtl"
-import std/private/since
+import std/private/[since, jsutils]
 from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl,
     startsWithImpl, endsWithImpl
 
@@ -129,11 +129,11 @@ const
     ## Not very useful by its own, you can use it to create *inverted* sets to
     ## make the `find func<#find,string,set[char],Natural,int>`_
     ## find **invalid** characters in strings. Example:
-    ##
-    ## .. code-block:: nim
+    ##   ```nim
     ##   let invalid = AllChars - Digits
     ##   doAssert "01234".find(invalid) == -1
     ##   doAssert "01A34".find(invalid) == 2
+    ##   ```
 
 func isAlphaAscii*(c: char): bool {.rtl, extern: "nsuIsAlphaAsciiChar".} =
   ## Checks whether or not character `c` is alphabetical.
@@ -334,9 +334,9 @@ func normalize*(s: string): string {.rtl, extern: "nsuNormalize".} =
 func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} =
   ## Compares two strings in a case insensitive manner. Returns:
   ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
+  ## | `0` if a == b
+  ## | `< 0` if a < b
+  ## | `> 0` if a > b
   runnableExamples:
     doAssert cmpIgnoreCase("FooBar", "foobar") == 0
     doAssert cmpIgnoreCase("bar", "Foo") < 0
@@ -354,9 +354,9 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
   ##
   ## Returns:
   ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
+  ## | `0` if a == b
+  ## | `< 0` if a < b
+  ## | `> 0` if a > b
   runnableExamples:
     doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
     doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
@@ -366,11 +366,14 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
 # --------- Private templates for different split separators -----------
 
 func substrEq(s: string, pos: int, substr: string): bool =
-  var i = 0
+  # Always returns false for empty `substr`
   var length = substr.len
-  while i < length and pos+i < s.len and s[pos+i] == substr[i]:
-    inc i
-  return i == length
+  if length > 0:
+    var i = 0
+    while i < length and pos+i < s.len and s[pos+i] == substr[i]:
+      inc i
+    i == length
+  else: false
 
 template stringHasSep(s: string, index: int, seps: set[char]): bool =
   s[index] in seps
@@ -420,14 +423,12 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string =
   ##
   ## Substrings are separated by the character `sep`.
   ## The code:
-  ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for word in split(";;this;is;an;;example;;;", ';'):
   ##     writeLine(stdout, word)
-  ##
+  ##   ```
   ## Results in:
-  ##
-  ## .. code-block::
+  ##   ```
   ##   ""
   ##   ""
   ##   "this"
@@ -438,6 +439,7 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string =
   ##   ""
   ##   ""
   ##   ""
+  ##   ```
   ##
   ## See also:
   ## * `rsplit iterator<#rsplit.i,string,char,int>`_
@@ -452,41 +454,49 @@ iterator split*(s: string, seps: set[char] = Whitespace,
   ##
   ## Substrings are separated by a substring containing only `seps`.
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for word in split("this\lis an\texample"):
   ##     writeLine(stdout, word)
+  ##   ```
   ##
   ## ...generates this output:
   ##
-  ## .. code-block::
+  ##   ```
   ##   "this"
   ##   "is"
   ##   "an"
   ##   "example"
+  ##   ```
   ##
   ## And the following code:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for word in split("this:is;an$example", {';', ':', '$'}):
   ##     writeLine(stdout, word)
+  ##   ```
   ##
   ## ...produces the same output as the first example. The code:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   let date = "2012-11-20T22:08:08.398990"
   ##   let separators = {' ', '-', ':', 'T'}
   ##   for number in split(date, separators):
   ##     writeLine(stdout, number)
+  ##   ```
   ##
   ## ...results in:
   ##
-  ## .. code-block::
+  ##   ```
   ##   "2012"
   ##   "11"
   ##   "20"
   ##   "22"
   ##   "08"
   ##   "08.398990"
+  ##   ```
+  ##
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator<#rsplit.i,string,set[char],int>`_
@@ -501,23 +511,30 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
   ## Substrings are separated by the string `sep`.
   ## The code:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for word in split("thisDATAisDATAcorrupted", "DATA"):
   ##     writeLine(stdout, word)
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block::
+  ##   ```
   ##   "this"
   ##   "is"
   ##   "corrupted"
+  ##   ```
+  ##
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_
   ## * `splitLines iterator<#splitLines.i,string>`_
   ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
   ## * `split func<#split,string,string,int>`_
-  splitCommon(s, sep, maxsplit, sep.len)
+  let sepLen = if sep.len == 0: 1 # prevents infinite loop
+    else: sep.len
+  splitCommon(s, sep, maxsplit, sepLen)
 
 
 template rsplitCommon(s, sep, maxsplit, sepLen) =
@@ -548,17 +565,19 @@ iterator rsplit*(s: string, sep: char,
                  maxsplit: int = -1): string =
   ## Splits the string `s` into substrings from the right using a
   ## string separator. Works exactly the same as `split iterator
-  ## <#split.i,string,char,int>`_ except in reverse order.
+  ## <#split.i,string,char,int>`_ except in **reverse** order.
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for piece in "foo:bar".rsplit(':'):
   ##     echo piece
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block:: nim
+  ##   ```
   ##   "bar"
   ##   "foo"
+  ##   ```
   ##
   ## Substrings are separated from the right by the char `sep`.
   ##
@@ -573,20 +592,25 @@ iterator rsplit*(s: string, seps: set[char] = Whitespace,
                  maxsplit: int = -1): string =
   ## Splits the string `s` into substrings from the right using a
   ## string separator. Works exactly the same as `split iterator
-  ## <#split.i,string,char,int>`_ except in reverse order.
+  ## <#split.i,string,char,int>`_ except in **reverse** order.
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for piece in "foo bar".rsplit(WhiteSpace):
   ##     echo piece
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block:: nim
+  ##   ```
   ##   "bar"
   ##   "foo"
+  ##   ```
   ##
   ## Substrings are separated from the right by the set of chars `seps`
   ##
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
+  ##
   ## See also:
   ## * `split iterator<#split.i,string,set[char],int>`_
   ## * `splitLines iterator<#splitLines.i,string>`_
@@ -598,44 +622,52 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
                  keepSeparators: bool = false): string =
   ## Splits the string `s` into substrings from the right using a
   ## string separator. Works exactly the same as `split iterator
-  ## <#split.i,string,string,int>`_ except in reverse order.
+  ## <#split.i,string,string,int>`_ except in **reverse** order.
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for piece in "foothebar".rsplit("the"):
   ##     echo piece
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block:: nim
+  ##   ```
   ##   "bar"
   ##   "foo"
+  ##   ```
   ##
   ## Substrings are separated from the right by the string `sep`
   ##
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
+  ##
   ## See also:
   ## * `split iterator<#split.i,string,string,int>`_
   ## * `splitLines iterator<#splitLines.i,string>`_
   ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
   ## * `rsplit func<#rsplit,string,string,int>`_
-  rsplitCommon(s, sep, maxsplit, sep.len)
+  let sepLen = if sep.len == 0: 1 # prevents infinite loop
+    else: sep.len
+  rsplitCommon(s, sep, maxsplit, sepLen)
 
 iterator splitLines*(s: string, keepEol = false): string =
   ## Splits the string `s` into its containing lines.
   ##
   ## Every `character literal <manual.html#lexical-analysis-character-literals>`_
   ## newline combination (CR, LF, CR-LF) is supported. The result strings
-  ## contain no trailing end of line characters unless parameter `keepEol`
+  ## contain no trailing end of line characters unless the parameter `keepEol`
   ## is set to `true`.
   ##
   ## Example:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for line in splitLines("\nthis\nis\nan\n\nexample\n"):
   ##     writeLine(stdout, line)
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   ""
   ##   "this"
   ##   "is"
@@ -643,6 +675,7 @@ iterator splitLines*(s: string, keepEol = false): string =
   ##   ""
   ##   "example"
   ##   ""
+  ##   ```
   ##
   ## See also:
   ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
@@ -675,16 +708,17 @@ iterator splitWhitespace*(s: string, maxsplit: int = -1): string =
   ##
   ## The following code:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   let s = "  foo \t bar  baz  "
   ##   for ms in [-1, 1, 2, 3]:
   ##     echo "------ maxsplit = ", ms, ":"
   ##     for item in s.splitWhitespace(maxsplit=ms):
   ##       echo '"', item, '"'
+  ##   ```
   ##
   ## ...results in:
   ##
-  ## .. code-block::
+  ##   ```
   ##   ------ maxsplit = -1:
   ##   "foo"
   ##   "bar"
@@ -700,6 +734,7 @@ iterator splitWhitespace*(s: string, maxsplit: int = -1): string =
   ##   "foo"
   ##   "bar"
   ##   "baz"
+  ##   ```
   ##
   ## See also:
   ## * `splitLines iterator<#splitLines.i,string>`_
@@ -728,6 +763,9 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[
   ## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its
   ## documentation), but is a func that returns a sequence of substrings.
   ##
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
+  ##
   ## See also:
   ## * `split iterator <#split.i,string,set[char],int>`_
   ## * `rsplit func<#rsplit,string,set[char],int>`_
@@ -736,6 +774,7 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[
   runnableExamples:
     doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"]
     doAssert "".split({' '}) == @[""]
+    doAssert "empty seps return unsplit s".split({}) == @["empty seps return unsplit s"]
   accResult(split(s, seps, maxsplit))
 
 func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
@@ -745,6 +784,9 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
   ## Substrings are separated by the string `sep`. This is a wrapper around the
   ## `split iterator <#split.i,string,string,int>`_.
   ##
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
+  ##
   ## See also:
   ## * `split iterator <#split.i,string,string,int>`_
   ## * `rsplit func<#rsplit,string,string,int>`_
@@ -757,14 +799,13 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
     doAssert "a  largely    spaced sentence".split(" ") == @["a", "", "largely",
         "", "", "", "spaced", "sentence"]
     doAssert "a  largely    spaced sentence".split(" ", maxsplit = 1) == @["a", " largely    spaced sentence"]
-  doAssert(sep.len > 0)
-
+    doAssert "empty sep returns unsplit s".split("") == @["empty sep returns unsplit s"]
   accResult(split(s, sep, maxsplit))
 
 func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl,
     extern: "nsuRSplitChar".} =
   ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a func
-  ## that returns a sequence of substrings.
+  ## that returns a sequence of substrings in original order.
   ##
   ## A possible common use case for `rsplit` is path manipulation,
   ## particularly on systems that don't use a common delimiter.
@@ -772,13 +813,15 @@ func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl,
   ## For example, if a system had `#` as a delimiter, you could
   ## do the following to get the tail of the path:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
+  ##   ```
   ##
   ## Results in `tailSplit` containing:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   @["Root#Object#Method", "Index"]
+  ##   ```
   ##
   ## See also:
   ## * `rsplit iterator <#rsplit.i,string,char,int>`_
@@ -792,7 +835,7 @@ func rsplit*(s: string, seps: set[char] = Whitespace,
              maxsplit: int = -1): seq[string]
              {.rtl, extern: "nsuRSplitCharSet".} =
   ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a
-  ## func that returns a sequence of substrings.
+  ## func that returns a sequence of substrings in original order.
   ##
   ## A possible common use case for `rsplit` is path manipulation,
   ## particularly on systems that don't use a common delimiter.
@@ -800,13 +843,18 @@ func rsplit*(s: string, seps: set[char] = Whitespace,
   ## For example, if a system had `#` as a delimiter, you could
   ## do the following to get the tail of the path:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
+  ##   ```
   ##
   ## Results in `tailSplit` containing:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   @["Root#Object#Method", "Index"]
+  ##   ```
+  ##
+  ##  .. note:: Empty separator set results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator <#rsplit.i,string,set[char],int>`_
@@ -819,7 +867,7 @@ func rsplit*(s: string, seps: set[char] = Whitespace,
 func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
     extern: "nsuRSplitString".} =
   ## The same as the `rsplit iterator <#rsplit.i,string,string,int,bool>`_, but is a func
-  ## that returns a sequence of substrings.
+  ## that returns a sequence of substrings in original order.
   ##
   ## A possible common use case for `rsplit` is path manipulation,
   ## particularly on systems that don't use a common delimiter.
@@ -827,13 +875,18 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
   ## For example, if a system had `#` as a delimiter, you could
   ## do the following to get the tail of the path:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
+  ##   ```
   ##
   ## Results in `tailSplit` containing:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   @["Root#Object#Method", "Index"]
+  ##   ```
+  ##
+  ##  .. note:: Empty separator string results in returning an original string,
+  ##   following the interpretation "split by no element".
   ##
   ## See also:
   ## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_
@@ -849,6 +902,7 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
     doAssert "".rsplit("Elon Musk") == @[""]
     doAssert "a  largely    spaced sentence".rsplit(" ") == @["a", "",
         "largely", "", "", "", "spaced", "sentence"]
+    doAssert "empty sep returns unsplit s".rsplit("") == @["empty sep returns unsplit s"]
   accResult(rsplit(s, sep, maxsplit))
   result.reverse()
 
@@ -944,14 +998,26 @@ func toHex*[T: SomeInteger](x: T, len: Positive): string =
     doAssert b.toHex(4) == "1001"
     doAssert toHex(62, 3) == "03E"
     doAssert toHex(-8, 6) == "FFFFF8"
-  toHexImpl(cast[BiggestUInt](x), len, x < 0)
+  whenJsNoBigInt64:
+    toHexImpl(cast[BiggestUInt](x), len, x < 0)
+  do:
+    when T is SomeSignedInt:
+      toHexImpl(cast[BiggestUInt](BiggestInt(x)), len, x < 0)
+    else:
+      toHexImpl(BiggestUInt(x), len, x < 0)
 
 func toHex*[T: SomeInteger](x: T): string =
   ## Shortcut for `toHex(x, T.sizeof * 2)`
   runnableExamples:
     doAssert toHex(1984'i64) == "00000000000007C0"
     doAssert toHex(1984'i16) == "07C0"
-  toHexImpl(cast[BiggestUInt](x), 2*sizeof(T), x < 0)
+  whenJsNoBigInt64:
+    toHexImpl(cast[BiggestUInt](x), 2*sizeof(T), x < 0)
+  do:
+    when T is SomeSignedInt:
+      toHexImpl(cast[BiggestUInt](BiggestInt(x)), 2*sizeof(T), x < 0)
+    else:
+      toHexImpl(BiggestUInt(x), 2*sizeof(T), x < 0)
 
 func toHex*(s: string): string {.rtl.} =
   ## Converts a bytes string to its hexadecimal representation.
@@ -1241,7 +1307,7 @@ func parseEnum*[T: enum](s: string): T =
   ## type contains multiple fields with the same string value.
   ##
   ## Raises `ValueError` for an invalid value in `s`. The comparison is
-  ## done in a style insensitive way.
+  ## done in a style insensitive way (first letter is still case-sensitive).
   runnableExamples:
     type
       MyEnum = enum
@@ -1261,7 +1327,7 @@ func parseEnum*[T: enum](s: string, default: T): T =
   ## type contains multiple fields with the same string value.
   ##
   ## Uses `default` for an invalid value in `s`. The comparison is done in a
-  ## style insensitive way.
+  ## style insensitive way (first letter is still case-sensitive).
   runnableExamples:
     type
       MyEnum = enum
@@ -1742,8 +1808,9 @@ func addSep*(dest: var string, sep = ", ", startLen: Natural = 0) {.inline.} =
   ##
   ## A shorthand for:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   if dest.len > startLen: add(dest, sep)
+  ##   ```
   ##
   ## This is often useful for generating some code where the items need to
   ## be *separated* by `sep`. `sep` is only added if `dest` is longer than
@@ -1808,7 +1875,7 @@ func join*(a: openArray[string], sep: string = ""): string {.rtl,
   else:
     result = ""
 
-func join*[T: not string](a: openArray[T], sep: string = ""): string =
+proc join*[T: not string](a: openArray[T], sep: string = ""): string =
   ## Converts all elements in the container `a` to strings using `$`,
   ## and concatenates them with `sep`.
   runnableExamples:
@@ -1914,7 +1981,7 @@ func find*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl,
       if length > 0:
         let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](length))
         if not found.isNil:
-          return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
+          return cast[int](found) -% cast[int](s.cstring)
     else:
       findImpl()
 
@@ -1970,7 +2037,7 @@ func find*(s, sub: string, start: Natural = 0, last = -1): int {.rtl,
       if last < 0 and start < s.len and subLen != 0:
         let found = memmem(s[start].unsafeAddr, csize_t(s.len - start), sub.cstring, csize_t(subLen))
         result = if not found.isNil:
-            cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
+            cast[int](found) -% cast[int](s.cstring)
           else:
             -1
       else:
@@ -2214,17 +2281,31 @@ func replaceWord*(s, sub: string, by = ""): string {.rtl,
     add result, substr(s, i)
 
 func multiReplace*(s: string, replacements: varargs[(string, string)]): string =
-  ## Same as replace, but specialized for doing multiple replacements in a single
-  ## pass through the input string.
+  ## Same as `replace<#replace,string,string,string>`_, but specialized for
+  ## doing multiple replacements in a single pass through the input string.
+  ##
+  ## `multiReplace` scans the input string from left to right and replaces the
+  ## matching substrings in the same order as passed in the argument list.
   ##
-  ## `multiReplace` performs all replacements in a single pass, this means it
-  ## can be used to swap the occurrences of "a" and "b", for instance.
+  ## The implications of the order of scanning the string and matching the
+  ## replacements:
+  ##   - In case of multiple matches at a given position, the earliest
+  ##     replacement is applied.
+  ##   - Overlaps are not handled. After performing a replacement, the scan
+  ##     continues from the character after the matched substring. If the
+  ##     resulting string then contains a possible match starting in a newly
+  ##     placed substring, the additional replacement is not performed.
   ##
   ## If the resulting string is not longer than the original input string,
   ## only a single memory allocation is required.
   ##
-  ## The order of the replacements does matter. Earlier replacements are
-  ## preferred over later replacements in the argument list.
+  runnableExamples:
+    # Swapping occurrences of 'a' and 'b':
+    doAssert multireplace("abba", [("a", "b"), ("b", "a")]) == "baab"
+
+    # The second replacement ("ab") is matched and performed first, the scan then
+    # continues from 'c', so the "bc" replacement is never matched and thus skipped.
+    doAssert multireplace("abc", [("bc", "x"), ("ab", "_b")]) == "_bc"
   result = newStringOfCap(s.len)
   var i = 0
   var fastChk: set[char] = {}
@@ -2373,8 +2454,8 @@ func validIdentifier*(s: string): bool {.rtl, extern: "nsuValidIdentifier".} =
 
 # floating point formatting:
 when not defined(js):
-  func c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
-                                     importc: "sprintf", varargs.}
+  func c_snprintf(buf: cstring, n: csize_t, frmt: cstring): cint {.header: "<stdio.h>",
+                                     importc: "snprintf", varargs.}
 
 type
   FloatFormatMode* = enum
@@ -2401,60 +2482,63 @@ func formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
     doAssert x.formatBiggestFloat() == "123.4560000000000"
     doAssert x.formatBiggestFloat(ffDecimal, 4) == "123.4560"
     doAssert x.formatBiggestFloat(ffScientific, 2) == "1.23e+02"
-  when defined(js):
-    var precision = precision
-    if precision == -1:
-      # use the same default precision as c_sprintf
-      precision = 6
-    var res: cstring
-    case format
-    of ffDefault:
-      {.emit: "`res` = `f`.toString();".}
-    of ffDecimal:
-      {.emit: "`res` = `f`.toFixed(`precision`);".}
-    of ffScientific:
-      {.emit: "`res` = `f`.toExponential(`precision`);".}
-    result = $res
-    if 1.0 / f == -Inf:
-      # JavaScript removes the "-" from negative Zero, add it back here
-      result = "-" & $res
-    for i in 0 ..< result.len:
-      # Depending on the locale either dot or comma is produced,
-      # but nothing else is possible:
-      if result[i] in {'.', ','}: result[i] = decimalSep
+  when nimvm:
+    discard "implemented in the vmops"
   else:
-    const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e']
-    var
-      frmtstr {.noinit.}: array[0..5, char]
-      buf {.noinit.}: array[0..2500, char]
-      L: cint
-    frmtstr[0] = '%'
-    if precision >= 0:
-      frmtstr[1] = '#'
-      frmtstr[2] = '.'
-      frmtstr[3] = '*'
-      frmtstr[4] = floatFormatToChar[format]
-      frmtstr[5] = '\0'
-      L = c_sprintf(cast[cstring](addr buf), cast[cstring](addr frmtstr), precision, f)
+    when defined(js):
+      var precision = precision
+      if precision == -1:
+        # use the same default precision as c_snprintf
+        precision = 6
+      var res: cstring
+      case format
+      of ffDefault:
+        {.emit: "`res` = `f`.toString();".}
+      of ffDecimal:
+        {.emit: "`res` = `f`.toFixed(`precision`);".}
+      of ffScientific:
+        {.emit: "`res` = `f`.toExponential(`precision`);".}
+      result = $res
+      if 1.0 / f == -Inf:
+        # JavaScript removes the "-" from negative Zero, add it back here
+        result = "-" & $res
+      for i in 0 ..< result.len:
+        # Depending on the locale either dot or comma is produced,
+        # but nothing else is possible:
+        if result[i] in {'.', ','}: result[i] = decimalSep
     else:
-      frmtstr[1] = floatFormatToChar[format]
-      frmtstr[2] = '\0'
-      L = c_sprintf(cast[cstring](addr buf), cast[cstring](addr frmtstr), f)
-    result = newString(L)
-    for i in 0 ..< L:
-      # Depending on the locale either dot or comma is produced,
-      # but nothing else is possible:
-      if buf[i] in {'.', ','}: result[i] = decimalSep
-      else: result[i] = buf[i]
-    when defined(windows):
-      # VS pre 2015 violates the C standard: "The exponent always contains at
-      # least two digits, and only as many more digits as necessary to
-      # represent the exponent." [C11 §7.21.6.1]
-      # The following post-processing fixes this behavior.
-      if result.len > 4 and result[^4] == '+' and result[^3] == '0':
-        result[^3] = result[^2]
-        result[^2] = result[^1]
-        result.setLen(result.len - 1)
+      const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e']
+      var
+        frmtstr {.noinit.}: array[0..5, char]
+        buf {.noinit.}: array[0..2500, char]
+        L: cint
+      frmtstr[0] = '%'
+      if precision >= 0:
+        frmtstr[1] = '#'
+        frmtstr[2] = '.'
+        frmtstr[3] = '*'
+        frmtstr[4] = floatFormatToChar[format]
+        frmtstr[5] = '\0'
+        L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), precision, f)
+      else:
+        frmtstr[1] = floatFormatToChar[format]
+        frmtstr[2] = '\0'
+        L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), f)
+      result = newString(L)
+      for i in 0 ..< L:
+        # Depending on the locale either dot or comma is produced,
+        # but nothing else is possible:
+        if buf[i] in {'.', ','}: result[i] = decimalSep
+        else: result[i] = buf[i]
+      when defined(windows):
+        # VS pre 2015 violates the C standard: "The exponent always contains at
+        # least two digits, and only as many more digits as necessary to
+        # represent the exponent." [C11 §7.21.6.1]
+        # The following post-processing fixes this behavior.
+        if result.len > 4 and result[^4] == '+' and result[^3] == '0':
+          result[^3] = result[^2]
+          result[^2] = result[^1]
+          result.setLen(result.len - 1)
 
 func formatFloat*(f: float, format: FloatFormatMode = ffDefault,
                   precision: range[-1..32] = 16; decimalSep = '.'): string {.
@@ -2587,13 +2671,13 @@ func formatEng*(f: BiggestFloat,
   ## decimal point or (if `trim` is true) the maximum number of digits to be
   ## shown.
   ##
-  ## .. code-block:: nim
-  ##
+  ##   ```nim
   ##    formatEng(0, 2, trim=false) == "0.00"
   ##    formatEng(0, 2) == "0"
   ##    formatEng(0.053, 0) == "53e-3"
   ##    formatEng(52731234, 2) == "52.73e6"
   ##    formatEng(-52731234, 2) == "-52.73e6"
+  ##    ```
   ##
   ## If `siPrefix` is set to true, the number will be displayed with the SI
   ## prefix corresponding to the exponent. For example 4100 will be displayed
@@ -2608,8 +2692,7 @@ func formatEng*(f: BiggestFloat,
   ## different to appending the unit to the result as the location of the space
   ## is altered depending on whether there is an exponent.
   ##
-  ## .. code-block:: nim
-  ##
+  ##   ```nim
   ##    formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
   ##    formatEng(4.1, siPrefix=true, unit="V") == "4.1 V"
   ##    formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
@@ -2619,6 +2702,7 @@ func formatEng*(f: BiggestFloat,
   ##    formatEng(4100) == "4.1e3"
   ##    formatEng(4100, unit="V") == "4.1e3 V"
   ##    formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true
+  ##    ```
   ##
   ## `decimalSep` is used as the decimal separator.
   ##
@@ -2782,13 +2866,15 @@ func `%`*(formatstr: string, a: openArray[string]): string {.rtl,
   ##
   ## This is best explained by an example:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   "$1 eats $2." % ["The cat", "fish"]
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   "The cat eats fish."
+  ##   ```
   ##
   ## The substitution variables (the thing after the `$`) are enumerated
   ## from 1 to `a.len`.
@@ -2796,21 +2882,24 @@ func `%`*(formatstr: string, a: openArray[string]): string {.rtl,
   ## The notation `$#` can be used to refer to the next substitution
   ## variable:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   "$# eats $#." % ["The cat", "fish"]
+  ##   ```
   ##
   ## Substitution variables can also be words (that is
   ## `[A-Za-z_]+[A-Za-z0-9_]*`) in which case the arguments in `a` with even
   ## indices are keys and with odd indices are the corresponding values.
   ## An example:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   "$animal eats $food." % ["animal", "The cat", "food", "fish"]
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   "The cat eats fish."
+  ##   ```
   ##
   ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is
   ## raised if an ill-formed format string has been passed to the `%` operator.
@@ -2908,13 +2997,14 @@ iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
   ## Substrings are separated by a substring containing only `seps`.
   ## Example:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   for word in tokenize("  this is an  example  "):
   ##     writeLine(stdout, word)
+  ##   ```
   ##
   ## Results in:
   ##
-  ## .. code-block:: nim
+  ##   ```nim
   ##   ("  ", true)
   ##   ("this", false)
   ##   (" ", true)
@@ -2924,6 +3014,7 @@ iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
   ##   ("  ", true)
   ##   ("example", false)
   ##   ("  ", true)
+  ##   ```
   var i = 0
   while true:
     var j = i