summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--changelog.md3
-rw-r--r--doc/lib.rst4
-rw-r--r--lib/pure/strformat.nim590
-rw-r--r--web/website.ini2
4 files changed, 598 insertions, 1 deletions
diff --git a/changelog.md b/changelog.md
index 6007a2d46..b216b0f17 100644
--- a/changelog.md
+++ b/changelog.md
@@ -136,3 +136,6 @@ This now needs to be written as:
   Types that shadow procs and vice versa are marked as ambiguous (bug #6693).
 - ``yield`` (or ``await`` which is mapped to ``yield``) never worked reliably
   in an array, seq or object constructor and is now prevented at compile-time.
+- For string formatting / interpolation a new module
+  called [strformat](https://nim-lang.org/docs/strformat.html) has been added
+  to the stdlib.
diff --git a/doc/lib.rst b/doc/lib.rst
index 959c3ef9b..6eaf6c788 100644
--- a/doc/lib.rst
+++ b/doc/lib.rst
@@ -102,6 +102,10 @@ String handling
   case of a string, splitting a string into substrings, searching for
   substrings, replacing substrings.
 
+* `strformat <strformat.html>`_
+  Macro based standard string interpolation / formatting. Inpired by
+  Python's ```f``-strings.
+
 * `strmisc <strmisc.html>`_
   This module contains uncommon string handling operations that do not
   fit with the commonly used operations in strutils.
diff --git a/lib/pure/strformat.nim b/lib/pure/strformat.nim
new file mode 100644
index 000000000..b2198aa40
--- /dev/null
+++ b/lib/pure/strformat.nim
@@ -0,0 +1,590 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2017 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+##[
+String `interpolation`:idx: / `format`:idx: inspired by
+Python's ``f``-strings.
+
+Examples:
+
+.. code-block:: nim
+
+    doAssert fmt"""{"abc":>4}""" == " abc"
+    doAssert fmt"""{"abc":<4}""" == "abc "
+
+    doAssert fmt"{-12345:08}" == "-0012345"
+    doAssert fmt"{-1:3}" == "-1 "
+    doAssert fmt"{-1:03}" == "-01"
+    doAssert fmt"{16:#X}" == "0x10"
+
+    doAssert fmt"{123.456}" == "123.456"
+    doAssert fmt"{123.456:>9.3f}" == "  123.456"
+    doAssert fmt"{123.456:9.3f}" == "123.456  "
+    doAssert fmt"{123.456:>9.4f}" == " 123.4560"
+    doAssert fmt"{123.456:>9.0f}" == "     123."
+    doAssert fmt"{123.456:<9.4f}" == "123.4560 "
+
+    doAssert fmt"{123.456:e}" == "1.234560e+02"
+    doAssert fmt"{123.456:>13e}" == " 1.234560e+02"
+    doAssert fmt"{123.456:<13e}" == "1.234560e+02 "
+
+
+An expression like ``fmt"{key} is {value:arg} {{z}}"`` is transformed into:
+
+.. code-block:: nim
+  var temp = newStringOfCap(educatedCapGuess)
+  format(key, temp)
+  format(" is ", temp)
+  format(value, arg, temp)
+  format("{z}", temp)
+  temp
+
+Parts of the string that are enclosed in the curly braces are interpreted
+as Nim code, to escape an ``{`` or ``}`` double it.
+
+``fmt`` delegates most of the work to an open overloaded set
+of ``format`` procs. The required signature for a type ``T`` that supports
+formatting is usually ``proc format(x: T; result: var string)`` for efficiency
+but can also be ``proc format(x: T): string``. ``add`` and ``$`` procs are
+used as the fallback implementation.
+
+This is the concrete lookup algorithm that ``fmt`` uses:
+
+.. code-block:: nim
+
+  when compiles(format(arg, res)):
+    format(arg, res)
+  elif compiles(format(arg)):
+    res.add format(arg)
+  elif compiles(add(res, arg)):
+    res.add(arg)
+  else:
+    res.add($arg)
+
+
+The subexpression after the colon
+(``arg`` in ``fmt"{key} is {value:arg} {{z}}"``) is an optional argument
+passed to ``format``.
+
+If an optional argument is present the following lookup algorithm is used:
+
+.. code-block:: nim
+
+  when compiles(format(arg, option, res)):
+    format(arg, option, res)
+  else:
+    res.add format(arg, option)
+
+
+For strings and numeric types the optional argument is a so-called
+"standard format specifier".
+
+
+Standard format specifier
+=========================
+
+
+The general form of a standard format specifier is::
+
+  [[fill]align][#][0][minimumwidth][.precision][type]
+
+The brackets ([]) indicate an optional element.
+
+The optional align flag can be one of the following:
+
+'<'
+    Forces the field to be left-aligned within the available
+    space (This is the default.)
+
+'>'
+    Forces the field to be right-aligned within the available space.
+
+Note that unless a minimum field width is defined, the field width
+will always be the same size as the data to fill it, so that the alignment
+option has no meaning in this case.
+
+The optional 'fill' character defines the character to be used to pad
+the field to the minimum width. The fill character, if present, must be
+followed by an alignment flag.
+
+If the '#' character is present, integers use the 'alternate form' for formatting.
+This means that binary, octal, and hexadecimal output will be prefixed
+with '0b', '0o', and '0x', respectively.
+
+'width' is a decimal integer defining the minimum field width. If not specified,
+then the field width will be determined by the content.
+
+If the width field is preceded by a zero ('0') character, this enables
+zero-padding.
+
+The 'precision' is a decimal number indicating how many digits should be displayed
+after the decimal point in a floating point conversion. For non-numeric types the
+field indicates the maximum field size - in other words, how many characters will
+be used from the field content. The precision is ignored for integer conversions.
+
+Finally, the 'type' determines how the data should be presented.
+
+The available integer presentation types are:
+
+
+=================        ====================================================
+  Type                   Result
+=================        ====================================================
+``b``                    Binary. Outputs the number in base 2.
+``d``                    Decimal Integer. Outputs the number in base 10.
+``o``                    Octal format. Outputs the number in base 8.
+``x``                    Hex format. Outputs the number in base 16, using
+                         lower-case letters for the digits above 9.
+``X``                    Hex format. Outputs the number in base 16, using
+                         uppercase letters for the digits above 9.
+(None)                   the same as 'd'
+=================        ====================================================
+
+
+The available floating point presentation types are:
+
+=================        ====================================================
+  Type                   Result
+=================        ====================================================
+``e``                    Exponent notation. Prints the number in scientific
+                         notation using the letter 'e' to indicate the
+                         exponent.
+``E``                    Exponent notation. Same as 'e' except it converts
+                         the number to uppercase.
+``f``                    Fixed point. Displays the number as a fixed-point
+                         number.
+``F``                    Fixed point. Same as 'f' except it converts the
+                         number to uppercase.
+``g``                    General format. This prints the number as a
+                         fixed-point number, unless the number is too
+                         large, in which case it switches to 'e'
+                         exponent notation.
+``G``                    General format. Same as 'g' except switches to 'E'
+                         if the number gets to large.
+'' (None)                similar to 'g', except that it prints at least one
+                         digit after the decimal point.
+=================        ====================================================
+
+
+Future directions
+=================
+
+A curly expression with commas in it like ``{x, argA, argB}`` could be
+transformed to ``format(x, argA, argB, res)`` in order to support
+formatters that do not need to parse a custom language within a custom
+language but instead prefer to use Nim's existing syntax. This also
+helps in readability since there is only so much you can cram into
+single letter DSLs.
+
+]##
+
+import macros, parseutils, unicode
+import strutils
+
+template callFormat(res, arg) {.dirty.} =
+  when arg is string:
+    # workaround in order to circumvent 'strutils.format' which matches
+    # too but doesn't adhere to our protocol.
+    res.add arg
+  elif compiles(format(arg, res)):
+    format(arg, res)
+  elif compiles(format(arg)):
+    res.add format(arg)
+  elif compiles(add(res, arg)):
+    res.add(arg)
+  else:
+    res.add($arg)
+
+template callFormatOption(res, arg, option) {.dirty.} =
+  when compiles(format(arg, option, res)):
+    format(arg, option, res)
+  else:
+    res.add format(arg, option)
+
+macro fmt*(pattern: string): untyped =
+  ## For a specification of the ``fmt`` macro, see the module level documentation.
+  runnableExamples:
+    template check(actual, expected: string) =
+      doAssert actual == expected
+
+    from strutils import toUpperAscii, repeat
+
+    # Basic tests
+    let s = "string"
+    check fmt"{0} {s}", "0 string"
+    check fmt"{s[0..2].toUpperAscii}", "STR"
+    check fmt"{-10:04}", "-010"
+    check fmt"{-10:<04}", "-010"
+    check fmt"{-10:>04}", "-010"
+    check fmt"0x{10:02X}", "0x0A"
+
+    check fmt"{10:#04X}", "0x0A"
+
+    check fmt"""{"test":#>5}""", "#test"
+    check fmt"""{"test":>5}""", " test"
+
+    check fmt"""{"test": <5}""", "test "
+    check fmt"""{"test":<5}""", "test "
+    check fmt"{1f:.3f}", "1.000"
+    check fmt"Hello, {s}!", "Hello, string!"
+
+    # Tests for identifers without parenthesis
+    check fmt"{s} works{s}", "string worksstring"
+    check fmt"{s:>7}", " string"
+    doAssert(not compiles(fmt"{s_works}")) # parsed as identifier `s_works`
+
+    # Misc general tests
+    check fmt"{{}}", "{}"
+    check fmt"{0}%", "0%"
+    check fmt"{0}%asdf", "0%asdf"
+    check fmt("\n{\"\\n\"}\n"), "\n\n\n"
+    check fmt"""{"abc"}s""", "abcs"
+
+    # String tests
+    check fmt"""{"abc"}""", "abc"
+    check fmt"""{"abc":>4}""", " abc"
+    check fmt"""{"abc":<4}""", "abc "
+    check fmt"""{"":>4}""", "    "
+    check fmt"""{"":<4}""", "    "
+
+    # Int tests
+    check fmt"{12345}", "12345"
+    check fmt"{ - 12345}", "-12345"
+    check fmt"{12345:6}", "12345 "
+    check fmt"{12345:>6}", " 12345"
+    check fmt"{12345:4}", "12345"
+    check fmt"{12345:08}", "00012345"
+    check fmt"{-12345:08}", "-0012345"
+    check fmt"{0:0}", "0"
+    check fmt"{0:02}", "00"
+    check fmt"{-1:3}", "-1 "
+    check fmt"{-1:03}", "-01"
+    check fmt"{10}", "10"
+    check fmt"{16:#X}", "0x10"
+
+    # Hex tests
+    check fmt"{0:x}", "0"
+    check fmt"{-0:x}", "0"
+    check fmt"{255:x}", "ff"
+    check fmt"{255:X}", "FF"
+    check fmt"{-255:x}", "-ff"
+    check fmt"{-255:X}", "-FF"
+    check fmt"{255:x} uNaffeCteD CaSe", "ff uNaffeCteD CaSe"
+    check fmt"{255:X} uNaffeCteD CaSe", "FF uNaffeCteD CaSe"
+    check fmt"{255:>4x}", "  ff"
+    check fmt"{255:04x}", "00ff"
+    check fmt"{-255:>4x}", " -ff"
+    check fmt"{-255:04x}", "-0ff"
+
+    # Float tests
+    check fmt"{123.456}", "123.456"
+    check fmt"{-123.456}", "-123.456"
+    check fmt"{123.456:.3f}", "123.456"
+    check fmt"{-123.456:.3f}", "-123.456"
+    check fmt"{123.456:1g}", "123.456"
+    check fmt"{123.456:.1f}", "123.5"
+    check fmt"{123.456:.0f}", "123."
+    check fmt"{123.456:>9.3f}", "  123.456"
+    check fmt"{123.456:9.3f}", "123.456  "
+    check fmt"{123.456:>9.4f}", " 123.4560"
+    check fmt"{123.456:>9.0f}", "     123."
+    check fmt"{123.456:<9.4f}", "123.4560 "
+
+    # Float (scientific) tests
+    check fmt"{123.456:e}", "1.234560e+02"
+    check fmt"{123.456:>13e}", " 1.234560e+02"
+    check fmt"{123.456:<13e}", "1.234560e+02 "
+    check fmt"{123.456:.1e}", "1.2e+02"
+    check fmt"{123.456:.2e}", "1.23e+02"
+    check fmt"{123.456:.3e}", "1.235e+02"
+
+    # Note: times.format adheres to the format protocol. Test that this
+    # works:
+    import times
+
+    var nullTime: TimeInfo
+    check fmt"{nullTime:yyyy-mm-dd}", "0000-00-00"
+
+    # Unicode string tests
+    check fmt"""{"αβγ"}""", "αβγ"
+    check fmt"""{"αβγ":>5}""", "  αβγ"
+    check fmt"""{"αβγ":<5}""", "αβγ  "
+    check fmt"""a{"a"}α{"α"}€{"€"}𐍈{"𐍈"}""", "aaαα€€𐍈𐍈"
+    check fmt"""a{"a":2}α{"α":2}€{"€":2}𐍈{"𐍈":2}""", "aa αα €€ 𐍈𐍈 "
+    # Invalid unicode sequences should be handled as plain strings.
+    # Invalid examples taken from: https://stackoverflow.com/a/3886015/1804173
+    let invalidUtf8 = [
+      "\xc3\x28", "\xa0\xa1",
+      "\xe2\x28\xa1", "\xe2\x82\x28",
+      "\xf0\x28\x8c\xbc", "\xf0\x90\x28\xbc", "\xf0\x28\x8c\x28"
+    ]
+    for s in invalidUtf8:
+      check fmt"{s:>5}", repeat(" ", 5-s.len) & s
+
+  if pattern.kind notin {nnkStrLit..nnkTripleStrLit}:
+    error "fmt only works with string literals", pattern
+  let f = pattern.strVal
+  var i = 0
+  let res = genSym(nskVar, "fmtRes")
+  result = newNimNode(nnkStmtListExpr, lineInfoFrom=pattern)
+  result.add newVarStmt(res, newCall(bindSym"newStringOfCap", newLit(f.len + count(f, '{')*10)))
+  var strlit = ""
+  while i < f.len:
+    if f[i] == '{':
+      inc i
+      if f[i] == '{':
+        inc i
+        strlit.add '{'
+      else:
+        if strlit.len > 0:
+          result.add newCall(bindSym"add", res, newLit(strlit))
+          strlit = ""
+
+        var subexpr = ""
+        while i < f.len and f[i] != '}' and f[i] != ':':
+          subexpr.add f[i]
+          inc i
+        let x = parseExpr(subexpr)
+
+        if f[i] == ':':
+          inc i
+          var options = ""
+          while i < f.len and f[i] != '}':
+            options.add f[i]
+            inc i
+          result.add getAst(callFormatOption(res, x, newLit(options)))
+        else:
+          result.add getAst(callFormat(res, x))
+        if f[i] == '}':
+          inc i
+        else:
+          doAssert false, "invalid format string: missing '}'"
+    elif f[i] == '}':
+      if f[i+1] == '}':
+        strlit.add '}'
+        inc i, 2
+      else:
+        doAssert false, "invalid format string: '}' instead of '}}'"
+        inc i
+    else:
+      strlit.add f[i]
+      inc i
+  if strlit.len > 0:
+    result.add newCall(bindSym"add", res, newLit(strlit))
+  result.add res
+  when defined(debugFmtDsl):
+    echo repr result
+
+proc mkDigit(v: int, typ: char): string {.inline.} =
+  assert(v < 26)
+  if v < 10:
+    result = $chr(ord('0') + v)
+  else:
+    result = $chr(ord(if typ == 'x': 'a' else: 'A') + v - 10)
+
+proc alignString*(s: string, minimumWidth: int; align = '<'; fill = ' '): string =
+  ## Aligns ``s`` using ``fill`` char.
+  ## This is only of interest if you want to write a custom ``format`` proc that
+  ## should support the standard format specifiers.
+  if minimumWidth == 0:
+    result = s
+  else:
+    let sRuneLen = if s.validateUtf8 == -1: s.runeLen else: s.len
+    let toFill = minimumWidth - sRuneLen
+    if toFill <= 0:
+      result = s
+    elif align == '<':
+      result = s & repeat(fill, toFill)
+    else:
+      result = repeat(fill, toFill) & s
+
+type
+  StandardFormatSpecifier* = object ## Type that describes "standard format specifiers".
+    fill*, align*: char             ## Desired fill and alignment.
+    when false:
+      sign: char                     ## Desired sign.
+    alternateForm*: bool            ## Whether to prefix binary, octal and hex numbers
+                                    ## with ``0b``, ``0o``, ``0x``.
+    padWithZero*: bool              ## Whether to pad with zeros rather than spaces.
+    minimumWidth*, precision*: int  ## Desired minium width and precision.
+    typ*: char                      ## Type like 'f', 'g' or 'd'.
+    endPosition*: int ## End position in the format specifier after
+                      ## ``parseStandardFormatSpecifier`` returned.
+
+proc formatInt(n: SomeNumber; radix: int; spec: StandardFormatSpecifier): string =
+  ## Converts ``n`` to string. If ``n`` is `SomeReal`, it casts to `int64`.
+  ## Conversion is done using ``radix``. If result's length is lesser than
+  ## ``minimumWidth``, it aligns result to the right or left (depending on ``a``)
+  ## with ``fill`` char.
+  when n is SomeUnsignedInt:
+    var v = n.uint64
+    let negative = false
+  else:
+    var v = n.int64
+    let negative = v.int64 < 0
+    if negative:
+      # FIXME: overflow error for low(int64)
+      v = v * -1
+
+  var xx = ""
+  if spec.alternateForm:
+    case spec.typ
+    of 'X': xx = "0x"
+    of 'x': xx = "0x"
+    of 'b': xx = "0b"
+    of 'o': xx = "0o"
+    else: discard
+
+  if v == 0:
+    result = "0"
+  else:
+    result = ""
+    while v > type(v)(0):
+      let d = v mod type(v)(radix)
+      v = v div type(v)(radix)
+      result.add(mkDigit(d.int, spec.typ))
+    for idx in 0..<(result.len div 2):
+      swap result[idx], result[result.len - idx - 1]
+  let adjustedWid = if negative: spec.minimumWidth - 1 else: spec.minimumWidth
+  if spec.padWithZero:
+    let toFill = spec.minimumWidth - result.len - xx.len - ord(negative)
+    if toFill > 0:
+      result = repeat('0', toFill) & result
+
+  if spec.align == '<':
+    if negative:
+      result = "-" & xx & result
+    else:
+      result = xx & result
+    for i in result.len..<spec.minimumWidth:
+      result.add(spec.fill)
+  else:
+    if negative:
+      result = "-" & xx & result
+    else:
+      result = xx & result
+    let toFill = spec.minimumWidth - result.len
+    if toFill > 0:
+      result = repeat(spec.fill, toFill) & result
+
+proc parseStandardFormatSpecifier*(s: string; start = 0;
+                                   ignoreUnknownSuffix = false): StandardFormatSpecifier =
+  ## An exported helper proc that parses the "standard format specifiers",
+  ## as specified by the grammar::
+  ##
+  ##   [[fill]align][#][0][minimumwidth][.precision][type]
+  ##
+  ## This is only of interest if you want to write a custom ``format`` proc that
+  ## should support the standard format specifiers. If ``ignoreUnknownSuffix`` is true,
+  ## an unknown suffix after the ``type`` field is not an error.
+  const alignChars = {'<', '>'}
+  result.fill = ' '
+  result.align = '<'
+  var i = start
+  if i + 1 < s.len and s[i+1] in alignChars:
+    result.fill = s[i]
+    result.align = s[i+1]
+    inc i, 2
+  elif i < s.len and s[i] in alignChars:
+    result.align = s[i]
+    inc i
+
+  when false:
+    # XXX Python inspired 'sign' not yet supported!
+    if i < s.len and s[i] in {'-', '+', ' '}:
+      result.sign = s[i]
+      inc i
+
+  if i < s.len and s[i] == '#':
+    result.alternateForm = true
+    inc i
+
+  if i+1 < s.len and s[i] == '0' and s[i+1] in {'0'..'9'}:
+    result.padWithZero = true
+    inc i
+
+  let parsedLength = parseSaturatedNatural(s, result.minimumWidth, i)
+  inc i, parsedLength
+  if i < s.len and s[i] == '.':
+    inc i
+    let parsedLengthB = parseSaturatedNatural(s, result.precision, i)
+    inc i, parsedLengthB
+  else:
+    result.precision = -1
+
+  if i < s.len and s[i] in {'A'..'Z', 'a'..'z'}:
+    result.typ = s[i]
+    inc i
+  result.endPosition = i
+  if i != s.len and not ignoreUnknownSuffix:
+    raise newException(ValueError,
+      "invalid format string, cannot parse: " & s[i..^1])
+
+
+proc format*(value: SomeInteger; specifier: string; res: var string) =
+  ## Standard format implementation for ``SomeInteger``. It makes little
+  ## sense to call this directly, but it is required to exist
+  ## by the ``fmt`` macro.
+  let spec = parseStandardFormatSpecifier(specifier)
+  var radix = 10
+  case spec.typ
+  of 'x', 'X': radix = 16
+  of 'd', '\0': discard
+  of 'b': radix = 2
+  of 'o': radix = 8
+  else:
+    raise newException(ValueError,
+      "invalid type in format string for number, expected one " &
+      " of 'x', 'X', 'b', 'd', 'o' but got: " & spec.typ)
+  res.add formatInt(value, radix, spec)
+
+proc format*(value: SomeReal; specifier: string; res: var string) =
+  ## Standard format implementation for ``SomeReal``. It makes little
+  ## sense to call this directly, but it is required to exist
+  ## by the ``fmt`` macro.
+  let spec = parseStandardFormatSpecifier(specifier)
+
+  var fmode = ffDefault
+  case spec.typ
+  of 'e', 'E':
+    fmode = ffScientific
+  of 'f', 'F':
+    fmode = ffDecimal
+  of 'g', 'G':
+    fmode = ffDefault
+  of '\0': discard
+  else:
+    raise newException(ValueError,
+      "invalid type in format string for number, expected one " &
+      " of 'e', 'E', 'f', 'F', 'g', 'G' but got: " & spec.typ)
+
+  #let result = if spec.minimumWidth > 0 and spec.align == '<' and value < 0 and spec.padWithZero:
+  #  "-" & alignString(formatBiggestFloat(-value, fmode, spec.precision), spec.minimumWidth-1,
+  #                     spec.align, '0')
+  #else:
+  let result = alignString(formatBiggestFloat(value, fmode, spec.precision), spec.minimumWidth,
+                            spec.align, spec.fill)
+  if spec.typ in {'A'..'Z'}:
+    res.add toUpperAscii(result)
+  else:
+    res.add result
+
+proc format*(value: string; specifier: string; res: var string) =
+  ## Standard format implementation for ``string``. It makes little
+  ## sense to call this directly, but it is required to exist
+  ## by the ``fmt`` macro.
+  let spec = parseStandardFormatSpecifier(specifier)
+  var fmode = ffDefault
+  case spec.typ
+  of 's', '\0': discard
+  else:
+    raise newException(ValueError,
+      "invalid type in format string for string, expected 's', but got " &
+      spec.typ)
+  res.add alignString(value, spec.minimumWidth, spec.align, spec.fill)
diff --git a/web/website.ini b/web/website.ini
index a158e3b47..5560e67ea 100644
--- a/web/website.ini
+++ b/web/website.ini
@@ -64,7 +64,7 @@ srcdoc2: "pure/asyncfile;pure/asyncftpclient;pure/lenientops"
 srcdoc2: "pure/md5;pure/rationals"
 srcdoc2: "posix/posix;pure/distros;pure/oswalkdir"
 srcdoc2: "pure/collections/heapqueue"
-srcdoc2: "pure/fenv;impure/rdstdin"
+srcdoc2: "pure/fenv;impure/rdstdin;pure/strformat"
 srcdoc2: "pure/segfaults"
 srcdoc2: "pure/basic2d;pure/basic3d;pure/mersenne;pure/coro;pure/httpcore"
 srcdoc2: "pure/bitops;pure/nimtracker;pure/punycode;pure/volatile"