diff options
author | Araq <rumpf_a@web.de> | 2011-12-27 19:22:46 +0100 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2011-12-27 19:22:46 +0100 |
commit | b336bf4039f3bc428f0a6690bf448f1e0b447c4b (patch) | |
tree | 93224405b04a09225f7a278ae99c340d9d8faa4b /lib | |
parent | 76f91b90e2a411a6d2ca82f075f55abe63d8f6a5 (diff) | |
download | Nim-b336bf4039f3bc428f0a6690bf448f1e0b447c4b.tar.gz |
added support for advanced substitution expressions
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/pure/strutils.nim | 232 | ||||
-rw-r--r-- | lib/pure/subexes.nim | 380 |
2 files changed, 501 insertions, 111 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 620e6d1c5..6d4544425 100755 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -140,105 +140,6 @@ proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect, {.pop.} -proc findNormalized(x: string, inArray: openarray[string]): int = - var i = 0 - while i < high(inArray): - if cmpIgnoreStyle(x, inArray[i]) == 0: return i - inc(i, 2) # incrementing by 1 would probably lead to a - # security hole... - return -1 - -proc addf*(s: var string, formatstr: string, a: openarray[string]) {. - noSideEffect, rtl, extern: "nsuAddf".} = - ## The same as ``add(s, formatstr % a)``, but more efficient. - const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} - var i = 0 - var num = 0 - while i < len(formatstr): - if formatstr[i] == '$': - case formatstr[i+1] # again we use the fact that strings - # are zero-terminated here - of '#': - add s, a[num] - inc i, 2 - inc num - of '$': - add s, '$' - inc(i, 2) - of '1'..'9': - var j = 0 - inc(i) # skip $ - while formatstr[i] in Digits: - j = j * 10 + ord(formatstr[i]) - ord('0') - inc(i) - add s, a[j - 1] - of '{': - var j = i+1 - while formatstr[j] notin {'\0', '}'}: inc(j) - var x = findNormalized(substr(formatstr, i+2, j-1), a) - if x >= 0 and x < high(a): add s, a[x+1] - else: raise newException(EInvalidValue, "invalid format string") - i = j+1 - of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': - var j = i+1 - while formatstr[j] in PatternChars: inc(j) - var x = findNormalized(substr(formatstr, i+1, j-1), a) - if x >= 0 and x < high(a): add s, a[x+1] - else: raise newException(EInvalidValue, "invalid format string") - i = j - else: raise newException(EInvalidValue, "invalid format string") - else: - add s, formatstr[i] - inc(i) - -proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect, - rtl, extern: "nsuFormatOpenArray".} = - ## The `substitution`:idx: operator performs string substitutions in - ## `formatstr` and returns a modified `formatstr`. This is often called - ## `string interpolation`:idx:. - ## - ## This is best explained by an example: - ## - ## .. code-block:: nimrod - ## "$1 eats $2." % ["The cat", "fish"] - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "The cat eats fish." - ## - ## The substitution variables (the thing after the ``$``) are enumerated - ## from 1 to ``a.len``. - ## To produce a verbatim ``$``, use ``$$``. - ## The notation ``$#`` can be used to refer to the next substitution variable: - ## - ## .. code-block:: nimrod - ## "$# eats $#." % ["The cat", "fish"] - ## - ## Substitution variables can also be words (that is - ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even - ## indices are keys and with odd indices are the corresponding values. - ## An example: - ## - ## .. code-block:: nimrod - ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "The cat eats fish." - ## - ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is - ## raised if an ill-formed format string has been passed to the `%` operator. - result = newStringOfCap(formatstr.len + a.len shl 4) - addf(result, formatstr, a) - -proc `%` *(formatstr, a: string): string {.noSideEffect, - rtl, extern: "nsuFormatSingleElem".} = - ## This is the same as ``formatstr % [a]``. - result = newStringOfCap(formatstr.len + a.len) - addf(result, formatstr, [a]) - proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect, rtl, extern: "nsuStrip".} = ## Strips whitespace from `s` and returns the resulting string. @@ -467,16 +368,16 @@ proc ParseHexInt*(s: string): int {.noSideEffect, procvar, inc(i) of '\0': break else: raise newException(EInvalidValue, "invalid integer: " & s) - -proc parseBool*(s: string): bool = - ## Parses a value into a `bool`. If ``s`` is one of the following values: - ## ``y, yes, true, 1, on``, then returns `true`. If ``s`` is one of the - ## following values: ``n, no, false, 0, off``, then returns `false`. - ## If ``s`` is something else a ``EInvalidValue`` exception is raised. - case normalize(s) - of "y", "yes", "true", "1", "on": result = true - of "n", "no", "false", "0", "off": result = false - else: raise newException(EInvalidValue, "cannot interpret as a bool: " & s) + +proc parseBool*(s: string): bool = + ## Parses a value into a `bool`. If ``s`` is one of the following values: + ## ``y, yes, true, 1, on``, then returns `true`. If ``s`` is one of the + ## following values: ``n, no, false, 0, off``, then returns `false`. + ## If ``s`` is something else a ``EInvalidValue`` exception is raised. + case normalize(s) + of "y", "yes", "true", "1", "on": result = true + of "n", "no", "false", "0", "off": result = false + else: raise newException(EInvalidValue, "cannot interpret as a bool: " & s) proc repeatChar*(count: int, c: Char = ' '): string {.noSideEffect, rtl, extern: "nsuRepeatChar".} = @@ -921,7 +822,7 @@ proc editDistance*(a, b: string): int {.noSideEffect, inc(len2) var half = len1 shr 1 # initalize first row: - #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2 * sizeof(int))) + #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int))) var row: seq[int] newSeq(row, len2) var e = s + len2 - 1 # end marker @@ -1033,7 +934,7 @@ proc formatSize*(bytes: biggestInt, decimalSep = '.'): string = ## ## .. code-block:: nimrod ## - ## formatSize(1'i64 shl 31 + 300'i64) == "4GB" + ## formatSize(1'i64 shl 31 + 300'i64) == "2.204GB" ## formatSize(4096) == "4KB" ## template frmt(a, b, c: expr): expr = @@ -1051,6 +952,112 @@ proc formatSize*(bytes: biggestInt, decimalSep = '.'): string = else: result = insertSep($bytes) & "B" +proc findNormalized(x: string, inArray: openarray[string]): int = + var i = 0 + while i < high(inArray): + if cmpIgnoreStyle(x, inArray[i]) == 0: return i + inc(i, 2) # incrementing by 1 would probably lead to a + # security hole... + return -1 + +proc addf*(s: var string, formatstr: string, a: openarray[string]) {. + noSideEffect, rtl, extern: "nsuAddf".} = + ## The same as ``add(s, formatstr % a)``, but more efficient. + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} + var i = 0 + var num = 0 + while i < len(formatstr): + if formatstr[i] == '$': + case formatstr[i+1] # again we use the fact that strings + # are zero-terminated here + of '#': + add s, a[num] + inc i, 2 + inc num + of '$': + add s, '$' + inc(i, 2) + of '1'..'9', '-': + var j = 0 + inc(i) # skip $ + var negative = formatstr[i] == '-' + if negative: inc i + while formatstr[i] in Digits: + j = j * 10 + ord(formatstr[i]) - ord('0') + inc(i) + if not negative: + add s, a[j - 1] + else: + add s, a[a.len - j] + of '{': + var j = i+1 + while formatstr[j] notin {'\0', '}'}: inc(j) + var x = findNormalized(substr(formatstr, i+2, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j+1 + of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': + var j = i+1 + while formatstr[j] in PatternChars: inc(j) + var x = findNormalized(substr(formatstr, i+1, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j + else: + raise newException(EInvalidValue, "invalid format string") + else: + add s, formatstr[i] + inc(i) + +proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect, + rtl, extern: "nsuFormatOpenArray".} = + ## The `substitution`:idx: operator performs string substitutions in + ## `formatstr` and returns a modified `formatstr`. This is often called + ## `string interpolation`:idx:. + ## + ## This is best explained by an example: + ## + ## .. code-block:: nimrod + ## "$1 eats $2." % ["The cat", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The substitution variables (the thing after the ``$``) are enumerated + ## from 1 to ``a.len``. + ## To produce a verbatim ``$``, use ``$$``. + ## The notation ``$#`` can be used to refer to the next substitution + ## variable: + ## + ## .. code-block:: nimrod + ## "$# eats $#." % ["The cat", "fish"] + ## + ## Substitution variables can also be words (that is + ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even + ## indices are keys and with odd indices are the corresponding values. + ## An example: + ## + ## .. code-block:: nimrod + ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is + ## raised if an ill-formed format string has been passed to the `%` operator. + result = newStringOfCap(formatstr.len + a.len shl 4) + addf(result, formatstr, a) + +proc `%` *(formatstr, a: string): string {.noSideEffect, + rtl, extern: "nsuFormatSingleElem".} = + ## This is the same as ``formatstr % [a]``. + result = newStringOfCap(formatstr.len + a.len) + addf(result, formatstr, [a]) + {.pop.} when isMainModule: @@ -1066,3 +1073,6 @@ when isMainModule: echo formatSize(1'i64 shl 31 + 300'i64) # == "4,GB" echo formatSize(1'i64 shl 31) + doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] == + "The cat eats fish." + diff --git a/lib/pure/subexes.nim b/lib/pure/subexes.nim new file mode 100644 index 000000000..363cf6d04 --- /dev/null +++ b/lib/pure/subexes.nim @@ -0,0 +1,380 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Nimrod support for `substitution expressions`:idx: (`subex`:idx:). +## +## .. include:: ../doc/subexes.txt +## + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +from strutils import parseInt, cmpIgnoreStyle, Digits +include "system/inclrtl" + + +proc findNormalized(x: string, inArray: openarray[string]): int = + var i = 0 + while i < high(inArray): + if cmpIgnoreStyle(x, inArray[i]) == 0: return i + inc(i, 2) # incrementing by 1 would probably lead to a + # security hole... + return -1 + +type + EInvalidSubex* = object of EInvalidValue ## exception that is raised for + ## an invalid subex + +proc raiseInvalidFormat(msg: string) {.noinline.} = + raise newException(EInvalidSubex, "invalid format string: " & msg) + +type + TFormatParser = object {.pure, final.} + f: cstring + num, i, lineLen: int + +template call(x: stmt) = + p.i = i + x + i = p.i + +template callNoLineLenTracking(x: stmt) = + let oldLineLen = p.lineLen + p.i = i + x + i = p.i + p.lineLen = oldLineLen + +proc getFormatArg(p: var TFormatParser, a: openArray[string]): int = + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} + var i = p.i + var f = p.f + case f[i] + of '#': + result = p.num + inc i + inc p.num + of '1'..'9', '-': + var j = 0 + var negative = f[i] == '-' + if negative: inc i + while f[i] in Digits: + j = j * 10 + ord(f[i]) - ord('0') + inc i + result = if not negative: j-1 else: a.len-j + of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': + var name = "" + while f[i] in PatternChars: + name.add(f[i]) + inc(i) + result = findNormalized(name, a)+1 + of '$': + inc(i) + call: + result = getFormatArg(p, a) + result = parseInt(a[result])-1 + else: + raiseInvalidFormat("'#', '$', number or identifier expected") + if result >=% a.len: raiseInvalidFormat("index out of bounds: " & $result) + p.i = i + +proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) + +proc emitChar(p: var TFormatParser, x: var string, ch: char) {.inline.} = + x.add(ch) + if ch == '\L': p.lineLen = 0 + else: inc p.lineLen + +proc emitStrLinear(p: var TFormatParser, x: var string, y: string) {.inline.} = + for ch in items(y): emitChar(p, x, ch) + +proc emitStr(p: var TFormatParser, x: var string, y: string) {.inline.} = + x.add(y) + inc p.lineLen, y.len + +proc scanQuote(p: var TFormatParser, x: var string, toAdd: bool) = + var i = p.i+1 + var f = p.f + while true: + if f[i] == '\'': + inc i + if f[i] != '\'': break + inc i + if toAdd: emitChar(p, x, '\'') + elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected") + else: + if toAdd: emitChar(p, x, f[i]) + inc i + p.i = i + +proc scanBranch(p: var TFormatParser, a: openArray[string], + x: var string, choice: int) = + var i = p.i + var f = p.f + var c = 0 + var elsePart = i + var toAdd = choice == 0 + while true: + case f[i] + of ']': break + of '|': + inc i + elsePart = i + inc c + if toAdd: break + toAdd = choice == c + of '\'': + call: scanQuote(p, x, toAdd) + of '\0': raiseInvalidFormat("closing ']' expected") + else: + if toAdd: + if f[i] == '$': + inc i + call: scanDollar(p, a, x) + else: + emitChar(p, x, f[i]) + inc i + else: + inc i + if not toAdd and choice >= 0: + # evaluate 'else' part: + var last = i + i = elsePart + while true: + case f[i] + of '|', ']': break + of '\'': + call: scanQuote(p, x, true) + of '$': + inc i + call: scanDollar(p, a, x) + else: + emitChar(p, x, f[i]) + inc i + i = last + p.i = i+1 + +proc scanSlice(p: var TFormatParser, a: openarray[string]): tuple[x, y: int] = + var slice = false + var i = p.i + var f = p.f + + if f[i] == '{': inc i + else: raiseInvalidFormat("'{' expected") + if f[i] == '.' and f[i+1] == '.': + inc i, 2 + slice = true + else: + call: result.x = getFormatArg(p, a) + if f[i] == '.' and f[i+1] == '.': + inc i, 2 + slice = true + if slice: + if f[i] != '}': + call: result.y = getFormatArg(p, a) + else: + result.y = high(a) + else: + result.y = result.x + if f[i] != '}': raiseInvalidFormat("'}' expected") + inc i + p.i = i + +proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) = + var i = p.i + var f = p.f + case f[i] + of '$': + emitChar p, s, '$' + inc i + of '{': + call: + let (x, y) = scanSlice(p, a) + for j in x..y: emitStr p, s, a[j] + of '[': + inc i + var start = i + call: scanBranch(p, a, s, -1) + var x: int + if f[i] == '{': + inc i + call: x = getFormatArg(p, a) + if f[i] != '}': raiseInvalidFormat("'}' expected") + inc i + else: + call: x = getFormatArg(p, a) + var last = i + let choice = parseInt(a[x]) + i = start + call: scanBranch(p, a, s, choice) + i = last + of '\'': + var sep = "" + callNoLineLenTracking: scanQuote(p, sep, true) + if f[i] == '~': + # $' '~{1..3} + # insert space followed by 1..3 if not empty + inc i + call: + let (x, y) = scanSlice(p, a) + var L = 0 + for j in x..y: inc L, a[j].len + if L > 0: + emitStrLinear p, s, sep + for j in x..y: emitStr p, s, a[j] + else: + block StringJoin: + block OptionalLineLengthSpecifier: + var maxLen = 0 + case f[i] + of '0'..'9': + while f[i] in Digits: + maxLen = maxLen * 10 + ord(f[i]) - ord('0') + inc i + of '$': + # do not skip the '$' here for `getFormatArg`! + call: + maxLen = getFormatArg(p, a) + else: break OptionalLineLengthSpecifier + var indent = "" + case f[i] + of 'i': + inc i + callNoLineLenTracking: scanQuote(p, indent, true) + + call: + let (x, y) = scanSlice(p, a) + if maxLen < 1: emitStrLinear(p, s, indent) + var items = 1 + emitStr p, s, a[x] + for j in x+1..y: + emitStr p, s, sep + if items >= maxLen: + emitStrLinear p, s, indent + items = 0 + emitStr p, s, a[j] + inc items + of 'c': + inc i + callNoLineLenTracking: scanQuote(p, indent, true) + + call: + let (x, y) = scanSlice(p, a) + if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent) + emitStr p, s, a[x] + for j in x+1..y: + emitStr p, s, sep + if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent) + emitStr p, s, a[j] + + else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected") + break StringJoin + + call: + let (x, y) = scanSlice(p, a) + emitStr p, s, a[x] + for j in x+1..y: + emitStr p, s, sep + emitStr p, s, a[j] + else: + call: + var x = getFormatArg(p, a) + emitStr p, s, a[x] + p.i = i + + +type + TSubex* = distinct string ## string that contains a substitution expression + +proc subex*(s: string): TSubex = + ## constructs a *substitution expression* from `s`. Currently this performs + ## no syntax checking but this may change in later versions. + result = TSubex(s) + +proc addf*(s: var string, formatstr: TSubex, a: openarray[string]) {. + noSideEffect, rtl, extern: "nfrmtAddf".} = + ## The same as ``add(s, formatstr % a)``, but more efficient. + var p: TFormatParser + p.f = formatstr.string + var i = 0 + while i < len(formatstr.string): + if p.f[i] == '$': + inc i + call: scanDollar(p, a, s) + else: + emitChar(p, s, p.f[i]) + inc(i) + +proc `%` *(formatstr: TSubex, a: openarray[string]): string {.noSideEffect, + rtl, extern: "nfrmtFormatOpenArray".} = + ## The `substitution`:idx: operator performs string substitutions in + ## `formatstr` and returns a modified `formatstr`. This is often called + ## `string interpolation`:idx:. + ## + result = newStringOfCap(formatstr.string.len + a.len shl 4) + addf(result, formatstr, a) + +proc `%` *(formatstr: TSubex, a: string): string {.noSideEffect, + rtl, extern: "nfrmtFormatSingleElem".} = + ## This is the same as ``formatstr % [a]``. + result = newStringOfCap(formatstr.string.len + a.len) + addf(result, formatstr, [a]) + +{.pop.} + +when isMainModule: + + proc `%`(formatstr: string, a: openarray[string]): string = + result = newStringOfCap(formatstr.len + a.len shl 4) + addf(result, formatstr.TSubex, a) + + proc `%`(formatstr: string, a: string): string = + result = newStringOfCap(formatstr.len + a.len) + addf(result, formatstr.TSubex, [a]) + + + doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c" + doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] == + "The cat eats fish." + + + doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c" + doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c" + doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c" + doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c" + + doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c" + doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)" + + doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3" + + doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)" + doAssert "$' '~{..}" % "" == "" + doAssert "$' '~{..}" % "P0" == " P0" + doAssert "${$1}" % "1" == "1" + doAssert "${$$-1} $$1" % "1" == "1 $1" + + doAssert "$#($', '10c'\n '{#..})" % ["doAssert", "longishA", "longish"] == + """doAssert( + longishA, + longish)""" + + echo "type TMyEnum* = enum\n $', '2i'\n '{..}" % ["fieldA", + "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"] + + doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)" + + doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied" + + doAssert subex"$['''|'|''''|']']#" % "0" == "'|" + + echo subex("type\n TEnum = enum\n $', '40c'\n '{..}") % [ + "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"] + + |