summary refs log tree commit diff stats
path: root/lib/pure/subexes.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/subexes.nim')
-rw-r--r--lib/pure/subexes.nim380
1 files changed, 380 insertions, 0 deletions
diff --git a/lib/pure/subexes.nim b/lib/pure/subexes.nim
new file mode 100644
index 000000000..363cf6d04
--- /dev/null
+++ b/lib/pure/subexes.nim
@@ -0,0 +1,380 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2011 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Nimrod support for `substitution expressions`:idx: (`subex`:idx:).
+##
+## .. include:: ../doc/subexes.txt
+##
+
+{.push debugger:off .} # the user does not want to trace a part
+                       # of the standard library!
+
+from strutils import parseInt, cmpIgnoreStyle, Digits
+include "system/inclrtl"
+
+
+proc findNormalized(x: string, inArray: openarray[string]): int =
+  var i = 0
+  while i < high(inArray):
+    if cmpIgnoreStyle(x, inArray[i]) == 0: return i
+    inc(i, 2) # incrementing by 1 would probably lead to a
+              # security hole...
+  return -1
+
+type
+  EInvalidSubex* = object of EInvalidValue ## exception that is raised for
+                                           ## an invalid subex
+
+proc raiseInvalidFormat(msg: string) {.noinline.} =
+  raise newException(EInvalidSubex, "invalid format string: " & msg)
+
+type
+  TFormatParser = object {.pure, final.}
+    f: cstring
+    num, i, lineLen: int
+
+template call(x: stmt) =
+  p.i = i
+  x
+  i = p.i
+
+template callNoLineLenTracking(x: stmt) =
+  let oldLineLen = p.lineLen
+  p.i = i
+  x
+  i = p.i
+  p.lineLen = oldLineLen
+
+proc getFormatArg(p: var TFormatParser, a: openArray[string]): int =
+  const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
+  var i = p.i
+  var f = p.f
+  case f[i]
+  of '#':
+    result = p.num
+    inc i
+    inc p.num
+  of '1'..'9', '-':
+    var j = 0
+    var negative = f[i] == '-'
+    if negative: inc i
+    while f[i] in Digits:
+      j = j * 10 + ord(f[i]) - ord('0')
+      inc i
+    result = if not negative: j-1 else: a.len-j
+  of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
+    var name = ""
+    while f[i] in PatternChars: 
+      name.add(f[i])
+      inc(i)
+    result = findNormalized(name, a)+1
+  of '$':
+    inc(i)
+    call:
+      result = getFormatArg(p, a)
+    result = parseInt(a[result])-1
+  else:
+    raiseInvalidFormat("'#', '$', number or identifier expected")
+  if result >=% a.len: raiseInvalidFormat("index out of bounds: " & $result)
+  p.i = i
+
+proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string)
+
+proc emitChar(p: var TFormatParser, x: var string, ch: char) {.inline.} =
+  x.add(ch)
+  if ch == '\L': p.lineLen = 0
+  else: inc p.lineLen
+
+proc emitStrLinear(p: var TFormatParser, x: var string, y: string) {.inline.} =
+  for ch in items(y): emitChar(p, x, ch)
+
+proc emitStr(p: var TFormatParser, x: var string, y: string) {.inline.} =
+  x.add(y)
+  inc p.lineLen, y.len
+
+proc scanQuote(p: var TFormatParser, x: var string, toAdd: bool) =
+  var i = p.i+1
+  var f = p.f
+  while true:
+    if f[i] == '\'':
+      inc i
+      if f[i] != '\'': break
+      inc i
+      if toAdd: emitChar(p, x, '\'')
+    elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected")
+    else:
+      if toAdd: emitChar(p, x, f[i])
+      inc i
+  p.i = i
+
+proc scanBranch(p: var TFormatParser, a: openArray[string],
+                x: var string, choice: int) =
+  var i = p.i
+  var f = p.f
+  var c = 0
+  var elsePart = i
+  var toAdd = choice == 0
+  while true:
+    case f[i]
+    of ']': break
+    of '|': 
+      inc i
+      elsePart = i
+      inc c
+      if toAdd: break
+      toAdd = choice == c
+    of '\'':
+      call: scanQuote(p, x, toAdd)
+    of '\0': raiseInvalidFormat("closing ']' expected")
+    else:
+      if toAdd:
+        if f[i] == '$':
+          inc i
+          call: scanDollar(p, a, x)
+        else:
+          emitChar(p, x, f[i])
+          inc i
+      else:
+        inc i
+  if not toAdd and choice >= 0:
+    # evaluate 'else' part:
+    var last = i
+    i = elsePart
+    while true:
+      case f[i]
+      of '|', ']': break
+      of '\'':
+        call: scanQuote(p, x, true)
+      of '$':
+        inc i
+        call: scanDollar(p, a, x)
+      else:
+        emitChar(p, x, f[i])
+        inc i
+    i = last
+  p.i = i+1
+
+proc scanSlice(p: var TFormatParser, a: openarray[string]): tuple[x, y: int] =
+  var slice = false
+  var i = p.i
+  var f = p.f
+  
+  if f[i] == '{': inc i
+  else: raiseInvalidFormat("'{' expected")
+  if f[i] == '.' and f[i+1] == '.':
+    inc i, 2
+    slice = true
+  else:
+    call: result.x = getFormatArg(p, a)
+    if f[i] == '.' and f[i+1] == '.':
+      inc i, 2
+      slice = true
+  if slice:
+    if f[i] != '}':
+      call: result.y = getFormatArg(p, a)
+    else:
+      result.y = high(a)
+  else:
+    result.y = result.x
+  if f[i] != '}': raiseInvalidFormat("'}' expected")
+  inc i
+  p.i = i
+  
+proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) =
+  var i = p.i
+  var f = p.f
+  case f[i]
+  of '$': 
+    emitChar p, s, '$'
+    inc i
+  of '{':
+    call:
+      let (x, y) = scanSlice(p, a)
+    for j in x..y: emitStr p, s, a[j]
+  of '[':
+    inc i
+    var start = i
+    call: scanBranch(p, a, s, -1)
+    var x: int
+    if f[i] == '{':
+      inc i
+      call: x = getFormatArg(p, a)
+      if f[i] != '}': raiseInvalidFormat("'}' expected")
+      inc i
+    else:
+      call: x = getFormatArg(p, a)
+    var last = i
+    let choice = parseInt(a[x])
+    i = start
+    call: scanBranch(p, a, s, choice)
+    i = last
+  of '\'':
+    var sep = ""
+    callNoLineLenTracking: scanQuote(p, sep, true)
+    if f[i] == '~':
+      # $' '~{1..3}
+      # insert space followed by 1..3 if not empty
+      inc i
+      call: 
+        let (x, y) = scanSlice(p, a)
+      var L = 0
+      for j in x..y: inc L, a[j].len
+      if L > 0:
+        emitStrLinear p, s, sep
+        for j in x..y: emitStr p, s, a[j]
+    else:
+      block StringJoin:
+        block OptionalLineLengthSpecifier:
+          var maxLen = 0
+          case f[i]
+          of '0'..'9':
+            while f[i] in Digits:
+              maxLen = maxLen * 10 + ord(f[i]) - ord('0')
+              inc i
+          of '$':
+            # do not skip the '$' here for `getFormatArg`!
+            call:
+              maxLen = getFormatArg(p, a)
+          else: break OptionalLineLengthSpecifier
+          var indent = ""
+          case f[i]
+          of 'i':
+            inc i
+            callNoLineLenTracking: scanQuote(p, indent, true)
+            
+            call:
+              let (x, y) = scanSlice(p, a)
+            if maxLen < 1: emitStrLinear(p, s, indent)
+            var items = 1
+            emitStr p, s, a[x]
+            for j in x+1..y:
+              emitStr p, s, sep
+              if items >= maxLen: 
+                emitStrLinear p, s, indent
+                items = 0
+              emitStr p, s, a[j]
+              inc items
+          of 'c':
+            inc i
+            callNoLineLenTracking: scanQuote(p, indent, true)
+            
+            call:
+              let (x, y) = scanSlice(p, a)
+            if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent)
+            emitStr p, s, a[x]
+            for j in x+1..y:
+              emitStr p, s, sep
+              if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent)
+              emitStr p, s, a[j]
+            
+          else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected")
+          break StringJoin
+
+        call:
+          let (x, y) = scanSlice(p, a)
+        emitStr p, s, a[x]
+        for j in x+1..y:
+          emitStr p, s, sep
+          emitStr p, s, a[j]
+  else:
+    call: 
+      var x = getFormatArg(p, a)
+    emitStr p, s, a[x]
+  p.i = i
+
+
+type
+  TSubex* = distinct string ## string that contains a substitution expression
+
+proc subex*(s: string): TSubex =
+  ## constructs a *substitution expression* from `s`. Currently this performs
+  ## no syntax checking but this may change in later versions.
+  result = TSubex(s)
+
+proc addf*(s: var string, formatstr: TSubex, a: openarray[string]) {.
+           noSideEffect, rtl, extern: "nfrmtAddf".} =
+  ## The same as ``add(s, formatstr % a)``, but more efficient.
+  var p: TFormatParser
+  p.f = formatstr.string
+  var i = 0
+  while i < len(formatstr.string):
+    if p.f[i] == '$':
+      inc i
+      call: scanDollar(p, a, s)
+    else:
+      emitChar(p, s, p.f[i])
+      inc(i)
+
+proc `%` *(formatstr: TSubex, a: openarray[string]): string {.noSideEffect,
+  rtl, extern: "nfrmtFormatOpenArray".} =
+  ## The `substitution`:idx: operator performs string substitutions in
+  ## `formatstr` and returns a modified `formatstr`. This is often called
+  ## `string interpolation`:idx:.
+  ##
+  result = newStringOfCap(formatstr.string.len + a.len shl 4)
+  addf(result, formatstr, a)
+
+proc `%` *(formatstr: TSubex, a: string): string {.noSideEffect,
+  rtl, extern: "nfrmtFormatSingleElem".} =
+  ## This is the same as ``formatstr % [a]``.
+  result = newStringOfCap(formatstr.string.len + a.len)
+  addf(result, formatstr, [a])
+
+{.pop.}
+
+when isMainModule:
+
+  proc `%`(formatstr: string, a: openarray[string]): string =
+    result = newStringOfCap(formatstr.len + a.len shl 4)
+    addf(result, formatstr.TSubex, a)
+
+  proc `%`(formatstr: string, a: string): string =
+    result = newStringOfCap(formatstr.len + a.len)
+    addf(result, formatstr.TSubex, [a])
+
+
+  doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
+  doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
+           "The cat eats fish."
+
+
+  doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c"
+  doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c"
+  doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c"
+  doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c"
+
+  doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c"
+  doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)"
+
+  doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3"
+  
+  doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)"
+  doAssert "$' '~{..}" % "" == ""
+  doAssert "$' '~{..}" % "P0" == " P0"
+  doAssert "${$1}" % "1" == "1"
+  doAssert "${$$-1} $$1" % "1" == "1 $1"
+           
+  doAssert "$#($', '10c'\n    '{#..})" % ["doAssert", "longishA", "longish"] ==
+           """doAssert(
+    longishA, 
+    longish)"""
+  
+  echo "type TMyEnum* = enum\n  $', '2i'\n  '{..}" % ["fieldA", 
+    "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"]
+  
+  doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)"
+  
+  doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied"
+  
+  doAssert subex"$['''|'|''''|']']#" % "0" == "'|"
+  
+  echo subex("type\n  TEnum = enum\n    $', '40c'\n    '{..}") % [
+    "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"]
+  
+