summary refs log tree commit diff stats
path: root/lib/pure/strutils.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r--lib/pure/strutils.nim306
1 files changed, 301 insertions, 5 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 86af6ac88..b8b3c77c7 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -15,6 +15,7 @@
 
 import parseutils
 from math import pow, round, floor, log10
+from algorithm import reverse
 
 {.deadCodeElim: on.}
 
@@ -325,7 +326,8 @@ proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
     result[i] = chr(val mod 8 + ord('0'))
     val = val div 8
 
-iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string =
+iterator split*(s: string, seps: set[char] = Whitespace,
+                maxsplit: int = -1): string =
   ## Splits the string `s` into substrings using a group of separators.
   ##
   ## Substrings are separated by a substring containing only `seps`. Note
@@ -422,10 +424,13 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string =
       dec(splits)
       inc(last)
 
-proc substrEq(s: string, a, L: int, x: string): bool =
+proc substrEq(s: string, pos: int, substr: string): bool =
   var i = 0
-  while i < L and s[a+i] == x[i]: inc i
-  result = i == L
+  var length = substr.len
+  while i < length and s[pos+i] == substr[i]:
+    inc i
+
+  return i == length
 
 iterator split*(s: string, sep: string, maxsplit: int = -1): string =
   ## Splits the string `s` into substrings using a string separator.
@@ -433,10 +438,11 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
   ## Substrings are separated by the string `sep`.
   var last = 0
   var splits = maxsplit
+
   if len(s) > 0:
     while last <= len(s):
       var first = last
-      while last < len(s) and not s.substrEq(last, sep.len, sep):
+      while last < len(s) and not s.substrEq(last, sep):
         inc(last)
       if splits == 0: last = len(s)
       yield substr(s, first, last-1)
@@ -444,6 +450,108 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
       dec(splits)
       inc(last, sep.len)
 
+# --------- Private templates for different rsplit separators -----------
+
+template stringHasSep(s: string, index: int, seps: set[char]): bool =
+  s[index] in seps
+
+template stringHasSep(s: string, index: int, sep: char): bool =
+  s[index] == sep
+
+template stringHasSep(s: string, index: int, sep: string): bool =
+  s.substrEq(index, sep)
+
+template rsplitCommon(s, sep, maxsplit, sepLen) =
+  ## Common code for rsplit functions
+  var
+    last = s.len - 1
+    first = last
+    splits = maxsplit
+    startPos = 0
+
+  if len(s) > 0:
+    # go to -1 in order to get separators at the beginning
+    while first >= -1:
+      while first >= 0 and not stringHasSep(s, first, sep):
+        dec(first)
+
+      if splits == 0:
+        # No more splits means set first to the beginning
+        first = -1
+
+      if first == -1:
+        startPos = 0
+      else:
+        startPos = first + sepLen
+
+      yield substr(s, startPos, last)
+
+      if splits == 0:
+        break
+
+      dec(splits)
+      dec(first)
+
+      last = first
+
+iterator rsplit*(s: string, seps: set[char] = Whitespace,
+                 maxsplit: int = -1): string =
+  ## Splits the string `s` into substrings from the right using a
+  ## string separator. Works exactly the same as `split iterator
+  ## <#split.i,string,char>`_ except in reverse order.
+  ##
+  ## .. code-block:: nim
+  ##   for piece in "foo bar".rsplit(WhiteSpace):
+  ##     echo piece
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##   "bar"
+  ##   "foo"
+  ##
+  ## Substrings are separated from the right by the set of chars `seps`
+
+  rsplitCommon(s, seps, maxsplit, 1)
+
+iterator rsplit*(s: string, sep: char,
+                 maxsplit: int = -1): string =
+  ## Splits the string `s` into substrings from the right using a
+  ## string separator. Works exactly the same as `split iterator
+  ## <#split.i,string,char>`_ except in reverse order.
+  ##
+  ## .. code-block:: nim
+  ##   for piece in "foo:bar".rsplit(':'):
+  ##     echo piece
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##   "bar"
+  ##   "foo"
+  ##
+  ## Substrings are separated from the right by the char `sep`
+  rsplitCommon(s, sep, maxsplit, 1)
+
+iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
+                 keepSeparators: bool = false): string =
+  ## Splits the string `s` into substrings from the right using a
+  ## string separator. Works exactly the same as `split iterator
+  ## <#split.i,string,string>`_ except in reverse order.
+  ##
+  ## .. code-block:: nim
+  ##   for piece in "foothebar".rsplit("the"):
+  ##     echo piece
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##   "bar"
+  ##   "foo"
+  ##
+  ## Substrings are separated from the right by the string `sep`
+  rsplitCommon(s, sep, maxsplit, sep.len)
+
 iterator splitLines*(s: string): string =
   ## Splits the string `s` into its containing lines.
   ##
@@ -531,6 +639,73 @@ proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEff
   ## `split iterator <#split.i,string,string>`_.
   accumulateResult(split(s, sep, maxsplit))
 
+proc rsplit*(s: string, seps: set[char] = Whitespace,
+             maxsplit: int = -1): seq[string]
+             {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} =
+  ## The same as the `rsplit iterator <#rsplit.i,string,set[char]>`_, but is a
+  ## proc that returns a sequence of substrings.
+  ##
+  ## A possible common use case for `rsplit` is path manipulation,
+  ## particularly on systems that don't use a common delimiter.
+  ##
+  ## For example, if a system had `#` as a delimiter, you could
+  ## do the following to get the tail of the path:
+  ##
+  ## .. code-block:: nim
+  ##   var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
+  ##
+  ## Results in `tailSplit` containing:
+  ##
+  ## .. code-block:: nim
+  ##   @["Root#Object#Method", "Index"]
+  ##
+  accumulateResult(rsplit(s, seps, maxsplit))
+  result.reverse()
+
+proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string]
+             {.noSideEffect, rtl, extern: "nsuRSplitChar".} =
+  ## The same as the `split iterator <#rsplit.i,string,char>`_, but is a proc
+  ## that returns a sequence of substrings.
+  ##
+  ## A possible common use case for `rsplit` is path manipulation,
+  ## particularly on systems that don't use a common delimiter.
+  ##
+  ## For example, if a system had `#` as a delimiter, you could
+  ## do the following to get the tail of the path:
+  ##
+  ## .. code-block:: nim
+  ##   var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
+  ##
+  ## Results in `tailSplit` containing:
+  ##
+  ## .. code-block:: nim
+  ##   @["Root#Object#Method", "Index"]
+  ##
+  accumulateResult(rsplit(s, sep, maxsplit))
+  result.reverse()
+
+proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string]
+             {.noSideEffect, rtl, extern: "nsuRSplitString".} =
+  ## The same as the `split iterator <#rsplit.i,string,string>`_, but is a proc
+  ## that returns a sequence of substrings.
+  ##
+  ## A possible common use case for `rsplit` is path manipulation,
+  ## particularly on systems that don't use a common delimiter.
+  ##
+  ## For example, if a system had `#` as a delimiter, you could
+  ## do the following to get the tail of the path:
+  ##
+  ## .. code-block:: nim
+  ##   var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
+  ##
+  ## Results in `tailSplit` containing:
+  ##
+  ## .. code-block:: nim
+  ##   @["Root#Object#Method", "Index"]
+  ##
+  accumulateResult(rsplit(s, sep, maxsplit))
+  result.reverse()
+
 proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect,
   rtl, extern: "nsuToHex".} =
   ## Converts `x` to its hexadecimal representation.
@@ -1035,6 +1210,62 @@ proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect,
     if sub == s[i]: return i
   return -1
 
+proc partition*(s: string, sep: string,
+                right: bool = false): (string, string, string)
+                {.noSideEffect, procvar, rtl, extern: "nsuPartitionStr".} =
+  ## Split the string at the first or last occurrence of `sep` into a 3-tuple
+  ##
+  ## Returns a 3 string tuple of (beforeSep, `sep`, afterSep) or
+  ## (`s`, "", "") if `sep` is not found and `right` is false or
+  ## ("", "", `s`) if `sep` is not found and `right` is true
+
+  let position = if right: s.rfind(sep) else: s.find(sep)
+
+  if position != -1:
+    let
+      beforeSep = s[0 ..< position]
+      afterSep = s[position + sep.len ..< s.len]
+
+    return (s[0 ..< position], sep, afterSep)
+
+  return if right: ("", "", s) else: (s, "", "")
+
+proc rpartition*(s: string, sep: string): (string, string, string)
+                {.noSideEffect, procvar, rtl, extern: "nsuRPartitionStr".} =
+  ## Split the string at the last occurrence of `sep` into a 3-tuple
+  ##
+  ## Returns a 3 string tuple of (beforeSep, `sep`, afterSep) or
+  ## ("", "", `s`) if `sep` is not found
+  return partition(s, sep, right = true)
+
+proc center*(s: string, width: int, fillChar: char = ' '): string {.
+  noSideEffect, rtl, extern: "nsuCenterString".} =
+  ## Return the contents of `s` centered in a string `width` long using
+  ## `fillChar` as padding.
+  ##
+  ## The original string is returned if `width` is less than or equal
+  ## to `s.len`.
+  if width <= s.len:
+    return s
+
+  result = newString(width)
+
+  # Left padding will be one fillChar
+  # smaller if there are an odd number
+  # of characters
+  let
+    charsLeft = (width - s.len)
+    leftPadding = charsLeft div 2
+
+  for i in 0 ..< width:
+    if i >= leftPadding and i < leftPadding + s.len:
+      # we are where the string should be located
+      result[i] = s[i-leftPadding]
+    else:
+      # we are either before or after where
+      # the string s should go
+      result[i] = fillChar
+
 proc count*(s: string, sub: string, overlapping: bool = false): int {.
   noSideEffect, rtl, extern: "nsuCountString".} =
   ## Count the occurrences of a substring `sub` in the string `s`.
@@ -1116,6 +1347,38 @@ proc replace*(s: string, sub, by: char): string {.noSideEffect,
     else: result[i] = s[i]
     inc(i)
 
+proc expandTabs*(s: string, tabSize: int = 8): string {.noSideEffect,
+  procvar, rtl, extern: "nsuExpandTabsStr".} =
+  ## Expand tab characters in `s` by `tabSize` spaces
+
+  if len(s) == 0:
+    return s
+
+  result = newStringOfCap(s.len + s.len shr 2)
+
+  var pos = 0
+
+  template addSpaces(n) =
+    for j in 0 ..< n:
+      result.add(' ')
+      pos += 1
+
+  for i in 0 ..< len(s):
+    let c = s[i]
+
+    if c == '\t':
+      let
+        denominator = if tabSize > 0: tabSize else: 1
+        numSpaces = tabSize - pos mod denominator
+
+      addSpaces(numSpaces)
+    else:
+      result.add(c)
+      pos += 1
+
+    if c == '\l':
+      pos = 0
+
 proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect,
   rtl, extern: "nsuReplaceWord".} =
   ## Replaces `sub` in `s` by the string `by`.
@@ -1899,6 +2162,11 @@ when isMainModule:
 
   doAssert parseEnum("invalid enum value", enC) == enC
 
+  doAssert center("foo", 13) == "     foo     "
+  doAssert center("foo", 0) == "foo"
+  doAssert center("foo", 3, fillChar = 'a') == "foo"
+  doAssert center("foo", 10, fillChar = '\t') == "\t\t\tfoo\t\t\t\t"
+
   doAssert count("foofoofoo", "foofoo") == 1
   doAssert count("foofoofoo", "foofoo", overlapping = true) == 2
   doAssert count("foofoofoo", 'f') == 3
@@ -1967,6 +2235,34 @@ when isMainModule:
   doAssert(not isUpper("AAcc"))
   doAssert(not isUpper("A#$"))
 
+  doAssert expandTabs("\t", 4) == "    "
+  doAssert expandTabs("\tfoo\t", 4) == "    foo "
+  doAssert expandTabs("\tfoo\tbar", 4) == "    foo bar"
+  doAssert expandTabs("\tfoo\tbar\t", 4) == "    foo bar "
+  doAssert expandTabs("", 4) == ""
+  doAssert expandTabs("", 0) == ""
+  doAssert expandTabs("\t\t\t", 0) == ""
+
+  doAssert partition("foo:bar", ":") == ("foo", ":", "bar")
+  doAssert partition("foobarbar", "bar") == ("foo", "bar", "bar")
+  doAssert partition("foobarbar", "bank") == ("foobarbar", "", "")
+  doAssert partition("foobarbar", "foo") == ("", "foo", "barbar")
+  doAssert partition("foofoobar", "bar") == ("foofoo", "bar", "")
+
+  doAssert rpartition("foo:bar", ":") == ("foo", ":", "bar")
+  doAssert rpartition("foobarbar", "bar") == ("foobar", "bar", "")
+  doAssert rpartition("foobarbar", "bank") == ("", "", "foobarbar")
+  doAssert rpartition("foobarbar", "foo") == ("", "foo", "barbar")
+  doAssert rpartition("foofoobar", "bar") == ("foofoo", "bar", "")
+
+  doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
+  doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
+  doAssert rsplit(" foo bar ", seps=Whitespace, maxsplit=1) == @[" foo bar", ""]
+  doAssert rsplit(":foo:bar", sep=':') == @["", "foo", "bar"]
+  doAssert rsplit(":foo:bar", sep=':', maxsplit=2) == @["", "foo", "bar"]
+  doAssert rsplit(":foo:bar", sep=':', maxsplit=3) == @["", "foo", "bar"]
+  doAssert rsplit("foothebar", sep="the") == @["foo", "bar"]
+
   doAssert(unescape(r"\x013", "", "") == "\x013")
 
   doAssert join(["foo", "bar", "baz"]) == "foobarbaz"