summary refs log tree commit diff stats
path: root/lib/pure/parseutils.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/parseutils.nim')
-rw-r--r--lib/pure/parseutils.nim342
1 files changed, 342 insertions, 0 deletions
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim
new file mode 100644
index 000000000..c07b713de
--- /dev/null
+++ b/lib/pure/parseutils.nim
@@ -0,0 +1,342 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module contains helpers for parsing tokens, numbers, identifiers, etc.
+
+{.deadCodeElim: on.}
+
+{.push debugger:off .} # the user does not want to trace a part
+                       # of the standard library!
+
+include "system/inclrtl"
+
+const
+  Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'}
+  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+  IdentStartChars = {'a'..'z', 'A'..'Z', '_'}
+    ## copied from strutils
+
+proc toLower(c: char): char {.inline.} =
+  result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c
+
+proc parseHex*(s: string, number: var int, start = 0): int {.
+  rtl, extern: "npuParseHex", noSideEffect.}  = 
+  ## Parses a hexadecimal number and stores its value in ``number``.
+  ##
+  ## Returns the number of the parsed characters or 0 in case of an error. This
+  ## proc is sensitive to the already existing value of ``number`` and will
+  ## likely not do what you want unless you make sure ``number`` is zero. You
+  ## can use this feature to *chain* calls, though the result int will quickly
+  ## overflow. Example:
+  ##
+  ## .. code-block:: nim
+  ##   var value = 0
+  ##   discard parseHex("0x38", value)
+  ##   assert value == 56
+  ##   discard parseHex("0x34", value)
+  ##   assert value == 56 * 256 + 52
+  ##   value = -1
+  ##   discard parseHex("0x38", value)
+  ##   assert value == -200
+  ##
+  var i = start
+  var foundDigit = false
+  if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
+  elif s[i] == '#': inc(i)
+  while true: 
+    case s[i]
+    of '_': discard
+    of '0'..'9':
+      number = number shl 4 or (ord(s[i]) - ord('0'))
+      foundDigit = true
+    of 'a'..'f':
+      number = number shl 4 or (ord(s[i]) - ord('a') + 10)
+      foundDigit = true
+    of 'A'..'F':
+      number = number shl 4 or (ord(s[i]) - ord('A') + 10)
+      foundDigit = true
+    else: break
+    inc(i)
+  if foundDigit: result = i-start
+
+proc parseOct*(s: string, number: var int, start = 0): int  {.
+  rtl, extern: "npuParseOct", noSideEffect.} = 
+  ## parses an octal number and stores its value in ``number``. Returns
+  ## the number of the parsed characters or 0 in case of an error.
+  var i = start
+  var foundDigit = false
+  if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
+  while true: 
+    case s[i]
+    of '_': discard
+    of '0'..'7':
+      number = number shl 3 or (ord(s[i]) - ord('0'))
+      foundDigit = true
+    else: break
+    inc(i)
+  if foundDigit: result = i-start
+
+proc parseIdent*(s: string, ident: var string, start = 0): int =
+  ## parses an identifier and stores it in ``ident``. Returns
+  ## the number of the parsed characters or 0 in case of an error.
+  var i = start
+  if s[i] in IdentStartChars:
+    inc(i)
+    while s[i] in IdentChars: inc(i)
+    ident = substr(s, start, i-1)
+    result = i-start
+
+proc parseIdent*(s: string, start = 0): string =
+  ## parses an identifier and stores it in ``ident``. 
+  ## Returns the parsed identifier or an empty string in case of an error.
+  result = ""
+  var i = start
+
+  if s[i] in IdentStartChars:
+    inc(i)
+    while s[i] in IdentChars: inc(i)
+    
+    result = substr(s, start, i-1)
+
+proc parseToken*(s: string, token: var string, validChars: set[char],
+                 start = 0): int {.inline, deprecated.} =
+  ## parses a token and stores it in ``token``. Returns
+  ## the number of the parsed characters or 0 in case of an error. A token
+  ## consists of the characters in `validChars`. 
+  ##
+  ## **Deprecated since version 0.8.12**: Use ``parseWhile`` instead.
+  var i = start
+  while s[i] in validChars: inc(i)
+  result = i-start
+  token = substr(s, start, i-1)
+
+proc skipWhitespace*(s: string, start = 0): int {.inline.} =
+  ## skips the whitespace starting at ``s[start]``. Returns the number of
+  ## skipped characters.
+  while s[start+result] in Whitespace: inc(result)
+
+proc skip*(s, token: string, start = 0): int {.inline.} =
+  ## skips the `token` starting at ``s[start]``. Returns the length of `token`
+  ## or 0 if there was no `token` at ``s[start]``.
+  while result < token.len and s[result+start] == token[result]: inc(result)
+  if result != token.len: result = 0
+  
+proc skipIgnoreCase*(s, token: string, start = 0): int =
+  ## same as `skip` but case is ignored for token matching.
+  while result < token.len and
+      toLower(s[result+start]) == toLower(token[result]): inc(result)
+  if result != token.len: result = 0
+  
+proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} =
+  ## Skips all characters until one char from the set `until` is found
+  ## or the end is reached.
+  ## Returns number of characters skipped.
+  while s[result+start] notin until and s[result+start] != '\0': inc(result)
+
+proc skipUntil*(s: string, until: char, start = 0): int {.inline.} =
+  ## Skips all characters until the char `until` is found
+  ## or the end is reached.
+  ## Returns number of characters skipped.
+  while s[result+start] != until and s[result+start] != '\0': inc(result)
+
+proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} =
+  ## Skips all characters while one char from the set `token` is found.
+  ## Returns number of characters skipped.
+  while s[result+start] in toSkip and s[result+start] != '\0': inc(result)
+
+proc parseUntil*(s: string, token: var string, until: set[char],
+                 start = 0): int {.inline.} =
+  ## parses a token and stores it in ``token``. Returns
+  ## the number of the parsed characters or 0 in case of an error. A token
+  ## consists of the characters notin `until`. 
+  var i = start
+  while i < s.len and s[i] notin until: inc(i)
+  result = i-start
+  token = substr(s, start, i-1)
+
+proc parseUntil*(s: string, token: var string, until: char,
+                 start = 0): int {.inline.} =
+  ## parses a token and stores it in ``token``. Returns
+  ## the number of the parsed characters or 0 in case of an error. A token
+  ## consists of any character that is not the `until` character.
+  var i = start
+  while i < s.len and s[i] != until: inc(i)
+  result = i-start
+  token = substr(s, start, i-1)
+
+proc parseWhile*(s: string, token: var string, validChars: set[char],
+                 start = 0): int {.inline.} =
+  ## parses a token and stores it in ``token``. Returns
+  ## the number of the parsed characters or 0 in case of an error. A token
+  ## consists of the characters in `validChars`. 
+  var i = start
+  while s[i] in validChars: inc(i)
+  result = i-start
+  token = substr(s, start, i-1)
+
+proc captureBetween*(s: string, first: char, second = '\0', start = 0): string =
+  ## Finds the first occurrence of ``first``, then returns everything from there
+  ## up to ``second``(if ``second`` is '\0', then ``first`` is used).
+  var i = skipUntil(s, first, start)+1+start
+  result = ""
+  discard s.parseUntil(result, if second == '\0': first else: second, i)
+
+{.push overflowChecks: on.}
+# this must be compiled with overflow checking turned on:
+proc rawParseInt(s: string, b: var BiggestInt, start = 0): int =
+  var
+    sign: BiggestInt = -1
+    i = start
+  if s[i] == '+': inc(i)
+  elif s[i] == '-':
+    inc(i)
+    sign = 1
+  if s[i] in {'0'..'9'}:
+    b = 0
+    while s[i] in {'0'..'9'}:
+      b = b * 10 - (ord(s[i]) - ord('0'))
+      inc(i)
+      while s[i] == '_': inc(i) # underscores are allowed and ignored
+    b = b * sign
+    result = i - start
+{.pop.} # overflowChecks
+
+proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {.
+  rtl, extern: "npuParseBiggestInt", noSideEffect.} =
+  ## parses an integer starting at `start` and stores the value into `number`.
+  ## Result is the number of processed chars or 0 if there is no integer.
+  ## `EOverflow` is raised if an overflow occurs.
+  var res: BiggestInt
+  # use 'res' for exception safety (don't write to 'number' in case of an
+  # overflow exception:
+  result = rawParseInt(s, res, start)
+  number = res
+
+proc parseInt*(s: string, number: var int, start = 0): int {.
+  rtl, extern: "npuParseInt", noSideEffect.} =
+  ## parses an integer starting at `start` and stores the value into `number`.
+  ## Result is the number of processed chars or 0 if there is no integer.
+  ## `EOverflow` is raised if an overflow occurs.
+  var res: BiggestInt
+  result = parseBiggestInt(s, res, start)
+  if (sizeof(int) <= 4) and
+      ((res < low(int)) or (res > high(int))):
+    raise newException(OverflowError, "overflow")
+  elif result != 0:
+    number = int(res)
+
+proc parseBiggestFloat*(s: string, number: var BiggestFloat, start = 0): int {.
+  magic: "ParseBiggestFloat", importc: "nimParseBiggestFloat", noSideEffect.}
+  ## parses a float starting at `start` and stores the value into `number`.
+  ## Result is the number of processed chars or 0 if a parsing error
+  ## occurred.
+
+proc parseFloat*(s: string, number: var float, start = 0): int {.
+  rtl, extern: "npuParseFloat", noSideEffect.} =
+  ## parses a float starting at `start` and stores the value into `number`.
+  ## Result is the number of processed chars or 0 if there occurred a parsing
+  ## error.
+  var bf: BiggestFloat
+  result = parseBiggestFloat(s, bf, start)
+  if result != 0:
+    number = bf
+  
+type
+  InterpolatedKind* = enum   ## describes for `interpolatedFragments`
+                             ## which part of the interpolated string is
+                             ## yielded; for example in "str$$$var${expr}"
+    ikStr,                   ## ``str`` part of the interpolated string
+    ikDollar,                ## escaped ``$`` part of the interpolated string
+    ikVar,                   ## ``var`` part of the interpolated string
+    ikExpr                   ## ``expr`` part of the interpolated string
+
+{.deprecated: [TInterpolatedKind: InterpolatedKind].}
+
+iterator interpolatedFragments*(s: string): tuple[kind: InterpolatedKind,
+  value: string] =
+  ## Tokenizes the string `s` into substrings for interpolation purposes.
+  ##
+  ## Example:
+  ##
+  ## .. code-block:: nim
+  ##   for k, v in interpolatedFragments("  $this is ${an  example}  $$"):
+  ##     echo "(", k, ", \"", v, "\")"
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##   (ikString, "  ")
+  ##   (ikExpr, "this")
+  ##   (ikString, " is ")
+  ##   (ikExpr, "an  example")
+  ##   (ikString, "  ")
+  ##   (ikDollar, "$")
+  var i = 0
+  var kind: InterpolatedKind
+  while true:
+    var j = i
+    if s[j] == '$':
+      if s[j+1] == '{':
+        inc j, 2
+        var nesting = 0
+        while true:
+          case s[j]
+          of '{': inc nesting
+          of '}':
+            if nesting == 0: 
+              inc j
+              break
+            dec nesting
+          of '\0':
+            raise newException(ValueError, 
+              "Expected closing '}': " & substr(s, i, s.high))
+          else: discard
+          inc j
+        inc i, 2 # skip ${
+        kind = ikExpr
+      elif s[j+1] in IdentStartChars:
+        inc j, 2
+        while s[j] in IdentChars: inc(j)
+        inc i # skip $
+        kind = ikVar
+      elif s[j+1] == '$':
+        inc j, 2
+        inc i # skip $
+        kind = ikDollar
+      else:
+        raise newException(ValueError, 
+          "Unable to parse a varible name at " & substr(s, i, s.high))
+    else:
+      while j < s.len and s[j] != '$': inc j
+      kind = ikStr
+    if j > i:
+      # do not copy the trailing } for ikExpr:
+      yield (kind, substr(s, i, j-1-ord(kind == ikExpr)))
+    else:
+      break
+    i = j
+
+when isMainModule:
+  import sequtils
+  let input = "$test{}  $this is ${an{  example}}  "
+  let expected = @[(ikVar, "test"), (ikStr, "{}  "), (ikVar, "this"),
+                   (ikStr, " is "), (ikExpr, "an{  example}"), (ikStr, "  ")]
+  assert toSeq(interpolatedFragments(input)) == expected
+
+  var value = 0
+  discard parseHex("0x38", value)
+  assert value == 56
+  discard parseHex("0x34", value)
+  assert value == 56 * 256 + 52
+  value = -1
+  discard parseHex("0x38", value)
+  assert value == -200
+
+
+{.pop.}