#
#
# Nimrod's Runtime Library
# (c) Copyright 2011 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module contains helpers for parsing tokens, numbers, identifiers, etc.
{.deadCodeElim: on.}
{.push debugger:off .} # the user does not want to trace a part
# of the standard library!
include "system/inclrtl"
const
Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'}
IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
IdentStartChars = {'a'..'z', 'A'..'Z', '_'}
## copied from strutils
proc toLower(c: char): char {.inline.} =
result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c
proc parseHex*(s: string, number: var int, start = 0): int {.
rtl, extern: "npuParseHex", noSideEffect.} =
## parses a hexadecimal number and stores its value in ``number``. Returns
## the number of the parsed characters or 0 in case of an error.
var i = start
var foundDigit = false
if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
elif s[i] == '#': inc(i)
while true:
case s[i]
of '_': nil
of '0'..'9':
number = number shl 4 or (ord(s[i]) - ord('0'))
foundDigit = true
of 'a'..'f':
number = number shl 4 or (ord(s[i]) - ord('a') + 10)
foundDigit = true
of 'A'..'F':
number = number shl 4 or (ord(s[i]) - ord('A') + 10)
foundDigit = true
else: break
inc(i)
if foundDigit: result = i-start
proc parseOct*(s: string, number: var int, start = 0): int {.
rtl, extern: "npuParseOct", noSideEffect.} =
## parses an octal number and stores its value in ``number``. Returns
## the number of the parsed characters or 0 in case of an error.
var i = start
var foundDigit = false
if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
while true:
case s[i]
of '_': nil
of '0'..'7':
number = number shl 3 or (ord(s[i]) - ord('0'))
foundDigit = true
else: break
inc(i)
if foundDigit: result = i-start
proc parseIdent*(s: string, ident: var string, start = 0): int =
## parses an identifier and stores it in ``ident``. Returns
## the number of the parsed characters or 0 in case of an error.
var i = start
if s[i] in IdentStartChars:
inc(i)
while s[i] in IdentChars: inc(i)
ident = substr(s, start, i-1)
result = i-start
proc parseIdent*(s: string, start = 0): string =
## parses an identifier and stores it in ``ident``.
## Returns the parsed identifier or an empty string in case of an error.
result = ""
var i = start
if s[i] in IdentStartChars:
inc(i)
while s[i] in IdentChars: inc(i)
result = substr(s, start, i-1)
proc parseToken*(s: string, token: var string, validChars: set[char],
start = 0): int {.inline, deprecated.} =
## parses a token and stores it in ``token``. Returns
## the number of the parsed characters or 0 in case of an error. A token
## consists of the characters in `validChars`.
##
## **Deprecated since version 0.8.12**: Use ``parseWhile`` instead.
var i = start
while s[i] in validChars: inc(i)
result = i-start
token = substr(s, start, i-1)
proc skipWhitespace*(s: string, start = 0): int {.inline.} =
## skips the whitespace starting at ``s[start]``. Returns the number of
## skipped characters.
while s[start+result] in Whitespace: inc(result)
proc skip*(s, token: string, start = 0): int {.inline.} =
while result < token.len and s[result+start] == token[result]: inc(result)
if result != token.len: result = 0
proc skipIgnoreCase*(s, token: string, start = 0): int =
while result < token.len and
toLower(s[result+start]) == toLower(token[result]): inc(result)
if result != token.len: result = 0
proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} =
## Skips all characters until one char from the set `token` is found.
## Returns number of characters skipped.
while s[result+start] notin until and s[result+start] != '\0': inc(result)
proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} =
## Skips all characters while one char from the set `token` is found.
## Returns number of characters skipped.
while s[result+start] in toSkip and s[result+start] != '\0': inc(result)
proc parseUntil*(s: string, token: var string, until: set[char],
start = 0): int {.inline.} =
## parses a token and stores it in ``token``. Returns
## the number of the parsed characters or 0 in case of an error. A token
## consists of the characters notin `until`.
var i = start
while s[i] notin until: inc(i)
result = i-start
token = substr(s, start, i-1)
proc parseWhile*(s: string, token: var string, validChars: set[char],
start = 0): int {.inline.} =
## parses a token and stores it in ``token``. Returns
## the number of the parsed characters or 0 in case of an error. A token
## consists of the characters in `validChars`.
var i = start
while s[i] in validChars: inc(i)
result = i-start
token = substr(s, start, i-1)
{.push overflowChecks: on.}
# this must be compiled with overflow checking turned on:
proc rawParseInt(s: string, b: var biggestInt, start = 0): int =
var
sign: BiggestInt = -1
i = start
if s[i] == '+': inc(i)
elif s[i] == '-':
inc(i)
sign = 1
if s[i] in {'0'..'9'}:
b = 0
while s[i] in {'0'..'9'}:
b = b * 10 - (ord(s[i]) - ord('0'))
inc(i)
while s[i] == '_': inc(i) # underscores are allowed and ignored
b = b * sign
result = i - start
{.pop.} # overflowChecks
proc parseBiggestInt*(s: string, number: var biggestInt, start = 0): int {.
rtl, extern: "npuParseBiggestInt", noSideEffect.} =
## parses an integer starting at `start` and stores the value into `number`.
## Result is the number of processed chars or 0 if there is no integer.
## `EOverflow` is raised if an overflow occurs.
result = rawParseInt(s, number, start)
proc parseInt*(s: string, number: var int, start = 0): int {.
rtl, extern: "npuParseInt", noSideEffect.} =
## parses an integer starting at `start` and stores the value into `number`.
## Result is the number of processed chars or 0 if there is no integer.
## `EOverflow` is raised if an overflow occurs.
var res: biggestInt
result = parseBiggestInt(s, res, start)
if (sizeof(int) <= 4) and
((res < low(int)) or (res > high(int))):
raise newException(EOverflow, "overflow")
else:
number = int(res)
proc parseBiggestFloat*(s: string, number: var biggestFloat, start = 0): int {.
rtl, extern: "npuParseBiggestFloat", noSideEffect.} =
## parses a float starting at `start` and stores the value into `number`.
## Result is the number of processed chars or 0 if there occured a parsing
## error.
var
esign = 1.0
sign = 1.0
i = start
exponent: int
flags: int
number = 0.0
if s[i] == '+': inc(i)
elif s[i] == '-':
sign = -1.0
inc(i)
if s[i] == 'N' or s[i] == 'n':
if s[i+1] == 'A' or s[i+1] == 'a':
if s[i+2] == 'N' or s[i+2] == 'n':
if s[i+3] notin IdentChars:
number = NaN
return i+3 - start
return 0
if s[i] == 'I' or s[i] == 'i':
if s[i+1] == 'N' or s[i+1] == 'n':
if s[i+2] == 'F' or s[i+2] == 'f':
if s[i+3] notin IdentChars:
number = Inf*sign
return i+3 - start
return 0
while s[i] in {'0'..'9'}:
# Read integer part
flags = flags or 1
number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
inc(i)
while s[i] == '_': inc(i)
# Decimal?
if s[i] == '.':
var hd = 1.0
inc(i)
while s[i] in {'0'..'9'}:
# Read fractional part
flags = flags or 2
number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
hd = hd * 10.0
inc(i)
while s[i] == '_': inc(i)
number = number / hd # this complicated way preserves precision
# Again, read integer and fractional part
if flags == 0: return 0
# Exponent?
if s[i] in {'e', 'E'}:
inc(i)
if s[i] == '+':
inc(i)
elif s[i] == '-':
esign = -1.0
inc(i)
if s[i] notin {'0'..'9'}:
return 0
while s[i] in {'0'..'9'}:
exponent = exponent * 10 + ord(s[i]) - ord('0')
inc(i)
while s[i] == '_': inc(i)
# Calculate Exponent
var hd = 1.0
for j in 1..exponent: hd = hd * 10.0
if esign > 0.0: number = number * hd
else: number = number / hd
# evaluate sign
number = number * sign
result = i - start
proc parseFloat*(s: string, number: var float, start = 0): int {.
rtl, extern: "npuParseFloat", noSideEffect.} =
## parses a float starting at `start` and stores the value into `number`.
## Result is the number of processed chars or 0 if there occured a parsing
## error.
var bf: biggestFloat
result = parseBiggestFloat(s, bf, start)
number = bf
proc isEscaped*(s: string, pos: int) : bool =
assert pos >= 0 and pos < s.len
var
backslashes = 0
j = pos - 1
while j >= 0:
if s[j] == '\\':
inc backslashes
dec j
else:
break
return backslashes mod 2 != 0
type
TInterpolatedKind* = enum
ikString, ikExpr
TInterpStrFragment* = tuple[kind: TInterpolatedKind, value: string]
iterator interpolatedFragments*(s: string): TInterpStrFragment =
var
i = 0
tokenStart = 0
proc token(kind: TInterpolatedKind, value: string): TInterpStrFragment =
result.kind = kind
result.value = value
while i < s.len:
# The $ sign marks the start of an interpolation.
#
# It's followed either by a varialbe name or an opening bracket
# (so it should be before the end of the string)
# if the dollar sign is escaped, don't trigger interpolation
if s[i] == '$' and i < (s.len - 1) and not isEscaped(s, i):
# Interpolation starts here.
# Return any string that we've ran over so far.
if i != tokenStart:
yield token(ikString, s[tokenStart..i-1])
var next = s[i+1]
if next == '{':
# Complex expression: ${foo(bar) in {1..100}}
# Find closing braket, while respecting any nested brackets
inc i
tokenStart = i + 1
var
brackets = {'{', '}'}
nestingCount = 1
while i < s.len:
inc i, skipUntil(s, brackets, i+1) + 1
if not isEscaped(s, i):
if s[i] == '}':
dec nestingCount
if nestingCount == 0: break
else:
inc nestingCount
yield token(ikExpr, s[tokenStart..(i-1)])
tokenStart = i + 1
else:
tokenStart = i + 1
var identifier = parseIdent(s, i+1)
if identifier.len > 0:
inc i, identifier.len
yield token(ikExpr, s[tokenStart..i])
tokenStart = i + 1
else:
raise newException(EInvalidValue, "Unable to parse a varible name at " & s[i..s.len])
inc i
#end while
# We've reached the end of the string without finding a new interpolation.
# Return the last fragment at string.
if i != tokenStart:
yield token(ikString, s[tokenStart..i])
{.pop.}