# # # Nimrod's Runtime Library # (c) Copyright 2011 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module contains helpers for parsing tokens, numbers, identifiers, etc. {.deadCodeElim: on.} {.push debugger:off .} # the user does not want to trace a part # of the standard library! include "system/inclrtl" const Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'} IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'} IdentStartChars = {'a'..'z', 'A'..'Z', '_'} ## copied from strutils proc toLower(c: char): char {.inline.} = result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c proc parseHex*(s: string, number: var int, start = 0): int {. rtl, extern: "npuParseHex", noSideEffect.} = ## parses a hexadecimal number and stores its value in ``number``. Returns ## the number of the parsed characters or 0 in case of an error. var i = start var foundDigit = false if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) elif s[i] == '#': inc(i) while true: case s[i] of '_': nil of '0'..'9': number = number shl 4 or (ord(s[i]) - ord('0')) foundDigit = true of 'a'..'f': number = number shl 4 or (ord(s[i]) - ord('a') + 10) foundDigit = true of 'A'..'F': number = number shl 4 or (ord(s[i]) - ord('A') + 10) foundDigit = true else: break inc(i) if foundDigit: result = i-start proc parseOct*(s: string, number: var int, start = 0): int {. rtl, extern: "npuParseOct", noSideEffect.} = ## parses an octal number and stores its value in ``number``. Returns ## the number of the parsed characters or 0 in case of an error. var i = start var foundDigit = false if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2) while true: case s[i] of '_': nil of '0'..'7': number = number shl 3 or (ord(s[i]) - ord('0')) foundDigit = true else: break inc(i) if foundDigit: result = i-start proc parseIdent*(s: string, ident: var string, start = 0): int = ## parses an identifier and stores it in ``ident``. Returns ## the number of the parsed characters or 0 in case of an error. var i = start if s[i] in IdentStartChars: inc(i) while s[i] in IdentChars: inc(i) ident = substr(s, start, i-1) result = i-start proc parseIdent*(s: string, start = 0): string = ## parses an identifier and stores it in ``ident``. ## Returns the parsed identifier or an empty string in case of an error. result = "" var i = start if s[i] in IdentStartChars: inc(i) while s[i] in IdentChars: inc(i) result = substr(s, start, i-1) proc parseToken*(s: string, token: var string, validChars: set[char], start = 0): int {.inline, deprecated.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of the characters in `validChars`. ## ## **Deprecated since version 0.8.12**: Use ``parseWhile`` instead. var i = start while s[i] in validChars: inc(i) result = i-start token = substr(s, start, i-1) proc skipWhitespace*(s: string, start = 0): int {.inline.} = ## skips the whitespace starting at ``s[start]``. Returns the number of ## skipped characters. while s[start+result] in Whitespace: inc(result) proc skip*(s, token: string, start = 0): int {.inline.} = while result < token.len and s[result+start] == token[result]: inc(result) if result != token.len: result = 0 proc skipIgnoreCase*(s, token: string, start = 0): int = while result < token.len and toLower(s[result+start]) == toLower(token[result]): inc(result) if result != token.len: result = 0 proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} = ## Skips all characters until one char from the set `token` is found. ## Returns number of characters skipped. while s[result+start] notin until and s[result+start] != '\0': inc(result) proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} = ## Skips all characters while one char from the set `token` is found. ## Returns number of characters skipped. while s[result+start] in toSkip and s[result+start] != '\0': inc(result) proc parseUntil*(s: string, token: var string, until: set[char], start = 0): int {.inline.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of the characters notin `until`. var i = start while s[i] notin until: inc(i) result = i-start token = substr(s, start, i-1) proc parseWhile*(s: string, token: var string, validChars: set[char], start = 0): int {.inline.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of the characters in `validChars`. var i = start while s[i] in validChars: inc(i) result = i-start token = substr(s, start, i-1) {.push overflowChecks: on.} # this must be compiled with overflow checking turned on: proc rawParseInt(s: string, b: var biggestInt, start = 0): int = var sign: BiggestInt = -1 i = start if s[i] == '+': inc(i) elif s[i] == '-': inc(i) sign = 1 if s[i] in {'0'..'9'}: b = 0 while s[i] in {'0'..'9'}: b = b * 10 - (ord(s[i]) - ord('0')) inc(i) while s[i] == '_': inc(i) # underscores are allowed and ignored b = b * sign result = i - start {.pop.} # overflowChecks proc parseBiggestInt*(s: string, number: var biggestInt, start = 0): int {. rtl, extern: "npuParseBiggestInt", noSideEffect.} = ## parses an integer starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if there is no integer. ## `EOverflow` is raised if an overflow occurs. result = rawParseInt(s, number, start) proc parseInt*(s: string, number: var int, start = 0): int {. rtl, extern: "npuParseInt", noSideEffect.} = ## parses an integer starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if there is no integer. ## `EOverflow` is raised if an overflow occurs. var res: biggestInt result = parseBiggestInt(s, res, start) if (sizeof(int) <= 4) and ((res < low(int)) or (res > high(int))): raise newException(EOverflow, "overflow") else: number = int(res) proc parseBiggestFloat*(s: string, number: var biggestFloat, start = 0): int {. rtl, extern: "npuParseBiggestFloat", noSideEffect.} = ## parses a float starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if there occured a parsing ## error. var esign = 1.0 sign = 1.0 i = start exponent: int flags: int number = 0.0 if s[i] == '+': inc(i) elif s[i] == '-': sign = -1.0 inc(i) if s[i] == 'N' or s[i] == 'n': if s[i+1] == 'A' or s[i+1] == 'a': if s[i+2] == 'N' or s[i+2] == 'n': if s[i+3] notin IdentChars: number = NaN return i+3 - start return 0 if s[i] == 'I' or s[i] == 'i': if s[i+1] == 'N' or s[i+1] == 'n': if s[i+2] == 'F' or s[i+2] == 'f': if s[i+3] notin IdentChars: number = Inf*sign return i+3 - start return 0 while s[i] in {'0'..'9'}: # Read integer part flags = flags or 1 number = number * 10.0 + toFloat(ord(s[i]) - ord('0')) inc(i) while s[i] == '_': inc(i) # Decimal? if s[i] == '.': var hd = 1.0 inc(i) while s[i] in {'0'..'9'}: # Read fractional part flags = flags or 2 number = number * 10.0 + toFloat(ord(s[i]) - ord('0')) hd = hd * 10.0 inc(i) while s[i] == '_': inc(i) number = number / hd # this complicated way preserves precision # Again, read integer and fractional part if flags == 0: return 0 # Exponent? if s[i] in {'e', 'E'}: inc(i) if s[i] == '+': inc(i) elif s[i] == '-': esign = -1.0 inc(i) if s[i] notin {'0'..'9'}: return 0 while s[i] in {'0'..'9'}: exponent = exponent * 10 + ord(s[i]) - ord('0') inc(i) while s[i] == '_': inc(i) # Calculate Exponent var hd = 1.0 for j in 1..exponent: hd = hd * 10.0 if esign > 0.0: number = number * hd else: number = number / hd # evaluate sign number = number * sign result = i - start proc parseFloat*(s: string, number: var float, start = 0): int {. rtl, extern: "npuParseFloat", noSideEffect.} = ## parses a float starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if there occured a parsing ## error. var bf: biggestFloat result = parseBiggestFloat(s, bf, start) number = bf proc isEscaped*(s: string, pos: int) : bool = assert pos >= 0 and pos < s.len var backslashes = 0 j = pos - 1 while j >= 0: if s[j] == '\\': inc backslashes dec j else: break return backslashes mod 2 != 0 type TInterpolatedKind* = enum ikString, ikExpr TInterpStrFragment* = tuple[kind: TInterpolatedKind, value: string] iterator interpolatedFragments*(s: string): TInterpStrFragment = var i = 0 tokenStart = 0 proc token(kind: TInterpolatedKind, value: string): TInterpStrFragment = result.kind = kind result.value = value while i < s.len: # The $ sign marks the start of an interpolation. # # It's followed either by a varialbe name or an opening bracket # (so it should be before the end of the string) # if the dollar sign is escaped, don't trigger interpolation if s[i] == '$' and i < (s.len - 1) and not isEscaped(s, i): # Interpolation starts here. # Return any string that we've ran over so far. if i != tokenStart: yield token(ikString, s[tokenStart..i-1]) var next = s[i+1] if next == '{': # Complex expression: ${foo(bar) in {1..100}} # Find closing braket, while respecting any nested brackets inc i tokenStart = i + 1 var brackets = {'{', '}'} nestingCount = 1 while i < s.len: inc i, skipUntil(s, brackets, i+1) + 1 if not isEscaped(s, i): if s[i] == '}': dec nestingCount if nestingCount == 0: break else: inc nestingCount yield token(ikExpr, s[tokenStart..(i-1)]) tokenStart = i + 1 else: tokenStart = i + 1 var identifier = parseIdent(s, i+1) if identifier.len > 0: inc i, identifier.len yield token(ikExpr, s[tokenStart..i]) tokenStart = i + 1 else: raise newException(EInvalidValue, "Unable to parse a varible name at " & s[i..s.len]) inc i #end while # We've reached the end of the string without finding a new interpolation. # Return the last fragment at string. if i != tokenStart: yield token(ikString, s[tokenStart..i]) {.pop.}