# # # Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module contains helpers for parsing tokens, numbers, identifiers, etc. {.deadCodeElim: on.} {.push debugger:off .} # the user does not want to trace a part # of the standard library! include "system/inclrtl" const Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'} IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'} IdentStartChars = {'a'..'z', 'A'..'Z', '_'} ## copied from strutils proc toLower(c: char): char {.inline.} = result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {. rtl, extern: "npuParseHex", noSideEffect.} = ## Parses a hexadecimal number and stores its value in ``number``. ## ## Returns the number of the parsed characters or 0 in case of an error. This ## proc is sensitive to the already existing value of ``number`` and will ## likely not do what you want unless you make sure ``number`` is zero. You ## can use this feature to *chain* calls, though the result int will quickly ## overflow. Example: ## ## .. code-block:: nim ## var value = 0 ## discard parseHex("0x38", value) ## assert value == 56 ## discard parseHex("0x34", value) ## assert value == 56 * 256 + 52 ## value = -1 ## discard parseHex("0x38", value) ## assert value == -200 ## ## If 'maxLen==0' the length of the hexadecimal number has no ## upper bound. Not more than ```maxLen`` characters are parsed. var i = start var foundDigit = false if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) elif s[i] == '#': inc(i) let last = if maxLen == 0: s.len else: i+maxLen while i < last: case s[i] of '_': discard of '0'..'9': number = number shl 4 or (ord(s[i]) - ord('0')) foundDigit = true of 'a'..'f': number = number shl 4 or (ord(s[i]) - ord('a') + 10) foundDigit = true of 'A'..'F': number = number shl 4 or (ord(s[i]) - ord('A') + 10) foundDigit = true else: break inc(i) if foundDigit: result = i-start proc parseOct*(s: string, number: var int, start = 0): int {. rtl, extern: "npuParseOct", noSideEffect.} = ## parses an octal number and stores its value in ``number``. Returns ## the number of the parsed characters or 0 in case of an error. var i = start var foundDigit = false if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2) while true: case s[i] of '_': discard of '0'..'7': number = number shl 3 or (ord(s[i]) - ord('0')) foundDigit = true else: break inc(i) if foundDigit: result = i-start proc parseIdent*(s: string, ident: var string, start = 0): int = ## parses an identifier and stores it in ``ident``. Returns ## the number of the parsed characters or 0 in case of an error. var i = start if s[i] in IdentStartChars: inc(i) while s[i] in IdentChars: inc(i) ident = substr(s, start, i-1) result = i-start proc parseIdent*(s: string, start = 0): string = ## parses an identifier and stores it in ``ident``. ## Returns the parsed identifier or an empty string in case of an error. result = "" var i = start if s[i] in IdentStartChars: inc(i) while s[i] in IdentChars: inc(i) result = substr(s, start, i-1) proc parseToken*(s: string, token: var string, validChars: set[char], start = 0): int {.inline, deprecated.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of the characters in `validChars`. ## ## **Deprecated since version 0.8.12**: Use ``parseWhile`` instead. var i = start while s[i] in validChars: inc(i) result = i-start token = substr(s, start, i-1) proc skipWhitespace*(s: string, start = 0): int {.inline.} = ## skips the whitespace starting at ``s[start]``. Returns the number of ## skipped characters. while s[start+result] in Whitespace: inc(result) proc skip*(s, token: string, start = 0): int {.inline.} = ## skips the `token` starting at ``s[start]``. Returns the length of `token` ## or 0 if there was no `token` at ``s[start]``. while result < token.len and s[result+start] == token[result]: inc(result) if result != token.len: result = 0 proc skipIgnoreCase*(s, token: string, start = 0): int = ## same as `skip` but case is ignored for token matching. while result < token.len and toLower(s[result+start]) == toLower(token[result]): inc(result) if result != token.len: result = 0 proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} = ## Skips all characters until one char from the set `until` is found ## or the end is reached. ## Returns number of characters skipped. while s[result+start] notin until and s[result+start] != '\0': inc(result) proc skipUntil*(s: string, until: char, start = 0): int {.inline.} = ## Skips all characters until the char `until` is found ## or the end is reached. ## Returns number of characters skipped. while s[result+start] != until and s[result+start] != '\0': inc(result) proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} = ## Skips all characters while one char from the set `token` is found. ## Returns number of characters skipped. while s[result+start] in toSkip and s[result+start] != '\0': inc(result) proc parseUntil*(s: string, token: var string, until: set[char], start = 0): int {.inline.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of the characters notin `until`. var i = start while i < s.len and s[i] notin until: inc(i) result = i-start token = substr(s, start, i-1) proc parseUntil*(s: string, token: var string, until: char, start = 0): int {.inline.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of any character that is not the `until` character. var i = start while i < s.len and s[i] != until: inc(i) result = i-start token = substr(s, start, i-1) proc parseUntil*(s: string, token: var string, until: string, start = 0): int {.inline.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of any character that comes before the `until` token. var i = start while i < s.len: if s[i] == until[0]: var u = 1 while i+u < s.len and u < until.len and s[i+u] == until[u]: inc u if u >= until.len: break inc(i) result = i-start token = substr(s, start, i-1) proc parseWhile*(s: string, token: var string, validChars: set[char], start = 0): int {.inline.} = ## parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of the characters in `validChars`. var i = start while s[i] in validChars: inc(i) result = i-start token = substr(s, start, i-1) proc captureBetween*(s: string, first: char, second = '\0', start = 0): string = ## Finds the first occurrence of ``first``, then returns everything from there ## up to ``second``(if ``second`` is '\0', then ``first`` is used). var i = skipUntil(s, first, start)+1+start result = "" discard s.parseUntil(result, if second == '\0': first else: second, i) {.push overflowChecks: on.} # this must be compiled with overflow checking turned on: proc rawParseInt(s: string, b: var BiggestInt, start = 0): int = var sign: BiggestInt = -1 i = start if s[i] == '+': inc(i) elif s[i] == '-': inc(i) sign = 1 if s[i] in {'0'..'9'}: b = 0 while s[i] in {'0'..'9'}: b = b * 10 - (ord(s[i]) - ord('0')) inc(i) while s[i] == '_': inc(i) # underscores are allowed and ignored b = b * sign result = i - start {.pop.} # overflowChecks proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {. rtl, extern: "npuParseBiggestInt", noSideEffect.} = ## parses an integer starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if there is no integer. ## `EOverflow` is raised if an overflow occurs. var res: BiggestInt # use 'res' for exception safety (don't write to 'number' in case of an # overflow exception): result = rawParseInt(s, res, start) number = res proc parseInt*(s: string, number: var int, start = 0): int {. rtl, extern: "npuParseInt", noSideEffect.} = ## parses an integer starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if there is no integer. ## `EOverflow` is raised if an overflow occurs. var res: BiggestInt result = parseBiggestInt(s, res, start) if (sizeof(int) <= 4) and ((res < low(int)) or (res > high(int))): raise newException(OverflowError, "overflow") elif result != 0: number = int(res) # overflowChecks doesn't work with uint64 proc rawParseUInt(s: string, b: var uint64, start = 0): int = var res = 0'u64 prev = 0'u64 i = start if s[i] == '+': inc(i) # Allow if s[i] in {'0'..'9'}: b = 0 while s[i] in {'0'..'9'}: prev = res res = res * 10 + (ord(s[i]) - ord('0')).uint64 if prev > res: return 0 # overflowChecks emulation inc(i) while s[i] == '_': inc(i) # underscores are allowed and ignored b = res result = i - start proc parseBiggestUInt*(s: string, number: var uint64, start = 0): int {. rtl, extern: "npuParseBiggestUInt", noSideEffect.} = ## parses an unsigned integer starting at `start` and stores the value ## into `number`. ## Result is the number of processed chars or 0 if there is no integer ## or overflow detected. var res: uint64 # use 'res' for exception safety (don't write to 'number' in case of an # overflow exception): result = rawParseUInt(s, res, start) number = res proc parseUInt*(s: string, number: var uint, start = 0): int {. rtl, extern: "npuParseUInt", noSideEffect.} = ## parses an unsigned integer starting at `start` and stores the value ## into `number`. ## Result is the number of processed chars or 0 if there is no integer or ## overflow detected. var res: uint64 result = parseBiggestUInt(s, res, start) if (sizeof(uint) <= 4) and (res > 0xFFFF_FFFF'u64): raise newException(OverflowError, "overflow") elif result != 0: number = uint(res) proc parseBiggestFloat*(s: string, number: var BiggestFloat, start = 0): int {. magic: "ParseBiggestFloat", importc: "nimParseBiggestFloat", noSideEffect.} ## parses a float starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if a parsing error ## occurred. proc parseFloat*(s: string, number: var float, start = 0): int {. rtl, extern: "npuParseFloat", noSideEffect.} = ## parses a float starting at `start` and stores the value into `number`. ## Result is the number of processed chars or 0 if there occurred a parsing ## error. var bf: BiggestFloat result = parseBiggestFloat(s, bf, start) if result != 0: number = bf type InterpolatedKind* = enum ## describes for `interpolatedFragments` ## which part of the interpolated string is ## yielded; for example in "str$$$var${expr}" ikStr, ## ``str`` part of the interpolated string ikDollar, ## escaped ``$`` part of the interpolated string ikVar, ## ``var`` part of the interpolated string ikExpr ## ``expr`` part of the interpolated string {.deprecated: [TInterpolatedKind: InterpolatedKind].} iterator interpolatedFragments*(s: string): tuple[kind: InterpolatedKind, value: string] = ## Tokenizes the string `s` into substrings for interpolation purposes. ## ## Example: ## ## .. code-block:: nim ## for k, v in interpolatedFragments(" $this is ${an example} $$"): ## echo "(", k, ", \"", v, "\")" ## ## Results in: ## ## .. code-block:: nim ## (ikString, " ") ## (ikExpr, "this") ## (ikString, " is ") ## (ikExpr, "an example") ## (ikString, " ") ## (ikDollar, "$") var i = 0 var kind: InterpolatedKind while true: var j = i if s[j] == '$': if s[j+1] == '{': inc j, 2 var nesting = 0 while true: case s[j] of '{': inc nesting of '}': if nesting == 0: inc j break dec nesting of '\0': raise newException(ValueError, "Expected closing '}': " & substr(s, i, s.high)) else: discard inc j inc i, 2 # skip ${ kind = ikExpr elif s[j+1] in IdentStartChars: inc j, 2 while s[j] in IdentChars: inc(j) inc i # skip $ kind = ikVar elif s[j+1] == '$': inc j, 2 inc i # skip $ kind = ikDollar else: raise newException(ValueError, "Unable to parse a varible name at " & substr(s, i, s.high)) else: while j < s.len and s[j] != '$': inc j kind = ikStr if j > i: # do not copy the trailing } for ikExpr: yield (kind, substr(s, i, j-1-ord(kind == ikExpr))) else: break i = j when isMainModule: import sequtils let input = "$test{} $this is ${an{ example}} " let expected = @[(ikVar, "test"), (ikStr, "{} "), (ikVar, "this"), (ikStr, " is "), (ikExpr, "an{ example}"), (ikStr, " ")] assert toSeq(interpolatedFragments(input)) == expected var value = 0 discard parseHex("0x38", value) assert value == 56 discard parseHex("0x34", value) assert value == 56 * 256 + 52 value = -1 discard parseHex("0x38", value) assert value == -200 {.pop.}