diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2021-05-03 11:21:36 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-03 10:21:36 +0200 |
commit | 287f1170ba29398acaab979389c68faf79aca29c (patch) | |
tree | 8da48b4015513fe8a927c926cf3d6e0c5e6358eb /lib/packages | |
parent | 0dc534832e41612c488011b58c2dc9eb576fff3b (diff) | |
download | Nim-287f1170ba29398acaab979389c68faf79aca29c.tar.gz |
highlite: fix #17890 - tokenize Nim escape seq-s (#17919)
* highlite: fix #17890 - tokenize Nim escape seq-s * Update tests/stdlib/thighlite.nim Co-authored-by: Timothee Cour <timothee.cour2@gmail.com> Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>
Diffstat (limited to 'lib/packages')
-rw-r--r-- | lib/packages/docutils/highlite.nim | 52 |
1 files changed, 33 insertions, 19 deletions
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index c0f4c9760..d6ce274dd 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -190,31 +190,33 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = var pos = g.pos g.start = g.pos if g.state == gtStringLit: - g.kind = gtStringLit - while true: + if g.buf[pos] == '\\': + g.kind = gtEscapeSequence + inc(pos) case g.buf[pos] - of '\\': - g.kind = gtEscapeSequence + of 'x', 'X': inc(pos) + if g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in hexChars: inc(pos) + of '0'..'9': + while g.buf[pos] in {'0'..'9'}: inc(pos) + of '\0': + g.state = gtNone + else: inc(pos) + else: + g.kind = gtStringLit + while true: case g.buf[pos] - of 'x', 'X': + of '\\': + break + of '\0', '\r', '\n': + g.state = gtNone + break + of '\"': inc(pos) - if g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in hexChars: inc(pos) - of '0'..'9': - while g.buf[pos] in {'0'..'9'}: inc(pos) - of '\0': g.state = gtNone + break else: inc(pos) - break - of '\0', '\r', '\n': - g.state = gtNone - break - of '\"': - inc(pos) - g.state = gtNone - break - else: inc(pos) else: case g.buf[pos] of ' ', '\t'..'\r': @@ -985,6 +987,18 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = of langPython: pythonNextToken(g) of langCmd: cmdNextToken(g) +proc tokenize*(text: string, lang: SourceLanguage): seq[(string, TokenClass)] = + var g: GeneralTokenizer + initGeneralTokenizer(g, text) + var prevPos = 0 + while true: + getNextToken(g, lang) + if g.kind == gtEof: + break + var s = text[prevPos ..< g.pos] + result.add (s, g.kind) + prevPos = g.pos + when isMainModule: var keywords: seq[string] # Try to work running in both the subdir or at the root. |