diff options
-rw-r--r-- | lib/packages/docutils/highlite.nim | 52 | ||||
-rw-r--r-- | tests/stdlib/thighlite.nim | 13 |
2 files changed, 46 insertions, 19 deletions
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index c0f4c9760..d6ce274dd 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -190,31 +190,33 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = var pos = g.pos g.start = g.pos if g.state == gtStringLit: - g.kind = gtStringLit - while true: + if g.buf[pos] == '\\': + g.kind = gtEscapeSequence + inc(pos) case g.buf[pos] - of '\\': - g.kind = gtEscapeSequence + of 'x', 'X': inc(pos) + if g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in hexChars: inc(pos) + of '0'..'9': + while g.buf[pos] in {'0'..'9'}: inc(pos) + of '\0': + g.state = gtNone + else: inc(pos) + else: + g.kind = gtStringLit + while true: case g.buf[pos] - of 'x', 'X': + of '\\': + break + of '\0', '\r', '\n': + g.state = gtNone + break + of '\"': inc(pos) - if g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in hexChars: inc(pos) - of '0'..'9': - while g.buf[pos] in {'0'..'9'}: inc(pos) - of '\0': g.state = gtNone + break else: inc(pos) - break - of '\0', '\r', '\n': - g.state = gtNone - break - of '\"': - inc(pos) - g.state = gtNone - break - else: inc(pos) else: case g.buf[pos] of ' ', '\t'..'\r': @@ -985,6 +987,18 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = of langPython: pythonNextToken(g) of langCmd: cmdNextToken(g) +proc tokenize*(text: string, lang: SourceLanguage): seq[(string, TokenClass)] = + var g: GeneralTokenizer + initGeneralTokenizer(g, text) + var prevPos = 0 + while true: + getNextToken(g, lang) + if g.kind == gtEof: + break + var s = text[prevPos ..< g.pos] + result.add (s, g.kind) + prevPos = g.pos + when isMainModule: var keywords: seq[string] # Try to work running in both the subdir or at the root. diff --git a/tests/stdlib/thighlite.nim b/tests/stdlib/thighlite.nim new file mode 100644 index 000000000..f26ea93e4 --- /dev/null +++ b/tests/stdlib/thighlite.nim @@ -0,0 +1,13 @@ + +import unittest +import ../../lib/packages/docutils/highlite + +block: # Nim tokenizing" + test "string literals and escape seq": + check("\"ok1\\nok2\\nok3\"".tokenize(langNim) == + @[("\"ok1", gtStringLit), ("\\n", gtEscapeSequence), ("ok2", gtStringLit), + ("\\n", gtEscapeSequence), ("ok3\"", gtStringLit) + ]) + check("\"\"\"ok1\\nok2\\nok3\"\"\"".tokenize(langNim) == + @[("\"\"\"ok1\\nok2\\nok3\"\"\"", gtLongStringLit) + ]) |