diff options
author | Xabier Bello <xbello@users.noreply.github.com> | 2020-12-08 10:44:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-08 10:44:07 +0100 |
commit | 4c533b2d92162980dc8fac2332b0eb0739dbda25 (patch) | |
tree | 51d81b1fd9f6aa309eb151deaf819a4dd3499d6c /lib | |
parent | 2297b9623843bb4c1dbd201a8fb57f9ec334c164 (diff) | |
download | Nim-4c533b2d92162980dc8fac2332b0eb0739dbda25.tar.gz |
Added Python to highlite module. (#16286)
* Added Python to highlite module. * Added the keywords for Python * Refactored the 'pythonLikeNextToken' into 'nimNextToken': - `lang` property added to GeneralTokenizer object, is set in `getNextToken`. - `nimNextToken` accepts `keywords` parameter, used for languages different from Nim. - Multiline comment available only for `langNim`.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/packages/docutils/highlite.nim | 44 |
1 files changed, 30 insertions, 14 deletions
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index df74dd980..fcc42add0 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -45,6 +45,9 @@ import from algorithm import binarySearch type + SourceLanguage* = enum + langNone, langNim, langCpp, langCsharp, langC, langJava, + langYaml, langPython TokenClass* = enum gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber, gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit, @@ -59,14 +62,11 @@ type buf: cstring pos: int state: TokenClass - - SourceLanguage* = enum - langNone, langNim, langCpp, langCsharp, langC, langJava, - langYaml + lang: SourceLanguage const sourceLanguageToStr*: array[SourceLanguage, string] = ["none", - "Nim", "C++", "C#", "C", "Java", "Yaml"] + "Nim", "C++", "C#", "C", "Java", "Yaml", "Python"] tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace", "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber", "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit", @@ -101,6 +101,7 @@ proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) = g.start = 0 g.length = 0 g.state = low(TokenClass) + g.lang = low(SourceLanguage) var pos = 0 # skip initial whitespace: while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) g.pos = pos @@ -161,7 +162,13 @@ const OpChars = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.', '|', '=', '%', '&', '$', '@', '~', ':'} -proc nimNextToken(g: var GeneralTokenizer) = +proc isKeyword(x: openArray[string], y: string): int = + binarySearch(x, y) + +proc isKeywordIgnoreCase(x: openArray[string], y: string): int = + binarySearch(x, y, cmpIgnoreCase) + +proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = const hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'} octChars = {'0'..'7', '_'} @@ -207,7 +214,7 @@ proc nimNextToken(g: var GeneralTokenizer) = if g.buf[pos] == '#': inc(pos) isDoc = true - if g.buf[pos] == '[': + if g.buf[pos] == '[' and g.lang == langNim: g.kind = gtLongComment var nesting = 0 while true: @@ -265,7 +272,10 @@ proc nimNextToken(g: var GeneralTokenizer) = inc(pos) if g.buf[pos] == '\"': inc(pos) else: - g.kind = nimGetKeyword(id) + if g.lang == langNim: + g.kind = nimGetKeyword(id) + elif isKeyword(keywords, id) >= 0: + g.kind = gtKeyword of '0': inc(pos) case g.buf[pos] @@ -394,12 +404,6 @@ proc generalStrLit(g: var GeneralTokenizer, position: int): int = inc(pos) result = pos -proc isKeyword(x: openArray[string], y: string): int = - binarySearch(x, y) - -proc isKeywordIgnoreCase(x: openArray[string], y: string): int = - binarySearch(x, y, cmpIgnoreCase) - type TokenizerFlag = enum hasPreprocessor, hasNestedComments @@ -886,7 +890,18 @@ proc yamlNextToken(g: var GeneralTokenizer) = g.length = pos - g.pos g.pos = pos +proc pythonNextToken(g: var GeneralTokenizer) = + const + keywords: array[0..34, string] = [ + "False", "None", "True", "and", "as", "assert", "async", "await", + "break", "class", "continue", "def", "del", "elif", "else", "except", + "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", + "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", + "with", "yield"] + nimNextToken(g, keywords) + proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = + g.lang = lang case lang of langNone: assert false of langNim: nimNextToken(g) @@ -895,6 +910,7 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = of langC: cNextToken(g) of langJava: javaNextToken(g) of langYaml: yamlNextToken(g) + of langPython: pythonNextToken(g) when isMainModule: var keywords: seq[string] |