From 4c533b2d92162980dc8fac2332b0eb0739dbda25 Mon Sep 17 00:00:00 2001 From: Xabier Bello Date: Tue, 8 Dec 2020 10:44:07 +0100 Subject: Added Python to highlite module. (#16286) * Added Python to highlite module. * Added the keywords for Python * Refactored the 'pythonLikeNextToken' into 'nimNextToken': - `lang` property added to GeneralTokenizer object, is set in `getNextToken`. - `nimNextToken` accepts `keywords` parameter, used for languages different from Nim. - Multiline comment available only for `langNim`. --- lib/packages/docutils/highlite.nim | 44 ++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index df74dd980..fcc42add0 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -45,6 +45,9 @@ import from algorithm import binarySearch type + SourceLanguage* = enum + langNone, langNim, langCpp, langCsharp, langC, langJava, + langYaml, langPython TokenClass* = enum gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber, gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit, @@ -59,14 +62,11 @@ type buf: cstring pos: int state: TokenClass - - SourceLanguage* = enum - langNone, langNim, langCpp, langCsharp, langC, langJava, - langYaml + lang: SourceLanguage const sourceLanguageToStr*: array[SourceLanguage, string] = ["none", - "Nim", "C++", "C#", "C", "Java", "Yaml"] + "Nim", "C++", "C#", "C", "Java", "Yaml", "Python"] tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace", "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber", "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit", @@ -101,6 +101,7 @@ proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) = g.start = 0 g.length = 0 g.state = low(TokenClass) + g.lang = low(SourceLanguage) var pos = 0 # skip initial whitespace: while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) g.pos = pos @@ -161,7 +162,13 @@ const OpChars = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.', '|', '=', '%', '&', '$', '@', '~', ':'} -proc nimNextToken(g: var GeneralTokenizer) = +proc isKeyword(x: openArray[string], y: string): int = + binarySearch(x, y) + +proc isKeywordIgnoreCase(x: openArray[string], y: string): int = + binarySearch(x, y, cmpIgnoreCase) + +proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = const hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'} octChars = {'0'..'7', '_'} @@ -207,7 +214,7 @@ proc nimNextToken(g: var GeneralTokenizer) = if g.buf[pos] == '#': inc(pos) isDoc = true - if g.buf[pos] == '[': + if g.buf[pos] == '[' and g.lang == langNim: g.kind = gtLongComment var nesting = 0 while true: @@ -265,7 +272,10 @@ proc nimNextToken(g: var GeneralTokenizer) = inc(pos) if g.buf[pos] == '\"': inc(pos) else: - g.kind = nimGetKeyword(id) + if g.lang == langNim: + g.kind = nimGetKeyword(id) + elif isKeyword(keywords, id) >= 0: + g.kind = gtKeyword of '0': inc(pos) case g.buf[pos] @@ -394,12 +404,6 @@ proc generalStrLit(g: var GeneralTokenizer, position: int): int = inc(pos) result = pos -proc isKeyword(x: openArray[string], y: string): int = - binarySearch(x, y) - -proc isKeywordIgnoreCase(x: openArray[string], y: string): int = - binarySearch(x, y, cmpIgnoreCase) - type TokenizerFlag = enum hasPreprocessor, hasNestedComments @@ -886,7 +890,18 @@ proc yamlNextToken(g: var GeneralTokenizer) = g.length = pos - g.pos g.pos = pos +proc pythonNextToken(g: var GeneralTokenizer) = + const + keywords: array[0..34, string] = [ + "False", "None", "True", "and", "as", "assert", "async", "await", + "break", "class", "continue", "def", "del", "elif", "else", "except", + "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", + "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", + "with", "yield"] + nimNextToken(g, keywords) + proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = + g.lang = lang case lang of langNone: assert false of langNim: nimNextToken(g) @@ -895,6 +910,7 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = of langC: cNextToken(g) of langJava: javaNextToken(g) of langYaml: yamlNextToken(g) + of langPython: pythonNextToken(g) when isMainModule: var keywords: seq[string] -- cgit 1.4.1-2-gfad0