Added Python to highlite module. (#16286)

* Added Python to highlite module. * Added the keywords for Python * Refactored the 'pythonLikeNextToken' into 'nimNextToken': - `lang` property added to GeneralTokenizer object, is set in `getNextToken`. - `nimNextToken` accepts `keywords` parameter, used for languages different from Nim. - Multiline comment available only for `langNim`.
author: Xabier Bello <xbello@users.noreply.github.com> 2020-12-08 10:44:07 +0100
committer: GitHub <noreply@github.com> 2020-12-08 10:44:07 +0100
commit: 4c533b2d92162980dc8fac2332b0eb0739dbda25 (patch)
tree: 51d81b1fd9f6aa309eb151deaf819a4dd3499d6c
parent: 2297b9623843bb4c1dbd201a8fb57f9ec334c164 (diff)
download: Nim-4c533b2d92162980dc8fac2332b0eb0739dbda25.tar.gz
2 files changed, 50 insertions, 18 deletions
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim
index df74dd980..fcc42add0 100644
--- a/lib/packages/docutils/highlite.nim
+++ b/lib/packages/docutils/highlite.nim
@@ -45,6 +45,9 @@ import
 from algorithm import binarySearch
 
 type
+  SourceLanguage* = enum
+    langNone, langNim, langCpp, langCsharp, langC, langJava,
+    langYaml, langPython
   TokenClass* = enum
     gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber,
     gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit,
@@ -59,14 +62,11 @@ type
     buf: cstring
     pos: int
     state: TokenClass
-
-  SourceLanguage* = enum
-    langNone, langNim, langCpp, langCsharp, langC, langJava,
-    langYaml
+    lang: SourceLanguage
 
 const
   sourceLanguageToStr*: array[SourceLanguage, string] = ["none",
-    "Nim", "C++", "C#", "C", "Java", "Yaml"]
+    "Nim", "C++", "C#", "C", "Java", "Yaml", "Python"]
   tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace",
     "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber",
     "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit",
@@ -101,6 +101,7 @@ proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) =
   g.start = 0
   g.length = 0
   g.state = low(TokenClass)
+  g.lang = low(SourceLanguage)
   var pos = 0                     # skip initial whitespace:
   while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
   g.pos = pos
@@ -161,7 +162,13 @@ const
   OpChars  = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
               '|', '=', '%', '&', '$', '@', '~', ':'}
 
-proc nimNextToken(g: var GeneralTokenizer) =
+proc isKeyword(x: openArray[string], y: string): int =
+  binarySearch(x, y)
+
+proc isKeywordIgnoreCase(x: openArray[string], y: string): int =
+  binarySearch(x, y, cmpIgnoreCase)
+
+proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
   const
     hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'}
     octChars = {'0'..'7', '_'}
@@ -207,7 +214,7 @@ proc nimNextToken(g: var GeneralTokenizer) =
       if g.buf[pos] == '#':
         inc(pos)
         isDoc = true
-      if g.buf[pos] == '[':
+      if g.buf[pos] == '[' and g.lang == langNim:
         g.kind = gtLongComment
         var nesting = 0
         while true:
@@ -265,7 +272,10 @@ proc nimNextToken(g: var GeneralTokenizer) =
             inc(pos)
           if g.buf[pos] == '\"': inc(pos)
       else:
-        g.kind = nimGetKeyword(id)
+        if g.lang == langNim:
+          g.kind = nimGetKeyword(id)
+        elif isKeyword(keywords, id) >= 0:
+          g.kind = gtKeyword
     of '0':
       inc(pos)
       case g.buf[pos]
@@ -394,12 +404,6 @@ proc generalStrLit(g: var GeneralTokenizer, position: int): int =
         inc(pos)
   result = pos
 
-proc isKeyword(x: openArray[string], y: string): int =
-  binarySearch(x, y)
-
-proc isKeywordIgnoreCase(x: openArray[string], y: string): int =
-  binarySearch(x, y, cmpIgnoreCase)
-
 type
   TokenizerFlag = enum
     hasPreprocessor, hasNestedComments
@@ -886,7 +890,18 @@ proc yamlNextToken(g: var GeneralTokenizer) =
   g.length = pos - g.pos
   g.pos = pos
 
+proc pythonNextToken(g: var GeneralTokenizer) =
+  const
+    keywords: array[0..34, string] = [
+      "False", "None", "True", "and", "as", "assert", "async", "await",
+      "break", "class", "continue", "def", "del", "elif", "else", "except",
+      "finally", "for", "from", "global", "if", "import", "in", "is", "lambda",
+      "nonlocal", "not", "or", "pass", "raise", "return", "try", "while",
+      "with", "yield"]
+  nimNextToken(g, keywords)
+
 proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
+  g.lang = lang
   case lang
   of langNone: assert false
   of langNim: nimNextToken(g)
@@ -895,6 +910,7 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
   of langC: cNextToken(g)
   of langJava: javaNextToken(g)
   of langYaml: yamlNextToken(g)
+  of langPython: pythonNextToken(g)
 
 when isMainModule:
   var keywords: seq[string]
diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim
index 1bf73e146..8b12a89c4 100644
--- a/tests/stdlib/trstgen.nim
+++ b/tests/stdlib/trstgen.nim
@@ -188,13 +188,13 @@ not in table"""
     let input1 = """
 Test 2 column/4 rows table:
 ====   ===
-H0     H1 
+H0     H1
 ====   ===
-A0     A1 
+A0     A1
 ====   ===
-A2     A3 
+A2     A3
 ====   ===
-A4     A5 
+A4     A5
 ====   === """
     let output1 = rstToLatex(input1, {})
     assert "{|X|X|}" in output1  # 2 columns
@@ -360,3 +360,19 @@ Test1
     let output1l = rstToLatex(input1, {})
     assert "line block\\\\" in output1l
     assert "other line\\\\" in output1l
+
+suite "RST/Code highlight":
+  test "Basic Python code highlight":
+    let pythonCode = """
+    .. code-block:: python
+
+      def f_name(arg=42):
+          print(f"{arg}")
+
+    """
+
+    let expected = """<blockquote><p><span class="Keyword">def</span> f_name<span class="Punctuation">(</span><span class="Punctuation">arg</span><span class="Operator">=</span><span class="DecNumber">42</span><span class="Punctuation">)</span><span class="Punctuation">:</span>
+    print<span class="Punctuation">(</span><span class="RawData">f&quot;{arg}&quot;</span><span class="Punctuation">)</span></p></blockquote>"""
+
+    check strip(rstToHtml(pythonCode, {}, newStringTable(modeCaseSensitive))) ==
+      strip(expected)
author	Xabier Bello <xbello@users.noreply.github.com>	2020-12-08 10:44:07 +0100
committer	GitHub <noreply@github.com>	2020-12-08 10:44:07 +0100
commit	4c533b2d92162980dc8fac2332b0eb0739dbda25 (patch)
tree	51d81b1fd9f6aa309eb151deaf819a4dd3499d6c
parent	2297b9623843bb4c1dbd201a8fb57f9ec334c164 (diff)
download	Nim-4c533b2d92162980dc8fac2332b0eb0739dbda25.tar.gz