add RST highlighting for command line / shells (also fixes #16858) (#17789)

author: Andrey Makarov <ph.makarov@gmail.com> 2021-04-21 17:57:54 +0300
committer: GitHub <noreply@github.com> 2021-04-21 16:57:54 +0200
commit: 8f79bc5f3dfdfd13ef6be1fbcb300872931801f8 (patch)
tree: a977691f76636db8eadab8dc68289205a04944a9 /lib/packages/docutils
parent: 80389b8053a3ddb2ef0f80b3129e91ce93bd0a51 (diff)
download: Nim-8f79bc5f3dfdfd13ef6be1fbcb300872931801f8.tar.gz
4 files changed, 125 insertions, 29 deletions
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim
index 94cb2ebde..c0f4c9760 100644
--- a/lib/packages/docutils/highlite.nim
+++ b/lib/packages/docutils/highlite.nim
@@ -37,6 +37,18 @@
 ## .. code:: Nim
 ##   for l in ["C", "c++", "jAvA", "Nim", "c#"]: echo getSourceLanguage(l)
 ##
+## There is also a `Cmd` pseudo-language supported, which is a simple generic
+## shell/cmdline tokenizer (UNIX shell/Powershell/Windows Command):
+## no escaping, no programming language constructs besides variable definition
+## at the beginning of line. It supports these operators:
+##
+## .. code:: Cmd
+##    &  &&  |  ||  (  )  ''  ""  ;  # for comments
+##
+## Instead of escaping always use quotes like here
+## `nimgrep --ext:'nim|nims' file.name`:cmd: shows how to input ``|``.
+## Any argument that contains ``.`` or ``/`` or ``\`` will be treated
+## as a file or directory.
 
 import
   strutils
@@ -45,7 +57,7 @@ from algorithm import binarySearch
 type
   SourceLanguage* = enum
     langNone, langNim, langCpp, langCsharp, langC, langJava,
-    langYaml, langPython
+    langYaml, langPython, langCmd
   TokenClass* = enum
     gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber,
     gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit,
@@ -53,7 +65,7 @@ type
     gtOperator, gtPunctuation, gtComment, gtLongComment, gtRegularExpression,
     gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler,
     gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel,
-    gtReference, gtOther
+    gtReference, gtProgram, gtOption, gtOther
   GeneralTokenizer* = object of RootObj
     kind*: TokenClass
     start*, length*: int
@@ -64,14 +76,17 @@ type
 
 const
   sourceLanguageToStr*: array[SourceLanguage, string] = ["none",
-    "Nim", "C++", "C#", "C", "Java", "Yaml", "Python"]
+    "Nim", "C++", "C#", "C", "Java", "Yaml", "Python", "Cmd"]
   tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace",
     "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber",
     "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit",
     "EscapeSequence", "Operator", "Punctuation", "Comment", "LongComment",
     "RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData",
     "Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink",
-    "Label", "Reference", "Other"]
+    "Label", "Reference",
+    # start from lower-case if there is a corresponding RST role (see rst.nim)
+    "program", "option",
+    "Other"]
 
   # The following list comes from doc/keywords.txt, make sure it is
   # synchronized with this array by running the module itself as a test case.
@@ -898,6 +913,65 @@ proc pythonNextToken(g: var GeneralTokenizer) =
       "with", "yield"]
   nimNextToken(g, keywords)
 
+proc cmdNextToken(g: var GeneralTokenizer) =
+  var pos = g.pos
+  g.start = g.pos
+  if g.state == low(TokenClass):
+    g.state = gtProgram
+  case g.buf[pos]
+  of ' ', '\t'..'\r':
+    g.kind = gtWhitespace
+    while g.buf[pos] in {' ', '\t'..'\r'}:
+      if g.buf[pos] == '\n':
+        g.state = gtProgram
+      inc(pos)
+  of '\'', '"':
+    g.kind = gtOption
+    let q = g.buf[pos]
+    inc(pos)
+    while g.buf[pos] notin {q, '\0'}:
+      inc(pos)
+    if g.buf[pos] == q: inc(pos)
+  of '#':
+    g.kind = gtComment
+    while g.buf[pos] notin {'\n', '\0'}:
+      inc(pos)
+  of '&', '|':
+    g.kind = gtOperator
+    inc(pos)
+    if g.buf[pos] == g.buf[pos-1]: inc(pos)
+    g.state = gtProgram
+  of '(':
+    g.kind = gtOperator
+    g.state = gtProgram
+    inc(pos)
+  of ')':
+    g.kind = gtOperator
+    inc(pos)
+  of ';':
+    g.state = gtProgram
+    g.kind = gtOperator
+    inc(pos)
+  of '\0': g.kind = gtEof
+  else:
+    if g.state == gtProgram:
+      g.kind = gtProgram
+      g.state = gtOption
+    else:
+      g.kind = gtOption
+    while g.buf[pos] notin {' ', '\t'..'\r', '&', '|', '(', ')', '\'', '"', '\0'}:
+      if g.buf[pos] == ';' and g.buf[pos+1] == ' ':
+        # (check space because ';' can be used inside arguments in Win bat)
+        break
+      if g.kind == gtOption and g.buf[pos] in {'/', '\\', '.'}:
+        g.kind = gtIdentifier  # for file/dir name
+      elif g.kind == gtProgram and g.buf[pos] == '=':
+        g.kind = gtIdentifier  # for env variable setting at beginning of line
+        g.state = gtProgram
+      inc(pos)
+  g.length = pos - g.pos
+  g.pos = pos
+
 proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
   g.lang = lang
   case lang
@@ -909,6 +983,7 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
   of langJava: javaNextToken(g)
   of langYaml: yamlNextToken(g)
   of langPython: pythonNextToken(g)
+  of langCmd: cmdNextToken(g)
 
 when isMainModule:
   var keywords: seq[string]
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index 7c4b92f88..cb65791ff 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -23,10 +23,10 @@
 ##
 ## Nim can output the result to HTML [#html]_ or Latex [#latex]_.
 ##
-## .. [#html] commands ``nim doc`` for ``*.nim`` files and
-##    ``nim rst2html`` for ``*.rst`` files
+## .. [#html] commands `nim doc`:cmd: for ``*.nim`` files and
+##    `nim rst2html`:cmd: for ``*.rst`` files
 ##
-## .. [#latex] command ``nim rst2tex`` for ``*.rst``.
+## .. [#latex] command `nim rst2tex`:cmd: for ``*.rst``.
 ##
 ## If you are new to RST please consider reading the following:
 ##
@@ -78,14 +78,21 @@
 ##
 ## * directives: ``code-block`` [cmp:Sphinx]_, ``title``,
 ##   ``index`` [cmp:Sphinx]_
-## * predefined roles ``:nim:`` (default), ``:c:`` (C programming language),
-##   ``:python:``, ``:yaml:``, ``:java:``, ``:cpp:`` (C++), ``:csharp`` (C#).
-##   That is every language that `highlite <highlite.html>`_ supports.
-##   They turn on appropriate syntax highlighting in inline code.
+## * predefined roles
+##   - ``:nim:`` (default), ``:c:`` (C programming language),
+##     ``:python:``, ``:yaml:``, ``:java:``, ``:cpp:`` (C++), ``:csharp`` (C#).
+##     That is every language that `highlite <highlite.html>`_ supports.
+##     They turn on appropriate syntax highlighting in inline code.
 ##
-##   .. Note:: default role for Nim files is ``:nim:``,
-##             for ``*.rst`` it's currently ``:literal:``.
+##     .. Note:: default role for Nim files is ``:nim:``,
+##               for ``*.rst`` it's currently ``:literal:``.
 ##
+##   - generic command line highlighting roles:
+##     - ``:cmd:`` for commands and common shells syntax
+##     - ``:program:`` for executable names [cmp:Sphinx]_
+##       (one can just use ``:cmd:`` on single word)
+##     - ``:option:`` for command line options [cmp:Sphinx]_
+##   - ``:tok:``, a role for highlighting of programming language tokens
 ## * ***triple emphasis*** (bold and italic) using \*\*\*
 ## * ``:idx:`` role for \`interpreted text\` to include the link to this
 ##   text into an index (example: `Nim index`_).
@@ -95,11 +102,11 @@
 ##     //compile   compile the project
 ##     //doc       generate documentation
 ##
-##   Here the dummy `//` will disappear, while options ``compile``
-##   and ``doc`` will be left in the final document.
+##   Here the dummy `//` will disappear, while options `compile`:option:
+##   and `doc`:option: will be left in the final document.
 ##
 ## .. [cmp:Sphinx] similar but different from the directives of
-##    Python `Sphinx directives`_ extensions
+##    Python `Sphinx directives`_ and `Sphinx roles`_ extensions
 ##
 ## .. _`extra features`:
 ##
@@ -144,7 +151,7 @@
 ## -----
 ##
 ## See `Nim DocGen Tools Guide <docgen.html>`_ for the details about
-## ``nim doc``, ``nim rst2html`` and ``nim rst2tex`` commands.
+## `nim doc`:cmd:, `nim rst2html`:cmd: and `nim rst2tex`:cmd: commands.
 ##
 ## See `packages/docutils/rstgen module <rstgen.html>`_ to know how to
 ## generate HTML or Latex strings to embed them into your documents.
@@ -156,6 +163,7 @@
 ## .. _RST roles list: https://docutils.sourceforge.io/docs/ref/rst/roles.html
 ## .. _Nim index: https://nim-lang.org/docs/theindex.html
 ## .. _Sphinx directives: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html
+## .. _Sphinx roles: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html
 
 import
   os, strutils, rstast, std/enumutils, algorithm, lists, sequtils,
@@ -530,7 +538,7 @@ proc defaultRole(options: RstParseOptions): string =
 
 # mirror highlite.nim sourceLanguageToStr with substitutions c++ cpp, c# csharp
 const supportedLanguages = ["nim", "yaml", "python", "java", "c",
-                            "cpp", "csharp"]
+                            "cpp", "csharp", "cmd"]
 
 proc whichRoleAux(sym: string): RstNodeKind =
   let r = sym.toLowerAscii
@@ -543,6 +551,7 @@ proc whichRoleAux(sym: string): RstNodeKind =
   of "sup", "superscript": result = rnSup
   # literal and code are the same in our implementation
   of "code": result = rnInlineLiteral
+  of "program", "option", "tok": result = rnCodeFragment
   # c++ currently can be spelled only as cpp, c# only as csharp
   elif r in supportedLanguages:
     result = rnInlineCode
@@ -1113,10 +1122,10 @@ proc toInlineCode(n: PRstNode, language: string): PRstNode =
   lb.add newLeaf(s)
   result.add lb
 
-proc toUnknownRole(n: PRstNode, roleName: string): PRstNode =
+proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode =
   let newN = newRstNode(rnInner, n.sons)
   let newSons = @[newN, newLeaf(roleName)]
-  result = newRstNode(rnUnknownRole, newSons)
+  result = newRstNode(kind, newSons)
 
 proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
   var newKind = n.kind
@@ -1144,8 +1153,8 @@ proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
     # a role:
     let (roleName, lastIdx) = getRefname(p, p.idx+1)
     newKind = whichRole(p, roleName)
-    if newKind == rnUnknownRole:
-      result = n.toUnknownRole(roleName)
+    if newKind in {rnUnknownRole, rnCodeFragment}:
+      result = n.toOtherRole(newKind, roleName)
     elif newKind == rnInlineCode:
       result = n.toInlineCode(language=roleName)
     else:
@@ -1417,8 +1426,8 @@ proc parseInline(p: var RstParser, father: PRstNode) =
       if k == rnInlineCode:
         n = n.toInlineCode(language=roleName)
       parseUntil(p, n, "`", false) # bug #17260
-      if k == rnUnknownRole:
-        n = n.toUnknownRole(roleName)
+      if k in {rnUnknownRole, rnCodeFragment}:
+        n = n.toOtherRole(k, roleName)
       father.add(n)
     elif isInlineMarkupStart(p, "`"):
       var n = newRstNode(rnInterpretedText)
diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim
index 00f3f2b35..81e3ba6d9 100644
--- a/lib/packages/docutils/rstast.nim
+++ b/lib/packages/docutils/rstast.nim
@@ -56,7 +56,9 @@ type
                               #     * `file#id <file#id>'_
     rnSubstitutionDef,        # a definition of a substitution
     # Inline markup:
-    rnInlineCode,
+    rnInlineCode,             # interpreted text with code in a known language
+    rnCodeFragment,           # inline code for highlighting with the specified
+                              # class (which cannot be inferred from context)
     rnUnknownRole,            # interpreted text with an unknown role
     rnSub, rnSup, rnIdx,
     rnEmphasis,               # "*"
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index 1b9334a77..40ed88954 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -1198,7 +1198,8 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
         "$1", result)
   of rnOptionGroup:
     renderAux(d, n,
-        "<div class=\"option-list-label\">$1</div>",
+        "<div class=\"option-list-label\"><tt><span class=\"option\">" &
+        "$1</span></tt></div>",
         "\\item[$1]", result)
   of rnDescription:
     renderAux(d, n, "<div class=\"option-list-description\">$1</div>",
@@ -1319,13 +1320,22 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
     renderAux(d, n, "|$1|", "|$1|", result)
   of rnDirective:
     renderAux(d, n, "", "", result)
-  of rnUnknownRole:
+  of rnUnknownRole, rnCodeFragment:
     var tmp0 = ""
     var tmp1 = ""
     renderRstToOut(d, n.sons[0], tmp0)
     renderRstToOut(d, n.sons[1], tmp1)
-    dispA(d.target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}",
-          [tmp0, tmp1])
+    var class = tmp1
+    # don't allow missing role break latex compilation:
+    if d.target == outLatex and n.kind == rnUnknownRole: class = "Other"
+    if n.kind == rnCodeFragment:
+      dispA(d.target, result,
+            "<tt class=\"docutils literal\"><span class=\"pre $2\">" &
+              "$1</span></tt>",
+            "\\texttt{\\span$2{$1}}", [tmp0, class])
+    else:  # rnUnknownRole, not necessarily code/monospace font
+      dispA(d.target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}",
+            [tmp0, class])
   of rnSub: renderAux(d, n, "<sub>$1</sub>", "\\rstsub{$1}", result)
   of rnSup: renderAux(d, n, "<sup>$1</sup>", "\\rstsup{$1}", result)
   of rnEmphasis: renderAux(d, n, "<em>$1</em>", "\\emph{$1}", result)
author	Andrey Makarov <ph.makarov@gmail.com>	2021-04-21 17:57:54 +0300
committer	GitHub <noreply@github.com>	2021-04-21 16:57:54 +0200
commit	8f79bc5f3dfdfd13ef6be1fbcb300872931801f8 (patch)
tree	a977691f76636db8eadab8dc68289205a04944a9 /lib/packages/docutils
parent	80389b8053a3ddb2ef0f80b3129e91ce93bd0a51 (diff)
download	Nim-8f79bc5f3dfdfd13ef6be1fbcb300872931801f8.tar.gz