doc/rst2html: some few fixes for enumerated and bullet lists (#16295)

* fix bullet/enumarated lists with many blank lines * fix enumerated list parsing * fix parse failure when next line after list empty * implement arbitrary start of enumerator * check that enumerators are in order * remove redundant start=x if x=1 or a * add some doc on implemented features * update start in rst_examples.rst * allow upper-case letters + more docs
author: Andrey Makarov <ph.makarov@gmail.com> 2020-12-14 20:10:39 +0300
committer: GitHub <noreply@github.com> 2020-12-14 18:10:39 +0100
commit: e843492b1334ded1d8fcd0e0eb6dc94fcf970aba (patch)
tree: 6c6ba7c9fe1aadf6eb12f3ee0fe08d1edb2f2bf0 /lib/packages/docutils
parent: 2728711dd3dbe8b1c2cd7dd6f15f696b58b77ea6 (diff)
download: Nim-e843492b1334ded1d8fcd0e0eb6dc94fcf970aba.tar.gz
3 files changed, 161 insertions, 30 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index bc3efa272..709173526 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -11,6 +11,59 @@
 ## subset is implemented. Some features of the `markdown`:idx: wiki syntax are
 ## also supported.
 ##
+## Supported RST features:
+##
+## * body elements
+##   + sections
+##   + transitions
+##   + paragraphs
+##   + bullet lists using \+, \*, \-
+##   + enumerated lists using arabic numerals or alphabet
+##     characters:  1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ...
+##   + definition lists
+##   + field lists
+##   + option lists
+##   + indented literal blocks
+##   + simple tables
+##   + directives
+##     - image, figure
+##     - code-block
+##     - substitution definitions: replace and image
+##     - ... a few more
+##   + comments
+## * inline markup
+##   + *emphasis*, **strong emphasis**, `interpreted text`,
+##     ``inline literals``, hyperlink references, substitution references,
+##     standalone hyperlinks
+##
+## Additional features:
+##
+## * ***triple emphasis*** (bold and italic) using \*\*\*
+##
+## Optional additional features, turned on by ``options: RstParseOption`` in
+## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_:
+##
+## * emoji / smiley symbols
+## * markdown tables
+## * markdown code blocks
+## * markdown links
+## * markdown headlines
+##
+## Limitations:
+##
+## * no Unicode support in character width calculations
+## * body elements
+##   - no roman numerals in enumerated lists
+##   - no quoted literal blocks
+##   - no doctest blocks
+##   - no grid tables
+##   - directives: no support for admonitions (notes, caution)
+##   - no footnotes & citations support
+##   - no inline internal targets
+## * inline markup
+##   - no simple-inline-markup
+##   - no embedded URI and aliases
+##
 ## **Note:** Import ``packages/docutils/rst`` to use this module
 
 import
@@ -569,7 +622,9 @@ proc match(p: RstParser, start: int, expr: string): bool =
   # 'p'              tkPunct
   # 'T'              always true
   # 'E'              whitespace, indent or eof
-  # 'e'              tkWord or '#' (for enumeration lists)
+  # 'e'              any enumeration sequence or '#' (for enumeration lists)
+  # 'x'              a..z or '#' (for enumeration lists)
+  # 'n'              0..9 or '#' (for enumeration lists)
   var i = 0
   var j = start
   var last = expr.len - 1
@@ -583,12 +638,16 @@ proc match(p: RstParser, start: int, expr: string): bool =
     of 'o': result = p.tok[j].kind == tkOther
     of 'T': result = true
     of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
-    of 'e':
+    of 'e', 'x', 'n':
       result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
       if result:
         case p.tok[j].symbol[0]
-        of 'a'..'z', 'A'..'Z', '#': result = p.tok[j].symbol.len == 1
-        of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'})
+        of '#': result = true
+        of 'a'..'z', 'A'..'Z':
+          result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
+        of '0'..'9':
+          result = expr[i] in {'e', 'n'} and
+                     allCharsInSet(p.tok[j].symbol, {'0'..'9'})
         else: result = false
     else:
       var c = expr[i]
@@ -1465,33 +1524,55 @@ proc parseDefinitionList(p: var RstParser): PRstNode =
 
 proc parseEnumList(p: var RstParser): PRstNode =
   const
-    wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "]
-    wildpos: array[0..2, int] = [1, 0, 0]
-  result = nil
+    wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
+                                      "(x) ", "x) ", "x. "]
+      # enumerator patterns, where 'x' means letter and 'n' means number
+    wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3]  # number of tokens
+    wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
+      # position of enumeration sequence (number/letter) in enumerator
+  result = newRstNode(rnEnumList)
+  let col = currentTok(p).col
   var w = 0
-  while w <= 2:
+  while w < wildcards.len:
     if match(p, p.idx, wildcards[w]): break
     inc w
-  if w <= 2:
-    var col = currentTok(p).col
-    result = newRstNode(rnEnumList)
-    inc p.idx, wildpos[w] + 3
-    var j = tokenAfterNewline(p)
-    if p.tok[j].col == currentTok(p).col or match(p, j, wildcards[w]):
-      pushInd(p, currentTok(p).col)
-      while true:
-        var item = newRstNode(rnEnumItem)
-        parseSection(p, item)
-        result.add(item)
-        if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
-            match(p, p.idx + 1, wildcards[w]):
-          inc p.idx, wildpos[w] + 4
-        else:
+  assert w < wildcards.len
+  for i in 0 ..< wildToken[w]-1:  # add first enumerator with (, ), and .
+    if p.tok[p.idx + i].symbol == "#":
+      result.text.add "1"
+    else:
+      result.text.add p.tok[p.idx + i].symbol
+  var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
+  inc p.idx, wildToken[w]
+  while true:
+    var item = newRstNode(rnEnumItem)
+    pushInd(p, currentTok(p).col)
+    parseSection(p, item)
+    popInd(p)
+    result.add(item)
+    if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
+        match(p, p.idx+1, wildcards[w]):
+      let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
+      # check that it's in sequence: enumerator == next(prevEnum)
+      if "n" in wildcards[w]:  # arabic numeral
+        let prevEnumI = try: parseInt(prevEnum) except: 1
+        let curEnum =
+          if enumerator == "#": prevEnumI + 1
+          else: (try: parseInt(enumerator) except: 1)
+        if curEnum - prevEnumI != 1:
           break
-      popInd(p)
+        prevEnum = enumerator
+      else:  # a..z
+        let prevEnumI = ord(prevEnum[0])
+        let curEnum =
+          if enumerator == "#": prevEnumI + 1
+          else: ord(enumerator[0])
+        if curEnum - prevEnumI != 1:
+          break
+        prevEnum = $chr(curEnum)
+      inc p.idx, 1 + wildToken[w]
     else:
-      dec p.idx, wildpos[w] + 3
-      result = nil
+      break
 
 proc sonKind(father: PRstNode, i: int): RstNodeKind =
   result = rnLeaf
@@ -1511,6 +1592,8 @@ proc parseSection(p: var RstParser, result: PRstNode) =
         result.add(a)
         popInd(p)
       else:
+        while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
+          inc p.idx  # skip blank lines
         leave = true
         break
     if leave or currentTok(p).kind == tkEof: break
diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim
index 8b2159aeb..5e2d21c04 100644
--- a/lib/packages/docutils/rstast.nim
+++ b/lib/packages/docutils/rstast.nim
@@ -69,7 +69,7 @@ type
   RstNode* {.acyclic, final.} = object ## an RST node's description
     kind*: RstNodeKind       ## the node's kind
     text*: string             ## valid for leafs in the AST; and the title of
-                              ## the document or the section
+                              ## the document or the section; and rnEnumList
     level*: int               ## valid for some node kinds
     sons*: RstNodeSeq        ## the node's sons
 
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index d88f2ed77..4d056a83e 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -1029,6 +1029,56 @@ proc renderField(d: PDoc, n: PRstNode, result: var string) =
   if not b:
     renderAux(d, n, "<tr>$1</tr>\n", "$1", result)
 
+proc renderEnumList(d: PDoc, n: PRstNode, result: var string) =
+  var
+    specifier = ""
+    specStart = ""
+    i1 = 0
+    pre = ""
+    i2 = n.text.len-1
+    post = ""
+  if n.text[0] == '(':
+    i1 = 1
+    pre = "("
+  if n.text[^1] == ')' or n.text[^1] == '.':
+    i2 = n.text.len-2
+    post = $n.text[^1]
+  let enumR = i1 .. i2  # enumerator range without surrounding (, ), .
+  if d.target == outLatex:
+    result.add ("\n%"&n.text&"\n")
+    # use enumerate parameters from package enumitem
+    if n.text[i1].isDigit:
+      var labelDef = ""
+      if pre != "" or post != "":
+        labelDef = "label=" & pre & "\\arabic*" & post & ","
+      if n.text[enumR] != "1":
+        specStart = "start=$1" % [n.text[enumR]]
+      if labelDef != "" or specStart != "":
+        specifier = "[$1$2]" % [labelDef, specStart]
+    else:
+      let (first, labelDef) =
+        if n.text[i1].isUpperAscii: ('A', "label=" & pre & "\\Alph*" & post)
+        else: ('a', "label=" & pre & "\\alph*" & post)
+      if n.text[i1] != first:
+        specStart = ",start=" & $(ord(n.text[i1]) - ord(first) + 1)
+      specifier = "[$1$2]" % [labelDef, specStart]
+  else:  # HTML
+    # TODO: implement enumerator formatting using pre and post ( and ) for HTML
+    if n.text[i1].isDigit:
+      if n.text[enumR] != "1":
+        specStart = " start=\"$1\"" % [n.text[enumR]]
+      specifier = "class=\"simple\"" & specStart
+    else:
+      let (first, labelDef) =
+        if n.text[i1].isUpperAscii: ('A', "class=\"upperalpha simple\"")
+        else: ('a', "class=\"loweralpha simple\"")
+      if n.text[i1] != first:
+        specStart = " start=\"$1\"" % [ $(ord(n.text[i1]) - ord(first) + 1) ]
+      specifier = labelDef & specStart
+  renderAux(d, n, "<ol " & specifier & ">$1</ol>\n",
+            "\\begin{enumerate}" & specifier & "$1\\end{enumerate}\n",
+            result)
+
 proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   if n == nil: return
   case n.kind
@@ -1042,9 +1092,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
                     "\\begin{itemize}$1\\end{itemize}\n", result)
   of rnBulletItem, rnEnumItem:
     renderAux(d, n, "<li>$1</li>\n", "\\item $1\n", result)
-  of rnEnumList:
-    renderAux(d, n, "<ol class=\"simple\">$1</ol>\n",
-                    "\\begin{enumerate}$1\\end{enumerate}\n", result)
+  of rnEnumList: renderEnumList(d, n, result)
   of rnDefList:
     renderAux(d, n, "<dl class=\"docutils\">$1</dl>\n",
                        "\\begin{description}$1\\end{description}\n", result)
author	Andrey Makarov <ph.makarov@gmail.com>	2020-12-14 20:10:39 +0300
committer	GitHub <noreply@github.com>	2020-12-14 18:10:39 +0100
commit	e843492b1334ded1d8fcd0e0eb6dc94fcf970aba (patch)
tree	6c6ba7c9fe1aadf6eb12f3ee0fe08d1edb2f2bf0 /lib/packages/docutils
parent	2728711dd3dbe8b1c2cd7dd6f15f696b58b77ea6 (diff)
download	Nim-e843492b1334ded1d8fcd0e0eb6dc94fcf970aba.tar.gz