diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2020-12-14 20:10:39 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-14 18:10:39 +0100 |
commit | e843492b1334ded1d8fcd0e0eb6dc94fcf970aba (patch) | |
tree | 6c6ba7c9fe1aadf6eb12f3ee0fe08d1edb2f2bf0 /lib/packages/docutils | |
parent | 2728711dd3dbe8b1c2cd7dd6f15f696b58b77ea6 (diff) | |
download | Nim-e843492b1334ded1d8fcd0e0eb6dc94fcf970aba.tar.gz |
doc/rst2html: some few fixes for enumerated and bullet lists (#16295)
* fix bullet/enumarated lists with many blank lines * fix enumerated list parsing * fix parse failure when next line after list empty * implement arbitrary start of enumerator * check that enumerators are in order * remove redundant start=x if x=1 or a * add some doc on implemented features * update start in rst_examples.rst * allow upper-case letters + more docs
Diffstat (limited to 'lib/packages/docutils')
-rw-r--r-- | lib/packages/docutils/rst.nim | 135 | ||||
-rw-r--r-- | lib/packages/docutils/rstast.nim | 2 | ||||
-rw-r--r-- | lib/packages/docutils/rstgen.nim | 54 |
3 files changed, 161 insertions, 30 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index bc3efa272..709173526 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -11,6 +11,59 @@ ## subset is implemented. Some features of the `markdown`:idx: wiki syntax are ## also supported. ## +## Supported RST features: +## +## * body elements +## + sections +## + transitions +## + paragraphs +## + bullet lists using \+, \*, \- +## + enumerated lists using arabic numerals or alphabet +## characters: 1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ... +## + definition lists +## + field lists +## + option lists +## + indented literal blocks +## + simple tables +## + directives +## - image, figure +## - code-block +## - substitution definitions: replace and image +## - ... a few more +## + comments +## * inline markup +## + *emphasis*, **strong emphasis**, `interpreted text`, +## ``inline literals``, hyperlink references, substitution references, +## standalone hyperlinks +## +## Additional features: +## +## * ***triple emphasis*** (bold and italic) using \*\*\* +## +## Optional additional features, turned on by ``options: RstParseOption`` in +## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_: +## +## * emoji / smiley symbols +## * markdown tables +## * markdown code blocks +## * markdown links +## * markdown headlines +## +## Limitations: +## +## * no Unicode support in character width calculations +## * body elements +## - no roman numerals in enumerated lists +## - no quoted literal blocks +## - no doctest blocks +## - no grid tables +## - directives: no support for admonitions (notes, caution) +## - no footnotes & citations support +## - no inline internal targets +## * inline markup +## - no simple-inline-markup +## - no embedded URI and aliases +## ## **Note:** Import ``packages/docutils/rst`` to use this module import @@ -569,7 +622,9 @@ proc match(p: RstParser, start: int, expr: string): bool = # 'p' tkPunct # 'T' always true # 'E' whitespace, indent or eof - # 'e' tkWord or '#' (for enumeration lists) + # 'e' any enumeration sequence or '#' (for enumeration lists) + # 'x' a..z or '#' (for enumeration lists) + # 'n' 0..9 or '#' (for enumeration lists) var i = 0 var j = start var last = expr.len - 1 @@ -583,12 +638,16 @@ proc match(p: RstParser, start: int, expr: string): bool = of 'o': result = p.tok[j].kind == tkOther of 'T': result = true of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent} - of 'e': + of 'e', 'x', 'n': result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#" if result: case p.tok[j].symbol[0] - of 'a'..'z', 'A'..'Z', '#': result = p.tok[j].symbol.len == 1 - of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'}) + of '#': result = true + of 'a'..'z', 'A'..'Z': + result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1 + of '0'..'9': + result = expr[i] in {'e', 'n'} and + allCharsInSet(p.tok[j].symbol, {'0'..'9'}) else: result = false else: var c = expr[i] @@ -1465,33 +1524,55 @@ proc parseDefinitionList(p: var RstParser): PRstNode = proc parseEnumList(p: var RstParser): PRstNode = const - wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "] - wildpos: array[0..2, int] = [1, 0, 0] - result = nil + wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ", + "(x) ", "x) ", "x. "] + # enumerator patterns, where 'x' means letter and 'n' means number + wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens + wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0] + # position of enumeration sequence (number/letter) in enumerator + result = newRstNode(rnEnumList) + let col = currentTok(p).col var w = 0 - while w <= 2: + while w < wildcards.len: if match(p, p.idx, wildcards[w]): break inc w - if w <= 2: - var col = currentTok(p).col - result = newRstNode(rnEnumList) - inc p.idx, wildpos[w] + 3 - var j = tokenAfterNewline(p) - if p.tok[j].col == currentTok(p).col or match(p, j, wildcards[w]): - pushInd(p, currentTok(p).col) - while true: - var item = newRstNode(rnEnumItem) - parseSection(p, item) - result.add(item) - if currentTok(p).kind == tkIndent and currentTok(p).ival == col and - match(p, p.idx + 1, wildcards[w]): - inc p.idx, wildpos[w] + 4 - else: + assert w < wildcards.len + for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and . + if p.tok[p.idx + i].symbol == "#": + result.text.add "1" + else: + result.text.add p.tok[p.idx + i].symbol + var prevEnum = p.tok[p.idx + wildIndex[w]].symbol + inc p.idx, wildToken[w] + while true: + var item = newRstNode(rnEnumItem) + pushInd(p, currentTok(p).col) + parseSection(p, item) + popInd(p) + result.add(item) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + match(p, p.idx+1, wildcards[w]): + let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol + # check that it's in sequence: enumerator == next(prevEnum) + if "n" in wildcards[w]: # arabic numeral + let prevEnumI = try: parseInt(prevEnum) except: 1 + let curEnum = + if enumerator == "#": prevEnumI + 1 + else: (try: parseInt(enumerator) except: 1) + if curEnum - prevEnumI != 1: break - popInd(p) + prevEnum = enumerator + else: # a..z + let prevEnumI = ord(prevEnum[0]) + let curEnum = + if enumerator == "#": prevEnumI + 1 + else: ord(enumerator[0]) + if curEnum - prevEnumI != 1: + break + prevEnum = $chr(curEnum) + inc p.idx, 1 + wildToken[w] else: - dec p.idx, wildpos[w] + 3 - result = nil + break proc sonKind(father: PRstNode, i: int): RstNodeKind = result = rnLeaf @@ -1511,6 +1592,8 @@ proc parseSection(p: var RstParser, result: PRstNode) = result.add(a) popInd(p) else: + while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent: + inc p.idx # skip blank lines leave = true break if leave or currentTok(p).kind == tkEof: break diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim index 8b2159aeb..5e2d21c04 100644 --- a/lib/packages/docutils/rstast.nim +++ b/lib/packages/docutils/rstast.nim @@ -69,7 +69,7 @@ type RstNode* {.acyclic, final.} = object ## an RST node's description kind*: RstNodeKind ## the node's kind text*: string ## valid for leafs in the AST; and the title of - ## the document or the section + ## the document or the section; and rnEnumList level*: int ## valid for some node kinds sons*: RstNodeSeq ## the node's sons diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim index d88f2ed77..4d056a83e 100644 --- a/lib/packages/docutils/rstgen.nim +++ b/lib/packages/docutils/rstgen.nim @@ -1029,6 +1029,56 @@ proc renderField(d: PDoc, n: PRstNode, result: var string) = if not b: renderAux(d, n, "<tr>$1</tr>\n", "$1", result) +proc renderEnumList(d: PDoc, n: PRstNode, result: var string) = + var + specifier = "" + specStart = "" + i1 = 0 + pre = "" + i2 = n.text.len-1 + post = "" + if n.text[0] == '(': + i1 = 1 + pre = "(" + if n.text[^1] == ')' or n.text[^1] == '.': + i2 = n.text.len-2 + post = $n.text[^1] + let enumR = i1 .. i2 # enumerator range without surrounding (, ), . + if d.target == outLatex: + result.add ("\n%"&n.text&"\n") + # use enumerate parameters from package enumitem + if n.text[i1].isDigit: + var labelDef = "" + if pre != "" or post != "": + labelDef = "label=" & pre & "\\arabic*" & post & "," + if n.text[enumR] != "1": + specStart = "start=$1" % [n.text[enumR]] + if labelDef != "" or specStart != "": + specifier = "[$1$2]" % [labelDef, specStart] + else: + let (first, labelDef) = + if n.text[i1].isUpperAscii: ('A', "label=" & pre & "\\Alph*" & post) + else: ('a', "label=" & pre & "\\alph*" & post) + if n.text[i1] != first: + specStart = ",start=" & $(ord(n.text[i1]) - ord(first) + 1) + specifier = "[$1$2]" % [labelDef, specStart] + else: # HTML + # TODO: implement enumerator formatting using pre and post ( and ) for HTML + if n.text[i1].isDigit: + if n.text[enumR] != "1": + specStart = " start=\"$1\"" % [n.text[enumR]] + specifier = "class=\"simple\"" & specStart + else: + let (first, labelDef) = + if n.text[i1].isUpperAscii: ('A', "class=\"upperalpha simple\"") + else: ('a', "class=\"loweralpha simple\"") + if n.text[i1] != first: + specStart = " start=\"$1\"" % [ $(ord(n.text[i1]) - ord(first) + 1) ] + specifier = labelDef & specStart + renderAux(d, n, "<ol " & specifier & ">$1</ol>\n", + "\\begin{enumerate}" & specifier & "$1\\end{enumerate}\n", + result) + proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = if n == nil: return case n.kind @@ -1042,9 +1092,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = "\\begin{itemize}$1\\end{itemize}\n", result) of rnBulletItem, rnEnumItem: renderAux(d, n, "<li>$1</li>\n", "\\item $1\n", result) - of rnEnumList: - renderAux(d, n, "<ol class=\"simple\">$1</ol>\n", - "\\begin{enumerate}$1\\end{enumerate}\n", result) + of rnEnumList: renderEnumList(d, n, result) of rnDefList: renderAux(d, n, "<dl class=\"docutils\">$1</dl>\n", "\\begin{description}$1\\end{description}\n", result) |