diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2021-11-23 15:02:03 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-23 13:02:03 +0100 |
commit | 040d23e799e94aa3e0c6b254f1dc708b2eada96b (patch) | |
tree | 44f8ec7ce44b767ff4be207906bbabc3b1347144 /lib/packages/docutils | |
parent | 7772ca303cbd81cade30bb6b54659b1994e01164 (diff) | |
download | Nim-040d23e799e94aa3e0c6b254f1dc708b2eada96b.tar.gz |
implement RST & Markdown quote blocks (#19147)
* implement RST & Markdown quote blocks * compile with nim 1.0 * Fix indentation
Diffstat (limited to 'lib/packages/docutils')
-rw-r--r-- | lib/packages/docutils/rst.nim | 210 | ||||
-rw-r--r-- | lib/packages/docutils/rstast.nim | 9 | ||||
-rw-r--r-- | lib/packages/docutils/rstgen.nim | 29 |
3 files changed, 237 insertions, 11 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 2e908f4e5..4c28894fb 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -53,6 +53,8 @@ ## + field lists ## + option lists ## + indented literal blocks +## + quoted literal blocks +## + line blocks ## + simple tables ## + directives (see official documentation in `RST directives list`_): ## - ``image``, ``figure`` for including images and videos @@ -121,6 +123,7 @@ ## * Markdown code blocks ## * Markdown links ## * Markdown headlines +## * Markdown block quotes ## * using ``1`` as auto-enumerator in enumerated lists like RST ``#`` ## (auto-enumerator ``1`` can not be used with ``#`` in the same list) ## @@ -145,7 +148,7 @@ ## 2) Compatibility mode which is RST rules. ## ## .. Note:: in both modes the parser interpretes text between single -## backticks (code) identically: +## backticks (code) identically: ## backslash does not escape; the only exception: ``\`` folowed by ` ## does escape so that we can always input a single backtick ` in ## inline code. However that makes impossible to input code with @@ -156,13 +159,35 @@ ## ``\`` -- GOOD ## So single backticks can always be input: `\`` will turn to ` code ## +## .. Attention:: +## We don't support some obviously poor design choices of Markdown (or RST). +## +## - no support for the rule of 2 spaces causing a line break in Markdown +## (use RST "line blocks" syntax for making line breaks) +## +## - interpretation of Markdown block quotes is also slightly different, +## e.g. case +## +## :: +## +## >>> foo +## > bar +## >>baz +## +## is a single 3rd-level quote `foo bar baz` in original Markdown, while +## in Nim we naturally see it as 3rd-level quote `foo` + 1st level `bar` + +## 2nd level `baz`: +## +## >>> foo +## > bar +## >>baz +## ## Limitations ## ----------- ## ## * no Unicode support in character width calculations ## * body elements ## - no roman numerals in enumerated lists -## - no quoted literal blocks ## - no doctest blocks ## - no grid tables ## - some directives are missing (check official `RST directives list`_): @@ -472,6 +497,10 @@ type line: int # the last line of this style occurrence # (for error message) hasPeers: bool # has headings on the same level of hierarchy? + LiteralBlockKind = enum # RST-style literal blocks after `::` + lbNone, + lbIndentedLiteralBlock, + lbQuotedLiteralBlock LevelMap = seq[LevelInfo] # Saves for each possible title adornment # style its level in the current document. SubstitutionKind = enum @@ -1953,6 +1982,44 @@ proc parseLiteralBlock(p: var RstParser): PRstNode = inc p.idx result.add(n) +proc parseQuotedLiteralBlock(p: var RstParser): PRstNode = + result = newRstNodeA(p, rnLiteralBlock) + var n = newLeaf("") + if currentTok(p).kind == tkIndent: + var indent = currInd(p) + while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines + var quoteSym = currentTok(p).symbol[0] + while true: + case currentTok(p).kind + of tkEof: + break + of tkIndent: + if currentTok(p).ival < indent: + break + elif currentTok(p).ival == indent: + if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym: + n.text.add("\n") + inc p.idx + elif nextTok(p).kind == tkIndent: + break + else: + rstMessage(p, mwRstStyle, "no newline after quoted literal block") + break + else: + rstMessage(p, mwRstStyle, + "unexpected indentation in quoted literal block") + break + else: + n.text.add(currentTok(p).symbol) + inc p.idx + result.add(n) + +proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode = + if kind == lbIndentedLiteralBlock: + result = parseLiteralBlock(p) + else: + result = parseQuotedLiteralBlock(p) + proc getLevel(p: var RstParser, c: char, hasOverline: bool): int = ## Returns (preliminary) heading level corresponding to `c` and ## `hasOverline`. If level does not exist, add it first. @@ -2023,6 +2090,33 @@ proc isLineBlock(p: RstParser): bool = p.tok[j].col > currentTok(p).col or p.tok[j].symbol == "\n" +proc isMarkdownBlockQuote(p: RstParser): bool = + result = currentTok(p).symbol[0] == '>' + +proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind = + ## Checks that the following tokens are either Indented Literal Block or + ## Quoted Literal Block (which is not quite the same as Markdown quote block). + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks + if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent: + if currInd(p) > nextTok(p).ival: + result = lbNone + if currInd(p) < nextTok(p).ival: + result = lbIndentedLiteralBlock + elif currInd(p) == nextTok(p).ival: + var i = p.idx + 1 + while p.tok[i].kind == tkIndent: inc i + const validQuotingCharacters = { + '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', + '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', + '_', '`', '{', '|', '}', '~'} + if p.tok[i].kind in {tkPunct, tkAdornment} and + p.tok[i].symbol[0] in validQuotingCharacters: + result = lbQuotedLiteralBlock + else: + result = lbNone + else: + result = lbNone + proc predNL(p: RstParser): bool = result = true if p.idx > 0: @@ -2078,6 +2172,8 @@ proc whichSection(p: RstParser): RstNodeKind = elif match(p, p.idx + 1, " a"): result = rnTable elif currentTok(p).symbol == "|" and isLineBlock(p): result = rnLineBlock + elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): + result = rnMarkdownBlockQuote elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx): result = rnOverline else: @@ -2090,6 +2186,8 @@ proc whichSection(p: RstParser): RstNodeKind = result = rnMarkdownTable elif currentTok(p).symbol == "|" and isLineBlock(p): result = rnLineBlock + elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): + result = rnMarkdownBlockQuote elif match(p, tokenAfterNewline(p), "aI") and isAdornmentHeadline(p, tokenAfterNewline(p)): result = rnHeadline @@ -2143,6 +2241,102 @@ proc parseLineBlock(p: var RstParser): PRstNode = else: break +proc parseDoc(p: var RstParser): PRstNode {.gcsafe.} + +proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] = + result = ("", 0, 0) + var i = idx + result.sym &= p.tok[i].symbol + result.depth += p.tok[i].symbol.len + inc result.tokens + inc i + while p.tok[i].kind == tkWhite and i+1 < p.tok.len and + p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>': + result.sym &= p.tok[i].symbol + result.sym &= p.tok[i+1].symbol + result.depth += p.tok[i+1].symbol.len + inc result.tokens, 2 + inc i, 2 + +proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int): + PRstNode = + ## We define *segment* as a group of lines that starts with exactly the + ## same quote symbol. If the following lines don't contain any `>` (*lazy* + ## continuation) they considered as continuation of the current segment. + var q: RstParser # to delete `>` at a start of line and then parse normally + initParser(q, p.s) + q.col = p.col + q.line = p.line + var minCol = int.high # minimum colum num in the segment + while true: # move tokens of segment from `p` to `q` skipping `curSym` + case currentTok(p).kind + of tkEof: + break + of tkIndent: + if nextTok(p).kind in {tkIndent, tkEof}: + break + else: + if nextTok(p).symbol[0] == '>': + var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1) + if quoteSym == curSym: # the segment continues + var iTok = tokenAfterNewline(p, p.idx+1) + if p.tok[iTok].kind notin {tkEof, tkIndent} and + p.tok[iTok].symbol[0] != '>': + rstMessage(p, mwRstStyle, + "two or more quoted lines are followed by unquoted line " & + $(curLine(p) + 1)) + break + q.tok.add currentTok(p) + var ival = currentTok(p).ival + quoteSym.len + inc p.idx, (1 + quoteTokens) # skip newline and > > > + if currentTok(p).kind == tkWhite: + ival += currentTok(p).symbol.len + inc p.idx + # fix up previous `tkIndent`s to ival (as if >>> were not there) + var j = q.tok.len - 1 + while j >= 0 and q.tok[j].kind == tkIndent: + q.tok[j].ival = ival + dec j + else: # next segment started + break + elif currentTok(p).ival < col: + break + else: # the segment continues, a case like: + # > beginning + # continuation + q.tok.add currentTok(p) + inc p.idx + else: + if currentTok(p).col < minCol: minCol = currentTok(p).col + q.tok.add currentTok(p) + inc p.idx + q.indentStack = @[minCol] + # if initial indentation `minCol` is > 0 then final newlines + # should be omitted so that parseDoc could advance to the end of tokens: + var j = q.tok.len - 1 + while q.tok[j].kind == tkIndent: dec j + q.tok.setLen (j+1) + q.tok.add Token(kind: tkEof, line: currentTok(p).line) + result = parseDoc(q) + +proc parseMarkdownBlockQuote(p: var RstParser): PRstNode = + var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx) + let col = currentTok(p).col + result = newRstNodeA(p, rnMarkdownBlockQuote) + inc p.idx, quoteTokens # skip first > + while true: + var item = newRstNode(rnMarkdownBlockQuoteItem) + item.quotationDepth = quotationDepth + if currentTok(p).kind == tkWhite: inc p.idx + item.add parseMarkdownQuoteSegment(p, curSym, col) + result.add(item) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>': + (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1) + inc p.idx, (1 + quoteTokens) # skip newline and > > > + else: + break + proc parseParagraph(p: var RstParser, result: PRstNode) = while true: case currentTok(p).kind @@ -2158,16 +2352,17 @@ proc parseParagraph(p: var RstParser, result: PRstNode) = result.add newLeaf(" ") of rnLineBlock: result.addIfNotNil(parseLineBlock(p)) + of rnMarkdownBlockQuote: + result.addIfNotNil(parseMarkdownBlockQuote(p)) else: break else: break of tkPunct: - if currentTok(p).symbol == "::" and - nextTok(p).kind == tkIndent and - currInd(p) < nextTok(p).ival: + if (let literalBlockKind = whichRstLiteralBlock(p); + literalBlockKind != lbNone): result.add newLeaf(":") inc p.idx # skip '::' - result.add(parseLiteralBlock(p)) + result.add(parseRstLiteralBlock(p, literalBlockKind)) break else: parseInline(p, result) @@ -2257,8 +2452,6 @@ proc getColumns(p: var RstParser, cols: var IntSeq) = # last column has no limit: cols[L - 1] = 32000 -proc parseDoc(p: var RstParser): PRstNode {.gcsafe.} - proc parseSimpleTable(p: var RstParser): PRstNode = var cols: IntSeq @@ -2585,6 +2778,7 @@ proc parseSection(p: var RstParser, result: PRstNode) = a = parseLiteralBlock(p) of rnBulletList: a = parseBulletList(p) of rnLineBlock: a = parseLineBlock(p) + of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p) of rnDirective: a = parseDotDot(p) of rnEnumList: a = parseEnumList(p) of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)") diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim index bc7b9a650..1d5da5e1c 100644 --- a/lib/packages/docutils/rstast.nim +++ b/lib/packages/docutils/rstast.nim @@ -32,7 +32,10 @@ type rnFieldName, # consisting of a field name ... rnFieldBody, # ... and a field body rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, - rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, + rnOptionArgument, rnDescription, rnLiteralBlock, + rnMarkdownBlockQuote, # a quote starting from punctuation like >>> + rnMarkdownBlockQuoteItem, # a quotation block, quote lines starting with + # the same number of chars rnLineBlock, # the | thingie rnLineBlockItem, # a son of rnLineBlock - one line inside it. # When `RstNode` lineIndent="\n" the line's empty @@ -101,6 +104,8 @@ type of rnFootnote, rnCitation, rnOptionListItem: order*: int ## footnote order (for auto-symbol footnotes and ## auto-numbered ones without a label) + of rnMarkdownBlockQuoteItem: + quotationDepth*: int ## number of characters in line prefix of rnRef, rnSubstitutionReferences, rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef: info*: TLineInfo ## To have line/column info for warnings at @@ -409,6 +414,8 @@ proc treeRepr*(node: PRstNode, indent=0): string = result.add " level=" & $node.level of rnFootnote, rnCitation, rnOptionListItem: result.add (if node.order == 0: "" else: " order=" & $node.order) + of rnMarkdownBlockQuoteItem: + result.add " quotationDepth=" & $node.quotationDepth else: discard result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'") diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim index 4cbd05379..dc6ca25c1 100644 --- a/lib/packages/docutils/rstgen.nim +++ b/lib/packages/docutils/rstgen.nim @@ -89,6 +89,7 @@ type onTestSnippet*: proc (d: var RstGenerator; filename, cmd: string; status: int; content: string) escMode*: EscapeMode + curQuotationDepth: int PDoc = var RstGenerator ## Alias to type less. @@ -167,6 +168,7 @@ proc initRstGenerator*(g: var RstGenerator, target: OutputTarget, g.currentSection = "" g.id = 0 g.escMode = emText + g.curQuotationDepth = 0 let fileParts = filename.splitFile if fileParts.ext == ".nim": g.currentSection = "Module " & fileParts.name @@ -1268,8 +1270,31 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = of rnLiteralBlock: renderAux(d, n, "<pre$2>$1</pre>\n", "\n\n$2\\begin{rstpre}\n$1\n\\end{rstpre}\n\n", result) - of rnQuotedLiteralBlock: - doAssert false, "renderRstToOut" + of rnMarkdownBlockQuote: + d.curQuotationDepth = 1 + var tmp = "" + renderAux(d, n, "$1", "$1", tmp) + let itemEnding = + if d.target == outHtml: "</blockquote>" else: "\\end{rstquote}" + tmp.add itemEnding.repeat(d.curQuotationDepth - 1) + dispA(d.target, result, + "<blockquote$2 class=\"markdown-quote\">$1</blockquote>\n", + "\n\\begin{rstquote}\n$2\n$1\\end{rstquote}\n", [tmp, n.anchor.idS]) + of rnMarkdownBlockQuoteItem: + let addQuotationDepth = n.quotationDepth - d.curQuotationDepth + var itemPrefix: string # start or ending (quotation grey bar on the left) + if addQuotationDepth >= 0: + let s = + if d.target == outHtml: "<blockquote class=\"markdown-quote\">" + else: "\\begin{rstquote}" + itemPrefix = s.repeat(addQuotationDepth) + else: + let s = + if d.target == outHtml: "</blockquote>" + else: "\\end{rstquote}" + itemPrefix = s.repeat(-addQuotationDepth) + renderAux(d, n, itemPrefix & "<p>$1</p>", itemPrefix & "\n$1", result) + d.curQuotationDepth = n.quotationDepth of rnLineBlock: if n.sons.len == 1 and n.sons[0].lineIndent == "\n": # whole line block is one empty line, no need to add extra spacing |