diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2022-09-11 20:52:43 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-11 13:52:43 -0400 |
commit | 088487f652638a745e8e7e440a8a3b381239597b (patch) | |
tree | 960d2b08b4d3f16520395d7d1239946fd9403edd /lib/packages/docutils | |
parent | 846cc746a2350ad3f845a4eb0ce97b864891cd35 (diff) | |
download | Nim-088487f652638a745e8e7e440a8a3b381239597b.tar.gz |
Implement Markdown definition lists (+ migration) (#20333)
Implements definition lists Markdown extension adopted in a few implementations including: * [Pandoc]( https://pandoc.org/MANUAL.html#definition-lists) * [kramdown]( https://kramdown.gettalong.org/quickref.html#definition-lists) * [PHP extra Markdown]( https://michelf.ca/projects/php-markdown/extra/#def-list) Also affected files have been migrated. RST definition lists are turned off for Markdown: this solves the problem of broken formatting mentioned in https://github.com/nim-lang/Nim/pull/20292.
Diffstat (limited to 'lib/packages/docutils')
-rw-r--r-- | lib/packages/docutils/rst.nim | 92 | ||||
-rw-r--r-- | lib/packages/docutils/rstast.nim | 2 | ||||
-rw-r--r-- | lib/packages/docutils/rstgen.nim | 2 |
3 files changed, 88 insertions, 8 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 0a997ecba..629245b4b 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -2064,7 +2064,22 @@ proc getWrappableIndent(p: RstParser): int = elif nextIndent >= currentTok(p).col: # may be a definition list [case.2] result = currentTok(p).col else: - result = nextIndent # [case.3] + result = nextIndent # allow parsing next lines [case.3] + +proc getMdBlockIndent(p: RstParser): int = + ## Markdown version of `getWrappableIndent`. + if currentTok(p).kind == tkIndent: + result = currentTok(p).ival + else: + var nextIndent = p.tok[tokenAfterNewline(p)-1].ival + # TODO: Markdown-compliant definition should allow nextIndent == currInd(p): + if nextIndent <= currInd(p): # parse only this line + result = currentTok(p).col + else: + result = nextIndent # allow parsing next lines [case.3] + +template isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options +template isMd(p: RstParser): bool = roPreferMarkdown in p.s.options proc parseField(p: var RstParser): PRstNode = ## Returns a parsed rnField node. @@ -2309,6 +2324,39 @@ proc isDefList(p: RstParser): bool = p.tok[j].kind in {tkWord, tkOther, tkPunct} and p.tok[j - 2].symbol != "::" +proc `$`(t: Token): string = # for debugging only + result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col + if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")" + else: result = result & " symbol=" & t.symbol & ")" + +proc skipNewlines(p: RstParser, j: int): int = + result = j + while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent: + inc result # skip blank lines + +proc skipNewlines(p: var RstParser) = + p.idx = skipNewlines(p, p.idx) + +const maxMdRelInd = 3 ## In Markdown: maximum indentation that does not yet + ## make the indented block a code + +proc isMdRelInd(outerInd, nestedInd: int): bool = + result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd + +proc isMdDefBody(p: RstParser, j: int, termCol: int): bool = + let defCol = p.tok[j].col + result = p.tok[j].symbol == ":" and + isMdRelInd(termCol, defCol) and + p.tok[j+1].kind == tkWhite and + p.tok[j+2].kind in {tkWord, tkOther, tkPunct} + +proc isMdDefListItem(p: RstParser, idx: int): bool = + var j = tokenAfterNewline(p, idx) + j = skipNewlines(p, j) + let termCol = p.tok[j].col + result = isMdRelInd(currInd(p), termCol) and + isMdDefBody(p, j, termCol) + proc isOptionList(p: RstParser): bool = result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or match(p, p.idx, "/w") or match(p, p.idx, "//w") @@ -2381,8 +2429,10 @@ proc whichSection(p: RstParser): RstNodeKind = result = rnEnumList elif isOptionList(p): result = rnOptionList - elif isDefList(p): + elif isRst(p) and isDefList(p): result = rnDefList + elif isMd(p) and isMdDefListItem(p, p.idx): + result = rnMdDefList else: result = rnParagraph of tkWord, tkOther, tkWhite: @@ -2391,7 +2441,9 @@ proc whichSection(p: RstParser): RstNodeKind = if isAdornmentHeadline(p, tokIdx): result = rnHeadline else: result = rnParagraph elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList - elif isDefList(p): result = rnDefList + elif isRst(p) and isDefList(p): result = rnDefList + elif isMd(p) and isMdDefListItem(p, p.idx): + result = rnMdDefList else: result = rnParagraph else: result = rnLeaf @@ -2921,6 +2973,36 @@ proc parseOptionList(p: var RstParser): PRstNode = if currentTok(p).kind != tkEof: dec p.idx # back to tkIndent break +proc parseMdDefinitionList(p: var RstParser): PRstNode = + ## Parses (Pandoc/kramdown/PHPextra) Mardkown definition lists. + result = newRstNodeA(p, rnMdDefList) + let termCol = currentTok(p).col + while true: + var item = newRstNode(rnDefItem) + var term = newRstNode(rnDefName) + parseLine(p, term) + skipNewlines(p) + inc p.idx, 2 # skip ":" and space + item.add(term) + while true: + var def = newRstNode(rnDefBody) + let indent = getMdBlockIndent(p) + pushInd(p, indent) + parseSection(p, def) + popInd(p) + item.add(def) + let j = skipNewlines(p, p.idx) + if isMdDefBody(p, j, termCol): # parse next definition body + p.idx = j + 2 # skip ":" and space + else: + break + result.add(item) + let j = skipNewlines(p, p.idx) + if p.tok[j].col == termCol and isMdDefListItem(p, j): + p.idx = j # parse next item + else: + break + proc parseDefinitionList(p: var RstParser): PRstNode = result = nil var j = tokenAfterNewline(p) - 1 @@ -3094,6 +3176,7 @@ proc parseSection(p: var RstParser, result: PRstNode) = of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)") of rnParagraph: discard of rnDefList: a = parseDefinitionList(p) + of rnMdDefList: a = parseMdDefinitionList(p) of rnFieldList: if p.idx > 0: dec p.idx a = parseFields(p) @@ -3120,9 +3203,6 @@ proc parseSectionWrapper(p: var RstParser): PRstNode = while result.kind == rnInner and result.len == 1: result = result.sons[0] -proc `$`(t: Token): string = - result = $t.kind & ' ' & t.symbol - proc parseDoc(p: var RstParser): PRstNode = result = parseSectionWrapper(p) if currentTok(p).kind != tkEof: diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim index 05e4ec39e..e85bbfb98 100644 --- a/lib/packages/docutils/rstast.nim +++ b/lib/packages/docutils/rstast.nim @@ -27,7 +27,7 @@ type rnBulletItem, # a bullet item rnEnumList, # an enumerated list rnEnumItem, # an enumerated item - rnDefList, # a definition list + rnDefList, rnMdDefList, # a definition list (RST/Markdown) rnDefItem, # an item of a definition list consisting of ... rnDefName, # ... a name part ... rnDefBody, # ... and a body part ... diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim index 1b5f9e78c..f5ff9aa03 100644 --- a/lib/packages/docutils/rstgen.nim +++ b/lib/packages/docutils/rstgen.nim @@ -1212,7 +1212,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = of rnBulletItem, rnEnumItem: renderAux(d, n, "<li$2>$1</li>\n", "\\item $2$1\n", result) of rnEnumList: renderEnumList(d, n, result) - of rnDefList: + of rnDefList, rnMdDefList: renderAux(d, n, "<dl$2 class=\"docutils\">$1</dl>\n", "\\begin{description}\n$2\n$1\\end{description}\n", result) of rnDefItem: renderAux(d, n, result) |