Implement Markdown definition lists (+ migration) (#20333)

Implements definition lists Markdown extension adopted in a few implementations including: * [Pandoc]( https://pandoc.org/MANUAL.html#definition-lists) * [kramdown]( https://kramdown.gettalong.org/quickref.html#definition-lists) * [PHP extra Markdown]( https://michelf.ca/projects/php-markdown/extra/#def-list) Also affected files have been migrated. RST definition lists are turned off for Markdown: this solves the problem of broken formatting mentioned in https://github.com/nim-lang/Nim/pull/20292.
author: Andrey Makarov <ph.makarov@gmail.com> 2022-09-11 20:52:43 +0300
committer: GitHub <noreply@github.com> 2022-09-11 13:52:43 -0400
commit: 088487f652638a745e8e7e440a8a3b381239597b (patch)
tree: 960d2b08b4d3f16520395d7d1239946fd9403edd /lib/packages/docutils
parent: 846cc746a2350ad3f845a4eb0ce97b864891cd35 (diff)
download: Nim-088487f652638a745e8e7e440a8a3b381239597b.tar.gz
3 files changed, 88 insertions, 8 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index 0a997ecba..629245b4b 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -2064,7 +2064,22 @@ proc getWrappableIndent(p: RstParser): int =
     elif nextIndent >= currentTok(p).col: # may be a definition list [case.2]
       result = currentTok(p).col
     else:
-      result = nextIndent                 #                          [case.3]
+      result = nextIndent                 # allow parsing next lines [case.3]
+
+proc getMdBlockIndent(p: RstParser): int =
+  ## Markdown version of `getWrappableIndent`.
+  if currentTok(p).kind == tkIndent:
+    result = currentTok(p).ival
+  else:
+    var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
+    # TODO: Markdown-compliant definition should allow nextIndent == currInd(p):
+    if nextIndent <= currInd(p):           # parse only this line
+      result = currentTok(p).col
+    else:
+      result = nextIndent                 # allow parsing next lines [case.3]
+
+template isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options
+template isMd(p: RstParser): bool = roPreferMarkdown in p.s.options
 
 proc parseField(p: var RstParser): PRstNode =
   ## Returns a parsed rnField node.
@@ -2309,6 +2324,39 @@ proc isDefList(p: RstParser): bool =
       p.tok[j].kind in {tkWord, tkOther, tkPunct} and
       p.tok[j - 2].symbol != "::"
 
+proc `$`(t: Token): string =  # for debugging only
+  result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col
+  if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")"
+  else: result = result & " symbol=" & t.symbol & ")"
+
+proc skipNewlines(p: RstParser, j: int): int =
+  result = j
+  while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent:
+    inc result  # skip blank lines
+
+proc skipNewlines(p: var RstParser) =
+  p.idx = skipNewlines(p, p.idx)
+
+const maxMdRelInd = 3  ## In Markdown: maximum indentation that does not yet
+                       ## make the indented block a code
+
+proc isMdRelInd(outerInd, nestedInd: int): bool =
+  result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd
+
+proc isMdDefBody(p: RstParser, j: int, termCol: int): bool =
+  let defCol = p.tok[j].col
+  result = p.tok[j].symbol == ":" and
+    isMdRelInd(termCol, defCol) and
+    p.tok[j+1].kind == tkWhite and
+    p.tok[j+2].kind in {tkWord, tkOther, tkPunct}
+
+proc isMdDefListItem(p: RstParser, idx: int): bool =
+  var j = tokenAfterNewline(p, idx)
+  j = skipNewlines(p, j)
+  let termCol = p.tok[j].col
+  result = isMdRelInd(currInd(p), termCol) and
+      isMdDefBody(p, j, termCol)
+
 proc isOptionList(p: RstParser): bool =
   result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or
            match(p, p.idx, "/w") or match(p, p.idx, "//w")
@@ -2381,8 +2429,10 @@ proc whichSection(p: RstParser): RstNodeKind =
       result = rnEnumList
     elif isOptionList(p):
       result = rnOptionList
-    elif isDefList(p):
+    elif isRst(p) and isDefList(p):
       result = rnDefList
+    elif isMd(p) and isMdDefListItem(p, p.idx):
+      result = rnMdDefList
     else:
       result = rnParagraph
   of tkWord, tkOther, tkWhite:
@@ -2391,7 +2441,9 @@ proc whichSection(p: RstParser): RstNodeKind =
       if isAdornmentHeadline(p, tokIdx): result = rnHeadline
       else: result = rnParagraph
     elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
-    elif isDefList(p): result = rnDefList
+    elif isRst(p) and isDefList(p): result = rnDefList
+    elif isMd(p) and isMdDefListItem(p, p.idx):
+      result = rnMdDefList
     else: result = rnParagraph
   else: result = rnLeaf
 
@@ -2921,6 +2973,36 @@ proc parseOptionList(p: var RstParser): PRstNode =
       if currentTok(p).kind != tkEof: dec p.idx  # back to tkIndent
       break
 
+proc parseMdDefinitionList(p: var RstParser): PRstNode =
+  ## Parses (Pandoc/kramdown/PHPextra) Mardkown definition lists.
+  result = newRstNodeA(p, rnMdDefList)
+  let termCol = currentTok(p).col
+  while true:
+    var item = newRstNode(rnDefItem)
+    var term = newRstNode(rnDefName)
+    parseLine(p, term)
+    skipNewlines(p)
+    inc p.idx, 2  # skip ":" and space
+    item.add(term)
+    while true:
+      var def = newRstNode(rnDefBody)
+      let indent = getMdBlockIndent(p)
+      pushInd(p, indent)
+      parseSection(p, def)
+      popInd(p)
+      item.add(def)
+      let j = skipNewlines(p, p.idx)
+      if isMdDefBody(p, j, termCol):  # parse next definition body
+        p.idx = j + 2  # skip ":" and space
+      else:
+        break
+    result.add(item)
+    let j = skipNewlines(p, p.idx)
+    if p.tok[j].col == termCol and isMdDefListItem(p, j):
+      p.idx = j  # parse next item
+    else:
+      break
+
 proc parseDefinitionList(p: var RstParser): PRstNode =
   result = nil
   var j = tokenAfterNewline(p) - 1
@@ -3094,6 +3176,7 @@ proc parseSection(p: var RstParser, result: PRstNode) =
     of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")
     of rnParagraph: discard
     of rnDefList: a = parseDefinitionList(p)
+    of rnMdDefList: a = parseMdDefinitionList(p)
     of rnFieldList:
       if p.idx > 0: dec p.idx
       a = parseFields(p)
@@ -3120,9 +3203,6 @@ proc parseSectionWrapper(p: var RstParser): PRstNode =
   while result.kind == rnInner and result.len == 1:
     result = result.sons[0]
 
-proc `$`(t: Token): string =
-  result = $t.kind & ' ' & t.symbol
-
 proc parseDoc(p: var RstParser): PRstNode =
   result = parseSectionWrapper(p)
   if currentTok(p).kind != tkEof:
diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim
index 05e4ec39e..e85bbfb98 100644
--- a/lib/packages/docutils/rstast.nim
+++ b/lib/packages/docutils/rstast.nim
@@ -27,7 +27,7 @@ type
     rnBulletItem,             # a bullet item
     rnEnumList,               # an enumerated list
     rnEnumItem,               # an enumerated item
-    rnDefList,                # a definition list
+    rnDefList, rnMdDefList,   # a definition list (RST/Markdown)
     rnDefItem,                # an item of a definition list consisting of ...
     rnDefName,                # ... a name part ...
     rnDefBody,                # ... and a body part ...
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index 1b5f9e78c..f5ff9aa03 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -1212,7 +1212,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   of rnBulletItem, rnEnumItem:
     renderAux(d, n, "<li$2>$1</li>\n", "\\item $2$1\n", result)
   of rnEnumList: renderEnumList(d, n, result)
-  of rnDefList:
+  of rnDefList, rnMdDefList:
     renderAux(d, n, "<dl$2 class=\"docutils\">$1</dl>\n",
                     "\\begin{description}\n$2\n$1\\end{description}\n", result)
   of rnDefItem: renderAux(d, n, result)
author	Andrey Makarov <ph.makarov@gmail.com>	2022-09-11 20:52:43 +0300
committer	GitHub <noreply@github.com>	2022-09-11 13:52:43 -0400
commit	088487f652638a745e8e7e440a8a3b381239597b (patch)
tree	960d2b08b4d3f16520395d7d1239946fd9403edd /lib/packages/docutils
parent	846cc746a2350ad3f845a4eb0ce97b864891cd35 (diff)
download	Nim-088487f652638a745e8e7e440a8a3b381239597b.tar.gz