diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2022-07-15 20:27:54 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-15 19:27:54 +0200 |
commit | 417b90a7e5b88bfc0ad1bfbbc81a3205c99e128e (patch) | |
tree | 676f65014d120cb19d5f8fa8b92cf27557a6f80a /lib | |
parent | f35c9cf73ddb3150ab6dbe449db4975866ee8a11 (diff) | |
download | Nim-417b90a7e5b88bfc0ad1bfbbc81a3205c99e128e.tar.gz |
Improve Markdown code blocks & start moving docs to Markdown style (#19954)
- add additional parameters parsing (other implementations will just ignore them). E.g. if in RST we have: .. code:: nim :test: "nim c $1" ... then in Markdown that will be: ```nim test="nim c $1" ... ``` - implement Markdown interpretation of additional indentation which is less than 4 spaces (>=4 spaces is a code block but it's not implemented yet). RST interpretes it as quoted block, for Markdown it's just normal paragraphs. - add separate `md2html` and `md2tex` commands. This is to separate Markdown behavior in cases when it diverges w.r.t. RST significantly — most conspicously like in the case of additional indentation above, and also currently the contradicting inline rule of Markdown is also turned on only in `md2html` and `md2tex`. **Rationale:** mixing Markdown and RST arbitrarily is a way to nowhere, we need to provide a way to fix the particular behavior. Note that still all commands have **both** Markdown and RST features **enabled**. In this PR `*.nim` files can be processed only in Markdown mode, while `md2html` is for `*.md` files and `rst2html` for `*.rst` files. - rename `*.rst` files to `.*md` as our current default behavior is already Markdown-ish - convert code blocks in `docgen.rst` to Markdown style as an example. Other code blocks will be converted in the follow-up PRs - fix indentation inside Markdown code blocks — additional indentation is preserved there - allow more than 3 backticks open/close blocks (tildas \~ are still not allowed to avoid conflict with RST adornment headings) see also https://github.com/nim-lang/RFCs/issues/355 - better error messages - (other) fix a bug that admonitions cannot be used in sandbox mode; fix annoying warning on line 2711
Diffstat (limited to 'lib')
-rw-r--r-- | lib/packages/docutils/highlite.nim | 4 | ||||
-rw-r--r-- | lib/packages/docutils/rst.nim | 151 |
2 files changed, 119 insertions, 36 deletions
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index d36b2c877..3af94ab21 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -125,9 +125,7 @@ proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) = g.length = 0 g.state = low(TokenClass) g.lang = low(SourceLanguage) - var pos = 0 # skip initial whitespace: - while g.buf[pos] in {' ', '\t'..'\r'}: inc(pos) - g.pos = pos + g.pos = 0 proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: string) = initGeneralTokenizer(g, cstring(buf)) diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index cd5f26278..1721674c8 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -8,20 +8,23 @@ # ## ================================== -## rst +## packages/docutils/rst ## ================================== ## ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## Nim-flavored reStructuredText and Markdown ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## -## This module implements a `reStructuredText`:idx: (RST) parser. +## This module implements a `reStructuredText`:idx: (RST) and +## `Markdown`:idx: parser. ## A large subset is implemented with some limitations_ and ## `Nim-specific features`_. -## A few `extra features`_ of the `Markdown`:idx: syntax are -## also supported. +## Both Markdown and RST are mark-up languages whose goal is to +## typeset texts with complex structure, formatting and references +## using simple plaintext representation. ## -## Nim can output the result to HTML [#html]_ or Latex [#latex]_. +## This module is also embedded into Nim compiler; the compiler can output +## the result to HTML [#html]_ or Latex [#latex]_. ## ## .. [#html] commands `nim doc`:cmd: for ``*.nim`` files and ## `nim rst2html`:cmd: for ``*.rst`` files @@ -29,11 +32,13 @@ ## .. [#latex] commands `nim doc2tex`:cmd: for ``*.nim`` and ## `nim rst2tex`:cmd: for ``*.rst``. ## -## If you are new to RST please consider reading the following: +## If you are new to Markdown/RST please consider reading the following: ## -## 1) a short `quick introduction`_ -## 2) an `RST reference`_: a comprehensive cheatsheet for RST -## 3) a more formal 50-page `RST specification`_. +## 1) `Markdown Basic Syntax`_ +## 2) a long specification of Markdown: `CommonMark Spec`_ +## 3) a short `quick introduction`_ to RST +## 4) an `RST reference`_: a comprehensive cheatsheet for RST +## 5) a more formal 50-page `RST specification`_. ## ## Features ## -------- @@ -120,7 +125,13 @@ ## ## * emoji / smiley symbols ## * Markdown tables -## * Markdown code blocks +## * Markdown code blocks. For them the same additional arguments as for RST +## code blocks can be provided (e.g. `test` or `number-lines`) but with +## a one-line syntax like this:: +## +## ```nim test number-lines=10 +## echo "ok" +## ``` ## * Markdown links ## * Markdown headlines ## * Markdown block quotes @@ -211,6 +222,8 @@ ## See `packages/docutils/rstgen module <rstgen.html>`_ to know how to ## generate HTML or Latex strings to embed them into your documents. ## +## .. _Markdown Basic Syntax: https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax +## .. _CommonMark Spec: https://spec.commonmark.org/0.30 ## .. _quick introduction: https://docutils.sourceforge.io/docs/user/rst/quickstart.html ## .. _RST reference: https://docutils.sourceforge.io/docs/user/rst/quickref.html ## .. _RST specification: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html @@ -253,6 +266,7 @@ type MsgKind* = enum ## the possible messages meCannotOpenFile = "cannot open '$1'", meExpected = "'$1' expected", + meMissingClosing = "$1", meGridTableNotImplemented = "grid table is not implemented", meMarkdownIllformedTable = "illformed delimiter row of a Markdown table", meIllformedTable = "Illformed table: $1", @@ -323,7 +337,10 @@ const ":geek:": "icon_e_geek", ":ugeek:": "icon_e_ugeek" } - SandboxDirAllowlist = ["image", "code", "code-block"] + SandboxDirAllowlist = [ + "image", "code", "code-block", "admonition", "attention", "caution", + "container", "contents", "danger", "default-role", "error", "figure", + "hint", "important", "index", "note", "role", "tip", "title", "warning"] type TokType = enum @@ -1616,35 +1633,89 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, inc p.idx else: rstMessage(p, meExpected, postfix, line, col) +proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode = + ## Parses additional (after language string) code block parameters + ## in a format *suggested* in the `CommonMark Spec`_ with handling of `"`. + if currentTok(p).kind == tkIndent: + result = nil + else: + result = newRstNode(rnFieldList) + while currentTok(p).kind != tkIndent: + if currentTok(p).kind == tkWhite: + inc p.idx + else: + let field = newRstNode(rnField) + var fieldName = "" + while currentTok(p).kind notin {tkWhite, tkIndent, tkEof} and + currentTok(p).symbol != "=": + fieldName.add currentTok(p).symbol + inc p.idx + field.add(newRstNode(rnFieldName, @[newLeaf(fieldName)])) + if currentTok(p).kind == tkWhite: inc p.idx + let fieldBody = newRstNode(rnFieldBody) + if currentTok(p).symbol == "=": + inc p.idx + if currentTok(p).kind == tkWhite: inc p.idx + var fieldValue = "" + if currentTok(p).symbol == "\"": + while true: + fieldValue.add currentTok(p).symbol + inc p.idx + if currentTok(p).kind == tkEof: + rstMessage(p, meExpected, "\"") + elif currentTok(p).symbol == "\"": + fieldValue.add "\"" + inc p.idx + break + else: + while currentTok(p).kind notin {tkWhite, tkIndent, tkEof}: + fieldValue.add currentTok(p).symbol + inc p.idx + fieldBody.add newLeaf(fieldValue) + field.add(fieldBody) + result.add(field) + proc parseMarkdownCodeblock(p: var RstParser): PRstNode = result = newRstNodeA(p, rnCodeBlock) + let line = curLine(p) + let baseCol = currentTok(p).col + let baseSym = currentTok(p).symbol # usually just ``` + inc p.idx result.info = lineInfo(p) var args = newRstNode(rnDirArg) + var fields: PRstNode = nil if currentTok(p).kind == tkWord: args.add(newLeaf(p)) inc p.idx + fields = parseMarkdownCodeblockFields(p) else: args = nil var n = newLeaf("") while true: - case currentTok(p).kind - of tkEof: - rstMessage(p, meExpected, "```") + if currentTok(p).kind == tkEof: + rstMessage(p, meMissingClosing, + "$1 (started at line $2)" % [baseSym, $line]) break - of tkPunct, tkAdornment: - if currentTok(p).symbol == "```": - inc p.idx - break - else: - n.text.add(currentTok(p).symbol) - inc p.idx + elif nextTok(p).kind in {tkPunct, tkAdornment} and + nextTok(p).symbol[0] == baseSym[0] and + nextTok(p).symbol.len >= baseSym.len: + inc p.idx, 2 + break + elif currentTok(p).kind == tkIndent: + n.text.add "\n" + if currentTok(p).ival > baseCol: + n.text.add " ".repeat(currentTok(p).ival - baseCol) + elif currentTok(p).ival < baseCol: + rstMessage(p, mwRstStyle, + "unexpected de-indentation in Markdown code block") + inc p.idx else: n.text.add(currentTok(p).symbol) inc p.idx var lb = newRstNode(rnLiteralBlock) lb.add(n) result.add(args) - result.add(PRstNode(nil)) + result.add(fields) result.add(lb) proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = @@ -1730,6 +1801,12 @@ proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = inc i p.idx = i +proc isMarkdownCodeBlock(p: RstParser): bool = + result = (roSupportMarkdown in p.s.options and + currentTok(p).kind in {tkPunct, tkAdornment} and + currentTok(p).symbol[0] == '`' and # tilde ~ is not supported + currentTok(p).symbol.len >= 3) + proc parseInline(p: var RstParser, father: PRstNode) = var n: PRstNode # to be used in `if` condition let saveIdx = p.idx @@ -1755,8 +1832,7 @@ proc parseInline(p: var RstParser, father: PRstNode) = addAnchorRst(p, name = linkName(n), refn = refn, reset = true, anchorType=manualInlineAnchor) father.add(n) - elif roSupportMarkdown in p.s.options and currentTok(p).symbol == "```": - inc p.idx + elif isMarkdownCodeBlock(p): father.add(parseMarkdownCodeblock(p)) elif isInlineMarkupStart(p, "``"): var n = newRstNode(rnInlineLiteral) @@ -1816,8 +1892,7 @@ proc parseInline(p: var RstParser, father: PRstNode) = return parseWordOrRef(p, father) of tkAdornment, tkOther, tkWhite: - if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```": - inc p.idx + if isMarkdownCodeBlock(p): father.add(parseMarkdownCodeblock(p)) return if roSupportSmilies in p.s.options: @@ -2194,7 +2269,7 @@ proc findPipe(p: RstParser, start: int): bool = proc whichSection(p: RstParser): RstNodeKind = if currentTok(p).kind in {tkAdornment, tkPunct}: # for punctuation sequences that can be both tkAdornment and tkPunct - if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```": + if isMarkdownCodeBlock(p): return rnCodeBlock elif currentTok(p).symbol == "::": return rnLiteralBlock @@ -2633,7 +2708,9 @@ proc parseSimpleTable(p: var RstParser): PRstNode = # fix rnTableDataCell -> rnTableHeaderCell for previous table rows: for nRow in 0 ..< result.sons.len: for nCell in 0 ..< result.sons[nRow].len: - result.sons[nRow].sons[nCell].kind = rnTableHeaderCell + template cell: PRstNode = result.sons[nRow].sons[nCell] + cell = PRstNode(kind: rnTableHeaderCell, sons: cell.sons, + span: cell.span, anchor: cell.anchor) if currentTok(p).kind == tkEof: break let tabRow = parseSimpleTableRow(p, cols, colChar) result.add tabRow @@ -2892,11 +2969,19 @@ proc parseSection(p: var RstParser, result: PRstNode) = if currInd(p) == currentTok(p).ival: inc p.idx elif currentTok(p).ival > currInd(p): - pushInd(p, currentTok(p).ival) - var a = newRstNodeA(p, rnBlockQuote) - parseSection(p, a) - result.add(a) - popInd(p) + if roPreferMarkdown in p.s.options: # Markdown => normal paragraphs + if currentTok(p).ival - currInd(p) >= 4: + rstMessage(p, mwRstStyle, + "Markdown indented code not implemented") + pushInd(p, currentTok(p).ival) + parseSection(p, result) + popInd(p) + else: # RST mode => block quotes + pushInd(p, currentTok(p).ival) + var a = newRstNodeA(p, rnBlockQuote) + parseSection(p, a) + result.add(a) + popInd(p) else: while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent: inc p.idx # skip blank lines |