diff options
Diffstat (limited to 'lib/packages/docutils/rstast.nim')
-rw-r--r-- | lib/packages/docutils/rstast.nim | 306 |
1 files changed, 231 insertions, 75 deletions
diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim index 23233fd39..2bbb0d0b8 100644 --- a/lib/packages/docutils/rstast.nim +++ b/lib/packages/docutils/rstast.nim @@ -1,28 +1,33 @@ # # -# Nimrod's Runtime Library +# Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # -## This module implements an AST for the `reStructuredText`:idx parser. +## This module implements an AST for the `reStructuredText`:idx: parser. + +import std/[strutils, json] + +when defined(nimPreviewSlimSystem): + import std/assertions -import strutils type - TRstNodeKind* = enum ## the possible node kinds of an PRstNode + RstNodeKind* = enum ## the possible node kinds of an PRstNode rnInner, # an inner node or a root rnHeadline, # a headline rnOverline, # an over- and underlined headline + rnMarkdownHeadline, # a Markdown headline rnTransition, # a transition (the ------------- <hr> thingie) rnParagraph, # a paragraph rnBulletList, # a bullet list rnBulletItem, # a bullet item rnEnumList, # an enumerated list rnEnumItem, # an enumerated item - rnDefList, # a definition list + rnDefList, rnMdDefList, # a definition list (RST/Markdown) rnDefItem, # an item of a definition list consisting of ... rnDefName, # ... a name part ... rnDefBody, # ... and a body part ... @@ -30,17 +35,29 @@ type rnField, # a field item rnFieldName, # consisting of a field name ... rnFieldBody, # ... and a field body - rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, - rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, + rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, + rnOptionArgument, rnDescription, rnLiteralBlock, + rnMarkdownBlockQuote, # a quote starting from punctuation like >>> + rnMarkdownBlockQuoteItem, # a quotation block, quote lines starting with + # the same number of chars rnLineBlock, # the | thingie - rnLineBlockItem, # sons of the | thing + rnLineBlockItem, # a son of rnLineBlock - one line inside it. + # When `RstNode` lineIndent="\n" the line's empty rnBlockQuote, # text just indented - rnTable, rnGridTable, rnTableRow, rnTableHeaderCell, rnTableDataCell, - rnLabel, # used for footnotes and other things + rnTable, rnGridTable, rnMarkdownTable, rnTableRow, rnTableHeaderCell, rnTableDataCell, rnFootnote, # a footnote - rnCitation, # similar to footnote - rnStandaloneHyperlink, rnHyperlink, rnRef, rnDirective, # a directive - rnDirArg, rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, + rnCitation, # similar to footnote, so use rnFootnote instead + rnFootnoteGroup, # footnote group - exists for a purely stylistic + # reason: to display a few footnotes as 1 block + rnStandaloneHyperlink, rnHyperlink, + rnRstRef, # RST reference like `section name`_ + rnPandocRef, # Pandoc Markdown reference like [section name] + rnInternalRef, rnFootnoteRef, + rnNimdocRef, # reference to automatically generated Nim symbol + rnDirective, # a general directive + rnDirArg, # a directive argument (for some directives). + # here are directives that are not rnDirective: + rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, rnAdmonition, rnRawHtml, rnRawLatex, rnContainer, # ``container`` directive rnIndex, # index directve: @@ -49,111 +66,169 @@ type # * `file#id <file#id>`_ # * `file#id <file#id>'_ rnSubstitutionDef, # a definition of a substitution - rnGeneralRole, # Inline markup: - rnSub, rnSup, rnIdx, + # Inline markup: + rnInlineCode, # interpreted text with code in a known language + rnCodeFragment, # inline code for highlighting with the specified + # class (which cannot be inferred from context) + rnUnknownRole, # interpreted text with an unknown role + rnSub, rnSup, rnIdx, rnEmphasis, # "*" rnStrongEmphasis, # "**" rnTripleEmphasis, # "***" - rnInterpretedText, # "`" + rnInterpretedText, # "`" an auxiliary role for parsing that will + # be converted into other kinds like rnInlineCode rnInlineLiteral, # "``" + rnInlineTarget, # "_`target`" rnSubstitutionReferences, # "|" rnSmiley, # some smiley + rnDefaultRole, # .. default-role:: code rnLeaf # a leaf; the node's text field contains the # leaf val + FileIndex* = distinct int32 + TLineInfo* = object + line*: uint16 + col*: int16 + fileIndex*: FileIndex + + PRstNode* = ref RstNode ## an RST node + RstNodeSeq* = seq[PRstNode] + RstNode* {.acyclic, final.} = object ## AST node (result of RST parsing) + case kind*: RstNodeKind ## the node's kind + of rnLeaf, rnSmiley: + text*: string ## string that is expected to be displayed + of rnEnumList: + labelFmt*: string ## label format like "(1)" + of rnLineBlockItem: + lineIndent*: string ## a few spaces or newline at the line beginning + of rnAdmonition: + adType*: string ## admonition type: "note", "caution", etc. This + ## text will set the style and also be displayed + of rnOverline, rnHeadline, rnMarkdownHeadline: + level*: int ## level of headings starting from 1 (main + ## chapter) to larger ones (minor sub-sections) + ## level=0 means it's document title or subtitle + of rnFootnote, rnCitation, rnOptionListItem: + order*: int ## footnote order (for auto-symbol footnotes and + ## auto-numbered ones without a label) + of rnMarkdownBlockQuoteItem: + quotationDepth*: int ## number of characters in line prefix + of rnRstRef, rnPandocRef, rnSubstitutionReferences, + rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef: + info*: TLineInfo ## To have line/column info for warnings at + ## nodes that are post-processed after parsing + of rnNimdocRef: + tooltip*: string + of rnTable, rnGridTable, rnMarkdownTable: + colCount*: int ## Number of (not-united) cells in the table + of rnTableRow: + endsHeader*: bool ## Is last row in the header of table? + of rnTableHeaderCell, rnTableDataCell: + span*: int ## Number of table columns that the cell occupies + else: + discard + anchor*: string ## anchor, internal link target + ## (aka HTML id tag, aka Latex label/hypertarget) + sons*: RstNodeSeq ## the node's sons - PRSTNode* = ref TRstNode ## an RST node - TRstNodeSeq* = seq[PRstNode] - TRSTNode* {.acyclic, final.} = object ## an RST node's description - kind*: TRstNodeKind ## the node's kind - text*: string ## valid for leafs in the AST; and the title of - ## the document or the section - level*: int ## valid for some node kinds - sons*: TRstNodeSeq ## the node's sons +proc `==`*(a, b: FileIndex): bool {.borrow.} -proc len*(n: PRstNode): int = +proc len*(n: PRstNode): int = result = len(n.sons) -proc newRstNode*(kind: TRstNodeKind): PRstNode = - new(result) - result.sons = @[] - result.kind = kind +proc newRstNode*(kind: RstNodeKind, sons: seq[PRstNode] = @[], + anchor = ""): PRstNode = + result = PRstNode(kind: kind, sons: sons, anchor: anchor) + +proc newRstNode*(kind: RstNodeKind, info: TLineInfo, + sons: seq[PRstNode] = @[]): PRstNode = + result = PRstNode(kind: kind, sons: sons) + result.info = info -proc newRstNode*(kind: TRstNodeKind, s: string): PRstNode = +proc newRstNode*(kind: RstNodeKind, s: string): PRstNode {.deprecated.} = + assert kind in {rnLeaf, rnSmiley} result = newRstNode(kind) result.text = s -proc lastSon*(n: PRstNode): PRstNode = +proc newRstLeaf*(s: string): PRstNode = + result = newRstNode(rnLeaf) + result.text = s + +proc lastSon*(n: PRstNode): PRstNode = result = n.sons[len(n.sons)-1] proc add*(father, son: PRstNode) = add(father.sons, son) -proc addIfNotNil*(father, son: PRstNode) = +proc add*(father: PRstNode; s: string) = + add(father.sons, newRstLeaf(s)) + +proc addIfNotNil*(father, son: PRstNode) = if son != nil: add(father, son) type - TRenderContext {.pure.} = object + RenderContext {.pure.} = object indent: int verbatim: int -proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) +proc renderRstToRst(d: var RenderContext, n: PRstNode, + result: var string) {.gcsafe.} -proc renderRstSons(d: var TRenderContext, n: PRstNode, result: var string) = - for i in countup(0, len(n) - 1): +proc renderRstSons(d: var RenderContext, n: PRstNode, result: var string) = + for i in countup(0, len(n) - 1): renderRstToRst(d, n.sons[i], result) - -proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = + +proc renderRstToRst(d: var RenderContext, n: PRstNode, result: var string) = # this is needed for the index generation; it may also be useful for # debugging, but most code is already debugged... - const + const lvlToChar: array[0..8, char] = ['!', '=', '-', '~', '`', '<', '*', '|', '+'] if n == nil: return - var ind = repeatChar(d.indent) + var ind = spaces(d.indent) case n.kind - of rnInner: + of rnInner: renderRstSons(d, n, result) of rnHeadline: result.add("\n") result.add(ind) - + let oldLen = result.len renderRstSons(d, n, result) - let HeadlineLen = result.len - oldLen + let headlineLen = result.len - oldLen result.add("\n") result.add(ind) - result.add repeatChar(HeadlineLen, lvlToChar[n.level]) + result.add repeat(lvlToChar[n.level], headlineLen) of rnOverline: result.add("\n") result.add(ind) var headline = "" renderRstSons(d, n, headline) - - let lvl = repeatChar(headline.Len - d.indent, lvlToChar[n.level]) + + let lvl = repeat(lvlToChar[n.level], headline.len - d.indent) result.add(lvl) result.add("\n") result.add(headline) - + result.add("\n") result.add(ind) result.add(lvl) - of rnTransition: + of rnTransition: result.add("\n\n") result.add(ind) - result.add repeatChar(78-d.indent, '-') + result.add repeat('-', 78-d.indent) result.add("\n\n") of rnParagraph: result.add("\n\n") result.add(ind) renderRstSons(d, n, result) - of rnBulletItem: + of rnBulletItem: inc(d.indent, 2) var tmp = "" renderRstSons(d, n, tmp) - if tmp.len > 0: + if tmp.len > 0: result.add("\n") result.add(ind) result.add("* ") @@ -163,22 +238,22 @@ proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = inc(d.indent, 4) var tmp = "" renderRstSons(d, n, tmp) - if tmp.len > 0: + if tmp.len > 0: result.add("\n") result.add(ind) result.add("(#) ") result.add(tmp) dec(d.indent, 4) - of rnOptionList, rnFieldList, rnDefList, rnDefItem, rnLineBlock, rnFieldName, - rnFieldBody, rnStandaloneHyperlink, rnBulletList, rnEnumList: + of rnOptionList, rnFieldList, rnDefList, rnDefItem, rnLineBlock, rnFieldName, + rnFieldBody, rnStandaloneHyperlink, rnBulletList, rnEnumList: renderRstSons(d, n, result) - of rnDefName: + of rnDefName: result.add("\n\n") result.add(ind) renderRstSons(d, n, result) of rnDefBody: inc(d.indent, 2) - if n.sons[0].kind != rnBulletList: + if n.sons[0].kind != rnBulletList: result.add("\n") result.add(ind) result.add(" ") @@ -187,20 +262,20 @@ proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = of rnField: var tmp = "" renderRstToRst(d, n.sons[0], tmp) - + var L = max(tmp.len + 3, 30) inc(d.indent, L) - + result.add "\n" result.add ind result.add ':' result.add tmp result.add ':' - result.add repeatChar(L - tmp.len - 2) + result.add spaces(L - tmp.len - 2) renderRstToRst(d, n.sons[1], result) - + dec(d.indent, L) - of rnLineBlockItem: + of rnLineBlockItem: result.add("\n") result.add(ind) result.add("| ") @@ -209,39 +284,39 @@ proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = inc(d.indent, 2) renderRstSons(d, n, result) dec(d.indent, 2) - of rnRef: + of rnRstRef: result.add("`") renderRstSons(d, n, result) result.add("`_") - of rnHyperlink: + of rnHyperlink: result.add('`') renderRstToRst(d, n.sons[0], result) result.add(" <") renderRstToRst(d, n.sons[1], result) result.add(">`_") - of rnGeneralRole: + of rnUnknownRole: result.add('`') renderRstToRst(d, n.sons[0],result) result.add("`:") renderRstToRst(d, n.sons[1],result) result.add(':') - of rnSub: + of rnSub: result.add('`') renderRstSons(d, n, result) result.add("`:sub:") - of rnSup: + of rnSup: result.add('`') renderRstSons(d, n, result) result.add("`:sup:") - of rnIdx: + of rnIdx: result.add('`') renderRstSons(d, n, result) result.add("`:idx:") - of rnEmphasis: + of rnEmphasis: result.add("*") renderRstSons(d, n, result) result.add("*") - of rnStrongEmphasis: + of rnStrongEmphasis: result.add("**") renderRstSons(d, n, result) result.add("**") @@ -249,11 +324,11 @@ proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = result.add("***") renderRstSons(d, n, result) result.add("***") - of rnInterpretedText: + of rnInterpretedText: result.add('`') renderRstSons(d, n, result) result.add('`') - of rnInlineLiteral: + of rnInlineLiteral: inc(d.verbatim) result.add("``") renderRstSons(d, n, result) @@ -266,11 +341,11 @@ proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = result.add("\\\\") # XXX: escape more special characters! else: result.add(n.text) - of rnIndex: + of rnIndex: result.add("\n\n") result.add(ind) result.add(".. index::\n") - + inc(d.indent, 3) if n.sons[2] != nil: renderRstSons(d, n.sons[2], result) dec(d.indent, 3) @@ -280,9 +355,90 @@ proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = result.add(".. contents::") else: result.add("Error: cannot render: " & $n.kind) - + proc renderRstToRst*(n: PRstNode, result: var string) = ## renders `n` into its string representation and appends to `result`. - var d: TRenderContext + var d: RenderContext renderRstToRst(d, n, result) +proc renderRstToJsonNode(node: PRstNode): JsonNode = + result = + %[ + (key: "kind", val: %($node.kind)), + (key: "level", val: %BiggestInt(node.level)) + ] + if node.kind in {rnLeaf, rnSmiley} and node.text.len > 0: + result.add("text", %node.text) + if len(node.sons) > 0: + var accm = newSeq[JsonNode](len(node.sons)) + for i, son in node.sons: + accm[i] = renderRstToJsonNode(son) + result.add("sons", %accm) + +proc renderRstToJson*(node: PRstNode): string = + ## Writes the given RST node as JSON that is in the form + ## + ## { + ## "kind":string node.kind, + ## "text":optional string node.text, + ## "level":optional int node.level, + ## "sons":optional node array + ## } + renderRstToJsonNode(node).pretty + +proc renderRstToText*(node: PRstNode): string = + ## minimal text representation of markup node + const code = {rnCodeFragment, rnInterpretedText, rnInlineLiteral, rnInlineCode} + if node == nil: + return "" + case node.kind + of rnLeaf, rnSmiley: + result.add node.text + else: + if node.kind in code: result.add "`" + for i in 0 ..< node.sons.len: + if node.kind in {rnInlineCode, rnCodeBlock} and i == 0: + continue # omit language specifier + result.add renderRstToText(node.sons[i]) + if node.kind in code: result.add "`" + +proc treeRepr*(node: PRstNode, indent=0): string = + ## Writes the parsed RST `node` into an AST tree with compact string + ## representation in the format (one line per every sub-node): + ## ``indent - kind - [text|level|order|adType] - anchor (if non-zero)`` + ## (suitable for debugging of RST parsing). + if node == nil: + result.add " ".repeat(indent) & "[nil]\n" + return + result.add " ".repeat(indent) & $node.kind + case node.kind + of rnLeaf, rnSmiley: + result.add (if node.text == "": "" else: " '" & node.text & "'") + of rnEnumList: + result.add " labelFmt=" & node.labelFmt + of rnLineBlockItem: + var txt: string + if node.lineIndent == "\n": txt = " (blank line)" + else: txt = " lineIndent=" & $node.lineIndent.len + result.add txt + of rnAdmonition: + result.add " adType=" & node.adType + of rnHeadline, rnOverline, rnMarkdownHeadline: + result.add " level=" & $node.level + of rnFootnote, rnCitation, rnOptionListItem: + result.add (if node.order == 0: "" else: " order=" & $node.order) + of rnMarkdownBlockQuoteItem: + result.add " quotationDepth=" & $node.quotationDepth + of rnTable, rnGridTable, rnMarkdownTable: + result.add " colCount=" & $node.colCount + of rnTableHeaderCell, rnTableDataCell: + if node.span > 0: + result.add " span=" & $node.span + of rnTableRow: + if node.endsHeader: result.add " endsHeader" + else: + discard + result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'") + result.add "\n" + for son in node.sons: + result.add treeRepr(son, indent=indent+2) |