RST tables: fix latex col number; allow less than three of `=` (#16040)

author: Andrey Makarov <ph.makarov@gmail.com> 2020-12-04 10:50:17 +0300
committer: GitHub <noreply@github.com> 2020-12-04 08:50:17 +0100
commit: 6877e0c8a3881e86ed40ddac3acba8926c8fc6ca (patch)
tree: 3eea74e0b0fd9d96c838cbfdec3bd935bb2953ea
parent: e4e5a0c65ab10f48746f8f9b2417e8ca69724a04 (diff)
download: Nim-6877e0c8a3881e86ed40ddac3acba8926c8fc6ca.tar.gz
5 files changed, 263 insertions, 31 deletions
diff --git a/doc/nep1.rst b/doc/nep1.rst
index 1ab45d680..73e4d8433 100644
--- a/doc/nep1.rst
+++ b/doc/nep1.rst
@@ -1,6 +1,6 @@
-==============================================
+==========================================================
 Nim Enhancement Proposal #1 - Standard Library Style Guide
-==============================================
+==========================================================
 :Author: Clay Sweetser, Dominik Picheta
 :Version: |nimversion|
 
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index a72ee17b6..bc3efa272 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -101,7 +101,12 @@ const
 
 type
   TokType = enum
-    tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther
+    tkEof, tkIndent,
+    tkWhite, tkWord,
+    tkAdornment,              # used for chapter adornment, transitions and
+                              # horizontal table borders
+    tkPunct,                  # one or many punctuation characters
+    tkOther
   Token = object              # a RST token
     kind*: TokType            # the type of the token
     ival*: int                # the indentation or parsed integer value
@@ -114,6 +119,7 @@ type
     bufpos*: int
     line*, col*, baseIndent*: int
     skipPounds*: bool
+    adornmentLine*: bool
 
 proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
   tok.kind = tkWord
@@ -127,8 +133,26 @@ proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
   inc L.col, pos - L.bufpos
   L.bufpos = pos
 
-proc getAdornment(L: var Lexer, tok: var Token) =
-  tok.kind = tkAdornment
+proc isCurrentLineAdornment(L: var Lexer): bool =
+  var pos = L.bufpos
+  let c = L.buf[pos]
+  while true:
+    inc pos
+    if L.buf[pos] in {'\c', '\l', '\0'}:
+      break
+    if c == '+':  # grid table
+      if L.buf[pos] notin {'-', '=', '+'}:
+        return false
+    else:  # section adornment or table horizontal border
+      if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
+        return false
+  result = true
+
+proc getPunctAdornment(L: var Lexer, tok: var Token) =
+  if L.adornmentLine:
+    tok.kind = tkAdornment
+  else:
+    tok.kind = tkPunct
   tok.line = L.line
   tok.col = L.col
   var pos = L.bufpos
@@ -139,6 +163,8 @@ proc getAdornment(L: var Lexer, tok: var Token) =
     if L.buf[pos] != c: break
   inc L.col, pos - L.bufpos
   L.bufpos = pos
+  if tok.symbol == "\\": tok.kind = tkPunct
+    # nim extension: standalone \ can not be adornment
 
 proc getBracket(L: var Lexer, tok: var Token) =
   tok.kind = tkPunct
@@ -189,6 +215,8 @@ proc getIndent(L: var Lexer, tok: var Token) =
 proc rawGetTok(L: var Lexer, tok: var Token) =
   tok.symbol = ""
   tok.ival = 0
+  if L.col == 0:
+    L.adornmentLine = false
   var c = L.buf[L.bufpos]
   case c
   of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
@@ -200,11 +228,13 @@ proc rawGetTok(L: var Lexer, tok: var Token) =
       rawGetTok(L, tok)       # ignore spaces before \n
   of '\x0D', '\x0A':
     getIndent(L, tok)
+    L.adornmentLine = false
   of '!', '\"', '#', '$', '%', '&', '\'',  '*', '+', ',', '-', '.',
      '/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
      '|', '~':
-    getAdornment(L, tok)
-    if tok.symbol.len <= 3: tok.kind = tkPunct
+    if L.col == 0:
+      L.adornmentLine = L.isCurrentLineAdornment()
+    getPunctAdornment(L, tok)
   of '(', ')', '[', ']', '{', '}':
     getBracket(L, tok)
   else:
@@ -730,7 +760,7 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
     of tkEof:
       rstMessage(p, meExpected, "```")
       break
-    of tkPunct:
+    of tkPunct, tkAdornment:
       if currentTok(p).symbol == "```":
         inc p.idx
         break
@@ -822,6 +852,10 @@ proc parseInline(p: var RstParser, father: PRstNode) =
         return
     parseUrl(p, father)
   of tkAdornment, tkOther, tkWhite:
+    if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
+      inc p.idx
+      father.add(parseMarkdownCodeblock(p))
+      return
     if roSupportSmilies in p.s.options:
       let n = parseSmiley(p)
       if n != nil:
@@ -1011,6 +1045,18 @@ proc tokenAfterNewline(p: RstParser): int =
       break
     else: inc result
 
+proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
+  var headlineLen = 0
+  if p.idx < adornmentIdx:  # underline
+    for i in p.idx ..< adornmentIdx-1:  # adornmentIdx-1 is a linebreak
+      headlineLen += p.tok[i].symbol.len
+  else:  # overline
+    var i = p.idx + 2
+    while p.tok[i].kind notin {tkEof, tkIndent}:
+      headlineLen += p.tok[i].symbol.len
+      inc i
+  return p.tok[adornmentIdx].symbol.len >= headlineLen
+
 proc isLineBlock(p: RstParser): bool =
   var j = tokenAfterNewline(p)
   result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
@@ -1052,13 +1098,26 @@ proc findPipe(p: RstParser, start: int): bool =
     inc i
 
 proc whichSection(p: RstParser): RstNodeKind =
+  if currentTok(p).kind in {tkAdornment, tkPunct}:
+    # for punctuation sequences that can be both tkAdornment and tkPunct
+    if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
+      return rnCodeBlock
+    elif currentTok(p).symbol == "::":
+      return rnLiteralBlock
+    elif currentTok(p).symbol == ".." and predNL(p):
+     return rnDirective
   case currentTok(p).kind
   of tkAdornment:
-    if match(p, p.idx + 1, "ii"): result = rnTransition
+    if match(p, p.idx + 1, "ii") and currentTok(p).symbol.len >= 4:
+      result = rnTransition
+    elif match(p, p.idx, "+a+"):
+      result = rnGridTable
+      rstMessage(p, meGridTableNotImplemented)
     elif match(p, p.idx + 1, " a"): result = rnTable
-    elif match(p, p.idx + 1, "i"): result = rnOverline
-    elif isMarkdownHeadline(p):
-      result = rnHeadline
+    elif currentTok(p).symbol == "|" and isLineBlock(p):
+      result = rnLineBlock
+    elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
+      result = rnOverline
     else:
       result = rnLeaf
   of tkPunct:
@@ -1067,27 +1126,18 @@ proc whichSection(p: RstParser): RstNodeKind =
     elif roSupportMarkdown in p.s.options and predNL(p) and
         match(p, p.idx, "| w") and findPipe(p, p.idx+3):
       result = rnMarkdownTable
-    elif currentTok(p).symbol == "```":
-      result = rnCodeBlock
+    elif currentTok(p).symbol == "|" and isLineBlock(p):
+      result = rnLineBlock
     elif match(p, tokenAfterNewline(p), "ai"):
       result = rnHeadline
-    elif currentTok(p).symbol == "::":
-      result = rnLiteralBlock
     elif predNL(p) and
         currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
       result = rnBulletList
-    elif currentTok(p).symbol == "|" and isLineBlock(p):
-      result = rnLineBlock
-    elif currentTok(p).symbol == ".." and predNL(p):
-      result = rnDirective
     elif match(p, p.idx, ":w:") and predNL(p):
       # (currentTok(p).symbol == ":")
       result = rnFieldList
     elif match(p, p.idx, "(e) ") or match(p, p.idx, "e. "):
       result = rnEnumList
-    elif match(p, p.idx, "+a+"):
-      result = rnGridTable
-      rstMessage(p, meGridTableNotImplemented)
     elif isDefList(p):
       result = rnDefList
     elif isOptionList(p):
@@ -1095,7 +1145,10 @@ proc whichSection(p: RstParser): RstNodeKind =
     else:
       result = rnParagraph
   of tkWord, tkOther, tkWhite:
-    if match(p, tokenAfterNewline(p), "ai"): result = rnHeadline
+    let tokIdx = tokenAfterNewline(p)
+    if match(p, tokIdx, "ai"):
+      if isAdornmentHeadline(p, tokIdx): result = rnHeadline
+      else: result = rnParagraph
     elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
     elif isDefList(p): result = rnDefList
     else: result = rnParagraph
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index d6bd26ad7..d88f2ed77 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -1009,7 +1009,8 @@ proc renderContainer(d: PDoc, n: PRstNode, result: var string) =
 
 proc texColumns(n: PRstNode): string =
   result = ""
-  for i in countup(1, len(n)): add(result, "|X")
+  let nColumns = if n.sons.len > 0: len(n.sons[0]) else: 1
+  for i in countup(1, nColumns): add(result, "|X")
 
 proc renderField(d: PDoc, n: PRstNode, result: var string) =
   var b = false
diff --git a/nimdoc/rst2html/source/rst_examples.rst b/nimdoc/rst2html/source/rst_examples.rst
index c8d15aef8..54f0124c8 100644
--- a/nimdoc/rst2html/source/rst_examples.rst
+++ b/nimdoc/rst2html/source/rst_examples.rst
@@ -1,6 +1,6 @@
-==========
+================
 Not a Nim Manual
-==========
+================
 
 :Authors: Andreas Rumpf, Zahary Karadjov
 :Version: |nimversion|
diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim
index 946104e7c..1bf73e146 100644
--- a/tests/stdlib/trstgen.nim
+++ b/tests/stdlib/trstgen.nim
@@ -6,7 +6,7 @@ outputsub: ""
 
 import ../../lib/packages/docutils/rstgen
 import ../../lib/packages/docutils/rst
-import unittest, strtabs
+import unittest, strutils, strtabs
 
 suite "YAML syntax highlighting":
   test "Basics":
@@ -144,6 +144,12 @@ suite "YAML syntax highlighting":
   <span class="StringLit">?not a map key</span></pre>"""
 
 
+suite "RST/Markdown general":
+  test "RST emphasis":
+    assert rstToHtml("*Hello* **world**!", {},
+      newStringTable(modeStyleInsensitive)) ==
+      "<em>Hello</em> <strong>world</strong>!"
+
   test "Markdown links":
     let
       a = rstToHtml("(( [Nim](https://nim-lang.org/) ))", {roSupportMarkdown}, defaultConfig())
@@ -178,7 +184,179 @@ not in table"""
     assert output2 == """<table border="1" class="docutils"><tr><th>A1 header</th><th>A2</th></tr>
 </table>"""
 
+  test "RST tables":
+    let input1 = """
+Test 2 column/4 rows table:
+====   ===
+H0     H1 
+====   ===
+A0     A1 
+====   ===
+A2     A3 
+====   ===
+A4     A5 
+====   === """
+    let output1 = rstToLatex(input1, {})
+    assert "{|X|X|}" in output1  # 2 columns
+    assert count(output1, "\\\\") == 4  # 4 rows
+    for cell in ["H0", "H1", "A0", "A1", "A2", "A3", "A4", "A5"]:
+      assert cell in output1
+
+    let input2 = """
+Now test 3 columns / 2 rows, and also borders containing 4 =, 3 =, 1 = signs:
+
+====   ===  =
+H0     H1   H
+====   ===  =
+A0     A1   X
+       Ax   Y
+====   ===  = """
+    let output2 = rstToLatex(input2, {})
+    assert "{|X|X|X|}" in output2  # 3 columns
+    assert count(output2, "\\\\") == 2  # 2 rows
+    for cell in ["H0", "H1", "H", "A0", "A1", "X", "Ax", "Y"]:
+      assert cell in output2
+
+
+  test "RST adornments":
+    let input1 = """
+Check that a few punctuation symbols are not parsed as adornments:
+:word1: word2 .... word3 """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    discard output1
+
+  test "RST sections":
+    let input1 = """
+Long chapter name
+'''''''''''''''''''
+"""
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "Long chapter name" in output1 and "<h1" in output1
+
+    let input2 = """
+Short chapter name:
+
+ChA
+===
+"""
+    let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
+    assert "ChA" in output2 and "<h1" in output2
+
+    let input3 = """
+Very short chapter name:
+
+X
+~
+"""
+    let output3 = rstToHtml(input3, {roSupportMarkdown}, defaultConfig())
+    assert "X" in output3 and "<h1" in output3
+
+    let input4 = """
+Check that short underline is not enough to make section:
+
+Wrong chapter
+------------
+
+"""
+    let output4 = rstToHtml(input4, {roSupportMarkdown}, defaultConfig())
+    assert "Wrong chapter" in output4 and "<h1" notin output4
+
+    let input5 = """
+Check that punctuation after adornment and indent are not detected as adornment.
+
+Some chapter
+--------------
+
+  "punctuation symbols" """
+    let output5 = rstToHtml(input5, {roSupportMarkdown}, defaultConfig())
+    assert "&quot;punctuation symbols&quot;" in output5 and "<h1" in output5
+
+
+  test "RST links":
+    let input1 = """
+Want to learn about `my favorite programming language`_?
+
+.. _my favorite programming language: https://nim-lang.org"""
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<a" in output1 and "href=\"https://nim-lang.org\"" in output1
+
+  test "RST transitions":
+    let input1 = """
+context1
+
+~~~~
+
+context2
+"""
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<hr" in output1
+
+    let input2 = """
+This is too short to be a transition:
+
+---
+
+context2
+"""
+    let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
+    assert "<hr" notin output2
+
+  test "RST literal block":
+    let input1 = """
+Test literal block
+
+::
 
-assert rstToHtml("*Hello* **world**!", {},
-  newStringTable(modeStyleInsensitive)) ==
-  "<em>Hello</em> <strong>world</strong>!"
+  check """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<pre>" in output1
+
+  test "Markdown code block":
+    let input1 = """
+```
+let x = 1
+``` """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<pre" in output1 and "class=\"Keyword\"" notin output1
+
+    let input2 = """
+Parse the block with language specifier:
+```Nim
+let x = 1
+``` """
+    let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
+    assert "<pre" in output2 and "class=\"Keyword\"" in output2
+
+  test "RST comments":
+    let input1 = """
+Check that comment disappears:
+
+..
+  some comment """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert output1 == "Check that comment disappears:"
+
+  test "RST line blocks":
+    let input1 = """
+=====
+Test1
+=====
+
+|
+|
+| line block
+| other line
+
+"""
+    var option: bool
+    var rstGenera: RstGenerator
+    var output1: string
+    rstGenera.initRstGenerator(outHtml, defaultConfig(), "input", {})
+    rstGenera.renderRstToOut(rstParse(input1, "", 1, 1, option, {}), output1)
+    assert rstGenera.meta[metaTitle] == "Test1"
+      # check that title was not overwritten to '|'
+    assert "line block<br />" in output1
+    assert "other line<br />" in output1
+    let output1l = rstToLatex(input1, {})
+    assert "line block\\\\" in output1l
+    assert "other line\\\\" in output1l
author	Andrey Makarov <ph.makarov@gmail.com>	2020-12-04 10:50:17 +0300
committer	GitHub <noreply@github.com>	2020-12-04 08:50:17 +0100
commit	6877e0c8a3881e86ed40ddac3acba8926c8fc6ca (patch)
tree	3eea74e0b0fd9d96c838cbfdec3bd935bb2953ea
parent	e4e5a0c65ab10f48746f8f9b2417e8ca69724a04 (diff)
download	Nim-6877e0c8a3881e86ed40ddac3acba8926c8fc6ca.tar.gz