RST: improve simple tables (#19859)

* RST: improve simple tables * nim 1.0 gotchas * Still allow legacy boundaries like `----`
author: Andrey Makarov <ph.makarov@gmail.com> 2022-06-04 08:03:03 +0300
committer: GitHub <noreply@github.com> 2022-06-04 07:03:03 +0200
commit: 4341b06f65396c63eec3689d0f8bb00db26c362f (patch)
tree: fc79066b6f922e6112d32a351033cc071a3581d9 /lib/packages/docutils/rst.nim
parent: f7a13f62d634300c3cf68e36dd7926a6a235d52f (diff)
download: Nim-4341b06f65396c63eec3689d0f8bb00db26c362f.tar.gz
1 files changed, 158 insertions, 63 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index d4bfae48f..cd5f26278 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -255,6 +255,7 @@ type
     meExpected = "'$1' expected",
     meGridTableNotImplemented = "grid table is not implemented",
     meMarkdownIllformedTable = "illformed delimiter row of a Markdown table",
+    meIllformedTable = "Illformed table: $1",
     meNewSectionExpected = "new section expected $1",
     meGeneralParseError = "general parse error",
     meInvalidDirective = "invalid directive: '$1'",
@@ -2467,81 +2468,175 @@ proc parseOverline(p: var RstParser): PRstNode =
                anchorType=headlineAnchor)
 
 type
-  IntSeq = seq[int]
-  ColumnLimits = tuple
+  ColSpec = object
+    start, stop: int
+  RstCols = seq[ColSpec]
+  ColumnLimits = tuple  # for Markdown
     first, last: int
   ColSeq = seq[ColumnLimits]
 
+proc tokStart(p: RstParser, idx: int): int =
+  result = p.tok[idx].col
+
+proc tokStart(p: RstParser): int =
+  result = tokStart(p, p.idx)
+
+proc tokEnd(p: RstParser, idx: int): int =
+  result = p.tok[idx].col + p.tok[idx].symbol.len - 1
+
 proc tokEnd(p: RstParser): int =
-  result = currentTok(p).col + currentTok(p).symbol.len - 1
+  result = tokEnd(p, p.idx)
 
-proc getColumns(p: var RstParser, cols: var IntSeq) =
+proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int =
+  # Fills table column specification (or separator) `cols` and returns
+  # the next parser index after it.
   var L = 0
+  result = startIdx
   while true:
     inc L
     setLen(cols, L)
-    cols[L - 1] = tokEnd(p)
-    assert(currentTok(p).kind == tkAdornment)
-    inc p.idx
-    if currentTok(p).kind != tkWhite: break
-    inc p.idx
-    if currentTok(p).kind != tkAdornment: break
-  if currentTok(p).kind == tkIndent: inc p.idx
-  # last column has no limit:
-  cols[L - 1] = 32000
+    cols[L - 1].start = tokStart(p, result)
+    cols[L - 1].stop = tokEnd(p, result)
+    assert(p.tok[result].kind == tkAdornment)
+    inc result
+    if p.tok[result].kind != tkWhite: break
+    inc result
+    if p.tok[result].kind != tkAdornment: break
+  if p.tok[result].kind == tkIndent: inc result
 
-proc parseSimpleTable(p: var RstParser): PRstNode =
+proc checkColumns(p: RstParser, cols: RstCols) =
   var
-    cols: IntSeq
-    row: seq[string]
-    i, last, line: int
-    c: char
-    q: RstParser
-    a, b: PRstNode
+    i = p.idx
+    col = 0
+  if p.tok[i].symbol[0] != '=':
+    rstMessage(p, mwRstStyle,
+               "only tables with `=` columns specification are allowed")
+  for col in 0 ..< cols.len:
+    if tokEnd(p, i) != cols[col].stop:
+      rstMessage(p, meIllformedTable,
+                 "end of table column #$1 should end at position $2" % [
+                   $(col+1), $(cols[col].stop+ColRstOffset)],
+                 p.tok[i].line, tokEnd(p, i))
+    inc i
+    if col == cols.len - 1:
+      if p.tok[i].kind == tkWhite:
+        inc i
+      if p.tok[i].kind notin {tkIndent, tkEof}:
+        rstMessage(p, meIllformedTable, "extraneous column specification")
+    elif p.tok[i].kind == tkWhite:
+      inc i
+    else:
+      rstMessage(p, meIllformedTable, "no enough table columns",
+                 p.tok[i].line, p.tok[i].col)
+
+proc getSpans(p: RstParser, nextLine: int,
+              cols: RstCols, unitedCols: RstCols): seq[int] =
+  ## Calculates how many columns a joined cell occupies.
+  if unitedCols.len > 0:
+    result = newSeq[int](unitedCols.len)
+    var
+      iCell = 0
+      jCell = 0
+      uCell = 0
+    while jCell < cols.len:
+      if cols[jCell].stop < unitedCols[uCell].stop:
+        inc jCell
+      elif cols[jCell].stop == unitedCols[uCell].stop:
+        result[uCell] = jCell - iCell + 1
+        iCell = jCell + 1
+        jCell = jCell + 1
+        inc uCell
+      else:
+        rstMessage(p, meIllformedTable,
+                   "spanning underline does not match main table columns",
+                   p.tok[nextLine].line, p.tok[nextLine].col)
+
+proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode =
+  ## Parses 1 row in RST simple table.
+  # Consider that columns may be spanning (united by using underline like ----):
+  let nextLine = tokenAfterNewline(p)
+  var unitedCols: RstCols
+  var afterSpan: int
+  if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-':
+    afterSpan = getColumns(p, unitedCols, nextLine)
+    if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar:
+      # legacy rst.nim compat.: allow punctuation like `----` in main boundaries
+      afterSpan = nextLine
+      unitedCols.setLen 0
+  else:
+    afterSpan = nextLine
+  template colEnd(i): int =
+    if i == cols.len - 1: high(int)  # last column has no limit
+    elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop
+  template colStart(i): int =
+    if unitedCols.len > 0: unitedCols[i].start else: cols[i].start
+  var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len)
+  var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols)
+
+  let line = currentTok(p).line
+  # Iterate over the lines a single cell may span:
+  while true:
+    var nCell = 0
+    # distribute tokens between cells in the current line:
+    while currentTok(p).kind notin {tkIndent, tkEof}:
+      if tokEnd(p) <= colEnd(nCell):
+        if tokStart(p) < colStart(nCell):
+          if currentTok(p).kind != tkWhite:
+            rstMessage(p, meIllformedTable,
+                       "this word crosses table column from the left")
+          else:
+            inc p.idx
+        else:
+          row[nCell].add(currentTok(p).symbol)
+          inc p.idx
+      else:
+        if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite:
+          rstMessage(p, meIllformedTable,
+                     "this word crosses table column from the right")
+        inc nCell
+    if currentTok(p).kind == tkIndent: inc p.idx
+    if tokEnd(p) <= colEnd(0): break
+    # Continued current cells because the 1st column is empty.
+    if currentTok(p).kind in {tkEof, tkAdornment}:
+      break
+    for nCell in countup(1, high(row)): row[nCell].add('\n')
+  result = newRstNode(rnTableRow)
+  var q: RstParser
+  for uCell in 0 ..< row.len:
+    initParser(q, p.s)
+    q.col = colStart(uCell)
+    q.line = line - 1
+    getTokens(row[uCell], q.tok)
+    let cell = newRstNode(rnTableDataCell)
+    cell.span = if spans.len == 0: 0 else: spans[uCell]
+    cell.add(parseDoc(q))
+    result.add(cell)
+  if afterSpan > p.idx:
+    p.idx = afterSpan
+
+proc parseSimpleTable(p: var RstParser): PRstNode =
+  var cols: RstCols
   result = newRstNodeA(p, rnTable)
-  cols = @[]
-  row = @[]
-  a = nil
-  c = currentTok(p).symbol[0]
+  let startIdx = getColumns(p, cols, p.idx)
+  let colChar = currentTok(p).symbol[0]
+  checkColumns(p, cols)
+  p.idx = startIdx
+  result.colCount = cols.len
   while true:
     if currentTok(p).kind == tkAdornment:
-      last = tokenAfterNewline(p)
-      if p.tok[last].kind in {tkEof, tkIndent}:
+      checkColumns(p, cols)
+      p.idx = tokenAfterNewline(p)
+      if currentTok(p).kind in {tkEof, tkIndent}:
         # skip last adornment line:
-        p.idx = last
         break
-      getColumns(p, cols)
-      setLen(row, cols.len)
-      if a != nil:
-        for j in 0 ..< a.len:  # fix rnTableDataCell -> rnTableHeaderCell
-          a.sons[j] = newRstNode(rnTableHeaderCell, a.sons[j].sons)
+      if result.sons.len > 0: result.sons[^1].endsHeader = true
+      # fix rnTableDataCell -> rnTableHeaderCell for previous table rows:
+      for nRow in 0 ..< result.sons.len:
+        for nCell in 0 ..< result.sons[nRow].len:
+          result.sons[nRow].sons[nCell].kind = rnTableHeaderCell
     if currentTok(p).kind == tkEof: break
-    for j in countup(0, high(row)): row[j] = ""
-    # the following while loop iterates over the lines a single cell may span:
-    line = currentTok(p).line
-    while true:
-      i = 0
-      while currentTok(p).kind notin {tkIndent, tkEof}:
-        if tokEnd(p) <= cols[i]:
-          row[i].add(currentTok(p).symbol)
-          inc p.idx
-        else:
-          if currentTok(p).kind == tkWhite: inc p.idx
-          inc i
-      if currentTok(p).kind == tkIndent: inc p.idx
-      if tokEnd(p) <= cols[0]: break
-      if currentTok(p).kind in {tkEof, tkAdornment}: break
-      for j in countup(1, high(row)): row[j].add('\n')
-    a = newRstNode(rnTableRow)
-    for j in countup(0, high(row)):
-      initParser(q, p.s)
-      q.col = cols[j]
-      q.line = line - 1
-      getTokens(row[j], q.tok)
-      b = newRstNode(rnTableDataCell)
-      b.add(parseDoc(q))
-      a.add(b)
-    result.add(a)
+    let tabRow = parseSimpleTableRow(p, cols, colChar)
+    result.add tabRow
 
 proc readTableRow(p: var RstParser): ColSeq =
   if currentTok(p).symbol == "|": inc p.idx
@@ -2574,17 +2669,16 @@ proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
 proc parseMarkdownTable(p: var RstParser): PRstNode =
   var
     row: ColSeq
-    colNum: int
     a, b: PRstNode
     q: RstParser
   result = newRstNodeA(p, rnMarkdownTable)
 
   proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
     row = readTableRow(p)
-    if colNum == 0: colNum = row.len # table header
-    elif row.len < colNum: row.setLen(colNum)
+    if result.colCount == 0: result.colCount = row.len # table header
+    elif row.len < result.colCount: row.setLen(result.colCount)
     a = newRstNode(rnTableRow)
-    for j in 0 ..< colNum:
+    for j in 0 ..< result.colCount:
       b = newRstNode(cellKind)
       initParser(q, p.s)
       q.col = p.col
@@ -2595,7 +2689,8 @@ proc parseMarkdownTable(p: var RstParser): PRstNode =
     result.add(a)
 
   parseRow(p, rnTableHeaderCell, result)
-  if not isValidDelimiterRow(p, colNum): rstMessage(p, meMarkdownIllformedTable)
+  if not isValidDelimiterRow(p, result.colCount):
+    rstMessage(p, meMarkdownIllformedTable)
   while predNL(p) and currentTok(p).symbol == "|":
     parseRow(p, rnTableDataCell, result)
author	Andrey Makarov <ph.makarov@gmail.com>	2022-06-04 08:03:03 +0300
committer	GitHub <noreply@github.com>	2022-06-04 07:03:03 +0200
commit	4341b06f65396c63eec3689d0f8bb00db26c362f (patch)
tree	fc79066b6f922e6112d32a351033cc071a3581d9 /lib/packages/docutils/rst.nim
parent	f7a13f62d634300c3cf68e36dd7926a6a235d52f (diff)
download	Nim-4341b06f65396c63eec3689d0f8bb00db26c362f.tar.gz