diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2022-06-04 08:03:03 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-04 07:03:03 +0200 |
commit | 4341b06f65396c63eec3689d0f8bb00db26c362f (patch) | |
tree | fc79066b6f922e6112d32a351033cc071a3581d9 /lib/packages/docutils/rst.nim | |
parent | f7a13f62d634300c3cf68e36dd7926a6a235d52f (diff) | |
download | Nim-4341b06f65396c63eec3689d0f8bb00db26c362f.tar.gz |
RST: improve simple tables (#19859)
* RST: improve simple tables * nim 1.0 gotchas * Still allow legacy boundaries like `----`
Diffstat (limited to 'lib/packages/docutils/rst.nim')
-rw-r--r-- | lib/packages/docutils/rst.nim | 221 |
1 files changed, 158 insertions, 63 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index d4bfae48f..cd5f26278 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -255,6 +255,7 @@ type meExpected = "'$1' expected", meGridTableNotImplemented = "grid table is not implemented", meMarkdownIllformedTable = "illformed delimiter row of a Markdown table", + meIllformedTable = "Illformed table: $1", meNewSectionExpected = "new section expected $1", meGeneralParseError = "general parse error", meInvalidDirective = "invalid directive: '$1'", @@ -2467,81 +2468,175 @@ proc parseOverline(p: var RstParser): PRstNode = anchorType=headlineAnchor) type - IntSeq = seq[int] - ColumnLimits = tuple + ColSpec = object + start, stop: int + RstCols = seq[ColSpec] + ColumnLimits = tuple # for Markdown first, last: int ColSeq = seq[ColumnLimits] +proc tokStart(p: RstParser, idx: int): int = + result = p.tok[idx].col + +proc tokStart(p: RstParser): int = + result = tokStart(p, p.idx) + +proc tokEnd(p: RstParser, idx: int): int = + result = p.tok[idx].col + p.tok[idx].symbol.len - 1 + proc tokEnd(p: RstParser): int = - result = currentTok(p).col + currentTok(p).symbol.len - 1 + result = tokEnd(p, p.idx) -proc getColumns(p: var RstParser, cols: var IntSeq) = +proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int = + # Fills table column specification (or separator) `cols` and returns + # the next parser index after it. var L = 0 + result = startIdx while true: inc L setLen(cols, L) - cols[L - 1] = tokEnd(p) - assert(currentTok(p).kind == tkAdornment) - inc p.idx - if currentTok(p).kind != tkWhite: break - inc p.idx - if currentTok(p).kind != tkAdornment: break - if currentTok(p).kind == tkIndent: inc p.idx - # last column has no limit: - cols[L - 1] = 32000 + cols[L - 1].start = tokStart(p, result) + cols[L - 1].stop = tokEnd(p, result) + assert(p.tok[result].kind == tkAdornment) + inc result + if p.tok[result].kind != tkWhite: break + inc result + if p.tok[result].kind != tkAdornment: break + if p.tok[result].kind == tkIndent: inc result -proc parseSimpleTable(p: var RstParser): PRstNode = +proc checkColumns(p: RstParser, cols: RstCols) = var - cols: IntSeq - row: seq[string] - i, last, line: int - c: char - q: RstParser - a, b: PRstNode + i = p.idx + col = 0 + if p.tok[i].symbol[0] != '=': + rstMessage(p, mwRstStyle, + "only tables with `=` columns specification are allowed") + for col in 0 ..< cols.len: + if tokEnd(p, i) != cols[col].stop: + rstMessage(p, meIllformedTable, + "end of table column #$1 should end at position $2" % [ + $(col+1), $(cols[col].stop+ColRstOffset)], + p.tok[i].line, tokEnd(p, i)) + inc i + if col == cols.len - 1: + if p.tok[i].kind == tkWhite: + inc i + if p.tok[i].kind notin {tkIndent, tkEof}: + rstMessage(p, meIllformedTable, "extraneous column specification") + elif p.tok[i].kind == tkWhite: + inc i + else: + rstMessage(p, meIllformedTable, "no enough table columns", + p.tok[i].line, p.tok[i].col) + +proc getSpans(p: RstParser, nextLine: int, + cols: RstCols, unitedCols: RstCols): seq[int] = + ## Calculates how many columns a joined cell occupies. + if unitedCols.len > 0: + result = newSeq[int](unitedCols.len) + var + iCell = 0 + jCell = 0 + uCell = 0 + while jCell < cols.len: + if cols[jCell].stop < unitedCols[uCell].stop: + inc jCell + elif cols[jCell].stop == unitedCols[uCell].stop: + result[uCell] = jCell - iCell + 1 + iCell = jCell + 1 + jCell = jCell + 1 + inc uCell + else: + rstMessage(p, meIllformedTable, + "spanning underline does not match main table columns", + p.tok[nextLine].line, p.tok[nextLine].col) + +proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode = + ## Parses 1 row in RST simple table. + # Consider that columns may be spanning (united by using underline like ----): + let nextLine = tokenAfterNewline(p) + var unitedCols: RstCols + var afterSpan: int + if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-': + afterSpan = getColumns(p, unitedCols, nextLine) + if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar: + # legacy rst.nim compat.: allow punctuation like `----` in main boundaries + afterSpan = nextLine + unitedCols.setLen 0 + else: + afterSpan = nextLine + template colEnd(i): int = + if i == cols.len - 1: high(int) # last column has no limit + elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop + template colStart(i): int = + if unitedCols.len > 0: unitedCols[i].start else: cols[i].start + var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len) + var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols) + + let line = currentTok(p).line + # Iterate over the lines a single cell may span: + while true: + var nCell = 0 + # distribute tokens between cells in the current line: + while currentTok(p).kind notin {tkIndent, tkEof}: + if tokEnd(p) <= colEnd(nCell): + if tokStart(p) < colStart(nCell): + if currentTok(p).kind != tkWhite: + rstMessage(p, meIllformedTable, + "this word crosses table column from the left") + else: + inc p.idx + else: + row[nCell].add(currentTok(p).symbol) + inc p.idx + else: + if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite: + rstMessage(p, meIllformedTable, + "this word crosses table column from the right") + inc nCell + if currentTok(p).kind == tkIndent: inc p.idx + if tokEnd(p) <= colEnd(0): break + # Continued current cells because the 1st column is empty. + if currentTok(p).kind in {tkEof, tkAdornment}: + break + for nCell in countup(1, high(row)): row[nCell].add('\n') + result = newRstNode(rnTableRow) + var q: RstParser + for uCell in 0 ..< row.len: + initParser(q, p.s) + q.col = colStart(uCell) + q.line = line - 1 + getTokens(row[uCell], q.tok) + let cell = newRstNode(rnTableDataCell) + cell.span = if spans.len == 0: 0 else: spans[uCell] + cell.add(parseDoc(q)) + result.add(cell) + if afterSpan > p.idx: + p.idx = afterSpan + +proc parseSimpleTable(p: var RstParser): PRstNode = + var cols: RstCols result = newRstNodeA(p, rnTable) - cols = @[] - row = @[] - a = nil - c = currentTok(p).symbol[0] + let startIdx = getColumns(p, cols, p.idx) + let colChar = currentTok(p).symbol[0] + checkColumns(p, cols) + p.idx = startIdx + result.colCount = cols.len while true: if currentTok(p).kind == tkAdornment: - last = tokenAfterNewline(p) - if p.tok[last].kind in {tkEof, tkIndent}: + checkColumns(p, cols) + p.idx = tokenAfterNewline(p) + if currentTok(p).kind in {tkEof, tkIndent}: # skip last adornment line: - p.idx = last break - getColumns(p, cols) - setLen(row, cols.len) - if a != nil: - for j in 0 ..< a.len: # fix rnTableDataCell -> rnTableHeaderCell - a.sons[j] = newRstNode(rnTableHeaderCell, a.sons[j].sons) + if result.sons.len > 0: result.sons[^1].endsHeader = true + # fix rnTableDataCell -> rnTableHeaderCell for previous table rows: + for nRow in 0 ..< result.sons.len: + for nCell in 0 ..< result.sons[nRow].len: + result.sons[nRow].sons[nCell].kind = rnTableHeaderCell if currentTok(p).kind == tkEof: break - for j in countup(0, high(row)): row[j] = "" - # the following while loop iterates over the lines a single cell may span: - line = currentTok(p).line - while true: - i = 0 - while currentTok(p).kind notin {tkIndent, tkEof}: - if tokEnd(p) <= cols[i]: - row[i].add(currentTok(p).symbol) - inc p.idx - else: - if currentTok(p).kind == tkWhite: inc p.idx - inc i - if currentTok(p).kind == tkIndent: inc p.idx - if tokEnd(p) <= cols[0]: break - if currentTok(p).kind in {tkEof, tkAdornment}: break - for j in countup(1, high(row)): row[j].add('\n') - a = newRstNode(rnTableRow) - for j in countup(0, high(row)): - initParser(q, p.s) - q.col = cols[j] - q.line = line - 1 - getTokens(row[j], q.tok) - b = newRstNode(rnTableDataCell) - b.add(parseDoc(q)) - a.add(b) - result.add(a) + let tabRow = parseSimpleTableRow(p, cols, colChar) + result.add tabRow proc readTableRow(p: var RstParser): ColSeq = if currentTok(p).symbol == "|": inc p.idx @@ -2574,17 +2669,16 @@ proc isValidDelimiterRow(p: var RstParser, colNum: int): bool = proc parseMarkdownTable(p: var RstParser): PRstNode = var row: ColSeq - colNum: int a, b: PRstNode q: RstParser result = newRstNodeA(p, rnMarkdownTable) proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) = row = readTableRow(p) - if colNum == 0: colNum = row.len # table header - elif row.len < colNum: row.setLen(colNum) + if result.colCount == 0: result.colCount = row.len # table header + elif row.len < result.colCount: row.setLen(result.colCount) a = newRstNode(rnTableRow) - for j in 0 ..< colNum: + for j in 0 ..< result.colCount: b = newRstNode(cellKind) initParser(q, p.s) q.col = p.col @@ -2595,7 +2689,8 @@ proc parseMarkdownTable(p: var RstParser): PRstNode = result.add(a) parseRow(p, rnTableHeaderCell, result) - if not isValidDelimiterRow(p, colNum): rstMessage(p, meMarkdownIllformedTable) + if not isValidDelimiterRow(p, result.colCount): + rstMessage(p, meMarkdownIllformedTable) while predNL(p) and currentTok(p).symbol == "|": parseRow(p, rnTableDataCell, result) |