#
#
#            Nim's Runtime Library
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module implements a `reStructuredText`:idx: (RST) parser. A large
## subset is implemented. Some features of the `markdown`:idx: syntax are
## also supported. Nim can output the result to HTML (command ``rst2html``)
## or Latex (command ``rst2tex``).
##
## If you are new to RST please consider reading the following:
##
## 1) a short `quick introduction`_
## 2) an `RST reference`_: a comprehensive cheatsheet for RST
## 3) a more formal 50-page `RST specification`_.
##
## Supported standard RST features:
##
## * body elements
##   + sections
##   + transitions
##   + paragraphs
##   + bullet lists using \+, \*, \-
##   + enumerated lists using arabic numerals or alphabet
##     characters:  1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ...
##   + definition lists
##   + field lists
##   + option lists
##   + indented literal blocks
##   + simple tables
##   + directives (see official documentation in `RST directives list`_):
##     - ``image``, ``figure`` for including images and videos
##     - ``code``
##     - ``contents`` (table of contents), ``container``, ``raw``
##     - ``include``
##     - admonitions: "attention", "caution", "danger", "error", "hint",
##       "important", "note", "tip", "warning", "admonition"
##     - substitution definitions: `replace` and `image`
##   + comments
## * inline markup
##   + *emphasis*, **strong emphasis**,
##     ``inline literals``, hyperlink references, substitution references,
##     standalone hyperlinks
##   + \`interpreted text\` with roles ``:literal:``, ``:strong:``,
##     ``emphasis``, ``:sub:``/``:subscript:``, ``:sup:``/``:supscript:``
##     (see `RST roles list`_ for description).
##
## Additional features:
##
## * directives: ``code-block``, ``title``, ``index``
## * ***triple emphasis*** (bold and italic) using \*\*\*
## * ``:idx:`` role for \`interpreted text\` to include the link to this
##   text into an index (example: `Nim index`_).
##
## Optional additional features, turned on by ``options: RstParseOption`` in
## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_:
##
## * emoji / smiley symbols
## * markdown tables
## * markdown code blocks
## * markdown links
## * markdown headlines
## * using ``1`` as auto-enumerator in enumerated lists like RST ``#``
##   (auto-enumerator ``1`` can not be used with ``#`` in the same list)
##
## .. Note:: By default Nim has ``roSupportMarkdown`` and
##    ``roSupportRawDirective`` turned **on**.
##
## .. warning:: Using Nim-specific features can cause other RST implementations
##   to fail on your document.
##
## Limitations:
##
## * no Unicode support in character width calculations
## * body elements
##   - no roman numerals in enumerated lists
##   - no quoted literal blocks
##   - no doctest blocks
##   - no grid tables
##   - some directives are missing (check official `RST directives list`_):
##     ``parsed-literal``, ``sidebar``, ``topic``, ``math``, ``rubric``,
##     ``epigraph``, ``highlights``, ``pull-quote``, ``compound``,
##     ``table``, ``csv-table``, ``list-table``, ``section-numbering``,
##     ``header``, ``footer``, ``meta``, ``class``
##     - no ``role`` directives and no custom interpreted text roles
##     - some standard roles are not supported (check `RST roles list`_)
##   - no footnotes & citations support
##   - no inline internal targets
## * inline markup
##   - no simple-inline-markup
##   - no embedded URI and aliases
##
## .. _quick introduction: https://docutils.sourceforge.io/docs/user/rst/quickstart.html
## .. _RST reference: https://docutils.sourceforge.io/docs/user/rst/quickref.html
## .. _RST specification: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html
## .. _RST directives list: https://docutils.sourceforge.io/docs/ref/rst/directives.html
## .. _RST roles list: https://docutils.sourceforge.io/docs/ref/rst/roles.html
## .. _Nim index: https://nim-lang.org/docs/theindex.html
##
## See `Nim DocGen Tools Guide <docgen.html>`_ for the details about
## ``nim doc``, ``nim rst2html`` and ``nim rst2tex`` commands.
##
## .. note:: Import ``packages/docutils/rst`` to use this module.
##
## See also `packages/docutils/rstgen module <rstgen.html>`_.

import
  os, strutils, rstast

type
  RstParseOption* = enum     ## options for the RST parser
    roSkipPounds,             ## skip ``#`` at line beginning (documentation
                              ## embedded in Nim comments)
    roSupportSmilies,         ## make the RST parser support smilies like ``:)``
    roSupportRawDirective,    ## support the ``raw`` directive (don't support
                              ## it for sandboxing)
    roSupportMarkdown         ## support additional features of markdown

  RstParseOptions* = set[RstParseOption]

  MsgClass* = enum
    mcHint = "Hint",
    mcWarning = "Warning",
    mcError = "Error"

  MsgKind* = enum          ## the possible messages
    meCannotOpenFile = "cannot open '$1'",
    meExpected = "'$1' expected",
    meGridTableNotImplemented = "grid table is not implemented",
    meMarkdownIllformedTable = "illformed delimiter row of a markdown table",
    meNewSectionExpected = "new section expected",
    meGeneralParseError = "general parse error",
    meInvalidDirective = "invalid directive: '$1'",
    mwRedefinitionOfLabel = "redefinition of label '$1'",
    mwUnknownSubstitution = "unknown substitution '$1'",
    mwUnsupportedLanguage = "language '$1' not supported",
    mwUnsupportedField = "field '$1' not supported"

  MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind,
                       arg: string) {.closure, gcsafe.} ## what to do in case of an error
  FindFileHandler* = proc (filename: string): string {.closure, gcsafe.}

proc rstnodeToRefname*(n: PRstNode): string
proc addNodes*(n: PRstNode): string
proc getFieldValue*(n: PRstNode, fieldname: string): string
proc getArgument*(n: PRstNode): string

# ----------------------------- scanner part --------------------------------

const
  SymChars: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
  SmileyStartChars: set[char] = {':', ';', '8'}
  Smilies = {
    ":D": "icon_e_biggrin",
    ":-D": "icon_e_biggrin",
    ":)": "icon_e_smile",
    ":-)": "icon_e_smile",
    ";)": "icon_e_wink",
    ";-)": "icon_e_wink",
    ":(": "icon_e_sad",
    ":-(": "icon_e_sad",
    ":o": "icon_e_surprised",
    ":-o": "icon_e_surprised",
    ":shock:": "icon_eek",
    ":?": "icon_e_confused",
    ":-?": "icon_e_confused",
    ":-/": "icon_e_confused",

    "8-)": "icon_cool",

    ":lol:": "icon_lol",
    ":x": "icon_mad",
    ":-x": "icon_mad",
    ":P": "icon_razz",
    ":-P": "icon_razz",
    ":oops:": "icon_redface",
    ":cry:": "icon_cry",
    ":evil:": "icon_evil",
    ":twisted:": "icon_twisted",
    ":roll:": "icon_rolleyes",
    ":!:": "icon_exclaim",

    ":?:": "icon_question",
    ":idea:": "icon_idea",
    ":arrow:": "icon_arrow",
    ":|": "icon_neutral",
    ":-|": "icon_neutral",
    ":mrgreen:": "icon_mrgreen",
    ":geek:": "icon_e_geek",
    ":ugeek:": "icon_e_ugeek"
  }

type
  TokType = enum
    tkEof, tkIndent,
    tkWhite, tkWord,
    tkAdornment,              # used for chapter adornment, transitions and
                              # horizontal table borders
    tkPunct,                  # one or many punctuation characters
    tkOther
  Token = object              # a RST token
    kind*: TokType            # the type of the token
    ival*: int                # the indentation or parsed integer value
    symbol*: string           # the parsed symbol as string
    line*, col*: int          # line and column of the token

  TokenSeq = seq[Token]
  Lexer = object of RootObj
    buf*: cstring
    bufpos*: int
    line*, col*, baseIndent*: int
    skipPounds*: bool
    adornmentLine*: bool

proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
  tok.kind = tkWord
  tok.line = L.line
  tok.col = L.col
  var pos = L.bufpos
  while true:
    tok.symbol.add(L.buf[pos])
    inc pos
    if L.buf[pos] notin s: break
  inc L.col, pos - L.bufpos
  L.bufpos = pos

proc isCurrentLineAdornment(L: var Lexer): bool =
  var pos = L.bufpos
  let c = L.buf[pos]
  while true:
    inc pos
    if L.buf[pos] in {'\c', '\l', '\0'}:
      break
    if c == '+':  # grid table
      if L.buf[pos] notin {'-', '=', '+'}:
        return false
    else:  # section adornment or table horizontal border
      if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
        return false
  result = true

proc getPunctAdornment(L: var Lexer, tok: var Token) =
  if L.adornmentLine:
    tok.kind = tkAdornment
  else:
    tok.kind = tkPunct
  tok.line = L.line
  tok.col = L.col
  var pos = L.bufpos
  let c = L.buf[pos]
  while true:
    tok.symbol.add(L.buf[pos])
    inc pos
    if L.buf[pos] != c: break
  inc L.col, pos - L.bufpos
  L.bufpos = pos
  if tok.symbol == "\\": tok.kind = tkPunct
    # nim extension: standalone \ can not be adornment

proc getBracket(L: var Lexer, tok: var Token) =
  tok.kind = tkPunct
  tok.line = L.line
  tok.col = L.col
  tok.symbol.add(L.buf[L.bufpos])
  inc L.col
  inc L.bufpos

proc getIndentAux(L: var Lexer, start: int): int =
  var pos = start
  # skip the newline (but include it in the token!)
  if L.buf[pos] == '\x0D':
    if L.buf[pos + 1] == '\x0A': inc pos, 2
    else: inc pos
  elif L.buf[pos] == '\x0A':
    inc pos
  if L.skipPounds:
    if L.buf[pos] == '#': inc pos
    if L.buf[pos] == '#': inc pos
  while true:
    case L.buf[pos]
    of ' ', '\x0B', '\x0C':
      inc pos
      inc result
    of '\x09':
      inc pos
      result = result - (result mod 8) + 8
    else:
      break                   # EndOfFile also leaves the loop
  if L.buf[pos] == '\0':
    result = 0
  elif L.buf[pos] == '\x0A' or L.buf[pos] == '\x0D':
    # look at the next line for proper indentation:
    result = getIndentAux(L, pos)
  L.bufpos = pos              # no need to set back buf

proc getIndent(L: var Lexer, tok: var Token) =
  tok.col = 0
  tok.kind = tkIndent         # skip the newline (but include it in the token!)
  tok.ival = getIndentAux(L, L.bufpos)
  inc L.line
  tok.line = L.line
  L.col = tok.ival
  tok.ival = max(tok.ival - L.baseIndent, 0)
  tok.symbol = "\n" & spaces(tok.ival)

proc rawGetTok(L: var Lexer, tok: var Token) =
  tok.symbol = ""
  tok.ival = 0
  if L.col == 0:
    L.adornmentLine = false
  var c = L.buf[L.bufpos]
  case c
  of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
    getThing(L, tok, SymChars)
  of ' ', '\x09', '\x0B', '\x0C':
    getThing(L, tok, {' ', '\x09'})
    tok.kind = tkWhite
    if L.buf[L.bufpos] in {'\x0D', '\x0A'}:
      rawGetTok(L, tok)       # ignore spaces before \n
  of '\x0D', '\x0A':
    getIndent(L, tok)
    L.adornmentLine = false
  of '!', '\"', '#', '$', '%', '&', '\'',  '*', '+', ',', '-', '.',
     '/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
     '|', '~':
    if L.col == 0:
      L.adornmentLine = L.isCurrentLineAdornment()
    getPunctAdornment(L, tok)
  of '(', ')', '[', ']', '{', '}':
    getBracket(L, tok)
  else:
    tok.line = L.line
    tok.col = L.col
    if c == '\0':
      tok.kind = tkEof
    else:
      tok.kind = tkOther
      tok.symbol.add(c)
      inc L.bufpos
      inc L.col
  tok.col = max(tok.col - L.baseIndent, 0)

proc getTokens(buffer: string, skipPounds: bool, tokens: var TokenSeq): int =
  var L: Lexer
  var length = tokens.len
  L.buf = cstring(buffer)
  L.line = 0                  # skip UTF-8 BOM
  if L.buf[0] == '\xEF' and L.buf[1] == '\xBB' and L.buf[2] == '\xBF':
    inc L.bufpos, 3
  L.skipPounds = skipPounds
  if skipPounds:
    if L.buf[L.bufpos] == '#':
      inc L.bufpos
      inc result
    if L.buf[L.bufpos] == '#':
      inc L.bufpos
      inc result
    L.baseIndent = 0
    while L.buf[L.bufpos] == ' ':
      inc L.bufpos
      inc L.baseIndent
      inc result
  while true:
    inc length
    setLen(tokens, length)
    rawGetTok(L, tokens[length - 1])
    if tokens[length - 1].kind == tkEof: break
  if tokens[0].kind == tkWhite:
    # BUGFIX
    tokens[0].ival = tokens[0].symbol.len
    tokens[0].kind = tkIndent

type
  LevelMap = array[char, int]
  Substitution = object
    key*: string
    value*: PRstNode

  SharedState = object
    options: RstParseOptions    # parsing options
    uLevel, oLevel: int         # counters for the section levels
    subs: seq[Substitution]     # substitutions
    refs: seq[Substitution]     # references
    underlineToLevel: LevelMap  # Saves for each possible title adornment
                                # character its level in the
                                # current document.
                                # This is for single underline adornments.
    overlineToLevel: LevelMap   # Saves for each possible title adornment
                                # character its level in the current
                                # document.
                                # This is for over-underline adornments.
    msgHandler: MsgHandler      # How to handle errors.
    findFile: FindFileHandler   # How to find files.

  PSharedState = ref SharedState
  RstParser = object of RootObj
    idx*: int
    tok*: TokenSeq
    s*: PSharedState
    indentStack*: seq[int]
    filename*: string
    line*, col*: int
    hasToc*: bool

  EParseError* = object of ValueError

template currentTok(p: RstParser): Token = p.tok[p.idx]
template prevTok(p: RstParser): Token = p.tok[p.idx - 1]
template nextTok(p: RstParser): Token = p.tok[p.idx + 1]

proc whichMsgClass*(k: MsgKind): MsgClass =
  ## returns which message class `k` belongs to.
  case ($k)[1]
  of 'e', 'E': result = mcError
  of 'w', 'W': result = mcWarning
  of 'h', 'H': result = mcHint
  else: assert false, "msgkind does not fit naming scheme"

proc defaultMsgHandler*(filename: string, line, col: int, msgkind: MsgKind,
                        arg: string) =
  let mc = msgkind.whichMsgClass
  let a = $msgkind % arg
  let message = "$1($2, $3) $4: $5" % [filename, $line, $col, $mc, a]
  if mc == mcError: raise newException(EParseError, message)
  else: writeLine(stdout, message)

proc defaultFindFile*(filename: string): string =
  if fileExists(filename): result = filename
  else: result = ""

proc newSharedState(options: RstParseOptions,
                    findFile: FindFileHandler,
                    msgHandler: MsgHandler): PSharedState =
  new(result)
  result.subs = @[]
  result.refs = @[]
  result.options = options
  result.msgHandler = if not isNil(msgHandler): msgHandler else: defaultMsgHandler
  result.findFile = if not isNil(findFile): findFile else: defaultFindFile

proc findRelativeFile(p: RstParser; filename: string): string =
  result = p.filename.splitFile.dir / filename
  if not fileExists(result):
    result = p.s.findFile(filename)

proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) =
  p.s.msgHandler(p.filename, p.line + currentTok(p).line,
                             p.col + currentTok(p).col, msgKind, arg)

proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string, line, col: int) =
  p.s.msgHandler(p.filename, p.line + line,
                             p.col + col, msgKind, arg)

proc rstMessage(p: RstParser, msgKind: MsgKind) =
  p.s.msgHandler(p.filename, p.line + currentTok(p).line,
                             p.col + currentTok(p).col, msgKind,
                             currentTok(p).symbol)

proc currInd(p: RstParser): int =
  result = p.indentStack[high(p.indentStack)]

proc pushInd(p: var RstParser, ind: int) =
  p.indentStack.add(ind)

proc popInd(p: var RstParser) =
  if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)

proc initParser(p: var RstParser, sharedState: PSharedState) =
  p.indentStack = @[0]
  p.tok = @[]
  p.idx = 0
  p.filename = ""
  p.hasToc = false
  p.col = 0
  p.line = 1
  p.s = sharedState

proc addNodesAux(n: PRstNode, result: var string) =
  if n.kind == rnLeaf:
    result.add(n.text)
  else:
    for i in 0 ..< n.len: addNodesAux(n.sons[i], result)

proc addNodes(n: PRstNode): string =
  n.addNodesAux(result)

proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) =
  template special(s) =
    if b:
      r.add('-')
      b = false
    r.add(s)

  if n == nil: return
  if n.kind == rnLeaf:
    for i in 0 ..< n.text.len:
      case n.text[i]
      of '0'..'9':
        if b:
          r.add('-')
          b = false
        if r.len == 0: r.add('Z')
        r.add(n.text[i])
      of 'a'..'z', '\128'..'\255':
        if b:
          r.add('-')
          b = false
        r.add(n.text[i])
      of 'A'..'Z':
        if b:
          r.add('-')
          b = false
        r.add(chr(ord(n.text[i]) - ord('A') + ord('a')))
      of '$': special "dollar"
      of '%': special "percent"
      of '&': special "amp"
      of '^': special "roof"
      of '!': special "emark"
      of '?': special "qmark"
      of '*': special "star"
      of '+': special "plus"
      of '-': special "minus"
      of '/': special "slash"
      of '\\': special "backslash"
      of '=': special "eq"
      of '<': special "lt"
      of '>': special "gt"
      of '~': special "tilde"
      of ':': special "colon"
      of '.': special "dot"
      of '@': special "at"
      of '|': special "bar"
      else:
        if r.len > 0: b = true
  else:
    for i in 0 ..< n.len: rstnodeToRefnameAux(n.sons[i], r, b)

proc rstnodeToRefname(n: PRstNode): string =
  var b = false
  rstnodeToRefnameAux(n, result, b)

proc findSub(p: var RstParser, n: PRstNode): int =
  var key = addNodes(n)
  # the spec says: if no exact match, try one without case distinction:
  for i in countup(0, high(p.s.subs)):
    if key == p.s.subs[i].key:
      return i
  for i in countup(0, high(p.s.subs)):
    if cmpIgnoreStyle(key, p.s.subs[i].key) == 0:
      return i
  result = -1

proc setSub(p: var RstParser, key: string, value: PRstNode) =
  var length = p.s.subs.len
  for i in 0 ..< length:
    if key == p.s.subs[i].key:
      p.s.subs[i].value = value
      return
  p.s.subs.add(Substitution(key: key, value: value))

proc setRef(p: var RstParser, key: string, value: PRstNode) =
  var length = p.s.refs.len
  for i in 0 ..< length:
    if key == p.s.refs[i].key:
      if p.s.refs[i].value.addNodes != value.addNodes:
        rstMessage(p, mwRedefinitionOfLabel, key)
      p.s.refs[i].value = value
      return
  p.s.refs.add(Substitution(key: key, value: value))

proc findRef(p: var RstParser, key: string): PRstNode =
  for i in countup(0, high(p.s.refs)):
    if key == p.s.refs[i].key:
      return p.s.refs[i].value

proc newLeaf(p: var RstParser): PRstNode =
  result = newRstNode(rnLeaf, currentTok(p).symbol)

proc getReferenceName(p: var RstParser, endStr: string): PRstNode =
  var res = newRstNode(rnInner)
  while true:
    case currentTok(p).kind
    of tkWord, tkOther, tkWhite:
      res.add(newLeaf(p))
    of tkPunct:
      if currentTok(p).symbol == endStr:
        inc p.idx
        break
      else:
        res.add(newLeaf(p))
    else:
      rstMessage(p, meExpected, endStr)
      break
    inc p.idx
  result = res

proc untilEol(p: var RstParser): PRstNode =
  result = newRstNode(rnInner)
  while currentTok(p).kind notin {tkIndent, tkEof}:
    result.add(newLeaf(p))
    inc p.idx

proc expect(p: var RstParser, tok: string) =
  if currentTok(p).symbol == tok: inc p.idx
  else: rstMessage(p, meExpected, tok)

proc isInlineMarkupEnd(p: RstParser, markup: string): bool =
  # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
  result = currentTok(p).symbol == markup
  if not result: return
  # Rule 2:
  result = prevTok(p).kind notin {tkIndent, tkWhite}
  if not result: return
  # Rule 7:
  result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
      markup in ["``", "`"] and nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof} or
      nextTok(p).symbol[0] in
      {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
  if not result: return
  # Rule 4:
  if p.idx > 0:
    if markup != "``" and prevTok(p).symbol == "\\":
      result = false

proc isInlineMarkupStart(p: RstParser, markup: string): bool =
  # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
  var d: char
  result = currentTok(p).symbol == markup
  if not result: return
  # Rule 6:
  result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
      (markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or
      prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
  if not result: return
  # Rule 1:
  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
  if not result: return
  # Rules 4 & 5:
  if p.idx > 0:
    if prevTok(p).symbol == "\\":
      result = false
    else:
      var c = prevTok(p).symbol[0]
      case c
      of '\'', '\"': d = c
      of '(': d = ')'
      of '[': d = ']'
      of '{': d = '}'
      of '<': d = '>'
      else: d = '\0'
      if d != '\0': result = nextTok(p).symbol[0] != d

proc match(p: RstParser, start: int, expr: string): bool =
  # regular expressions are:
  # special char     exact match
  # 'w'              tkWord
  # ' '              tkWhite
  # 'a'              tkAdornment
  # 'i'              tkIndent
  # 'p'              tkPunct
  # 'T'              always true
  # 'E'              whitespace, indent or eof
  # 'e'              any enumeration sequence or '#' (for enumeration lists)
  # 'x'              a..z or '#' (for enumeration lists)
  # 'n'              0..9 or '#' (for enumeration lists)
  var i = 0
  var j = start
  var last = expr.len - 1
  while i <= last:
    case expr[i]
    of 'w': result = p.tok[j].kind == tkWord
    of ' ': result = p.tok[j].kind == tkWhite
    of 'i': result = p.tok[j].kind == tkIndent
    of 'p': result = p.tok[j].kind == tkPunct
    of 'a': result = p.tok[j].kind == tkAdornment
    of 'o': result = p.tok[j].kind == tkOther
    of 'T': result = true
    of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
    of 'e', 'x', 'n':
      result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
      if result:
        case p.tok[j].symbol[0]
        of '#': result = true
        of 'a'..'z', 'A'..'Z':
          result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
        of '0'..'9':
          result = expr[i] in {'e', 'n'} and
                     allCharsInSet(p.tok[j].symbol, {'0'..'9'})
        else: result = false
    else:
      var c = expr[i]
      var length = 0
      while i <= last and expr[i] == c:
        inc i
        inc length
      dec i
      result = p.tok[j].kind in {tkPunct, tkAdornment} and
          p.tok[j].symbol.len == length and p.tok[j].symbol[0] == c
    if not result: return
    inc j
    inc i
  result = true

proc fixupEmbeddedRef(n, a, b: PRstNode) =
  var sep = - 1
  for i in countdown(n.len - 2, 0):
    if n.sons[i].text == "<":
      sep = i
      break
  var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
  for i in countup(0, sep - incr): a.add(n.sons[i])
  for i in countup(sep + 1, n.len - 2): b.add(n.sons[i])

proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
  result = n
  if isInlineMarkupEnd(p, "_") or isInlineMarkupEnd(p, "__"):
    inc p.idx
    if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
      var a = newRstNode(rnInner)
      var b = newRstNode(rnInner)
      fixupEmbeddedRef(n, a, b)
      if a.len == 0:
        result = newRstNode(rnStandaloneHyperlink)
        result.add(b)
      else:
        result = newRstNode(rnHyperlink)
        result.add(a)
        result.add(b)
        setRef(p, rstnodeToRefname(a), b)
    elif n.kind == rnInterpretedText:
      n.kind = rnRef
    else:
      result = newRstNode(rnRef)
      result.add(n)
  elif match(p, p.idx, ":w:"):
    # a role:
    if nextTok(p).symbol == "idx":
      n.kind = rnIdx
    elif nextTok(p).symbol == "literal":
      n.kind = rnInlineLiteral
    elif nextTok(p).symbol == "strong":
      n.kind = rnStrongEmphasis
    elif nextTok(p).symbol == "emphasis":
      n.kind = rnEmphasis
    elif nextTok(p).symbol == "sub" or
        nextTok(p).symbol == "subscript":
      n.kind = rnSub
    elif nextTok(p).symbol == "sup" or
        nextTok(p).symbol == "supscript":
      n.kind = rnSup
    else:
      result = newRstNode(rnGeneralRole)
      n.kind = rnInner
      result.add(n)
      result.add(newRstNode(rnLeaf, nextTok(p).symbol))
    inc p.idx, 3

proc matchVerbatim(p: RstParser, start: int, expr: string): int =
  result = start
  var j = 0
  while j < expr.len and result < p.tok.len and
        continuesWith(expr, p.tok[result].symbol, j):
    inc j, p.tok[result].symbol.len
    inc result
  if j < expr.len: result = 0

proc parseSmiley(p: var RstParser): PRstNode =
  if currentTok(p).symbol[0] notin SmileyStartChars: return
  for key, val in items(Smilies):
    let m = matchVerbatim(p, p.idx, key)
    if m > 0:
      p.idx = m
      result = newRstNode(rnSmiley)
      result.text = val
      return

proc isUrl(p: RstParser, i: int): bool =
  result = p.tok[i+1].symbol == ":" and p.tok[i+2].symbol == "//" and
    p.tok[i+3].kind == tkWord and
    p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]

proc parseUrl(p: var RstParser, father: PRstNode) =
  #if currentTok(p).symbol[strStart] == '<':
  if isUrl(p, p.idx):
    var n = newRstNode(rnStandaloneHyperlink)
    while true:
      case currentTok(p).kind
      of tkWord, tkAdornment, tkOther: discard
      of tkPunct:
        if nextTok(p).kind notin {tkWord, tkAdornment, tkOther, tkPunct}:
          break
      else: break
      n.add(newLeaf(p))
      inc p.idx
    father.add(n)
  else:
    var n = newLeaf(p)
    inc p.idx
    if currentTok(p).symbol == "_": n = parsePostfix(p, n)
    father.add(n)

proc parseBackslash(p: var RstParser, father: PRstNode) =
  assert(currentTok(p).kind == tkPunct)
  if currentTok(p).symbol == "\\\\":
    father.add(newRstNode(rnLeaf, "\\"))
    inc p.idx
  elif currentTok(p).symbol == "\\":
    # XXX: Unicode?
    inc p.idx
    if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
    if currentTok(p).kind != tkEof: inc p.idx
  else:
    father.add(newLeaf(p))
    inc p.idx

proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
                interpretBackslash: bool) =
  let
    line = currentTok(p).line
    col = currentTok(p).col
  inc p.idx
  while true:
    case currentTok(p).kind
    of tkPunct:
      if isInlineMarkupEnd(p, postfix):
        inc p.idx
        break
      elif interpretBackslash:
        parseBackslash(p, father)
      else:
        father.add(newLeaf(p))
        inc p.idx
    of tkAdornment, tkWord, tkOther:
      father.add(newLeaf(p))
      inc p.idx
    of tkIndent:
      father.add(newRstNode(rnLeaf, " "))
      inc p.idx
      if currentTok(p).kind == tkIndent:
        rstMessage(p, meExpected, postfix, line, col)
        break
    of tkWhite:
      father.add(newRstNode(rnLeaf, " "))
      inc p.idx
    else: rstMessage(p, meExpected, postfix, line, col)

proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
  var args = newRstNode(rnDirArg)
  if currentTok(p).kind == tkWord:
    args.add(newLeaf(p))
    inc p.idx
  else:
    args = nil
  var n = newRstNode(rnLeaf, "")
  while true:
    case currentTok(p).kind
    of tkEof:
      rstMessage(p, meExpected, "```")
      break
    of tkPunct, tkAdornment:
      if currentTok(p).symbol == "```":
        inc p.idx
        break
      else:
        n.text.add(currentTok(p).symbol)
        inc p.idx
    else:
      n.text.add(currentTok(p).symbol)
      inc p.idx
  var lb = newRstNode(rnLiteralBlock)
  lb.add(n)
  result = newRstNode(rnCodeBlock)
  result.add(args)
  result.add(PRstNode(nil))
  result.add(lb)

proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
  result = true
  var desc, link = ""
  var i = p.idx

  template parse(endToken, dest) =
    inc i # skip begin token
    while true:
      if p.tok[i].kind in {tkEof, tkIndent}: return false
      if p.tok[i].symbol == endToken: break
      dest.add p.tok[i].symbol
      inc i
    inc i # skip end token

  parse("]", desc)
  if p.tok[i].symbol != "(": return false
  parse(")", link)
  let child = newRstNode(rnHyperlink)
  child.add desc
  child.add link
  # only commit if we detected no syntax error:
  father.add child
  p.idx = i
  result = true

proc parseInline(p: var RstParser, father: PRstNode) =
  case currentTok(p).kind
  of tkPunct:
    if isInlineMarkupStart(p, "***"):
      var n = newRstNode(rnTripleEmphasis)
      parseUntil(p, n, "***", true)
      father.add(n)
    elif isInlineMarkupStart(p, "**"):
      var n = newRstNode(rnStrongEmphasis)
      parseUntil(p, n, "**", true)
      father.add(n)
    elif isInlineMarkupStart(p, "*"):
      var n = newRstNode(rnEmphasis)
      parseUntil(p, n, "*", true)
      father.add(n)
    elif roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
      inc p.idx
      father.add(parseMarkdownCodeblock(p))
    elif isInlineMarkupStart(p, "``"):
      var n = newRstNode(rnInlineLiteral)
      parseUntil(p, n, "``", false)
      father.add(n)
    elif isInlineMarkupStart(p, "`"):
      var n = newRstNode(rnInterpretedText)
      parseUntil(p, n, "`", true)
      n = parsePostfix(p, n)
      father.add(n)
    elif isInlineMarkupStart(p, "|"):
      var n = newRstNode(rnSubstitutionReferences)
      parseUntil(p, n, "|", false)
      father.add(n)
    elif roSupportMarkdown in p.s.options and
        currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
        parseMarkdownLink(p, father):
      discard "parseMarkdownLink already processed it"
    else:
      if roSupportSmilies in p.s.options:
        let n = parseSmiley(p)
        if n != nil:
          father.add(n)
          return
      parseBackslash(p, father)
  of tkWord:
    if roSupportSmilies in p.s.options:
      let n = parseSmiley(p)
      if n != nil:
        father.add(n)
        return
    parseUrl(p, father)
  of tkAdornment, tkOther, tkWhite:
    if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
      inc p.idx
      father.add(parseMarkdownCodeblock(p))
      return
    if roSupportSmilies in p.s.options:
      let n = parseSmiley(p)
      if n != nil:
        father.add(n)
        return
    father.add(newLeaf(p))
    inc p.idx
  else: discard

proc getDirective(p: var RstParser): string =
  if currentTok(p).kind == tkWhite and nextTok(p).kind == tkWord:
    var j = p.idx
    inc p.idx
    result = currentTok(p).symbol
    inc p.idx
    while currentTok(p).kind in {tkWord, tkPunct, tkAdornment, tkOther}:
      if currentTok(p).symbol == "::": break
      result.add(currentTok(p).symbol)
      inc p.idx
    if currentTok(p).kind == tkWhite: inc p.idx
    if currentTok(p).symbol == "::":
      inc p.idx
      if currentTok(p).kind == tkWhite: inc p.idx
    else:
      p.idx = j               # set back
      result = ""             # error
  else:
    result = ""
  result = result.toLowerAscii()

proc parseComment(p: var RstParser): PRstNode =
  case currentTok(p).kind
  of tkIndent, tkEof:
    if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
      inc p.idx              # empty comment
    else:
      var indent = currentTok(p).ival
      while true:
        case currentTok(p).kind
        of tkEof:
          break
        of tkIndent:
          if currentTok(p).ival < indent: break
        else:
          discard
        inc p.idx
  else:
    while currentTok(p).kind notin {tkIndent, tkEof}: inc p.idx
  result = nil

proc parseLine(p: var RstParser, father: PRstNode) =
  while true:
    case currentTok(p).kind
    of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father)
    else: break

proc parseUntilNewline(p: var RstParser, father: PRstNode) =
  while true:
    case currentTok(p).kind
    of tkWhite, tkWord, tkAdornment, tkOther, tkPunct: parseInline(p, father)
    of tkEof, tkIndent: break

proc parseSection(p: var RstParser, result: PRstNode) {.gcsafe.}
proc parseField(p: var RstParser): PRstNode =
  ## Returns a parsed rnField node.
  ##
  ## rnField nodes have two children nodes, a rnFieldName and a rnFieldBody.
  result = newRstNode(rnField)
  var col = currentTok(p).col
  var fieldname = newRstNode(rnFieldName)
  parseUntil(p, fieldname, ":", false)
  var fieldbody = newRstNode(rnFieldBody)
  if currentTok(p).kind != tkIndent: parseLine(p, fieldbody)
  if currentTok(p).kind == tkIndent:
    var indent = currentTok(p).ival
    if indent > col:
      pushInd(p, indent)
      parseSection(p, fieldbody)
      popInd(p)
  result.add(fieldname)
  result.add(fieldbody)

proc parseFields(p: var RstParser): PRstNode =
  ## Parses fields for a section or directive block.
  ##
  ## This proc may return nil if the parsing doesn't find anything of value,
  ## otherwise it will return a node of rnFieldList type with children.
  result = nil
  var atStart = p.idx == 0 and p.tok[0].symbol == ":"
  if currentTok(p).kind == tkIndent and nextTok(p).symbol == ":" or
      atStart:
    var col = if atStart: currentTok(p).col else: currentTok(p).ival
    result = newRstNode(rnFieldList)
    if not atStart: inc p.idx
    while true:
      result.add(parseField(p))
      if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
          nextTok(p).symbol == ":":
        inc p.idx
      else:
        break

proc getFieldValue*(n: PRstNode): string =
  ## Returns the value of a specific ``rnField`` node.
  ##
  ## This proc will assert if the node is not of the expected type. The empty
  ## string will be returned as a minimum. Any value in the rst will be
  ## stripped form leading/trailing whitespace.
  assert n.kind == rnField
  assert n.len == 2
  assert n.sons[0].kind == rnFieldName
  assert n.sons[1].kind == rnFieldBody
  result = addNodes(n.sons[1]).strip

proc getFieldValue(n: PRstNode, fieldname: string): string =
  if n.sons[1] == nil: return
  if n.sons[1].kind != rnFieldList:
    #InternalError("getFieldValue (2): " & $n.sons[1].kind)
    # We don't like internal errors here anymore as that would break the forum!
    return
  for i in 0 ..< n.sons[1].len:
    var f = n.sons[1].sons[i]
    if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0:
      result = addNodes(f.sons[1])
      if result == "": result = "\x01\x01" # indicates that the field exists
      return

proc getArgument(n: PRstNode): string =
  if n.sons[0] == nil: result = ""
  else: result = addNodes(n.sons[0])

proc parseDotDot(p: var RstParser): PRstNode {.gcsafe.}
proc parseLiteralBlock(p: var RstParser): PRstNode =
  result = newRstNode(rnLiteralBlock)
  var n = newRstNode(rnLeaf, "")
  if currentTok(p).kind == tkIndent:
    var indent = currentTok(p).ival
    inc p.idx
    while true:
      case currentTok(p).kind
      of tkEof:
        break
      of tkIndent:
        if currentTok(p).ival < indent:
          break
        else:
          n.text.add("\n")
          n.text.add(spaces(currentTok(p).ival - indent))
          inc p.idx
      else:
        n.text.add(currentTok(p).symbol)
        inc p.idx
  else:
    while currentTok(p).kind notin {tkIndent, tkEof}:
      n.text.add(currentTok(p).symbol)
      inc p.idx
  result.add(n)

proc getLevel(map: var LevelMap, lvl: var int, c: char): int =
  if map[c] == 0:
    inc lvl
    map[c] = lvl
  result = map[c]

proc tokenAfterNewline(p: RstParser): int =
  result = p.idx
  while true:
    case p.tok[result].kind
    of tkEof:
      break
    of tkIndent:
      inc result
      break
    else: inc result

proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
  var headlineLen = 0
  if p.idx < adornmentIdx:  # underline
    for i in p.idx ..< adornmentIdx-1:  # adornmentIdx-1 is a linebreak
      headlineLen += p.tok[i].symbol.len
  else:  # overline
    var i = p.idx + 2
    while p.tok[i].kind notin {tkEof, tkIndent}:
      headlineLen += p.tok[i].symbol.len
      inc i
  return p.tok[adornmentIdx].symbol.len >= headlineLen

proc isLineBlock(p: RstParser): bool =
  var j = tokenAfterNewline(p)
  result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
      p.tok[j].col > currentTok(p).col

proc predNL(p: RstParser): bool =
  result = true
  if p.idx > 0:
    result = prevTok(p).kind == tkIndent and
        prevTok(p).ival == currInd(p)

proc isDefList(p: RstParser): bool =
  var j = tokenAfterNewline(p)
  result = currentTok(p).col < p.tok[j].col and
      p.tok[j].kind in {tkWord, tkOther, tkPunct} and
      p.tok[j - 2].symbol != "::"

proc isOptionList(p: RstParser): bool =
  result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or
           match(p, p.idx, "/w") or match(p, p.idx, "//w")

proc isMarkdownHeadlinePattern(s: string): bool =
  if s.len >= 1 and s.len <= 6:
    for c in s:
      if c != '#': return false
    result = true

proc isMarkdownHeadline(p: RstParser): bool =
  if roSupportMarkdown in p.s.options:
    if isMarkdownHeadlinePattern(currentTok(p).symbol) and nextTok(p).kind == tkWhite:
      if p.tok[p.idx+2].kind in {tkWord, tkOther, tkPunct}:
        result = true

proc findPipe(p: RstParser, start: int): bool =
  var i = start
  while true:
    if p.tok[i].symbol == "|": return true
    if p.tok[i].kind in {tkIndent, tkEof}: return false
    inc i

proc whichSection(p: RstParser): RstNodeKind =
  if currentTok(p).kind in {tkAdornment, tkPunct}:
    # for punctuation sequences that can be both tkAdornment and tkPunct
    if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
      return rnCodeBlock
    elif currentTok(p).symbol == "::":
      return rnLiteralBlock
    elif currentTok(p).symbol == ".." and predNL(p):
     return rnDirective
  case currentTok(p).kind
  of tkAdornment:
    if match(p, p.idx + 1, "ii") and currentTok(p).symbol.len >= 4:
      result = rnTransition
    elif match(p, p.idx, "+a+"):
      result = rnGridTable
      rstMessage(p, meGridTableNotImplemented)
    elif match(p, p.idx + 1, " a"): result = rnTable
    elif currentTok(p).symbol == "|" and isLineBlock(p):
      result = rnLineBlock
    elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
      result = rnOverline
    else:
      result = rnLeaf
  of tkPunct:
    if isMarkdownHeadline(p):
      result = rnHeadline
    elif roSupportMarkdown in p.s.options and predNL(p) and
        match(p, p.idx, "| w") and findPipe(p, p.idx+3):
      result = rnMarkdownTable
    elif currentTok(p).symbol == "|" and isLineBlock(p):
      result = rnLineBlock
    elif match(p, tokenAfterNewline(p), "ai") and
        isAdornmentHeadline(p, tokenAfterNewline(p)):
      result = rnHeadline
    elif predNL(p) and
        currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
      result = rnBulletList
    elif match(p, p.idx, ":w:") and predNL(p):
      # (currentTok(p).symbol == ":")
      result = rnFieldList
    elif match(p, p.idx, "(e) ") or match(p, p.idx, "e) ") or
         match(p, p.idx, "e. "):
      result = rnEnumList
    elif isDefList(p):
      result = rnDefList
    elif isOptionList(p):
      result = rnOptionList
    else:
      result = rnParagraph
  of tkWord, tkOther, tkWhite:
    let tokIdx = tokenAfterNewline(p)
    if match(p, tokIdx, "ai"):
      if isAdornmentHeadline(p, tokIdx): result = rnHeadline
      else: result = rnParagraph
    elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
    elif isDefList(p): result = rnDefList
    else: result = rnParagraph
  else: result = rnLeaf

proc parseLineBlock(p: var RstParser): PRstNode =
  result = nil
  if nextTok(p).kind == tkWhite:
    var col = currentTok(p).col
    result = newRstNode(rnLineBlock)
    pushInd(p, p.tok[p.idx + 2].col)
    inc p.idx, 2
    while true:
      var item = newRstNode(rnLineBlockItem)
      parseSection(p, item)
      result.add(item)
      if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
          nextTok(p).symbol == "|" and p.tok[p.idx + 2].kind == tkWhite:
        inc p.idx, 3
      else:
        break
    popInd(p)

proc parseParagraph(p: var RstParser, result: PRstNode) =
  while true:
    case currentTok(p).kind
    of tkIndent:
      if nextTok(p).kind == tkIndent:
        inc p.idx
        break
      elif currentTok(p).ival == currInd(p):
        inc p.idx
        case whichSection(p)
        of rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective:
          result.add(newRstNode(rnLeaf, " "))
        of rnLineBlock:
          result.addIfNotNil(parseLineBlock(p))
        else: break
      else:
        break
    of tkPunct:
      if currentTok(p).symbol == "::" and
          nextTok(p).kind == tkIndent and
          currInd(p) < nextTok(p).ival:
        result.add(newRstNode(rnLeaf, ":"))
        inc p.idx            # skip '::'
        result.add(parseLiteralBlock(p))
        break
      else:
        parseInline(p, result)
    of tkWhite, tkWord, tkAdornment, tkOther:
      parseInline(p, result)
    else: break

proc parseHeadline(p: var RstParser): PRstNode =
  result = newRstNode(rnHeadline)
  if isMarkdownHeadline(p):
    result.level = currentTok(p).symbol.len
    assert(nextTok(p).kind == tkWhite)
    inc p.idx, 2
    parseUntilNewline(p, result)
  else:
    parseUntilNewline(p, result)
    assert(currentTok(p).kind == tkIndent)
    assert(nextTok(p).kind == tkAdornment)
    var c = nextTok(p).symbol[0]
    inc p.idx, 2
    result.level = getLevel(p.s.underlineToLevel, p.s.uLevel, c)

type
  IntSeq = seq[int]
  ColumnLimits = tuple
    first, last: int
  ColSeq = seq[ColumnLimits]

proc tokEnd(p: RstParser): int =
  result = currentTok(p).col + currentTok(p).symbol.len - 1

proc getColumns(p: var RstParser, cols: var IntSeq) =
  var L = 0
  while true:
    inc L
    setLen(cols, L)
    cols[L - 1] = tokEnd(p)
    assert(currentTok(p).kind == tkAdornment)
    inc p.idx
    if currentTok(p).kind != tkWhite: break
    inc p.idx
    if currentTok(p).kind != tkAdornment: break
  if currentTok(p).kind == tkIndent: inc p.idx
  # last column has no limit:
  cols[L - 1] = 32000

proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}

proc parseSimpleTable(p: var RstParser): PRstNode =
  var
    cols: IntSeq
    row: seq[string]
    i, last, line: int
    c: char
    q: RstParser
    a, b: PRstNode
  result = newRstNode(rnTable)
  cols = @[]
  row = @[]
  a = nil
  c = currentTok(p).symbol[0]
  while true:
    if currentTok(p).kind == tkAdornment:
      last = tokenAfterNewline(p)
      if p.tok[last].kind in {tkEof, tkIndent}:
        # skip last adornment line:
        p.idx = last
        break
      getColumns(p, cols)
      setLen(row, cols.len)
      if a != nil:
        for j in 0 ..< a.len: a.sons[j].kind = rnTableHeaderCell
    if currentTok(p).kind == tkEof: break
    for j in countup(0, high(row)): row[j] = ""
    # the following while loop iterates over the lines a single cell may span:
    line = currentTok(p).line
    while true:
      i = 0
      while currentTok(p).kind notin {tkIndent, tkEof}:
        if tokEnd(p) <= cols[i]:
          row[i].add(currentTok(p).symbol)
          inc p.idx
        else:
          if currentTok(p).kind == tkWhite: inc p.idx
          inc i
      if currentTok(p).kind == tkIndent: inc p.idx
      if tokEnd(p) <= cols[0]: break
      if currentTok(p).kind in {tkEof, tkAdornment}: break
      for j in countup(1, high(row)): row[j].add('\x0A')
    a = newRstNode(rnTableRow)
    for j in countup(0, high(row)):
      initParser(q, p.s)
      q.col = cols[j]
      q.line = line - 1
      q.filename = p.filename
      q.col += getTokens(row[j], false, q.tok)
      b = newRstNode(rnTableDataCell)
      b.add(parseDoc(q))
      a.add(b)
    result.add(a)

proc readTableRow(p: var RstParser): ColSeq =
  if currentTok(p).symbol == "|": inc p.idx
  while currentTok(p).kind notin {tkIndent, tkEof}:
    var limits: ColumnLimits
    limits.first = p.idx
    while currentTok(p).kind notin {tkIndent, tkEof}:
      if currentTok(p).symbol == "|" and prevTok(p).symbol != "\\": break
      inc p.idx
    limits.last = p.idx
    result.add(limits)
    if currentTok(p).kind in {tkIndent, tkEof}: break
    inc p.idx
  p.idx = tokenAfterNewline(p)

proc getColContents(p: var RstParser, colLim: ColumnLimits): string =
  for i in colLim.first ..< colLim.last:
    result.add(p.tok[i].symbol)
  result.strip

proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
  let row = readTableRow(p)
  if row.len != colNum: return false
  for limits in row:
    let content = getColContents(p, limits)
    if content.len < 3 or not (content.startsWith("--") or content.startsWith(":-")):
      return false
  return true

proc parseMarkdownTable(p: var RstParser): PRstNode =
  var
    row: ColSeq
    colNum: int
    a, b: PRstNode
    q: RstParser
  result = newRstNode(rnMarkdownTable)

  proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
    row = readTableRow(p)
    if colNum == 0: colNum = row.len # table header
    elif row.len < colNum: row.setLen(colNum)
    a = newRstNode(rnTableRow)
    for j in 0 ..< colNum:
      b = newRstNode(cellKind)
      initParser(q, p.s)
      q.col = p.col
      q.line = currentTok(p).line - 1
      q.filename = p.filename
      q.col += getTokens(getColContents(p, row[j]), false, q.tok)
      b.add(parseDoc(q))
      a.add(b)
    result.add(a)

  parseRow(p, rnTableHeaderCell, result)
  if not isValidDelimiterRow(p, colNum): rstMessage(p, meMarkdownIllformedTable)
  while predNL(p) and currentTok(p).symbol == "|":
    parseRow(p, rnTableDataCell, result)

proc parseTransition(p: var RstParser): PRstNode =
  result = newRstNode(rnTransition)
  inc p.idx
  if currentTok(p).kind == tkIndent: inc p.idx
  if currentTok(p).kind == tkIndent: inc p.idx

proc parseOverline(p: var RstParser): PRstNode =
  var c = currentTok(p).symbol[0]
  inc p.idx, 2
  result = newRstNode(rnOverline)
  while true:
    parseUntilNewline(p, result)
    if currentTok(p).kind == tkIndent:
      inc p.idx
      if prevTok(p).ival > currInd(p):
        result.add(newRstNode(rnLeaf, " "))
      else:
        break
    else:
      break
  result.level = getLevel(p.s.overlineToLevel, p.s.oLevel, c)
  if currentTok(p).kind == tkAdornment:
    inc p.idx                # XXX: check?
    if currentTok(p).kind == tkIndent: inc p.idx

proc parseBulletList(p: var RstParser): PRstNode =
  result = nil
  if nextTok(p).kind == tkWhite:
    var bullet = currentTok(p).symbol
    var col = currentTok(p).col
    result = newRstNode(rnBulletList)
    pushInd(p, p.tok[p.idx + 2].col)
    inc p.idx, 2
    while true:
      var item = newRstNode(rnBulletItem)
      parseSection(p, item)
      result.add(item)
      if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
          nextTok(p).symbol == bullet and
          p.tok[p.idx + 2].kind == tkWhite:
        inc p.idx, 3
      else:
        break
    popInd(p)

proc parseOptionList(p: var RstParser): PRstNode =
  result = newRstNode(rnOptionList)
  while true:
    if isOptionList(p):
      var a = newRstNode(rnOptionGroup)
      var b = newRstNode(rnDescription)
      var c = newRstNode(rnOptionListItem)
      if match(p, p.idx, "//w"): inc p.idx
      while currentTok(p).kind notin {tkIndent, tkEof}:
        if currentTok(p).kind == tkWhite and currentTok(p).symbol.len > 1:
          inc p.idx
          break
        a.add(newLeaf(p))
        inc p.idx
      var j = tokenAfterNewline(p)
      if j > 0 and p.tok[j - 1].kind == tkIndent and p.tok[j - 1].ival > currInd(p):
        pushInd(p, p.tok[j - 1].ival)
        parseSection(p, b)
        popInd(p)
      else:
        parseLine(p, b)
      if currentTok(p).kind == tkIndent: inc p.idx
      c.add(a)
      c.add(b)
      result.add(c)
    else:
      break

proc parseDefinitionList(p: var RstParser): PRstNode =
  result = nil
  var j = tokenAfterNewline(p) - 1
  if j >= 1 and p.tok[j].kind == tkIndent and
      p.tok[j].ival > currInd(p) and p.tok[j - 1].symbol != "::":
    var col = currentTok(p).col
    result = newRstNode(rnDefList)
    while true:
      j = p.idx
      var a = newRstNode(rnDefName)
      parseLine(p, a)
      if currentTok(p).kind == tkIndent and
          currentTok(p).ival > currInd(p) and
          nextTok(p).symbol != "::" and
          nextTok(p).kind notin {tkIndent, tkEof}:
        pushInd(p, currentTok(p).ival)
        var b = newRstNode(rnDefBody)
        parseSection(p, b)
        var c = newRstNode(rnDefItem)
        c.add(a)
        c.add(b)
        result.add(c)
        popInd(p)
      else:
        p.idx = j
        break
      if currentTok(p).kind == tkIndent and currentTok(p).ival == col:
        inc p.idx
        j = tokenAfterNewline(p) - 1
        if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and
            p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent:
          discard
        else:
          break
    if result.len == 0: result = nil

proc parseEnumList(p: var RstParser): PRstNode =
  const
    wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
                                      "(x) ", "x) ", "x. "]
      # enumerator patterns, where 'x' means letter and 'n' means number
    wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3]  # number of tokens
    wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
      # position of enumeration sequence (number/letter) in enumerator
  result = newRstNode(rnEnumList)
  let col = currentTok(p).col
  var w = 0
  while w < wildcards.len:
    if match(p, p.idx, wildcards[w]): break
    inc w
  assert w < wildcards.len
  let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"]
  var prevAE = ""  # so as not allow mixing auto-enumerators `1` and `#`
  var curEnum = 1
  for i in 0 ..< wildToken[w]-1:  # add first enumerator with (, ), and .
    if p.tok[p.idx + i].symbol == "#":
      prevAE = "#"
      result.text.add "1"
    else:
      result.text.add p.tok[p.idx + i].symbol
  var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
  inc p.idx, wildToken[w]
  while true:
    var item = newRstNode(rnEnumItem)
    pushInd(p, currentTok(p).col)
    parseSection(p, item)
    popInd(p)
    result.add(item)
    if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
        match(p, p.idx+1, wildcards[w]):
      let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
      # check that it's in sequence: enumerator == next(prevEnum)
      if "n" in wildcards[w]:  # arabic numeral
        let prevEnumI = try: parseInt(prevEnum) except: 1
        if enumerator in autoEnums:
          if prevAE != "" and enumerator != prevAE:
            break
          prevAE = enumerator
          curEnum = prevEnumI + 1
        else: curEnum = (try: parseInt(enumerator) except: 1)
        if curEnum - prevEnumI != 1:
          break
        prevEnum = enumerator
      else:  # a..z
        let prevEnumI = ord(prevEnum[0])
        if enumerator == "#": curEnum = prevEnumI + 1
        else: curEnum = ord(enumerator[0])
        if curEnum - prevEnumI != 1:
          break
        prevEnum = $chr(curEnum)
      inc p.idx, 1 + wildToken[w]
    else:
      break

proc sonKind(father: PRstNode, i: int): RstNodeKind =
  result = rnLeaf
  if i < father.len: result = father.sons[i].kind

proc parseSection(p: var RstParser, result: PRstNode) =
  while true:
    var leave = false
    assert(p.idx >= 0)
    while currentTok(p).kind == tkIndent:
      if currInd(p) == currentTok(p).ival:
        inc p.idx
      elif currentTok(p).ival > currInd(p):
        pushInd(p, currentTok(p).ival)
        var a = newRstNode(rnBlockQuote)
        parseSection(p, a)
        result.add(a)
        popInd(p)
      else:
        while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
          inc p.idx  # skip blank lines
        leave = true
        break
    if leave or currentTok(p).kind == tkEof: break
    var a: PRstNode = nil
    var k = whichSection(p)
    case k
    of rnLiteralBlock:
      inc p.idx              # skip '::'
      a = parseLiteralBlock(p)
    of rnBulletList: a = parseBulletList(p)
    of rnLineBlock: a = parseLineBlock(p)
    of rnDirective: a = parseDotDot(p)
    of rnEnumList: a = parseEnumList(p)
    of rnLeaf: rstMessage(p, meNewSectionExpected)
    of rnParagraph: discard
    of rnDefList: a = parseDefinitionList(p)
    of rnFieldList:
      if p.idx > 0: dec p.idx
      a = parseFields(p)
    of rnTransition: a = parseTransition(p)
    of rnHeadline: a = parseHeadline(p)
    of rnOverline: a = parseOverline(p)
    of rnTable: a = parseSimpleTable(p)
    of rnMarkdownTable: a = parseMarkdownTable(p)
    of rnOptionList: a = parseOptionList(p)
    else:
      #InternalError("rst.parseSection()")
      discard
    if a == nil and k != rnDirective:
      a = newRstNode(rnParagraph)
      parseParagraph(p, a)
    result.addIfNotNil(a)
  if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph:
    result.sons[0].kind = rnInner

proc parseSectionWrapper(p: var RstParser): PRstNode =
  result = newRstNode(rnInner)
  parseSection(p, result)
  while result.kind == rnInner and result.len == 1:
    result = result.sons[0]

proc `$`(t: Token): string =
  result = $t.kind & ' ' & t.symbol

proc parseDoc(p: var RstParser): PRstNode =
  result = parseSectionWrapper(p)
  if currentTok(p).kind != tkEof:
    rstMessage(p, meGeneralParseError)

type
  DirFlag = enum
    hasArg, hasOptions, argIsFile, argIsWord
  DirFlags = set[DirFlag]
  SectionParser = proc (p: var RstParser): PRstNode {.nimcall.}

proc parseDirective(p: var RstParser, flags: DirFlags): PRstNode =
  ## Parses arguments and options for a directive block.
  ##
  ## A directive block will always have three sons: the arguments for the
  ## directive (rnDirArg), the options (rnFieldList) and the directive
  ## content block. This proc parses the two first nodes, the 3rd is left to
  ## the outer `parseDirective` call.
  ##
  ## Both rnDirArg and rnFieldList children nodes might be nil, so you need to
  ## check them before accessing.
  result = newRstNode(rnDirective)
  var args: PRstNode = nil
  var options: PRstNode = nil
  if hasArg in flags:
    args = newRstNode(rnDirArg)
    if argIsFile in flags:
      while true:
        case currentTok(p).kind
        of tkWord, tkOther, tkPunct, tkAdornment:
          args.add(newLeaf(p))
          inc p.idx
        else: break
    elif argIsWord in flags:
      while currentTok(p).kind == tkWhite: inc p.idx
      if currentTok(p).kind == tkWord:
        args.add(newLeaf(p))
        inc p.idx
      else:
        args = nil
    else:
      parseLine(p, args)
  result.add(args)
  if hasOptions in flags:
    if currentTok(p).kind == tkIndent and currentTok(p).ival >= 3 and
        nextTok(p).symbol == ":":
      options = parseFields(p)
  result.add(options)

proc indFollows(p: RstParser): bool =
  result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p)

proc parseDirective(p: var RstParser, flags: DirFlags,
                    contentParser: SectionParser): PRstNode =
  ## A helper proc that does main work for specific directive procs.
  ## Always returns a generic rnDirective tree with these 3 children:
  ##
  ## 1) rnDirArg
  ## 2) rnFieldList
  ## 3) a node returned by `contentParser`.
  ##
  ## .. warning:: Any of the 3 children may be nil.
  result = parseDirective(p, flags)
  if not isNil(contentParser) and
      (currentTok(p).kind != tkIndent or indFollows(p)):
    var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
    if nextIndent <= currInd(p):  # parse only this line
      nextIndent = currentTok(p).col
    pushInd(p, nextIndent)
    var content = contentParser(p)
    popInd(p)
    result.add(content)
  else:
    result.add(PRstNode(nil))

proc parseDirBody(p: var RstParser, contentParser: SectionParser): PRstNode =
  if indFollows(p):
    pushInd(p, currentTok(p).ival)
    result = contentParser(p)
    popInd(p)

proc dirInclude(p: var RstParser): PRstNode =
  ##
  ## The following options are recognized:
  ##
  ## :start-after: text to find in the external data file
  ##
  ##     Only the content after the first occurrence of the specified
  ##     text will be included. If text is not found inclusion will
  ##     start from beginning of the file
  ##
  ## :end-before: text to find in the external data file
  ##
  ##     Only the content before the first occurrence of the specified
  ##     text (but after any after text) will be included. If text is
  ##     not found inclusion will happen until the end of the file.
  #literal : flag (empty)
  #    The entire included text is inserted into the document as a single
  #    literal block (useful for program listings).
  #encoding : name of text encoding
  #    The text encoding of the external data file. Defaults to the document's
  #    encoding (if specified).
  #
  result = nil
  var n = parseDirective(p, {hasArg, argIsFile, hasOptions}, nil)
  var filename = strip(addNodes(n.sons[0]))
  var path = p.findRelativeFile(filename)
  if path == "":
    rstMessage(p, meCannotOpenFile, filename)
  else:
    # XXX: error handling; recursive file inclusion!
    if getFieldValue(n, "literal") != "":
      result = newRstNode(rnLiteralBlock)
      result.add(newRstNode(rnLeaf, readFile(path)))
    else:
      let inputString = readFile(path).string()
      let startPosition =
        block:
          let searchFor = n.getFieldValue("start-after").strip()
          if searchFor != "":
            let pos = inputString.find(searchFor)
            if pos != -1: pos + searchFor.len
            else: 0
          else:
            0

      let endPosition =
        block:
          let searchFor = n.getFieldValue("end-before").strip()
          if searchFor != "":
            let pos = inputString.find(searchFor, start = startPosition)
            if pos != -1: pos - 1
            else: 0
          else:
            inputString.len - 1

      var q: RstParser
      initParser(q, p.s)
      q.filename = path
      q.col += getTokens(
        inputString[startPosition..endPosition].strip(),
        false,
        q.tok)
      # workaround a GCC bug; more like the interior pointer bug?
      #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0:
      #  InternalError("Too many binary zeros in include file")
      result = parseDoc(q)

proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode =
  ## Parses a code block.
  ##
  ## Code blocks are rnDirective trees with a `kind` of rnCodeBlock. See the
  ## description of ``parseDirective`` for further structure information.
  ##
  ## Code blocks can come in two forms, the standard `code directive
  ## <http://docutils.sourceforge.net/docs/ref/rst/directives.html#code>`_ and
  ## the nim extension ``.. code-block::``. If the block is an extension, we
  ## want the default language syntax highlighting to be Nim, so we create a
  ## fake internal field to communicate with the generator. The field is named
  ## ``default-language``, which is unlikely to collide with a field specified
  ## by any random rst input file.
  ##
  ## As an extension this proc will process the ``file`` extension field and if
  ## present will replace the code block with the contents of the referenced
  ## file.
  result = parseDirective(p, {hasArg, hasOptions}, parseLiteralBlock)
  var filename = strip(getFieldValue(result, "file"))
  if filename != "":
    var path = p.findRelativeFile(filename)
    if path == "": rstMessage(p, meCannotOpenFile, filename)
    var n = newRstNode(rnLiteralBlock)
    n.add(newRstNode(rnLeaf, readFile(path)))
    result.sons[2] = n

  # Extend the field block if we are using our custom Nim extension.
  if nimExtension:
    # Create a field block if the input block didn't have any.
    if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList)
    assert result.sons[1].kind == rnFieldList
    # Hook the extra field and specify the Nim language as value.
    var extraNode = newRstNode(rnField)
    extraNode.add(newRstNode(rnFieldName))
    extraNode.add(newRstNode(rnFieldBody))
    extraNode.sons[0].add(newRstNode(rnLeaf, "default-language"))
    extraNode.sons[1].add(newRstNode(rnLeaf, "Nim"))
    result.sons[1].add(extraNode)

  result.kind = rnCodeBlock

proc dirContainer(p: var RstParser): PRstNode =
  result = parseDirective(p, {hasArg}, parseSectionWrapper)
  assert(result.kind == rnDirective)
  assert(result.len == 3)
  result.kind = rnContainer

proc dirImage(p: var RstParser): PRstNode =
  result = parseDirective(p, {hasOptions, hasArg, argIsFile}, nil)
  result.kind = rnImage

proc dirFigure(p: var RstParser): PRstNode =
  result = parseDirective(p, {hasOptions, hasArg, argIsFile},
                          parseSectionWrapper)
  result.kind = rnFigure

proc dirTitle(p: var RstParser): PRstNode =
  result = parseDirective(p, {hasArg}, nil)
  result.kind = rnTitle

proc dirContents(p: var RstParser): PRstNode =
  result = parseDirective(p, {hasArg}, nil)
  result.kind = rnContents

proc dirIndex(p: var RstParser): PRstNode =
  result = parseDirective(p, {}, parseSectionWrapper)
  result.kind = rnIndex

proc dirAdmonition(p: var RstParser, d: string): PRstNode =
  result = parseDirective(p, {}, parseSectionWrapper)
  result.kind = rnAdmonition
  result.text = d

proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind,
               contentParser: SectionParser) =
  var filename = getFieldValue(result, "file")
  if filename.len > 0:
    var path = p.findRelativeFile(filename)
    if path.len == 0:
      rstMessage(p, meCannotOpenFile, filename)
    else:
      var f = readFile(path)
      result = newRstNode(kind)
      result.add(newRstNode(rnLeaf, f))
  else:
    result.kind = kind
    result.add(parseDirBody(p, contentParser))

proc dirRaw(p: var RstParser): PRstNode =
  #
  #The following options are recognized:
  #
  #file : string (newlines removed)
  #    The local filesystem path of a raw data file to be included.
  #
  # html
  # latex
  result = parseDirective(p, {hasOptions, hasArg, argIsWord})
  if result.sons[0] != nil:
    if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0:
      dirRawAux(p, result, rnRawHtml, parseLiteralBlock)
    elif cmpIgnoreCase(result.sons[0].sons[0].text, "latex") == 0:
      dirRawAux(p, result, rnRawLatex, parseLiteralBlock)
    else:
      rstMessage(p, meInvalidDirective, result.sons[0].sons[0].text)
  else:
    dirRawAux(p, result, rnRaw, parseSectionWrapper)

proc selectDir(p: var RstParser, d: string): PRstNode =
  result = nil
  case d
  of "admonition", "attention", "caution": result = dirAdmonition(p, d)
  of "code": result = dirCodeBlock(p)
  of "code-block": result = dirCodeBlock(p, nimExtension = true)
  of "container": result = dirContainer(p)
  of "contents": result = dirContents(p)
  of "danger", "error": result = dirAdmonition(p, d)
  of "figure": result = dirFigure(p)
  of "hint": result = dirAdmonition(p, d)
  of "image": result = dirImage(p)
  of "important": result = dirAdmonition(p, d)
  of "include": result = dirInclude(p)
  of "index": result = dirIndex(p)
  of "note": result = dirAdmonition(p, d)
  of "raw":
    if roSupportRawDirective in p.s.options:
      result = dirRaw(p)
    else:
      rstMessage(p, meInvalidDirective, d)
  of "tip": result = dirAdmonition(p, d)
  of "title": result = dirTitle(p)
  of "warning": result = dirAdmonition(p, d)
  else:
    rstMessage(p, meInvalidDirective, d)

proc parseDotDot(p: var RstParser): PRstNode =
  # parse "explicit markup blocks"
  result = nil
  var col = currentTok(p).col
  inc p.idx
  var d = getDirective(p)
  if d != "":
    pushInd(p, col)
    result = selectDir(p, d)
    popInd(p)
  elif match(p, p.idx, " _"):
    # hyperlink target:
    inc p.idx, 2
    var a = getReferenceName(p, ":")
    if currentTok(p).kind == tkWhite: inc p.idx
    var b = untilEol(p)
    setRef(p, rstnodeToRefname(a), b)
  elif match(p, p.idx, " |"):
    # substitution definitions:
    inc p.idx, 2
    var a = getReferenceName(p, "|")
    var b: PRstNode
    if currentTok(p).kind == tkWhite: inc p.idx
    if cmpIgnoreStyle(currentTok(p).symbol, "replace") == 0:
      inc p.idx
      expect(p, "::")
      b = untilEol(p)
    elif cmpIgnoreStyle(currentTok(p).symbol, "image") == 0:
      inc p.idx
      b = dirImage(p)
    else:
      rstMessage(p, meInvalidDirective, currentTok(p).symbol)
    setSub(p, addNodes(a), b)
  elif match(p, p.idx, " ["):
    # footnotes, citations
    inc p.idx, 2
    var a = getReferenceName(p, "]")
    if currentTok(p).kind == tkWhite: inc p.idx
    var b = untilEol(p)
    setRef(p, rstnodeToRefname(a), b)
  else:
    result = parseComment(p)

proc resolveSubs(p: var RstParser, n: PRstNode): PRstNode =
  result = n
  if n == nil: return
  case n.kind
  of rnSubstitutionReferences:
    var x = findSub(p, n)
    if x >= 0:
      result = p.s.subs[x].value
    else:
      var key = addNodes(n)
      var e = getEnv(key)
      if e != "": result = newRstNode(rnLeaf, e)
      else: rstMessage(p, mwUnknownSubstitution, key)
  of rnRef:
    var y = findRef(p, rstnodeToRefname(n))
    if y != nil:
      result = newRstNode(rnHyperlink)
      n.kind = rnInner
      result.add(n)
      result.add(y)
  of rnLeaf:
    discard
  of rnContents:
    p.hasToc = true
  else:
    for i in 0 ..< n.len: n.sons[i] = resolveSubs(p, n.sons[i])

proc rstParse*(text, filename: string,
               line, column: int, hasToc: var bool,
               options: RstParseOptions,
               findFile: FindFileHandler = nil,
               msgHandler: MsgHandler = nil): PRstNode =
  var p: RstParser
  initParser(p, newSharedState(options, findFile, msgHandler))
  p.filename = filename
  p.line = line
  p.col = column + getTokens(text, roSkipPounds in options, p.tok)
  result = resolveSubs(p, parseDoc(p))
  hasToc = p.hasToc