diff options
Diffstat (limited to 'rod/rst.nim')
-rwxr-xr-x | rod/rst.nim | 1680 |
1 files changed, 1680 insertions, 0 deletions
diff --git a/rod/rst.nim b/rod/rst.nim new file mode 100755 index 000000000..18ee3c78e --- /dev/null +++ b/rod/rst.nim @@ -0,0 +1,1680 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This module implements a *reStructuredText* parser. A larget +# subset is provided. + +import + os, msgs, strutils, platform, nhashes, ropes, options + +type + TRstNodeKind* = enum + rnInner, # an inner node or a root + rnHeadline, # a headline + rnOverline, # an over- and underlined headline + rnTransition, # a transition (the ------------- <hr> thingie) + rnParagraph, # a paragraph + rnBulletList, # a bullet list + rnBulletItem, # a bullet item + rnEnumList, # an enumerated list + rnEnumItem, # an enumerated item + rnDefList, # a definition list + rnDefItem, # an item of a definition list consisting of ... + rnDefName, # ... a name part ... + rnDefBody, # ... and a body part ... + rnFieldList, # a field list + rnField, # a field item + rnFieldName, # consisting of a field name ... + rnFieldBody, # ... and a field body + rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, + rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, rnLineBlock, # + # the + # | + # thingie + rnLineBlockItem, # sons of the | thing + rnBlockQuote, # text just indented + rnTable, rnGridTable, rnTableRow, rnTableHeaderCell, rnTableDataCell, rnLabel, # + # used + # for + # footnotes + # and + # other + # things + rnFootnote, # a footnote + rnCitation, # similar to footnote + rnStandaloneHyperlink, rnHyperlink, rnRef, rnDirective, # a directive + rnDirArg, rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, rnContainer, # + # ``container`` + # directive + rnIndex, # index directve: + # .. index:: + # key + # * `file#id <file#id>`_ + # * `file#id <file#id>'_ + rnSubstitutionDef, # a definition of a substitution + rnGeneralRole, # Inline markup: + rnSub, rnSup, rnIdx, rnEmphasis, # "*" + rnStrongEmphasis, # "**" + rnInterpretedText, # "`" + rnInlineLiteral, # "``" + rnSubstitutionReferences, # "|" + rnLeaf # a leaf; the node's text field contains the leaf val + +const + rstnodekindToStr*: array[TRstNodeKind, string] = ["Inner", "Headline", + "Overline", "Transition", "Paragraph", "BulletList", "BulletItem", + "EnumList", "EnumItem", "DefList", "DefItem", "DefName", "DefBody", + "FieldList", "Field", "FieldName", "FieldBody", "OptionList", + "OptionListItem", "OptionGroup", "Option", "OptionString", "OptionArgument", + "Description", "LiteralBlock", "QuotedLiteralBlock", "LineBlock", + "LineBlockItem", "BlockQuote", "Table", "GridTable", "TableRow", + "TableHeaderCell", "TableDataCell", "Label", "Footnote", "Citation", + "StandaloneHyperlink", "Hyperlink", "Ref", "Directive", "DirArg", "Raw", + "Title", "Contents", "Image", "Figure", "CodeBlock", "Container", "Index", + "SubstitutionDef", "GeneralRole", "Sub", "Sup", "Idx", "Emphasis", + "StrongEmphasis", "InterpretedText", "InlineLiteral", + "SubstitutionReferences", "Leaf"] + +type # the syntax tree of RST: + PRSTNode* = ref TRstNode + TRstNodeSeq* = seq[PRstNode] + TRSTNode*{.acyclic, final.} = object + kind*: TRstNodeKind + text*: string # valid for leafs in the AST; and the title of + # the document or the section + level*: int # valid for some node kinds + sons*: TRstNodeSeq # the node's sons + + +proc rstParse*(text: string, # the text to be parsed + skipPounds: bool, filename: string, # for error messages + line, column: int, hasToc: var bool): PRstNode +proc rsonsLen*(n: PRstNode): int +proc newRstNode*(kind: TRstNodeKind): PRstNode +proc newRstNode*(kind: TRstNodeKind, s: string): PRstNode +proc addSon*(father, son: PRstNode) +proc rstnodeToRefname*(n: PRstNode): string +proc addNodes*(n: PRstNode): string +proc getFieldValue*(n: PRstNode, fieldname: string): string +proc getArgument*(n: PRstNode): string + # index handling: +proc setIndexPair*(index, key, val: PRstNode) +proc sortIndex*(a: PRstNode) +proc clearIndex*(index: PRstNode, filename: string) +# implementation +# ----------------------------- scanner part -------------------------------- + +const + SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'} + +type + TTokType = enum + tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther + TToken{.final.} = object # a RST token + kind*: TTokType # the type of the token + ival*: int # the indentation or parsed integer value + symbol*: string # the parsed symbol as string + line*, col*: int # line and column of the token + + TTokenSeq = seq[TToken] + TLexer = object of TObject + buf*: cstring + bufpos*: int + line*, col*, baseIndent*: int + skipPounds*: bool + + +proc getThing(L: var TLexer, tok: var TToken, s: TCharSet) = + var pos: int + tok.kind = tkWord + tok.line = L.line + tok.col = L.col + pos = L.bufpos + while True: + add(tok.symbol, L.buf[pos]) + inc(pos) + if not (L.buf[pos] in s): break + inc(L.col, pos - L.bufpos) + L.bufpos = pos + +proc getAdornment(L: var TLexer, tok: var TToken) = + var + pos: int + c: char + tok.kind = tkAdornment + tok.line = L.line + tok.col = L.col + pos = L.bufpos + c = L.buf[pos] + while True: + add(tok.symbol, L.buf[pos]) + inc(pos) + if L.buf[pos] != c: break + inc(L.col, pos - L.bufpos) + L.bufpos = pos + +proc getIndentAux(L: var TLexer, start: int): int = + var + buf: cstring + pos: int + pos = start + buf = L.buf # skip the newline (but include it in the token!) + if buf[pos] == '\x0D': + if buf[pos + 1] == '\x0A': inc(pos, 2) + else: inc(pos) + elif buf[pos] == '\x0A': + inc(pos) + if L.skipPounds: + if buf[pos] == '#': inc(pos) + if buf[pos] == '#': inc(pos) + result = 0 + while True: + case buf[pos] + of ' ', '\x0B', '\x0C': + inc(pos) + inc(result) + of '\x09': + inc(pos) + result = result - (result mod 8) + 8 + else: + break # EndOfFile also leaves the loop + if buf[pos] == '\0': + result = 0 + elif (buf[pos] == '\x0A') or (buf[pos] == '\x0D'): + # look at the next line for proper indentation: + result = getIndentAux(L, pos) + L.bufpos = pos # no need to set back buf + +proc getIndent(L: var TLexer, tok: var TToken) = + inc(L.line) + tok.line = L.line + tok.col = 0 + tok.kind = tkIndent # skip the newline (but include it in the token!) + tok.ival = getIndentAux(L, L.bufpos) + L.col = tok.ival + tok.ival = max(tok.ival - L.baseIndent, 0) + tok.symbol = "\n" & repeatChar(tok.ival) + +proc rawGetTok(L: var TLexer, tok: var TToken) = + var c: Char + tok.symbol = "" + tok.ival = 0 + c = L.buf[L.bufpos] + case c + of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9': + getThing(L, tok, SymChars) + of ' ', '\x09', '\x0B', '\x0C': + getThing(L, tok, {' ', '\x09'}) + tok.kind = tkWhite + if L.buf[L.bufpos] in {'\x0D', '\x0A'}: + rawGetTok(L, tok) # ignore spaces before \n + of '\x0D', '\x0A': + getIndent(L, tok) + of '!', '\"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', + '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', + '|', '}', '~': + getAdornment(L, tok) + if len(tok.symbol) <= 3: tok.kind = tkPunct + else: + tok.line = L.line + tok.col = L.col + if c == '\0': + tok.kind = tkEof + else: + tok.kind = tkOther + add(tok.symbol, c) + inc(L.bufpos) + inc(L.col) + tok.col = max(tok.col - L.baseIndent, 0) + +proc getTokens(buffer: string, skipPounds: bool, tokens: var TTokenSeq) = + var + L: TLexer + length: int + length = len(tokens) + L.buf = cstring(buffer) + L.line = 1 # skip UTF-8 BOM + if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): + inc(L.bufpos, 3) + L.skipPounds = skipPounds + if skipPounds: + if L.buf[L.bufpos] == '#': inc(L.bufpos) + if L.buf[L.bufpos] == '#': inc(L.bufpos) + L.baseIndent = 0 + while L.buf[L.bufpos] == ' ': + inc(L.bufpos) + inc(L.baseIndent) + while true: + inc(length) + setlen(tokens, length) + rawGetTok(L, tokens[length - 1]) + if tokens[length - 1].kind == tkEof: break + if tokens[0].kind == tkWhite: + # BUGFIX + tokens[0].ival = len(tokens[0].symbol) + tokens[0].kind = tkIndent + +proc addSon(father, son: PRstNode) = + var L: int + L = len(father.sons) + setlen(father.sons, L + 1) + father.sons[L] = son + +proc addSonIfNotNil(father, son: PRstNode) = + if son != nil: addSon(father, son) + +proc rsonsLen(n: PRstNode): int = + result = len(n.sons) + +proc newRstNode(kind: TRstNodeKind): PRstNode = + new(result) + result.sons = @ [] + result.kind = kind + +proc newRstNode(kind: TRstNodeKind, s: string): PRstNode = + result = newRstNode(kind) + result.text = s + +type + TLevelMap = array[Char, int] + TSubstitution{.final.} = object + key*: string + value*: PRstNode + + TSharedState{.final.} = object + uLevel*, oLevel*: int # counters for the section levels + subs*: seq[TSubstitution] # substitutions + refs*: seq[TSubstitution] # references + underlineToLevel*: TLevelMap # Saves for each possible title adornment character its level in the + # current document. This is for single underline adornments. + overlineToLevel*: TLevelMap # Saves for each possible title adornment character its level in the + # current document. This is for over-underline adornments. + + PSharedState = ref TSharedState + TRstParser = object of TObject + idx*: int + tok*: TTokenSeq + s*: PSharedState + indentStack*: seq[int] + filename*: string + line*, col*: int + hasToc*: bool + + +proc newSharedState(): PSharedState = + new(result) + result.subs = @ [] + result.refs = @ [] + +proc tokInfo(p: TRstParser, tok: TToken): TLineInfo = + result = newLineInfo(p.filename, p.line + tok.line, p.col + tok.col) + +proc rstMessage(p: TRstParser, msgKind: TMsgKind, arg: string) = + liMessage(tokInfo(p, p.tok[p.idx]), msgKind, arg) + +proc rstMessage(p: TRstParser, msgKind: TMsgKind) = + liMessage(tokInfo(p, p.tok[p.idx]), msgKind, p.tok[p.idx].symbol) + +proc currInd(p: TRstParser): int = + result = p.indentStack[high(p.indentStack)] + +proc pushInd(p: var TRstParser, ind: int) = + var length: int + length = len(p.indentStack) + setlen(p.indentStack, length + 1) + p.indentStack[length] = ind + +proc popInd(p: var TRstParser) = + if len(p.indentStack) > 1: setlen(p.indentStack, len(p.indentStack) - 1) + +proc initParser(p: var TRstParser, sharedState: PSharedState) = + p.indentStack = @ [0] + p.tok = @ [] + p.idx = 0 + p.filename = "" + p.hasToc = false + p.col = 0 + p.line = 1 + p.s = sharedState + +proc addNodesAux(n: PRstNode, result: var string) = + if n.kind == rnLeaf: + add(result, n.text) + else: + for i in countup(0, rsonsLen(n) - 1): addNodesAux(n.sons[i], result) + +proc addNodes(n: PRstNode): string = + result = "" + addNodesAux(n, result) + +proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) = + if n.kind == rnLeaf: + for i in countup(0, len(n.text) + 0 - 1): + case n.text[i] + of '0'..'9': + if b: + add(r, '-') + b = false + if len(r) == 0: add(r, 'Z') + add(r, n.text[i]) + of 'a'..'z': + if b: + add(r, '-') + b = false + add(r, n.text[i]) + of 'A'..'Z': + if b: + add(r, '-') + b = false + add(r, chr(ord(n.text[i]) - ord('A') + ord('a'))) + else: + if (len(r) > 0): b = true + else: + for i in countup(0, rsonsLen(n) - 1): rstnodeToRefnameAux(n.sons[i], r, b) + +proc rstnodeToRefname(n: PRstNode): string = + var b: bool + result = "" + b = false + rstnodeToRefnameAux(n, result, b) + +proc findSub(p: var TRstParser, n: PRstNode): int = + var key: string + key = addNodes(n) # the spec says: if no exact match, try one without case distinction: + for i in countup(0, high(p.s.subs)): + if key == p.s.subs[i].key: + return i + for i in countup(0, high(p.s.subs)): + if cmpIgnoreStyle(key, p.s.subs[i].key) == 0: + return i + result = - 1 + +proc setSub(p: var TRstParser, key: string, value: PRstNode) = + var length: int + length = len(p.s.subs) + for i in countup(0, length - 1): + if key == p.s.subs[i].key: + p.s.subs[i].value = value + return + setlen(p.s.subs, length + 1) + p.s.subs[length].key = key + p.s.subs[length].value = value + +proc setRef(p: var TRstParser, key: string, value: PRstNode) = + var length: int + length = len(p.s.refs) + for i in countup(0, length - 1): + if key == p.s.refs[i].key: + p.s.refs[i].value = value + rstMessage(p, warnRedefinitionOfLabel, key) + return + setlen(p.s.refs, length + 1) + p.s.refs[length].key = key + p.s.refs[length].value = value + +proc findRef(p: var TRstParser, key: string): PRstNode = + for i in countup(0, high(p.s.refs)): + if key == p.s.refs[i].key: + return p.s.refs[i].value + result = nil + +proc cmpNodes(a, b: PRstNode): int = + var x, y: PRstNode + assert(a.kind == rnDefItem) + assert(b.kind == rnDefItem) + x = a.sons[0] + y = b.sons[0] + result = cmpIgnoreStyle(addNodes(x), addNodes(y)) + +proc sortIndex(a: PRstNode) = + # we use shellsort here; fast and simple + var + N, j, h: int + v: PRstNode + assert(a.kind == rnDefList) + N = rsonsLen(a) + h = 1 + while true: + h = 3 * h + 1 + if h > N: break + while true: + h = h div 3 + for i in countup(h, N - 1): + v = a.sons[i] + j = i + while cmpNodes(a.sons[j - h], v) >= 0: + a.sons[j] = a.sons[j - h] + j = j - h + if j < h: break + a.sons[j] = v + if h == 1: break + +proc eqRstNodes(a, b: PRstNode): bool = + result = false + if a.kind != b.kind: return + if a.kind == rnLeaf: + result = a.text == b.text + else: + if rsonsLen(a) != rsonsLen(b): return + for i in countup(0, rsonsLen(a) - 1): + if not eqRstNodes(a.sons[i], b.sons[i]): return + result = true + +proc matchesHyperlink(h: PRstNode, filename: string): bool = + var s: string + if h.kind == rnInner: # this may happen in broken indexes! + assert(rsonsLen(h) == 1) + result = matchesHyperlink(h.sons[0], filename) + elif h.kind == rnHyperlink: + s = addNodes(h.sons[1]) + if startsWith(s, filename) and (s[len(filename) + 0] == '#'): result = true + else: result = false + else: + result = false + +proc clearIndex(index: PRstNode, filename: string) = + var + k, items, lastItem: int + val: PRstNode + assert(index.kind == rnDefList) + for i in countup(0, rsonsLen(index) - 1): + assert(index.sons[i].sons[1].kind == rnDefBody) + val = index.sons[i].sons[1].sons[0] + if val.kind == rnInner: val = val.sons[0] + if val.kind == rnBulletList: + items = rsonsLen(val) + lastItem = - 1 # save the last valid item index + for j in countup(0, rsonsLen(val) - 1): + if val.sons[j] == nil: + dec(items) + elif matchesHyperlink(val.sons[j].sons[0], filename): + val.sons[j] = nil + dec(items) + else: + lastItem = j + if items == 1: + index.sons[i].sons[1].sons[0] = val.sons[lastItem].sons[0] + elif items == 0: + index.sons[i] = nil + elif matchesHyperlink(val, filename): + index.sons[i] = nil + k = 0 + for i in countup(0, rsonsLen(index) - 1): + if index.sons[i] != nil: + if k != i: index.sons[k] = index.sons[i] + inc(k) + setlen(index.sons, k) + +proc setIndexPair(index, key, val: PRstNode) = + var e, a, b: PRstNode + # writeln(rstnodekindToStr[key.kind], ': ', rstnodekindToStr[val.kind]); + assert(index.kind == rnDefList) + assert(key.kind != rnDefName) + a = newRstNode(rnDefName) + addSon(a, key) + for i in countup(0, rsonsLen(index) - 1): + if eqRstNodes(index.sons[i].sons[0], a): + assert(index.sons[i].sons[1].kind == rnDefBody) + e = index.sons[i].sons[1].sons[0] + if e.kind != rnBulletList: + e = newRstNode(rnBulletList) + b = newRstNode(rnBulletItem) + addSon(b, index.sons[i].sons[1].sons[0]) + addSon(e, b) + index.sons[i].sons[1].sons[0] = e + b = newRstNode(rnBulletItem) + addSon(b, val) + addSon(e, b) + return # key already exists + e = newRstNode(rnDefItem) + assert(val.kind != rnDefBody) + b = newRstNode(rnDefBody) + addSon(b, val) + addSon(e, a) + addSon(e, b) + addSon(index, e) + +proc newLeaf(p: var TRstParser): PRstNode = + result = newRstNode(rnLeaf, p.tok[p.idx].symbol) + +proc getReferenceName(p: var TRstParser, endStr: string): PRstNode = + var res: PRstNode + res = newRstNode(rnInner) + while true: + case p.tok[p.idx].kind + of tkWord, tkOther, tkWhite: + addSon(res, newLeaf(p)) + of tkPunct: + if p.tok[p.idx].symbol == endStr: + inc(p.idx) + break + else: + addSon(res, newLeaf(p)) + else: + rstMessage(p, errXexpected, endStr) + break + inc(p.idx) + result = res + +proc untilEol(p: var TRstParser): PRstNode = + result = newRstNode(rnInner) + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + addSon(result, newLeaf(p)) + inc(p.idx) + +proc expect(p: var TRstParser, tok: string) = + if p.tok[p.idx].symbol == tok: inc(p.idx) + else: rstMessage(p, errXexpected, tok) + +proc isInlineMarkupEnd(p: TRstParser, markup: string): bool = + result = p.tok[p.idx].symbol == markup + if not result: + return # Rule 3: + result = not (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) + if not result: + return # Rule 4: + result = (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) or + (p.tok[p.idx + 1].symbol[0] in + {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', + '?', '_'}) + if not result: + return # Rule 7: + if p.idx > 0: + if (markup != "``") and (p.tok[p.idx - 1].symbol == "\\"): + result = false + +proc isInlineMarkupStart(p: TRstParser, markup: string): bool = + var c, d: Char + result = p.tok[p.idx].symbol == markup + if not result: + return # Rule 1: + result = (p.idx == 0) or (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) or + (p.tok[p.idx - 1].symbol[0] in + {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}) + if not result: + return # Rule 2: + result = not (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) + if not result: + return # Rule 5 & 7: + if p.idx > 0: + if p.tok[p.idx - 1].symbol == "\\": + result = false + else: + c = p.tok[p.idx - 1].symbol[0] + case c + of '\'', '\"': d = c + of '(': d = ')' + of '[': d = ']' + of '{': d = '}' + of '<': d = '>' + else: d = '\0' + if d != '\0': result = p.tok[p.idx + 1].symbol[0] != d + +proc parseBackslash(p: var TRstParser, father: PRstNode) = + assert(p.tok[p.idx].kind == tkPunct) + if p.tok[p.idx].symbol == "\\\\": + addSon(father, newRstNode(rnLeaf, "\\")) + inc(p.idx) + elif p.tok[p.idx].symbol == "\\": + # XXX: Unicode? + inc(p.idx) + if p.tok[p.idx].kind != tkWhite: addSon(father, newLeaf(p)) + inc(p.idx) + else: + addSon(father, newLeaf(p)) + inc(p.idx) + +proc match(p: TRstParser, start: int, expr: string): bool = + # regular expressions are: + # special char exact match + # 'w' tkWord + # ' ' tkWhite + # 'a' tkAdornment + # 'i' tkIndent + # 'p' tkPunct + # 'T' always true + # 'E' whitespace, indent or eof + # 'e' tkWord or '#' (for enumeration lists) + var + i, j, last, length: int + c: char + i = 0 + j = start + last = len(expr) + 0 - 1 + while i <= last: + case expr[i] + of 'w': + result = p.tok[j].kind == tkWord + of ' ': + result = p.tok[j].kind == tkWhite + of 'i': + result = p.tok[j].kind == tkIndent + of 'p': + result = p.tok[j].kind == tkPunct + of 'a': + result = p.tok[j].kind == tkAdornment + of 'o': + result = p.tok[j].kind == tkOther + of 'T': + result = true + of 'E': + result = p.tok[j].kind in {tkEof, tkWhite, tkIndent} + of 'e': + result = (p.tok[j].kind == tkWord) or (p.tok[j].symbol == "#") + if result: + case p.tok[j].symbol[0] + of 'a'..'z', 'A'..'Z': result = len(p.tok[j].symbol) == 1 + of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'}) + else: + nil + else: + c = expr[i] + length = 0 + while (i <= last) and (expr[i] == c): + inc(i) + inc(length) + dec(i) + result = (p.tok[j].kind in {tkPunct, tkAdornment}) and + (len(p.tok[j].symbol) == length) and (p.tok[j].symbol[0] == c) + if not result: return + inc(j) + inc(i) + result = true + +proc fixupEmbeddedRef(n, a, b: PRstNode) = + var sep, incr: int + sep = - 1 + for i in countdown(rsonsLen(n) - 2, 0): + if n.sons[i].text == "<": + sep = i + break + if (sep > 0) and (n.sons[sep - 1].text[0] == ' '): incr = 2 + else: incr = 1 + for i in countup(0, sep - incr): addSon(a, n.sons[i]) + for i in countup(sep + 1, rsonsLen(n) - 2): addSon(b, n.sons[i]) + +proc parsePostfix(p: var TRstParser, n: PRstNode): PRstNode = + var a, b: PRstNode + result = n + if isInlineMarkupEnd(p, "_"): + inc(p.idx) + if (p.tok[p.idx - 2].symbol == "`") and (p.tok[p.idx - 3].symbol == ">"): + a = newRstNode(rnInner) + b = newRstNode(rnInner) + fixupEmbeddedRef(n, a, b) + if rsonsLen(a) == 0: + result = newRstNode(rnStandaloneHyperlink) + addSon(result, b) + else: + result = newRstNode(rnHyperlink) + addSon(result, a) + addSon(result, b) + setRef(p, rstnodeToRefname(a), b) + elif n.kind == rnInterpretedText: + n.kind = rnRef + else: + result = newRstNode(rnRef) + addSon(result, n) + elif match(p, p.idx, ":w:"): + # a role: + if p.tok[p.idx + 1].symbol == "idx": + n.kind = rnIdx + elif p.tok[p.idx + 1].symbol == "literal": + n.kind = rnInlineLiteral + elif p.tok[p.idx + 1].symbol == "strong": + n.kind = rnStrongEmphasis + elif p.tok[p.idx + 1].symbol == "emphasis": + n.kind = rnEmphasis + elif (p.tok[p.idx + 1].symbol == "sub") or + (p.tok[p.idx + 1].symbol == "subscript"): + n.kind = rnSub + elif (p.tok[p.idx + 1].symbol == "sup") or + (p.tok[p.idx + 1].symbol == "supscript"): + n.kind = rnSup + else: + result = newRstNode(rnGeneralRole) + n.kind = rnInner + addSon(result, n) + addSon(result, newRstNode(rnLeaf, p.tok[p.idx + 1].symbol)) + inc(p.idx, 3) + +proc isURL(p: TRstParser, i: int): bool = + result = (p.tok[i + 1].symbol == ":") and (p.tok[i + 2].symbol == "//") and + (p.tok[i + 3].kind == tkWord) and (p.tok[i + 4].symbol == ".") + +proc parseURL(p: var TRstParser, father: PRstNode) = + var n: PRstNode + #if p.tok[p.idx].symbol[strStart] = '<' then begin + if isURL(p, p.idx): + n = newRstNode(rnStandaloneHyperlink) + while true: + case p.tok[p.idx].kind + of tkWord, tkAdornment, tkOther: + nil + of tkPunct: + if not (p.tok[p.idx + 1].kind in + {tkWord, tkAdornment, tkOther, tkPunct}): + break + else: break + addSon(n, newLeaf(p)) + inc(p.idx) + addSon(father, n) + else: + n = newLeaf(p) + inc(p.idx) + if p.tok[p.idx].symbol == "_": n = parsePostfix(p, n) + addSon(father, n) + +proc parseUntil(p: var TRstParser, father: PRstNode, postfix: string, + interpretBackslash: bool) = + while true: + case p.tok[p.idx].kind + of tkPunct: + if isInlineMarkupEnd(p, postfix): + inc(p.idx) + break + elif interpretBackslash: + parseBackslash(p, father) + else: + addSon(father, newLeaf(p)) + inc(p.idx) + of tkAdornment, tkWord, tkOther: + addSon(father, newLeaf(p)) + inc(p.idx) + of tkIndent: + addSon(father, newRstNode(rnLeaf, " ")) + inc(p.idx) + if p.tok[p.idx].kind == tkIndent: + rstMessage(p, errXExpected, postfix) + break + of tkWhite: + addSon(father, newRstNode(rnLeaf, " ")) + inc(p.idx) + else: rstMessage(p, errXExpected, postfix) + +proc parseInline(p: var TRstParser, father: PRstNode) = + var n: PRstNode + case p.tok[p.idx].kind + of tkPunct: + if isInlineMarkupStart(p, "**"): + inc(p.idx) + n = newRstNode(rnStrongEmphasis) + parseUntil(p, n, "**", true) + addSon(father, n) + elif isInlineMarkupStart(p, "*"): + inc(p.idx) + n = newRstNode(rnEmphasis) + parseUntil(p, n, "*", true) + addSon(father, n) + elif isInlineMarkupStart(p, "``"): + inc(p.idx) + n = newRstNode(rnInlineLiteral) + parseUntil(p, n, "``", false) + addSon(father, n) + elif isInlineMarkupStart(p, "`"): + inc(p.idx) + n = newRstNode(rnInterpretedText) + parseUntil(p, n, "`", true) + n = parsePostfix(p, n) + addSon(father, n) + elif isInlineMarkupStart(p, "|"): + inc(p.idx) + n = newRstNode(rnSubstitutionReferences) + parseUntil(p, n, "|", false) + addSon(father, n) + else: + parseBackslash(p, father) + of tkWord: + parseURL(p, father) + of tkAdornment, tkOther, tkWhite: + addSon(father, newLeaf(p)) + inc(p.idx) + else: assert(false) + +proc getDirective(p: var TRstParser): string = + var j: int + if (p.tok[p.idx].kind == tkWhite) and (p.tok[p.idx + 1].kind == tkWord): + j = p.idx + inc(p.idx) + result = p.tok[p.idx].symbol + inc(p.idx) + while p.tok[p.idx].kind in {tkWord, tkPunct, tkAdornment, tkOther}: + if p.tok[p.idx].symbol == "::": break + add(result, p.tok[p.idx].symbol) + inc(p.idx) + if (p.tok[p.idx].kind == tkWhite): inc(p.idx) + if p.tok[p.idx].symbol == "::": + inc(p.idx) + if (p.tok[p.idx].kind == tkWhite): inc(p.idx) + else: + p.idx = j # set back + result = "" # error + else: + result = "" + +proc parseComment(p: var TRstParser): PRstNode = + var indent: int + case p.tok[p.idx].kind + of tkIndent, tkEof: + if p.tok[p.idx + 1].kind == tkIndent: + inc(p.idx) # empty comment + else: + indent = p.tok[p.idx].ival + while True: + case p.tok[p.idx].kind + of tkEof: + break + of tkIndent: + if (p.tok[p.idx].ival < indent): break + else: + nil + inc(p.idx) + else: + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): inc(p.idx) + result = nil + +type + TDirKind = enum # must be ordered alphabetically! + dkNone, dkAuthor, dkAuthors, dkCodeBlock, dkContainer, dkContents, dkFigure, + dkImage, dkInclude, dkIndex, dkRaw, dkTitle + +const + DirIds: array[0..11, string] = ["", "author", "authors", "code-block", + "container", "contents", "figure", "image", "include", "index", "raw", + "title"] + +proc getDirKind(s: string): TDirKind = + var i: int + i = binaryStrSearch(DirIds, s) + if i >= 0: result = TDirKind(i) + else: result = dkNone + +proc parseLine(p: var TRstParser, father: PRstNode) = + while True: + case p.tok[p.idx].kind + of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father) + else: break + +proc parseSection(p: var TRstParser, result: PRstNode) +proc parseField(p: var TRstParser): PRstNode = + var + col, indent: int + fieldname, fieldbody: PRstNode + result = newRstNode(rnField) + col = p.tok[p.idx].col + inc(p.idx) # skip : + fieldname = newRstNode(rnFieldname) + parseUntil(p, fieldname, ":", false) + fieldbody = newRstNode(rnFieldbody) + if p.tok[p.idx].kind != tkIndent: parseLine(p, fieldbody) + if p.tok[p.idx].kind == tkIndent: + indent = p.tok[p.idx].ival + if indent > col: + pushInd(p, indent) + parseSection(p, fieldbody) + popInd(p) + addSon(result, fieldname) + addSon(result, fieldbody) + +proc parseFields(p: var TRstParser): PRstNode = + var col: int + result = nil + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx + 1].symbol == ":"): + col = p.tok[p.idx].ival # BUGFIX! + result = newRstNode(rnFieldList) + inc(p.idx) + while true: + addSon(result, parseField(p)) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + (p.tok[p.idx + 1].symbol == ":"): + inc(p.idx) + else: + break + +proc getFieldValue(n: PRstNode, fieldname: string): string = + var f: PRstNode + result = "" + if n.sons[1] == nil: return + if (n.sons[1].kind != rnFieldList): + InternalError("getFieldValue (2): " & rstnodeKindToStr[n.sons[1].kind]) + for i in countup(0, rsonsLen(n.sons[1]) - 1): + f = n.sons[1].sons[i] + if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0: + result = addNodes(f.sons[1]) + if result == "": + result = "\x01\x01" # indicates that the field exists + return + +proc getArgument(n: PRstNode): string = + if n.sons[0] == nil: result = "" + else: result = addNodes(n.sons[0]) + +proc parseDotDot(p: var TRstParser): PRstNode +proc parseLiteralBlock(p: var TRstParser): PRstNode = + var + indent: int + n: PRstNode + result = newRstNode(rnLiteralBlock) + n = newRstNode(rnLeaf, "") + if p.tok[p.idx].kind == tkIndent: + indent = p.tok[p.idx].ival + inc(p.idx) + while True: + case p.tok[p.idx].kind + of tkEof: + break + of tkIndent: + if (p.tok[p.idx].ival < indent): + break + else: + add(n.text, "\n") + add(n.text, repeatChar(p.tok[p.idx].ival - indent)) + inc(p.idx) + else: + add(n.text, p.tok[p.idx].symbol) + inc(p.idx) + else: + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + add(n.text, p.tok[p.idx].symbol) + inc(p.idx) + addSon(result, n) + +proc getLevel(map: var TLevelMap, lvl: var int, c: Char): int = + if map[c] == 0: + inc(lvl) + map[c] = lvl + result = map[c] + +proc tokenAfterNewline(p: TRstParser): int = + result = p.idx + while true: + case p.tok[result].kind + of tkEof: + break + of tkIndent: + inc(result) + break + else: inc(result) + +proc isLineBlock(p: TRstParser): bool = + var j: int + j = tokenAfterNewline(p) + result = (p.tok[p.idx].col == p.tok[j].col) and (p.tok[j].symbol == "|") or + (p.tok[j].col > p.tok[p.idx].col) + +proc predNL(p: TRstParser): bool = + result = true + if (p.idx > 0): + result = (p.tok[p.idx - 1].kind == tkIndent) and + (p.tok[p.idx - 1].ival == currInd(p)) + +proc isDefList(p: TRstParser): bool = + var j: int + j = tokenAfterNewline(p) + result = (p.tok[p.idx].col < p.tok[j].col) and + (p.tok[j].kind in {tkWord, tkOther, tkPunct}) and + (p.tok[j - 2].symbol != "::") + +proc whichSection(p: TRstParser): TRstNodeKind = + case p.tok[p.idx].kind + of tkAdornment: + if match(p, p.idx + 1, "ii"): result = rnTransition + elif match(p, p.idx + 1, " a"): result = rnTable + elif match(p, p.idx + 1, "i"): result = rnOverline + else: result = rnLeaf + of tkPunct: + if match(p, tokenAfterNewLine(p), "ai"): + result = rnHeadline + elif p.tok[p.idx].symbol == "::": + result = rnLiteralBlock + elif predNL(p) and + ((p.tok[p.idx].symbol == "+") or (p.tok[p.idx].symbol == "*") or + (p.tok[p.idx].symbol == "-")) and (p.tok[p.idx + 1].kind == tkWhite): + result = rnBulletList + elif (p.tok[p.idx].symbol == "|") and isLineBlock(p): + result = rnLineBlock + elif (p.tok[p.idx].symbol == "..") and predNL(p): + result = rnDirective + elif (p.tok[p.idx].symbol == ":") and predNL(p): + result = rnFieldList + elif match(p, p.idx, "(e) "): + result = rnEnumList + elif match(p, p.idx, "+a+"): + result = rnGridTable + rstMessage(p, errGridTableNotImplemented) + elif isDefList(p): + result = rnDefList + elif match(p, p.idx, "-w") or match(p, p.idx, "--w") or + match(p, p.idx, "/w"): + result = rnOptionList + else: + result = rnParagraph + of tkWord, tkOther, tkWhite: + if match(p, tokenAfterNewLine(p), "ai"): result = rnHeadline + elif isDefList(p): result = rnDefList + elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList + else: result = rnParagraph + else: result = rnLeaf + +proc parseLineBlock(p: var TRstParser): PRstNode = + var + col: int + item: PRstNode + result = nil + if p.tok[p.idx + 1].kind == tkWhite: + col = p.tok[p.idx].col + result = newRstNode(rnLineBlock) + pushInd(p, p.tok[p.idx + 2].col) + inc(p.idx, 2) + while true: + item = newRstNode(rnLineBlockItem) + parseSection(p, item) + addSon(result, item) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + (p.tok[p.idx + 1].symbol == "|") and + (p.tok[p.idx + 2].kind == tkWhite): + inc(p.idx, 3) + else: + break + popInd(p) + +proc parseParagraph(p: var TRstParser, result: PRstNode) = + while True: + case p.tok[p.idx].kind + of tkIndent: + if p.tok[p.idx + 1].kind == tkIndent: + inc(p.idx) + break + elif (p.tok[p.idx].ival == currInd(p)): + inc(p.idx) + case whichSection(p) + of rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective: + addSon(result, newRstNode(rnLeaf, " ")) + of rnLineBlock: + addSonIfNotNil(result, parseLineBlock(p)) + else: break + else: + break + of tkPunct: + if (p.tok[p.idx].symbol == "::") and + (p.tok[p.idx + 1].kind == tkIndent) and + (currInd(p) < p.tok[p.idx + 1].ival): + addSon(result, newRstNode(rnLeaf, ":")) + inc(p.idx) # skip '::' + addSon(result, parseLiteralBlock(p)) + break + else: + parseInline(p, result) + of tkWhite, tkWord, tkAdornment, tkOther: + parseInline(p, result) + else: break + +proc parseParagraphWrapper(p: var TRstParser): PRstNode = + result = newRstNode(rnParagraph) + parseParagraph(p, result) + +proc parseHeadline(p: var TRstParser): PRstNode = + var c: Char + result = newRstNode(rnHeadline) + parseLine(p, result) + assert(p.tok[p.idx].kind == tkIndent) + assert(p.tok[p.idx + 1].kind == tkAdornment) + c = p.tok[p.idx + 1].symbol[0] + inc(p.idx, 2) + result.level = getLevel(p.s.underlineToLevel, p.s.uLevel, c) + +type + TIntSeq = seq[int] + +proc tokEnd(p: TRstParser): int = + result = p.tok[p.idx].col + len(p.tok[p.idx].symbol) - 1 + +proc getColumns(p: var TRstParser, cols: var TIntSeq) = + var L: int + L = 0 + while true: + inc(L) + setlen(cols, L) + cols[L - 1] = tokEnd(p) + assert(p.tok[p.idx].kind == tkAdornment) + inc(p.idx) + if p.tok[p.idx].kind != tkWhite: break + inc(p.idx) + if p.tok[p.idx].kind != tkAdornment: break + if p.tok[p.idx].kind == tkIndent: + inc(p.idx) # last column has no limit: + cols[L - 1] = 32000 + +proc parseDoc(p: var TRstParser): PRstNode +proc parseSimpleTable(p: var TRstParser): PRstNode = + var + cols: TIntSeq + row: seq[string] + i, last, line: int + c: Char + q: TRstParser + a, b: PRstNode + result = newRstNode(rnTable) + cols = @ [] + row = @ [] + a = nil + c = p.tok[p.idx].symbol[0] + while true: + if p.tok[p.idx].kind == tkAdornment: + last = tokenAfterNewline(p) + if p.tok[last].kind in {tkEof, tkIndent}: + # skip last adornment line: + p.idx = last + break + getColumns(p, cols) + setlen(row, len(cols)) + if a != nil: + for j in countup(0, rsonsLen(a) - 1): a.sons[j].kind = rnTableHeaderCell + if p.tok[p.idx].kind == tkEof: break + for j in countup(0, high(row)): + row[j] = "" # the following while loop iterates over the lines a single cell may span: + line = p.tok[p.idx].line + while true: + i = 0 + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + if (tokEnd(p) <= cols[i]): + add(row[i], p.tok[p.idx].symbol) + inc(p.idx) + else: + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + inc(i) + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + if tokEnd(p) <= cols[0]: break + if p.tok[p.idx].kind in {tkEof, tkAdornment}: break + for j in countup(1, high(row)): add(row[j], '\x0A') + a = newRstNode(rnTableRow) + for j in countup(0, high(row)): + initParser(q, p.s) + q.col = cols[j] + q.line = line - 1 + q.filename = p.filename + getTokens(row[j], false, q.tok) + b = newRstNode(rnTableDataCell) + addSon(b, parseDoc(q)) + addSon(a, b) + addSon(result, a) + +proc parseTransition(p: var TRstParser): PRstNode = + result = newRstNode(rnTransition) + inc(p.idx) + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + +proc parseOverline(p: var TRstParser): PRstNode = + var c: char + c = p.tok[p.idx].symbol[0] + inc(p.idx, 2) + result = newRstNode(rnOverline) + while true: + parseLine(p, result) + if p.tok[p.idx].kind == tkIndent: + inc(p.idx) + if p.tok[p.idx - 1].ival > currInd(p): + addSon(result, newRstNode(rnLeaf, " ")) + else: + break + else: + break + result.level = getLevel(p.s.overlineToLevel, p.s.oLevel, c) + if p.tok[p.idx].kind == tkAdornment: + inc(p.idx) # XXX: check? + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + +proc parseBulletList(p: var TRstParser): PRstNode = + var + bullet: string + col: int + item: PRstNode + result = nil + if p.tok[p.idx + 1].kind == tkWhite: + bullet = p.tok[p.idx].symbol + col = p.tok[p.idx].col + result = newRstNode(rnBulletList) + pushInd(p, p.tok[p.idx + 2].col) + inc(p.idx, 2) + while true: + item = newRstNode(rnBulletItem) + parseSection(p, item) + addSon(result, item) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + (p.tok[p.idx + 1].symbol == bullet) and + (p.tok[p.idx + 2].kind == tkWhite): + inc(p.idx, 3) + else: + break + popInd(p) + +proc parseOptionList(p: var TRstParser): PRstNode = + var + a, b, c: PRstNode + j: int + result = newRstNode(rnOptionList) + while true: + if match(p, p.idx, "-w") or match(p, p.idx, "--w") or + match(p, p.idx, "/w"): + a = newRstNode(rnOptionGroup) + b = newRstNode(rnDescription) + c = newRstNode(rnOptionListItem) + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + if (p.tok[p.idx].kind == tkWhite) and (len(p.tok[p.idx].symbol) > 1): + inc(p.idx) + break + addSon(a, newLeaf(p)) + inc(p.idx) + j = tokenAfterNewline(p) + if (j > 0) and (p.tok[j - 1].kind == tkIndent) and + (p.tok[j - 1].ival > currInd(p)): + pushInd(p, p.tok[j - 1].ival) + parseSection(p, b) + popInd(p) + else: + parseLine(p, b) + if (p.tok[p.idx].kind == tkIndent): inc(p.idx) + addSon(c, a) + addSon(c, b) + addSon(result, c) + else: + break + +proc parseDefinitionList(p: var TRstParser): PRstNode = + var + j, col: int + a, b, c: PRstNode + result = nil + j = tokenAfterNewLine(p) - 1 + if (j >= 1) and (p.tok[j].kind == tkIndent) and + (p.tok[j].ival > currInd(p)) and (p.tok[j - 1].symbol != "::"): + col = p.tok[p.idx].col + result = newRstNode(rnDefList) + while true: + j = p.idx + a = newRstNode(rnDefName) + parseLine(p, a) #writeln('after def line: ', p.tok[p.idx].ival :1, ' ', col : 1); + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival > currInd(p)) and + (p.tok[p.idx + 1].symbol != "::") and + not (p.tok[p.idx + 1].kind in {tkIndent, tkEof}): + pushInd(p, p.tok[p.idx].ival) + b = newRstNode(rnDefBody) + parseSection(p, b) + c = newRstNode(rnDefItem) + addSon(c, a) + addSon(c, b) + addSon(result, c) + popInd(p) + else: + p.idx = j + break + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col): + inc(p.idx) + j = tokenAfterNewLine(p) - 1 + if (j >= 1) and (p.tok[j].kind == tkIndent) and (p.tok[j].ival > col) and + (p.tok[j - 1].symbol != "::") and (p.tok[j + 1].kind != tkIndent): + nil + else: + break + if rsonsLen(result) == 0: result = nil + +proc parseEnumList(p: var TRstParser): PRstNode = + const + wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "] + wildpos: array[0..2, int] = [1, 0, 0] + var + w, col, j: int + item: PRstNode + result = nil + w = 0 + while w <= 2: + if match(p, p.idx, wildcards[w]): break + inc(w) + if w <= 2: + col = p.tok[p.idx].col + result = newRstNode(rnEnumList) + inc(p.idx, wildpos[w] + 3) + j = tokenAfterNewLine(p) + if (p.tok[j].col == p.tok[p.idx].col) or match(p, j, wildcards[w]): + pushInd(p, p.tok[p.idx].col) + while true: + item = newRstNode(rnEnumItem) + parseSection(p, item) + addSon(result, item) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + match(p, p.idx + 1, wildcards[w]): + inc(p.idx, wildpos[w] + 4) + else: + break + popInd(p) + else: + dec(p.idx, wildpos[w] + 3) + result = nil + +proc sonKind(father: PRstNode, i: int): TRstNodeKind = + result = rnLeaf + if i < rsonsLen(father): result = father.sons[i].kind + +proc parseSection(p: var TRstParser, result: PRstNode) = + var + a: PRstNode + k: TRstNodeKind + leave: bool + while true: + leave = false + assert(p.idx >= 0) + while p.tok[p.idx].kind == tkIndent: + if currInd(p) == p.tok[p.idx].ival: + inc(p.idx) + elif p.tok[p.idx].ival > currInd(p): + pushInd(p, p.tok[p.idx].ival) + a = newRstNode(rnBlockQuote) + parseSection(p, a) + addSon(result, a) + popInd(p) + else: + leave = true + break + if leave: break + if p.tok[p.idx].kind == tkEof: break + a = nil + k = whichSection(p) + case k + of rnLiteralBlock: + inc(p.idx) # skip '::' + a = parseLiteralBlock(p) + of rnBulletList: + a = parseBulletList(p) + of rnLineblock: + a = parseLineBlock(p) + of rnDirective: + a = parseDotDot(p) + of rnEnumList: + a = parseEnumList(p) + of rnLeaf: + rstMessage(p, errNewSectionExpected) + of rnParagraph: + nil + of rnDefList: + a = parseDefinitionList(p) + of rnFieldList: + dec(p.idx) + a = parseFields(p) + of rnTransition: + a = parseTransition(p) + of rnHeadline: + a = parseHeadline(p) + of rnOverline: + a = parseOverline(p) + of rnTable: + a = parseSimpleTable(p) + of rnOptionList: + a = parseOptionList(p) + else: InternalError("rst.parseSection()") + if (a == nil) and (k != rnDirective): + a = newRstNode(rnParagraph) + parseParagraph(p, a) + addSonIfNotNil(result, a) + if (sonKind(result, 0) == rnParagraph) and + (sonKind(result, 1) != rnParagraph): + result.sons[0].kind = rnInner + +proc parseSectionWrapper(p: var TRstParser): PRstNode = + result = newRstNode(rnInner) + parseSection(p, result) + while (result.kind == rnInner) and (rsonsLen(result) == 1): + result = result.sons[0] + +proc parseDoc(p: var TRstParser): PRstNode = + result = parseSectionWrapper(p) + if p.tok[p.idx].kind != tkEof: rstMessage(p, errGeneralParseError) + +type + TDirFlag = enum + hasArg, hasOptions, argIsFile + TDirFlags = set[TDirFlag] + TSectionParser = proc (p: var TRstParser): PRstNode + +proc parseDirective(p: var TRstParser, flags: TDirFlags, + contentParser: TSectionParser): PRstNode = + var args, options, content: PRstNode + result = newRstNode(rnDirective) + args = nil + options = nil + if hasArg in flags: + args = newRstNode(rnDirArg) + if argIsFile in flags: + while True: + case p.tok[p.idx].kind + of tkWord, tkOther, tkPunct, tkAdornment: + addSon(args, newLeaf(p)) + inc(p.idx) + else: break + else: + parseLine(p, args) + addSon(result, args) + if hasOptions in flags: + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival >= 3) and + (p.tok[p.idx + 1].symbol == ":"): + options = parseFields(p) + addSon(result, options) + if (not isNil(contentParser)) and (p.tok[p.idx].kind == tkIndent) and + (p.tok[p.idx].ival > currInd(p)): + pushInd(p, p.tok[p.idx].ival) + content = contentParser(p) + popInd(p) + addSon(result, content) + else: + addSon(result, nil) + +proc dirInclude(p: var TRstParser): PRstNode = + # + #The following options are recognized: + # + #start-after : text to find in the external data file + # Only the content after the first occurrence of the specified text will + # be included. + #end-before : text to find in the external data file + # Only the content before the first occurrence of the specified text + # (but after any after text) will be included. + #literal : flag (empty) + # The entire included text is inserted into the document as a single + # literal block (useful for program listings). + #encoding : name of text encoding + # The text encoding of the external data file. Defaults to the document's + # encoding (if specified). + # + var + n: PRstNode + filename, path: string + q: TRstParser + result = nil + n = parseDirective(p, {hasArg, argIsFile, hasOptions}, nil) + filename = strip(addNodes(n.sons[0])) + path = findFile(filename) + if path == "": + rstMessage(p, errCannotOpenFile, filename) + else: + # XXX: error handling; recursive file inclusion! + if getFieldValue(n, "literal") != "": + result = newRstNode(rnLiteralBlock) + addSon(result, newRstNode(rnLeaf, readFile(path))) + else: + initParser(q, p.s) + q.filename = filename + getTokens(readFile(path), false, q.tok) # workaround a GCC bug: + if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0: + InternalError("Too many binary zeros in include file") + result = parseDoc(q) + +proc dirCodeBlock(p: var TRstParser): PRstNode = + var + n: PRstNode + filename, path: string + result = parseDirective(p, {hasArg, hasOptions}, parseLiteralBlock) + filename = strip(getFieldValue(result, "file")) + if filename != "": + path = findFile(filename) + if path == "": rstMessage(p, errCannotOpenFile, filename) + n = newRstNode(rnLiteralBlock) + addSon(n, newRstNode(rnLeaf, readFile(path))) + result.sons[2] = n + result.kind = rnCodeBlock + +proc dirContainer(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasArg}, parseSectionWrapper) + assert(result.kind == rnDirective) + assert(rsonsLen(result) == 3) + result.kind = rnContainer + +proc dirImage(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasOptions, hasArg, argIsFile}, nil) + result.kind = rnImage + +proc dirFigure(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasOptions, hasArg, argIsFile}, + parseSectionWrapper) + result.kind = rnFigure + +proc dirTitle(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasArg}, nil) + result.kind = rnTitle + +proc dirContents(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasArg}, nil) + result.kind = rnContents + +proc dirIndex(p: var TRstParser): PRstNode = + result = parseDirective(p, {}, parseSectionWrapper) + result.kind = rnIndex + +proc dirRaw(p: var TRstParser): PRstNode = + # + #The following options are recognized: + # + #file : string (newlines removed) + # The local filesystem path of a raw data file to be included. + #url : string (whitespace removed) + # An Internet URL reference to a raw data file to be included. + #encoding : name of text encoding + # The text encoding of the external raw data (file or URL). + # Defaults to the document's encoding (if specified). + # + var filename, path, f: string + result = parseDirective(p, {hasOptions}, parseSectionWrapper) + result.kind = rnRaw + filename = getFieldValue(result, "file") + if filename != "": + path = findFile(filename) + if path == "": + rstMessage(p, errCannotOpenFile, filename) + else: + f = readFile(path) + result = newRstNode(rnRaw) + addSon(result, newRstNode(rnLeaf, f)) + +proc parseDotDot(p: var TRstParser): PRstNode = + var + d: string + col: int + a, b: PRstNode + result = nil + col = p.tok[p.idx].col + inc(p.idx) + d = getDirective(p) + if d != "": + pushInd(p, col) + case getDirKind(d) + of dkInclude: result = dirInclude(p) + of dkImage: result = dirImage(p) + of dkFigure: result = dirFigure(p) + of dkTitle: result = dirTitle(p) + of dkContainer: result = dirContainer(p) + of dkContents: result = dirContents(p) + of dkRaw: result = dirRaw(p) + of dkCodeblock: result = dirCodeBlock(p) + of dkIndex: result = dirIndex(p) + else: rstMessage(p, errInvalidDirectiveX, d) + popInd(p) + elif match(p, p.idx, " _"): + # hyperlink target: + inc(p.idx, 2) + a = getReferenceName(p, ":") + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + b = untilEol(p) + setRef(p, rstnodeToRefname(a), b) + elif match(p, p.idx, " |"): + # substitution definitions: + inc(p.idx, 2) + a = getReferenceName(p, "|") + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + if cmpIgnoreStyle(p.tok[p.idx].symbol, "replace") == 0: + inc(p.idx) + expect(p, "::") + b = untilEol(p) + elif cmpIgnoreStyle(p.tok[p.idx].symbol, "image") == 0: + inc(p.idx) + b = dirImage(p) + else: + rstMessage(p, errInvalidDirectiveX, p.tok[p.idx].symbol) + setSub(p, addNodes(a), b) + elif match(p, p.idx, " ["): + # footnotes, citations + inc(p.idx, 2) + a = getReferenceName(p, "]") + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + b = untilEol(p) + setRef(p, rstnodeToRefname(a), b) + else: + result = parseComment(p) + +proc resolveSubs(p: var TRstParser, n: PRstNode): PRstNode = + var + x: int + y: PRstNode + e, key: string + result = n + if n == nil: return + case n.kind + of rnSubstitutionReferences: + x = findSub(p, n) + if x >= 0: + result = p.s.subs[x].value + else: + key = addNodes(n) + e = getEnv(key) + if e != "": result = newRstNode(rnLeaf, e) + else: rstMessage(p, warnUnknownSubstitutionX, key) + of rnRef: + y = findRef(p, rstnodeToRefname(n)) + if y != nil: + result = newRstNode(rnHyperlink) + n.kind = rnInner + addSon(result, n) + addSon(result, y) + of rnLeaf: + nil + of rnContents: + p.hasToc = true + else: + for i in countup(0, rsonsLen(n) - 1): n.sons[i] = resolveSubs(p, n.sons[i]) + +proc rstParse(text: string, # the text to be parsed + skipPounds: bool, filename: string, # for error messages + line, column: int, hasToc: var bool): PRstNode = + var p: TRstParser + if isNil(text): rawMessage(errCannotOpenFile, filename) + initParser(p, newSharedState()) + p.filename = filename + p.line = line + p.col = column + getTokens(text, skipPounds, p.tok) + result = resolveSubs(p, parseDoc(p)) + hasToc = p.hasToc |