// // // The Nimrod Compiler // (c) Copyright 2009 Andreas Rumpf // // See the file "copying.txt", included in this // distribution, for details about the copyright. // unit rst; // This module implements a *reStructuredText* parser. A larget // subset is provided. interface {$include 'config.inc'} uses nsystem, nos, msgs, strutils, platform, nhashes, ropes, charsets, options; type TRstNodeKind = ( rnInner, // an inner node or a root rnHeadline, // a headline rnOverline, // an over- and underlined headline rnTransition, // a transition (the -------------
thingie) rnParagraph, // a paragraph rnBulletList, // a bullet list rnBulletItem, // a bullet item rnEnumList, // an enumerated list rnEnumItem, // an enumerated item rnDefList, // a definition list rnDefItem, // an item of a definition list consisting of ... rnDefName, // ... a name part ... rnDefBody, // ... and a body part ... rnFieldList, // a field list rnField, // a field item rnFieldName, // consisting of a field name ... rnFieldBody, // ... and a field body rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, rnLineBlock, // the | thingie rnLineBlockItem, // sons of the | thing rnBlockQuote, // text just indented rnTable, rnGridTable, rnTableRow, rnTableHeaderCell, rnTableDataCell, rnLabel, // used for footnotes and other things rnFootnote, // a footnote rnCitation, // similar to footnote rnStandaloneHyperlink, rnHyperlink, rnRef, rnDirective, // a directive rnDirArg, rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, rnContainer, // ``container`` directive rnIndex, // index directve: // .. index:: // key // * `file#id `_ // * `file#id '_ rnSubstitutionDef, // a definition of a substitution rnGeneralRole, // Inline markup: rnSub, rnSup, rnIdx, rnEmphasis, // "*" rnStrongEmphasis, // "**" rnInterpretedText, // "`" rnInlineLiteral, // "``" rnSubstitutionReferences, // "|" rnLeaf // a leaf; the node's text field contains the leaf val ); const rstnodekindToStr: array [TRstNodeKind] of string = ( 'Inner', 'Headline', 'Overline', 'Transition', 'Paragraph', 'BulletList', 'BulletItem', 'EnumList', 'EnumItem', 'DefList', 'DefItem', 'DefName', 'DefBody', 'FieldList', 'Field', 'FieldName', 'FieldBody', 'OptionList', 'OptionListItem', 'OptionGroup', 'Option', 'OptionString', 'OptionArgument', 'Description', 'LiteralBlock', 'QuotedLiteralBlock', 'LineBlock', 'LineBlockItem', 'BlockQuote', 'Table', 'GridTable', 'TableRow', 'TableHeaderCell', 'TableDataCell', 'Label', 'Footnote', 'Citation', 'StandaloneHyperlink', 'Hyperlink', 'Ref', 'Directive', 'DirArg', 'Raw', 'Title', 'Contents', 'Image', 'Figure', 'CodeBlock', 'Container', 'Index', 'SubstitutionDef', 'GeneralRole', 'Sub', 'Sup', 'Idx', 'Emphasis', 'StrongEmphasis', 'InterpretedText', 'InlineLiteral', 'SubstitutionReferences', 'Leaf' ); type // the syntax tree of RST: PRSTNode = ^TRstNode; TRstNodeSeq = array of PRstNode; TRSTNode = record kind: TRstNodeKind; text: string; // valid for leafs in the AST; and the title of // the document or the section level: int; // valid for some node kinds sons: TRstNodeSeq; // the node's sons end {@acyclic}; function rstParse(const text: string; // the text to be parsed skipPounds: bool; const filename: string; // for error messages line, column: int; var hasToc: bool): PRstNode; function rsonsLen(n: PRstNode): int; function newRstNode(kind: TRstNodeKind): PRstNode; overload; function newRstNode(kind: TRstNodeKind; const s: string): PRstNode; overload; procedure addSon(father, son: PRstNode); function rstnodeToRefname(n: PRstNode): string; function addNodes(n: PRstNode): string; function getFieldValue(n: PRstNode; const fieldname: string): string; function getArgument(n: PRstNode): string; // index handling: procedure setIndexPair(index, key, val: PRstNode); procedure sortIndex(a: PRstNode); procedure clearIndex(index: PRstNode; const filename: string); implementation // ----------------------------- scanner part -------------------------------- const SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', #128..#255]; type TTokType = (tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther); TToken = record // a RST token kind: TTokType; // the type of the token ival: int; // the indentation or parsed integer value symbol: string; // the parsed symbol as string line, col: int; // line and column of the token end; TTokenSeq = array of TToken; TLexer = object(NObject) buf: PChar; bufpos: int; line, col, baseIndent: int; skipPounds: bool; end; procedure getThing(var L: TLexer; var tok: TToken; const s: TCharSet); var pos: int; begin tok.kind := tkWord; tok.line := L.line; tok.col := L.col; pos := L.bufpos; while True do begin addChar(tok.symbol, L.buf[pos]); inc(pos); if not (L.buf[pos] in s) then break end; inc(L.col, pos - L.bufpos); L.bufpos := pos; end; procedure getAdornment(var L: TLexer; var tok: TToken); var pos: int; c: char; begin tok.kind := tkAdornment; tok.line := L.line; tok.col := L.col; pos := L.bufpos; c := L.buf[pos]; while True do begin addChar(tok.symbol, L.buf[pos]); inc(pos); if L.buf[pos] <> c then break end; inc(L.col, pos - L.bufpos); L.bufpos := pos end; function getIndentAux(var L: TLexer; start: int): int; var buf: PChar; pos: int; begin pos := start; buf := L.buf; // skip the newline (but include it in the token!) if buf[pos] = #13 then begin if buf[pos+1] = #10 then inc(pos, 2) else inc(pos); end else if buf[pos] = #10 then inc(pos); if L.skipPounds then begin if buf[pos] = '#' then inc(pos); if buf[pos] = '#' then inc(pos); end; result := 0; while True do begin case buf[pos] of ' ', #11, #12: begin inc(pos); inc(result); end; #9: begin inc(pos); result := result - (result mod 8) + 8; end; else break; // EndOfFile also leaves the loop end; end; if buf[pos] = #0 then result := 0 else if (buf[pos] = #10) or (buf[pos] = #13) then begin // look at the next line for proper indentation: result := getIndentAux(L, pos); end; L.bufpos := pos; // no need to set back buf end; procedure getIndent(var L: TLexer; var tok: TToken); begin inc(L.line); tok.line := L.line; tok.col := 0; tok.kind := tkIndent; // skip the newline (but include it in the token!) tok.ival := getIndentAux(L, L.bufpos); L.col := tok.ival; tok.ival := max(tok.ival - L.baseIndent, 0); tok.symbol := nl +{&} repeatChar(tok.ival); end; procedure rawGetTok(var L: TLexer; var tok: TToken); var c: Char; begin tok.symbol := ''; tok.ival := 0; c := L.buf[L.bufpos]; case c of 'a'..'z', 'A'..'Z', #128..#255, '0'..'9': getThing(L, tok, SymChars); ' ', #9, #11, #12: begin getThing(L, tok, {@set}[' ', #9]); tok.kind := tkWhite; if L.buf[L.bufpos] in [#13, #10] then rawGetTok(L, tok); // ignore spaces before \n end; #13, #10: getIndent(L, tok); '!', '"', '#', '$', '%', '&', '''', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\', ']', '^', '_', '`', '{', '|', '}', '~': begin getAdornment(L, tok); if length(tok.symbol) <= 3 then tok.kind := tkPunct; end; else begin tok.line := L.line; tok.col := L.col; if c = #0 then tok.kind := tkEof else begin tok.kind := tkOther; addChar(tok.symbol, c); inc(L.bufpos); inc(L.col); end end end; tok.col := max(tok.col - L.baseIndent, 0); end; procedure getTokens(const buffer: string; skipPounds: bool; var tokens: TTokenSeq); var L: TLexer; len: int; begin {@ignore} fillChar(L, sizeof(L), 0); {@emit} len := length(tokens); L.buf := PChar(buffer); L.line := 1; // skip UTF-8 BOM if (L.buf[0] = #239) and (L.buf[1] = #187) and (L.buf[2] = #191) then inc(L.bufpos, 3); L.skipPounds := skipPounds; if skipPounds then begin if L.buf[L.bufpos] = '#' then inc(L.bufpos); if L.buf[L.bufpos] = '#' then inc(L.bufpos); L.baseIndent := 0; while L.buf[L.bufpos] = ' ' do begin inc(L.bufpos); inc(L.baseIndent); end end; while true do begin inc(len); setLength(tokens, len); rawGetTok(L, tokens[len-1]); if tokens[len-1].kind = tkEof then break; end; if tokens[0].kind = tkWhite then begin // BUGFIX tokens[0].ival := length(tokens[0].symbol); tokens[0].kind := tkIndent end end; // -------------------------------------------------------------------------- procedure addSon(father, son: PRstNode); var L: int; begin L := length(father.sons); setLength(father.sons, L+1); father.sons[L] := son; end; procedure addSonIfNotNil(father, son: PRstNode); begin if son <> nil then addSon(father, son); end; function rsonsLen(n: PRstNode): int; begin result := length(n.sons) end; function newRstNode(kind: TRstNodeKind): PRstNode; overload; begin new(result); {@ignore} fillChar(result^, sizeof(result^), 0); {@emit result.sons := @[]; } result.kind := kind; end; function newRstNode(kind: TRstNodeKind; const s: string): PRstNode; overload; begin result := newRstNode(kind); result.text := s; end; // --------------------------------------------------------------------------- type TLevelMap = array [Char] of int; TSubstitution = record key: string; value: PRstNode; end; TSharedState = record uLevel, oLevel: int; // counters for the section levels subs: array of TSubstitution; // substitutions refs: array of TSubstitution; // references underlineToLevel: TLevelMap; // Saves for each possible title adornment character its level in the // current document. This is for single underline adornments. overlineToLevel: TLevelMap; // Saves for each possible title adornment character its level in the // current document. This is for over-underline adornments. end; PSharedState = ^TSharedState; TRstParser = object(NObject) idx: int; tok: TTokenSeq; s: PSharedState; indentStack: array of int; filename: string; line, col: int; hasToc: bool; end; function newSharedState(): PSharedState; begin new(result); {@ignore} fillChar(result^, sizeof(result^), 0); {@emit} {@emit result.subs := @[];} {@emit result.refs := @[];} end; function tokInfo(const p: TRstParser; const tok: TToken): TLineInfo; begin result := newLineInfo(p.filename, p.line+tok.line, p.col+tok.col); end; procedure rstMessage(const p: TRstParser; msgKind: TMsgKind; const arg: string); overload; begin liMessage(tokInfo(p, p.tok[p.idx]), msgKind, arg); end; procedure rstMessage(const p: TRstParser; msgKind: TMsgKind); overload; begin liMessage(tokInfo(p, p.tok[p.idx]), msgKind, p.tok[p.idx].symbol); end; function currInd(const p: TRstParser): int; begin result := p.indentStack[high(p.indentStack)]; end; procedure pushInd(var p: TRstParser; ind: int); var len: int; begin len := length(p.indentStack); setLength(p.indentStack, len+1); p.indentStack[len] := ind; end; procedure popInd(var p: TRstParser); begin if length(p.indentStack) > 1 then setLength(p.indentStack, length(p.indentStack)-1); end; procedure initParser(var p: TRstParser; sharedState: PSharedState); begin {@ignore} fillChar(p, sizeof(p), 0); p.tok := nil; p.indentStack := nil; pushInd(p, 0); {@emit p.indentStack := @[0];} {@emit p.tok := @[];} p.idx := 0; p.filename := ''; p.hasToc := false; p.col := 0; p.line := 1; p.s := sharedState; end; // --------------------------------------------------------------- procedure addNodesAux(n: PRstNode; var result: string); var i: int; begin if n.kind = rnLeaf then add(result, n.text) else begin for i := 0 to rsonsLen(n)-1 do addNodesAux(n.sons[i], result) end end; function addNodes(n: PRstNode): string; begin result := ''; addNodesAux(n, result); end; procedure rstnodeToRefnameAux(n: PRstNode; var r: string; var b: bool); var i: int; begin if n.kind = rnLeaf then begin for i := strStart to length(n.text)+strStart-1 do begin case n.text[i] of '0'..'9': begin if b then begin addChar(r, '-'); b := false; end; // BUGFIX: HTML id's cannot start with a digit if length(r) = 0 then addChar(r, 'Z'); addChar(r, n.text[i]) end; 'a'..'z': begin if b then begin addChar(r, '-'); b := false; end; addChar(r, n.text[i]) end; 'A'..'Z': begin if b then begin addChar(r, '-'); b := false; end; addChar(r, chr(ord(n.text[i]) - ord('A') + ord('a'))); end; else if (length(r) > 0) then b := true; end end end else begin for i := 0 to rsonsLen(n)-1 do rstnodeToRefnameAux(n.sons[i], r, b) end end; function rstnodeToRefname(n: PRstNode): string; var b: bool; begin result := ''; b := false; rstnodeToRefnameAux(n, result, b); end; function findSub(var p: TRstParser; n: PRstNode): int; var key: string; i: int; begin key := addNodes(n); // the spec says: if no exact match, try one without case distinction: for i := 0 to high(p.s.subs) do if key = p.s.subs[i].key then begin result := i; exit end; for i := 0 to high(p.s.subs) do if cmpIgnoreStyle(key, p.s.subs[i].key) = 0 then begin result := i; exit end; result := -1 end; procedure setSub(var p: TRstParser; const key: string; value: PRstNode); var i, len: int; begin len := length(p.s.subs); for i := 0 to len-1 do if key = p.s.subs[i].key then begin p.s.subs[i].value := value; exit end; setLength(p.s.subs, len+1); p.s.subs[len].key := key; p.s.subs[len].value := value; end; procedure setRef(var p: TRstParser; const key: string; value: PRstNode); var i, len: int; begin len := length(p.s.refs); for i := 0 to len-1 do if key = p.s.refs[i].key then begin p.s.refs[i].value := value; rstMessage(p, warnRedefinitionOfLabel, key); exit end; setLength(p.s.refs, len+1); p.s.refs[len].key := key; p.s.refs[len].value := value; end; function findRef(var p: TRstParser; const key: string): PRstNode; var i: int; begin for i := 0 to high(p.s.refs) do if key = p.s.refs[i].key then begin result := p.s.refs[i].value; exit end; result := nil end; function cmpNodes(a, b: PRstNode): int; var x, y: PRstNode; begin assert(a.kind = rnDefItem); assert(b.kind = rnDefItem); x := a.sons[0]; y := b.sons[0]; result := cmpIgnoreStyle(addNodes(x), addNodes(y)) end; procedure sortIndex(a: PRstNode); // we use shellsort here; fast and simple var N, i, j, h: int; v: PRstNode; begin assert(a.kind = rnDefList); N := rsonsLen(a); h := 1; repeat h := 3*h+1; until h > N; repeat h := h div 3; for i := h to N-1 do begin v := a.sons[i]; j := i; while cmpNodes(a.sons[j-h], v) >= 0 do begin a.sons[j] := a.sons[j-h]; j := j - h; if j < h then break end; a.sons[j] := v; end; until h = 1 end; function eqRstNodes(a, b: PRstNode): bool; var i: int; begin result := false; if a.kind <> b.kind then exit; if a.kind = rnLeaf then result := a.text = b.text else begin if rsonsLen(a) <> rsonsLen(b) then exit; for i := 0 to rsonsLen(a)-1 do if not eqRstNodes(a.sons[i], b.sons[i]) then exit; result := true end end; function matchesHyperlink(h: PRstNode; const filename: string): bool; var s: string; begin if h.kind = rnInner then begin assert(rsonsLen(h) = 1); result := matchesHyperlink(h.sons[0], filename) end else if h.kind = rnHyperlink then begin s := addNodes(h.sons[1]); if startsWith(s, filename) and (s[length(filename)+strStart] = '#') then result := true else result := false end else // this may happen in broken indexes! result := false end; procedure clearIndex(index: PRstNode; const filename: string); var i, j, k, items, lastItem: int; val: PRstNode; begin assert(index.kind = rnDefList); for i := 0 to rsonsLen(index)-1 do begin assert(index.sons[i].sons[1].kind = rnDefBody); val := index.sons[i].sons[1].sons[0]; if val.kind = rnInner then val := val.sons[0]; if val.kind = rnBulletList then begin items := rsonsLen(val); lastItem := -1; // save the last valid item index for j := 0 to rsonsLen(val)-1 do begin if val.sons[j] = nil then dec(items) else if matchesHyperlink(val.sons[j].sons[0], filename) then begin val.sons[j] := nil; dec(items) end else lastItem := j end; if items = 1 then // remove bullet list: index.sons[i].sons[1].sons[0] := val.sons[lastItem].sons[0] else if items = 0 then index.sons[i] := nil end else if matchesHyperlink(val, filename) then index.sons[i] := nil end; // remove nil nodes: k := 0; for i := 0 to rsonsLen(index)-1 do begin if index.sons[i] <> nil then begin if k <> i then index.sons[k] := index.sons[i]; inc(k) end end; setLength(index.sons, k); end; procedure setIndexPair(index, key, val: PRstNode); var i: int; e, a, b: PRstNode; begin // writeln(rstnodekindToStr[key.kind], ': ', rstnodekindToStr[val.kind]); assert(index.kind = rnDefList); assert(key.kind <> rnDefName); a := newRstNode(rnDefName); addSon(a, key); for i := 0 to rsonsLen(index)-1 do begin if eqRstNodes(index.sons[i].sons[0], a) then begin assert(index.sons[i].sons[1].kind = rnDefBody); e := index.sons[i].sons[1].sons[0]; if e.kind <> rnBulletList then begin e := newRstNode(rnBulletList); b := newRstNode(rnBulletItem); addSon(b, index.sons[i].sons[1].sons[0]); addSon(e, b); index.sons[i].sons[1].sons[0] := e; end; b := newRstNode(rnBulletItem); addSon(b, val); addSon(e, b); exit // key already exists end end; e := newRstNode(rnDefItem); assert(val.kind <> rnDefBody); b := newRstNode(rnDefBody); addSon(b, val); addSon(e, a); addSon(e, b); addSon(index, e); end; // --------------------------------------------------------------------------- function newLeaf(var p: TRstParser): PRstNode; begin result := newRstNode(rnLeaf, p.tok[p.idx].symbol) end; function getReferenceName(var p: TRstParser; const endStr: string): PRstNode; var res: PRstNode; begin res := newRstNode(rnInner); while true do begin case p.tok[p.idx].kind of tkWord, tkOther, tkWhite: addSon(res, newLeaf(p)); tkPunct: if p.tok[p.idx].symbol = endStr then begin inc(p.idx); break end else addSon(res, newLeaf(p)); else begin rstMessage(p, errXexpected, endStr); break end end; inc(p.idx); end; result := res; end; function untilEol(var p: TRstParser): PRstNode; begin result := newRstNode(rnInner); while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin addSon(result, newLeaf(p)); inc(p.idx); end end; procedure expect(var p: TRstParser; const tok: string); begin if p.tok[p.idx].symbol = tok then inc(p.idx) else rstMessage(p, errXexpected, tok) end; (* From the specification: The inline markup start-string and end-string recognition rules are as follows. If any of the conditions are not met, the start-string or end-string will not be recognized or processed. 1. Inline markup start-strings must start a text block or be immediately preceded by whitespace or one of: ' " ( [ { < - / : 2. Inline markup start-strings must be immediately followed by non-whitespace. 3. Inline markup end-strings must be immediately preceded by non-whitespace. 4. Inline markup end-strings must end a text block or be immediately followed by whitespace or one of: ' " ) ] } > - / : . , ; ! ? \ 5. If an inline markup start-string is immediately preceded by a single or double quote, "(", "[", "{", or "<", it must not be immediately followed by the corresponding single or double quote, ")", "]", "}", or ">". 6. An inline markup end-string must be separated by at least one character from the start-string. 7. An unescaped backslash preceding a start-string or end-string will disable markup recognition, except for the end-string of inline literals. See Escaping Mechanism above for details. *) function isInlineMarkupEnd(const p: TRstParser; const markup: string): bool; begin result := p.tok[p.idx].symbol = markup; if not result then exit; // Rule 3: result := not (p.tok[p.idx-1].kind in [tkIndent, tkWhite]); if not result then exit; // Rule 4: result := (p.tok[p.idx+1].kind in [tkIndent, tkWhite, tkEof]) or (p.tok[p.idx+1].symbol[strStart] in ['''', '"', ')', ']', '}', '>', '-', '/', '\', ':', '.', ',', ';', '!', '?', '_']); if not result then exit; // Rule 7: if p.idx > 0 then begin if (markup <> '``') and (p.tok[p.idx-1].symbol = '\'+'') then begin result := false end end end; function isInlineMarkupStart(const p: TRstParser; const markup: string): bool; var c, d: Char; begin result := p.tok[p.idx].symbol = markup; if not result then exit; // Rule 1: result := (p.idx = 0) or (p.tok[p.idx-1].kind in [tkIndent, tkWhite]) or (p.tok[p.idx-1].symbol[strStart] in ['''', '"', '(', '[', '{', '<', '-', '/', ':', '_']); if not result then exit; // Rule 2: result := not (p.tok[p.idx+1].kind in [tkIndent, tkWhite, tkEof]); if not result then exit; // Rule 5 & 7: if p.idx > 0 then begin if p.tok[p.idx-1].symbol = '\'+'' then result := false else begin c := p.tok[p.idx-1].symbol[strStart]; case c of '''', '"': d := c; '(': d := ')'; '[': d := ']'; '{': d := '}'; '<': d := '>'; else d := #0; end; if d <> #0 then result := p.tok[p.idx+1].symbol[strStart] <> d; end end end; procedure parseBackslash(var p: TRstParser; father: PRstNode); begin assert(p.tok[p.idx].kind = tkPunct); if p.tok[p.idx].symbol = '\\' then begin addSon(father, newRstNode(rnLeaf, '\'+'')); inc(p.idx); end else if p.tok[p.idx].symbol = '\'+'' then begin // XXX: Unicode? inc(p.idx); if p.tok[p.idx].kind <> tkWhite then addSon(father, newLeaf(p)); inc(p.idx); end else begin addSon(father, newLeaf(p)); inc(p.idx) end end; function match(const p: TRstParser; start: int; const expr: string): bool; // regular expressions are: // special char exact match // 'w' tkWord // ' ' tkWhite // 'a' tkAdornment // 'i' tkIndent // 'p' tkPunct // 'T' always true // 'E' whitespace, indent or eof // 'e' tkWord or '#' (for enumeration lists) var i, j, last, len: int; c: char; begin i := strStart; j := start; last := length(expr)+strStart-1; while i <= last do begin case expr[i] of 'w': result := p.tok[j].kind = tkWord; ' ': result := p.tok[j].kind = tkWhite; 'i': result := p.tok[j].kind = tkIndent; 'p': result := p.tok[j].kind = tkPunct; 'a': result := p.tok[j].kind = tkAdornment; 'o': result := p.tok[j].kind = tkOther; 'T': result := true; 'E': result := p.tok[j].kind in [tkEof, tkWhite, tkIndent]; 'e': begin result := (p.tok[j].kind = tkWord) or (p.tok[j].symbol = '#'+''); if result then case p.tok[j].symbol[strStart] of 'a'..'z', 'A'..'Z': result := length(p.tok[j].symbol) = 1; '0'..'9': result := allCharsInSet(p.tok[j].symbol, ['0'..'9']); else begin end end end else begin c := expr[i]; len := 0; while (i <= last) and (expr[i] = c) do begin inc(i); inc(len) end; dec(i); result := (p.tok[j].kind in [tkPunct, tkAdornment]) and (length(p.tok[j].symbol) = len) and (p.tok[j].symbol[strStart] = c); end end; if not result then exit; inc(j); inc(i) end; result := true end; procedure fixupEmbeddedRef(n, a, b: PRstNode); var i, sep, incr: int; begin sep := -1; for i := rsonsLen(n)-2 downto 0 do if n.sons[i].text = '<'+'' then begin sep := i; break end; if (sep > 0) and (n.sons[sep-1].text[strStart] = ' ') then incr := 2 else incr := 1; for i := 0 to sep-incr do addSon(a, n.sons[i]); for i := sep+1 to rsonsLen(n)-2 do addSon(b, n.sons[i]); end; function parsePostfix(var p: TRstParser; n: PRstNode): PRstNode; var a, b: PRstNode; begin result := n; if isInlineMarkupEnd(p, '_'+'') then begin inc(p.idx); if (p.tok[p.idx-2].symbol ='`'+'') and (p.tok[p.idx-3].symbol = '>'+'') then begin a := newRstNode(rnInner); b := newRstNode(rnInner); fixupEmbeddedRef(n, a, b); if rsonsLen(a) = 0 then begin result := newRstNode(rnStandaloneHyperlink); addSon(result, b); end else begin result := newRstNode(rnHyperlink); addSon(result, a); addSon(result, b); setRef(p, rstnodeToRefname(a), b); end end else if n.kind = rnInterpretedText then n.kind := rnRef else begin result := newRstNode(rnRef); addSon(result, n); end; end else if match(p, p.idx, ':w:') then begin // a role: if p.tok[p.idx+1].symbol = 'idx' then n.kind := rnIdx else if p.tok[p.idx+1].symbol = 'literal' then n.kind := rnInlineLiteral else if p.tok[p.idx+1].symbol = 'strong' then n.kind := rnStrongEmphasis else if p.tok[p.idx+1].symbol = 'emphasis' then n.kind := rnEmphasis else if (p.tok[p.idx+1].symbol = 'sub') or (p.tok[p.idx+1].symbol = 'subscript') then n.kind := rnSub else if (p.tok[p.idx+1].symbol = 'sup') or (p.tok[p.idx+1].symbol = 'supscript') then n.kind := rnSup else begin result := newRstNode(rnGeneralRole); n.kind := rnInner; addSon(result, n); addSon(result, newRstNode(rnLeaf, p.tok[p.idx+1].symbol)); end; inc(p.idx, 3) end end; function isURL(const p: TRstParser; i: int): bool; begin result := (p.tok[i+1].symbol = ':'+'') and (p.tok[i+2].symbol = '//') and (p.tok[i+3].kind = tkWord) and (p.tok[i+4].symbol = '.'+'') end; procedure parseURL(var p: TRstParser; father: PRstNode); var n: PRstNode; begin //if p.tok[p.idx].symbol[strStart] = '<' then begin if isURL(p, p.idx) then begin n := newRstNode(rnStandaloneHyperlink); while true do begin case p.tok[p.idx].kind of tkWord, tkAdornment, tkOther: begin end; tkPunct: begin if not (p.tok[p.idx+1].kind in [tkWord, tkAdornment, tkOther, tkPunct]) then break end else break end; addSon(n, newLeaf(p)); inc(p.idx); end; addSon(father, n); end else begin n := newLeaf(p); inc(p.idx); if p.tok[p.idx].symbol = '_'+'' then n := parsePostfix(p, n); addSon(father, n); end end; procedure parseUntil(var p: TRstParser; father: PRstNode; const postfix: string; interpretBackslash: bool); begin while true do begin case p.tok[p.idx].kind of tkPunct: begin if isInlineMarkupEnd(p, postfix) then begin inc(p.idx); break; end else if interpretBackslash then parseBackslash(p, father) else begin addSon(father, newLeaf(p)); inc(p.idx); end end; tkAdornment, tkWord, tkOther: begin addSon(father, newLeaf(p)); inc(p.idx); end; tkIndent: begin addSon(father, newRstNode(rnLeaf, ' '+'')); inc(p.idx); if p.tok[p.idx].kind = tkIndent then begin rstMessage(p, errXExpected, postfix); break end end; tkWhite: begin addSon(father, newRstNode(rnLeaf, ' '+'')); inc(p.idx); end else rstMessage(p, errXExpected, postfix); end end end; procedure parseInline(var p: TRstParser; father: PRstNode); var n: PRstNode; begin case p.tok[p.idx].kind of tkPunct: begin if isInlineMarkupStart(p, '**') then begin inc(p.idx); n := newRstNode(rnStrongEmphasis); parseUntil(p, n, '**', true); addSon(father, n); end else if isInlineMarkupStart(p, '*'+'') then begin inc(p.idx); n := newRstNode(rnEmphasis); parseUntil(p, n, '*'+'', true); addSon(father, n); end else if isInlineMarkupStart(p, '``') then begin inc(p.idx); n := newRstNode(rnInlineLiteral); parseUntil(p, n, '``', false); addSon(father, n); end else if isInlineMarkupStart(p, '`'+'') then begin inc(p.idx); n := newRstNode(rnInterpretedText); parseUntil(p, n, '`'+'', true); n := parsePostfix(p, n); addSon(father, n); end else if isInlineMarkupStart(p, '|'+'') then begin inc(p.idx); n := newRstNode(rnSubstitutionReferences); parseUntil(p, n, '|'+'', false); addSon(father, n); end else begin parseBackslash(p, father); end; end; tkWord: parseURL(p, father); tkAdornment, tkOther, tkWhite: begin addSon(father, newLeaf(p)); inc(p.idx); end else assert(false); end end; function getDirective(var p: TRstParser): string; var j: int; begin if (p.tok[p.idx].kind = tkWhite) and (p.tok[p.idx+1].kind = tkWord) then begin j := p.idx; inc(p.idx); result := p.tok[p.idx].symbol; inc(p.idx); while p.tok[p.idx].kind in [tkWord, tkPunct, tkAdornment, tkOther] do begin if p.tok[p.idx].symbol = '::' then break; add(result, p.tok[p.idx].symbol); inc(p.idx); end; if (p.tok[p.idx].kind = tkWhite) then inc(p.idx); if p.tok[p.idx].symbol = '::' then begin inc(p.idx); if (p.tok[p.idx].kind = tkWhite) then inc(p.idx); end else begin p.idx := j; // set back result := '' // error end end else result := ''; end; function parseComment(var p: TRstParser): PRstNode; var indent: int; begin case p.tok[p.idx].kind of tkIndent, tkEof: begin if p.tok[p.idx+1].kind = tkIndent then begin inc(p.idx); // empty comment end else begin indent := p.tok[p.idx].ival; while True do begin case p.tok[p.idx].kind of tkEof: break; tkIndent: begin if (p.tok[p.idx].ival < indent) then break; end else begin end end; inc(p.idx) end end end else while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do inc(p.idx); end; result := nil; end; type TDirKind = ( // must be ordered alphabetically! dkNone, dkAuthor, dkAuthors, dkCodeBlock, dkContainer, dkContents, dkFigure, dkImage, dkInclude, dkIndex, dkRaw, dkTitle ); const DirIds: array [0..11] of string = ( '', 'author', 'authors', 'code-block', 'container', 'contents', 'figure', 'image', 'include', 'index', 'raw', 'title' ); function getDirKind(const s: string): TDirKind; var i: int; begin i := binaryStrSearch(DirIds, s); if i >= 0 then result := TDirKind(i) else result := dkNone end; procedure parseLine(var p: TRstParser; father: PRstNode); begin while True do begin case p.tok[p.idx].kind of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father); else break; end end end; procedure parseSection(var p: TRstParser; result: PRstNode); forward; function parseField(var p: TRstParser): PRstNode; var col, indent: int; fieldname, fieldbody: PRstNode; begin result := newRstNode(rnField); col := p.tok[p.idx].col; inc(p.idx); // skip : fieldname := newRstNode(rnFieldname); parseUntil(p, fieldname, ':'+'', false); fieldbody := newRstNode(rnFieldbody); if p.tok[p.idx].kind <> tkIndent then parseLine(p, fieldbody); if p.tok[p.idx].kind = tkIndent then begin indent := p.tok[p.idx].ival; if indent > col then begin pushInd(p, indent); parseSection(p, fieldbody); popInd(p); end end; addSon(result, fieldname); addSon(result, fieldbody); end; function parseFields(var p: TRstParser): PRstNode; var col: int; begin result := nil; if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx+1].symbol = ':'+'') then begin col := p.tok[p.idx].ival; // BUGFIX! result := newRstNode(rnFieldList); inc(p.idx); while true do begin addSon(result, parseField(p)); if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) and (p.tok[p.idx+1].symbol = ':'+'') then inc(p.idx) else break end end end; function getFieldValue(n: PRstNode; const fieldname: string): string; var i: int; f: PRstNode; begin result := ''; if n.sons[1] = nil then exit; if (n.sons[1].kind <> rnFieldList) then InternalError('getFieldValue (2): ' + rstnodeKindToStr[n.sons[1].kind]); for i := 0 to rsonsLen(n.sons[1])-1 do begin f := n.sons[1].sons[i]; if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) = 0 then begin result := addNodes(f.sons[1]); if result = '' then result := #1#1; // indicates that the field exists exit end end end; function getArgument(n: PRstNode): string; begin if n.sons[0] = nil then result := '' else result := addNodes(n.sons[0]); end; function parseDotDot(var p: TRstParser): PRstNode; forward; function parseLiteralBlock(var p: TRstParser): PRstNode; var indent: int; n: PRstNode; begin result := newRstNode(rnLiteralBlock); n := newRstNode(rnLeaf, ''); if p.tok[p.idx].kind = tkIndent then begin indent := p.tok[p.idx].ival; inc(p.idx); while True do begin case p.tok[p.idx].kind of tkEof: break; tkIndent: begin if (p.tok[p.idx].ival < indent) then begin break; end else begin add(n.text, nl); add(n.text, repeatChar(p.tok[p.idx].ival - indent)); inc(p.idx) end end else begin add(n.text, p.tok[p.idx].symbol); inc(p.idx) end end end end else begin while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin add(n.text, p.tok[p.idx].symbol); inc(p.idx) end end; addSon(result, n); end; function getLevel(var map: TLevelMap; var lvl: int; c: Char): int; begin if map[c] = 0 then begin inc(lvl); map[c] := lvl; end; result := map[c] end; function tokenAfterNewline(const p: TRstParser): int; begin result := p.idx; while true do case p.tok[result].kind of tkEof: break; tkIndent: begin inc(result); break end; else inc(result) end end; // --------------------------------------------------------------------------- function isLineBlock(const p: TRstParser): bool; var j: int; begin j := tokenAfterNewline(p); result := (p.tok[p.idx].col = p.tok[j].col) and (p.tok[j].symbol = '|'+'') or (p.tok[j].col > p.tok[p.idx].col) end; function predNL(const p: TRstParser): bool; begin result := true; if (p.idx > 0) then result := (p.tok[p.idx-1].kind = tkIndent) and (p.tok[p.idx-1].ival = currInd(p)) end; function isDefList(const p: TRstParser): bool; var j: int; begin j := tokenAfterNewline(p); result := (p.tok[p.idx].col < p.tok[j].col) and (p.tok[j].kind in [tkWord, tkOther, tkPunct]) and (p.tok[j-2].symbol <> '::'); end; function whichSection(const p: TRstParser): TRstNodeKind; begin case p.tok[p.idx].kind of tkAdornment: begin if match(p, p.idx+1, 'ii') then result := rnTransition else if match(p, p.idx+1, ' a') then result := rnTable else if match(p, p.idx+1, 'i'+'') then result := rnOverline else result := rnLeaf end; tkPunct: begin if match(p, tokenAfterNewLine(p), 'ai') then result := rnHeadline else if p.tok[p.idx].symbol = '::' then result := rnLiteralBlock else if predNL(p) and ((p.tok[p.idx].symbol = '+'+'') or (p.tok[p.idx].symbol = '*'+'') or (p.tok[p.idx].symbol = '-'+'')) and (p.tok[p.idx+1].kind = tkWhite) then result := rnBulletList else if (p.tok[p.idx].symbol = '|'+'') and isLineBlock(p) then result := rnLineBlock else if (p.tok[p.idx].symbol = '..') and predNL(p) then result := rnDirective else if (p.tok[p.idx].symbol = ':'+'') and predNL(p) then result := rnFieldList else if match(p, p.idx, '(e) ') then result := rnEnumList else if match(p, p.idx, '+a+') then begin result := rnGridTable; rstMessage(p, errGridTableNotImplemented); end else if isDefList(p) then result := rnDefList else if match(p, p.idx, '-w') or match(p, p.idx, '--w') or match(p, p.idx, '/w') then result := rnOptionList else result := rnParagraph end; tkWord, tkOther, tkWhite: begin if match(p, tokenAfterNewLine(p), 'ai') then result := rnHeadline else if isDefList(p) then result := rnDefList else if match(p, p.idx, 'e) ') or match(p, p.idx, 'e. ') then result := rnEnumList else result := rnParagraph; end; else result := rnLeaf; end end; function parseLineBlock(var p: TRstParser): PRstNode; var col: int; item: PRstNode; begin result := nil; if p.tok[p.idx+1].kind = tkWhite then begin col := p.tok[p.idx].col; result := newRstNode(rnLineBlock); pushInd(p, p.tok[p.idx+2].col); inc(p.idx, 2); while true do begin item := newRstNode(rnLineBlockItem); parseSection(p, item); addSon(result, item); if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) and (p.tok[p.idx+1].symbol = '|'+'') and (p.tok[p.idx+2].kind = tkWhite) then inc(p.idx, 3) else break; end; popInd(p); end; end; procedure parseParagraph(var p: TRstParser; result: PRstNode); begin while True do begin case p.tok[p.idx].kind of tkIndent: begin if p.tok[p.idx+1].kind = tkIndent then begin inc(p.idx); break end else if (p.tok[p.idx].ival = currInd(p)) then begin inc(p.idx); case whichSection(p) of rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective: addSon(result, newRstNode(rnLeaf, ' '+'')); rnLineBlock: addSonIfNotNil(result, parseLineBlock(p)); else break; end; end else break end; tkPunct: begin if (p.tok[p.idx].symbol = '::') and (p.tok[p.idx+1].kind = tkIndent) and (currInd(p) < p.tok[p.idx+1].ival) then begin addSon(result, newRstNode(rnLeaf, ':'+'')); inc(p.idx); // skip '::' addSon(result, parseLiteralBlock(p)); break end else parseInline(p, result) end; tkWhite, tkWord, tkAdornment, tkOther: parseInline(p, result); else break; end end end; function parseParagraphWrapper(var p: TRstParser): PRstNode; begin result := newRstNode(rnParagraph); parseParagraph(p, result); end; function parseHeadline(var p: TRstParser): PRstNode; var c: Char; begin result := newRstNode(rnHeadline); parseLine(p, result); assert(p.tok[p.idx].kind = tkIndent); assert(p.tok[p.idx+1].kind = tkAdornment); c := p.tok[p.idx+1].symbol[strStart]; inc(p.idx, 2); result.level := getLevel(p.s.underlineToLevel, p.s.uLevel, c); end; type TIntSeq = array of int; function tokEnd(const p: TRstParser): int; begin result := p.tok[p.idx].col + length(p.tok[p.idx].symbol) - 1; end; procedure getColumns(var p: TRstParser; var cols: TIntSeq); var L: int; begin L := 0; while true do begin inc(L); setLength(cols, L); cols[L-1] := tokEnd(p); assert(p.tok[p.idx].kind = tkAdornment); inc(p.idx); if p.tok[p.idx].kind <> tkWhite then break; inc(p.idx); if p.tok[p.idx].kind <> tkAdornment then break end; if p.tok[p.idx].kind = tkIndent then inc(p.idx); // last column has no limit: cols[L-1] := 32000; end; function parseDoc(var p: TRstParser): PRstNode; forward; function parseSimpleTable(var p: TRstParser): PRstNode; var cols: TIntSeq; row: array of string; j, i, last, line: int; c: Char; q: TRstParser; a, b: PRstNode; begin result := newRstNode(rnTable); {@ignore} cols := nil; row := nil; {@emit cols := @[];} {@emit row := @[];} a := nil; c := p.tok[p.idx].symbol[strStart]; while true do begin if p.tok[p.idx].kind = tkAdornment then begin last := tokenAfterNewline(p); if p.tok[last].kind in [tkEof, tkIndent] then begin // skip last adornment line: p.idx := last; break end; getColumns(p, cols); setLength(row, length(cols)); if a <> nil then for j := 0 to rsonsLen(a)-1 do a.sons[j].kind := rnTableHeaderCell; end; if p.tok[p.idx].kind = tkEof then break; for j := 0 to high(row) do row[j] := ''; // the following while loop iterates over the lines a single cell may span: line := p.tok[p.idx].line; while true do begin i := 0; while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin if (tokEnd(p) <= cols[i]) then begin add(row[i], p.tok[p.idx].symbol); inc(p.idx); end else begin if p.tok[p.idx].kind = tkWhite then inc(p.idx); inc(i) end end; if p.tok[p.idx].kind = tkIndent then inc(p.idx); if tokEnd(p) <= cols[0] then break; if p.tok[p.idx].kind in [tkEof, tkAdornment] then break; for j := 1 to high(row) do addChar(row[j], #10); end; // process all the cells: a := newRstNode(rnTableRow); for j := 0 to high(row) do begin initParser(q, p.s); q.col := cols[j]; q.line := line-1; q.filename := p.filename; getTokens(row[j], false, q.tok); b := newRstNode(rnTableDataCell); addSon(b, parseDoc(q)); addSon(a, b); end; addSon(result, a); end; end; function parseTransition(var p: TRstParser): PRstNode; begin result := newRstNode(rnTransition); inc(p.idx); if p.tok[p.idx].kind = tkIndent then inc(p.idx); if p.tok[p.idx].kind = tkIndent then inc(p.idx); end; function parseOverline(var p: TRstParser): PRstNode; var c: char; begin c := p.tok[p.idx].symbol[strStart]; inc(p.idx, 2); result := newRstNode(rnOverline); while true do begin parseLine(p, result); if p.tok[p.idx].kind = tkIndent then begin inc(p.idx); if p.tok[p.idx-1].ival > currInd(p) then addSon(result, newRstNode(rnLeaf, ' '+'')) else break end else break end; result.level := getLevel(p.s.overlineToLevel, p.s.oLevel, c); if p.tok[p.idx].kind = tkAdornment then begin inc(p.idx); // XXX: check? if p.tok[p.idx].kind = tkIndent then inc(p.idx); end end; function parseBulletList(var p: TRstParser): PRstNode; var bullet: string; col: int; item: PRstNode; begin result := nil; if p.tok[p.idx+1].kind = tkWhite then begin bullet := p.tok[p.idx].symbol; col := p.tok[p.idx].col; result := newRstNode(rnBulletList); pushInd(p, p.tok[p.idx+2].col); inc(p.idx, 2); while true do begin item := newRstNode(rnBulletItem); parseSection(p, item); addSon(result, item); if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) and (p.tok[p.idx+1].symbol = bullet) and (p.tok[p.idx+2].kind = tkWhite) then inc(p.idx, 3) else break; end; popInd(p); end; end; function parseOptionList(var p: TRstParser): PRstNode; var a, b, c: PRstNode; j: int; begin result := newRstNode(rnOptionList); while true do begin if match(p, p.idx, '-w') or match(p, p.idx, '--w') or match(p, p.idx, '/w') then begin a := newRstNode(rnOptionGroup); b := newRstNode(rnDescription); c := newRstNode(rnOptionListItem); while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin if (p.tok[p.idx].kind = tkWhite) and (length(p.tok[p.idx].symbol) > 1) then begin inc(p.idx); break end; addSon(a, newLeaf(p)); inc(p.idx); end; j := tokenAfterNewline(p); if (j > 0) and (p.tok[j-1].kind = tkIndent) and (p.tok[j-1].ival > currInd(p)) then begin pushInd(p, p.tok[j-1].ival); parseSection(p, b); popInd(p); end else begin parseLine(p, b); end; if (p.tok[p.idx].kind = tkIndent) then inc(p.idx); addSon(c, a); addSon(c, b); addSon(result, c); end else break; end end; function parseDefinitionList(var p: TRstParser): PRstNode; var j, col: int; a, b, c: PRstNode; begin result := nil; j := tokenAfterNewLine(p)-1; if (j >= 1) and (p.tok[j].kind = tkIndent) and (p.tok[j].ival > currInd(p)) and (p.tok[j-1].symbol <> '::') then begin col := p.tok[p.idx].col; result := newRstNode(rnDefList); while true do begin j := p.idx; a := newRstNode(rnDefName); parseLine(p, a); //writeln('after def line: ', p.tok[p.idx].ival :1, ' ', col : 1); if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival > currInd(p)) and (p.tok[p.idx+1].symbol <> '::') and not (p.tok[p.idx+1].kind in [tkIndent, tkEof]) then begin pushInd(p, p.tok[p.idx].ival); b := newRstNode(rnDefBody); parseSection(p, b); c := newRstNode(rnDefItem); addSon(c, a); addSon(c, b); addSon(result, c); popInd(p); end else begin p.idx := j; break end; if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) then begin inc(p.idx); j := tokenAfterNewLine(p)-1; if (j >= 1) and (p.tok[j].kind = tkIndent) and (p.tok[j].ival > col) and (p.tok[j-1].symbol <> '::') and (p.tok[j+1].kind <> tkIndent) then begin end else break end end; if rsonsLen(result) = 0 then result := nil end end; function parseEnumList(var p: TRstParser): PRstNode; const wildcards: array [0..2] of string = ('(e) ', 'e) ', 'e. '); wildpos: array [0..2] of int = (1, 0, 0); var w, col, j: int; item: PRstNode; begin result := nil; w := 0; while w <= 2 do begin if match(p, p.idx, wildcards[w]) then break; inc(w); end; if w <= 2 then begin col := p.tok[p.idx].col; result := newRstNode(rnEnumList); inc(p.idx, wildpos[w]+3); j := tokenAfterNewLine(p); if (p.tok[j].col = p.tok[p.idx].col) or match(p, j, wildcards[w]) then begin pushInd(p, p.tok[p.idx].col); while true do begin item := newRstNode(rnEnumItem); parseSection(p, item); addSon(result, item); if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) and match(p, p.idx+1, wildcards[w]) then inc(p.idx, wildpos[w]+4) else break end; popInd(p); end else begin dec(p.idx, wildpos[w]+3); result := nil end end end; function sonKind(father: PRstNode; i: int): TRstNodeKind; begin result := rnLeaf; if i < rsonsLen(father) then result := father.sons[i].kind; end; procedure parseSection(var p: TRstParser; result: PRstNode); var a: PRstNode; k: TRstNodeKind; leave: bool; begin while true do begin leave := false; assert(p.idx >= 0); while p.tok[p.idx].kind = tkIndent do begin if currInd(p) = p.tok[p.idx].ival then begin inc(p.idx); end else if p.tok[p.idx].ival > currInd(p) then begin pushInd(p, p.tok[p.idx].ival); a := newRstNode(rnBlockQuote); parseSection(p, a); addSon(result, a); popInd(p); end else begin leave := true; break; end end; if leave then break; if p.tok[p.idx].kind = tkEof then break; a := nil; k := whichSection(p); case k of rnLiteralBlock: begin inc(p.idx); // skip '::' a := parseLiteralBlock(p); end; rnBulletList: a := parseBulletList(p); rnLineblock: a := parseLineBlock(p); rnDirective: a := parseDotDot(p); rnEnumList: a := parseEnumList(p); rnLeaf: begin rstMessage(p, errNewSectionExpected); end; rnParagraph: begin end; rnDefList: a := parseDefinitionList(p); rnFieldList: begin dec(p.idx); a := parseFields(p); end; rnTransition: a := parseTransition(p); rnHeadline: a := parseHeadline(p); rnOverline: a := parseOverline(p); rnTable: a := parseSimpleTable(p); rnOptionList: a := parseOptionList(p); else InternalError('rst.parseSection()'); end; if (a = nil) and (k <> rnDirective) then begin a := newRstNode(rnParagraph); parseParagraph(p, a); end; addSonIfNotNil(result, a); end; if (sonKind(result, 0) = rnParagraph) and (sonKind(result, 1) <> rnParagraph) then result.sons[0].kind := rnInner; end; function parseSectionWrapper(var p: TRstParser): PRstNode; begin result := newRstNode(rnInner); parseSection(p, result); while (result.kind = rnInner) and (rsonsLen(result) = 1) do result := result.sons[0] end; function parseDoc(var p: TRstParser): PRstNode; begin result := parseSectionWrapper(p); if p.tok[p.idx].kind <> tkEof then rstMessage(p, errGeneralParseError); end; type TDirFlag = (hasArg, hasOptions, argIsFile); TDirFlags = set of TDirFlag; TSectionParser = function (var p: TRstParser): PRstNode; function parseDirective(var p: TRstParser; flags: TDirFlags; contentParser: TSectionParser): PRstNode; var args, options, content: PRstNode; begin result := newRstNode(rnDirective); args := nil; options := nil; if hasArg in flags then begin args := newRstNode(rnDirArg); if argIsFile in flags then begin while True do begin case p.tok[p.idx].kind of tkWord, tkOther, tkPunct, tkAdornment: begin addSon(args, newLeaf(p)); inc(p.idx); end; else break; end end end else begin parseLine(p, args); end end; addSon(result, args); if hasOptions in flags then begin if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival >= 3) and (p.tok[p.idx+1].symbol = ':'+'') then options := parseFields(p); end; addSon(result, options); if (assigned(contentParser)) and (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival > currInd(p)) then begin pushInd(p, p.tok[p.idx].ival); content := contentParser(p); popInd(p); addSon(result, content) end else addSon(result, nil); end; function dirInclude(var p: TRstParser): PRstNode; (* The following options are recognized: start-after : text to find in the external data file Only the content after the first occurrence of the specified text will be included. end-before : text to find in the external data file Only the content before the first occurrence of the specified text (but after any after text) will be included. literal : flag (empty) The entire included text is inserted into the document as a single literal block (useful for program listings). encoding : name of text encoding The text encoding of the external data file. Defaults to the document's encoding (if specified). *) var n: PRstNode; filename, path: string; q: TRstParser; begin result := nil; n := parseDirective(p, {@set}[hasArg, argIsFile, hasOptions], nil); filename := strip(addNodes(n.sons[0])); path := findFile(filename); if path = '' then rstMessage(p, errCannotOpenFile, filename) else begin // XXX: error handling; recursive file inclusion! if getFieldValue(n, 'literal') <> '' then begin result := newRstNode(rnLiteralBlock); addSon(result, newRstNode(rnLeaf, readFile(path))); end else begin initParser(q, p.s); q.filename := filename; getTokens(readFile(path), false, q.tok); // workaround a GCC bug: if find(q.tok[high(q.tok)].symbol, #0#1#2) > 0 then begin InternalError('Too many binary zeros in include file'); end; result := parseDoc(q); end end end; function dirCodeBlock(var p: TRstParser): PRstNode; var n: PRstNode; filename, path: string; begin result := parseDirective(p, {@set}[hasArg, hasOptions], parseLiteralBlock); filename := strip(getFieldValue(result, 'file')); if filename <> '' then begin path := findFile(filename); if path = '' then rstMessage(p, errCannotOpenFile, filename); n := newRstNode(rnLiteralBlock); addSon(n, newRstNode(rnLeaf, readFile(path))); result.sons[2] := n; end; result.kind := rnCodeBlock; end; function dirContainer(var p: TRstParser): PRstNode; begin result := parseDirective(p, {@set}[hasArg], parseSectionWrapper); assert(result.kind = rnDirective); assert(rsonsLen(result) = 3); result.kind := rnContainer; end; function dirImage(var p: TRstParser): PRstNode; begin result := parseDirective(p, {@set}[hasOptions, hasArg, argIsFile], nil); result.kind := rnImage end; function dirFigure(var p: TRstParser): PRstNode; begin result := parseDirective(p, {@set}[hasOptions, hasArg, argIsFile], parseSectionWrapper); result.kind := rnFigure end; function dirTitle(var p: TRstParser): PRstNode; begin result := parseDirective(p, {@set}[hasArg], nil); result.kind := rnTitle end; function dirContents(var p: TRstParser): PRstNode; begin result := parseDirective(p, {@set}[hasArg], nil); result.kind := rnContents end; function dirIndex(var p: TRstParser): PRstNode; begin result := parseDirective(p, {@set}[], parseSectionWrapper); result.kind := rnIndex end; function dirRaw(var p: TRstParser): PRstNode; (* The following options are recognized: file : string (newlines removed) The local filesystem path of a raw data file to be included. url : string (whitespace removed) An Internet URL reference to a raw data file to be included. encoding : name of text encoding The text encoding of the external raw data (file or URL). Defaults to the document's encoding (if specified). *) var filename, path, f: string; begin result := parseDirective(p, {@set}[hasOptions], parseSectionWrapper); result.kind := rnRaw; filename := getFieldValue(result, 'file'); if filename <> '' then begin path := findFile(filename); if path = '' then rstMessage(p, errCannotOpenFile, filename) else begin f := readFile(path); result := newRstNode(rnRaw); addSon(result, newRstNode(rnLeaf, f)); end end end; function parseDotDot(var p: TRstParser): PRstNode; var d: string; col: int; a, b: PRstNode; begin result := nil; col := p.tok[p.idx].col; inc(p.idx); d := getDirective(p); if d <> '' then begin pushInd(p, col); case getDirKind(d) of dkInclude: result := dirInclude(p); dkImage: result := dirImage(p); dkFigure: result := dirFigure(p); dkTitle: result := dirTitle(p); dkContainer: result := dirContainer(p); dkContents: result := dirContents(p); dkRaw: result := dirRaw(p); dkCodeblock: result := dirCodeBlock(p); dkIndex: result := dirIndex(p); else rstMessage(p, errInvalidDirectiveX, d); end; popInd(p); end else if match(p, p.idx, ' _') then begin // hyperlink target: inc(p.idx, 2); a := getReferenceName(p, ':'+''); if p.tok[p.idx].kind = tkWhite then inc(p.idx); b := untilEol(p); setRef(p, rstnodeToRefname(a), b); end else if match(p, p.idx, ' |') then begin // substitution definitions: inc(p.idx, 2); a := getReferenceName(p, '|'+''); if p.tok[p.idx].kind = tkWhite then inc(p.idx); if cmpIgnoreStyle(p.tok[p.idx].symbol, 'replace') = 0 then begin inc(p.idx); expect(p, '::'); b := untilEol(p); end else if cmpIgnoreStyle(p.tok[p.idx].symbol, 'image') = 0 then begin inc(p.idx); b := dirImage(p); end else rstMessage(p, errInvalidDirectiveX, p.tok[p.idx].symbol); setSub(p, addNodes(a), b); end else if match(p, p.idx, ' [') then begin // footnotes, citations inc(p.idx, 2); a := getReferenceName(p, ']'+''); if p.tok[p.idx].kind = tkWhite then inc(p.idx); b := untilEol(p); setRef(p, rstnodeToRefname(a), b); end else result := parseComment(p); end; function resolveSubs(var p: TRstParser; n: PRstNode): PRstNode; var i, x: int; y: PRstNode; e, key: string; begin result := n; if n = nil then exit; case n.kind of rnSubstitutionReferences: begin x := findSub(p, n); if x >= 0 then result := p.s.subs[x].value else begin key := addNodes(n); e := getEnv(key); if e <> '' then result := newRstNode(rnLeaf, e) else rstMessage(p, warnUnknownSubstitutionX, key); end end; rnRef: begin y := findRef(p, rstnodeToRefname(n)); if y <> nil then begin result := newRstNode(rnHyperlink); n.kind := rnInner; addSon(result, n); addSon(result, y); end end; rnLeaf: begin end; rnContents: p.hasToc := true; else begin for i := 0 to rsonsLen(n)-1 do n.sons[i] := resolveSubs(p, n.sons[i]); end end end; function rstParse(const text: string; // the text to be parsed skipPounds: bool; const filename: string; // for error messages line, column: int; var hasToc: bool): PRstNode; var p: TRstParser; begin if isNil(text) then rawMessage(errCannotOpenFile, filename); initParser(p, newSharedState()); p.filename := filename; p.line := line; p.col := column; getTokens(text, skipPounds, p.tok); result := resolveSubs(p, parseDoc(p)); hasToc := p.hasToc; end; end.