//
//
// The Nimrod Compiler
// (c) Copyright 2009 Andreas Rumpf
//
// See the file "copying.txt", included in this
// distribution, for details about the copyright.
//
unit rst;
// This module implements a *reStructuredText* parser. A larget
// subset is provided.
interface
{$include 'config.inc'}
uses
nsystem, nos, msgs, strutils, platform, nhashes, ropes, charsets, options;
type
TRstNodeKind = (
rnInner, // an inner node or a root
rnHeadline, // a headline
rnOverline, // an over- and underlined headline
rnTransition, // a transition (the -------------
thingie)
rnParagraph, // a paragraph
rnBulletList, // a bullet list
rnBulletItem, // a bullet item
rnEnumList, // an enumerated list
rnEnumItem, // an enumerated item
rnDefList, // a definition list
rnDefItem, // an item of a definition list consisting of ...
rnDefName, // ... a name part ...
rnDefBody, // ... and a body part ...
rnFieldList, // a field list
rnField, // a field item
rnFieldName, // consisting of a field name ...
rnFieldBody, // ... and a field body
rnOptionList,
rnOptionListItem,
rnOptionGroup,
rnOption,
rnOptionString,
rnOptionArgument,
rnDescription,
rnLiteralBlock,
rnQuotedLiteralBlock,
rnLineBlock, // the | thingie
rnLineBlockItem, // sons of the | thing
rnBlockQuote, // text just indented
rnTable,
rnGridTable,
rnTableRow,
rnTableHeaderCell,
rnTableDataCell,
rnLabel, // used for footnotes and other things
rnFootnote, // a footnote
rnCitation, // similar to footnote
rnStandaloneHyperlink,
rnHyperlink,
rnRef,
rnDirective, // a directive
rnDirArg,
rnRaw,
rnTitle,
rnContents,
rnImage,
rnFigure,
rnCodeBlock,
rnContainer, // ``container`` directive
rnIndex, // index directve:
// .. index::
// key
// * `file#id `_
// * `file#id '_
rnSubstitutionDef, // a definition of a substitution
rnGeneralRole,
// Inline markup:
rnSub,
rnSup,
rnIdx,
rnEmphasis, // "*"
rnStrongEmphasis, // "**"
rnInterpretedText, // "`"
rnInlineLiteral, // "``"
rnSubstitutionReferences, // "|"
rnLeaf // a leaf; the node's text field contains the leaf val
);
const
rstnodekindToStr: array [TRstNodeKind] of string = (
'Inner', 'Headline', 'Overline', 'Transition', 'Paragraph',
'BulletList', 'BulletItem', 'EnumList', 'EnumItem', 'DefList', 'DefItem',
'DefName', 'DefBody', 'FieldList', 'Field', 'FieldName', 'FieldBody',
'OptionList', 'OptionListItem', 'OptionGroup', 'Option', 'OptionString',
'OptionArgument', 'Description', 'LiteralBlock', 'QuotedLiteralBlock',
'LineBlock', 'LineBlockItem', 'BlockQuote', 'Table', 'GridTable',
'TableRow', 'TableHeaderCell', 'TableDataCell', 'Label', 'Footnote',
'Citation', 'StandaloneHyperlink', 'Hyperlink', 'Ref', 'Directive',
'DirArg', 'Raw', 'Title', 'Contents', 'Image', 'Figure', 'CodeBlock',
'Container', 'Index', 'SubstitutionDef', 'GeneralRole',
'Sub', 'Sup', 'Idx', 'Emphasis', 'StrongEmphasis', 'InterpretedText',
'InlineLiteral', 'SubstitutionReferences', 'Leaf'
);
type
// the syntax tree of RST:
PRSTNode = ^TRstNode;
TRstNodeSeq = array of PRstNode;
TRSTNode = record
kind: TRstNodeKind;
text: string; // valid for leafs in the AST; and the title of
// the document or the section
level: int; // valid for some node kinds
sons: TRstNodeSeq; // the node's sons
end {@acyclic};
function rstParse(const text: string; // the text to be parsed
skipPounds: bool;
const filename: string; // for error messages
line, column: int;
var hasToc: bool): PRstNode;
function rsonsLen(n: PRstNode): int;
function newRstNode(kind: TRstNodeKind): PRstNode; overload;
function newRstNode(kind: TRstNodeKind; const s: string): PRstNode; overload;
procedure addSon(father, son: PRstNode);
function rstnodeToRefname(n: PRstNode): string;
function addNodes(n: PRstNode): string;
function getFieldValue(n: PRstNode; const fieldname: string): string;
function getArgument(n: PRstNode): string;
// index handling:
procedure setIndexPair(index, key, val: PRstNode);
procedure sortIndex(a: PRstNode);
procedure clearIndex(index: PRstNode; const filename: string);
implementation
// ----------------------------- scanner part --------------------------------
const
SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', #128..#255];
type
TTokType = (tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther);
TToken = record // a RST token
kind: TTokType; // the type of the token
ival: int; // the indentation or parsed integer value
symbol: string; // the parsed symbol as string
line, col: int; // line and column of the token
end;
TTokenSeq = array of TToken;
TLexer = object(NObject)
buf: PChar;
bufpos: int;
line, col, baseIndent: int;
skipPounds: bool;
end;
procedure getThing(var L: TLexer; var tok: TToken; const s: TCharSet);
var
pos: int;
begin
tok.kind := tkWord;
tok.line := L.line;
tok.col := L.col;
pos := L.bufpos;
while True do begin
addChar(tok.symbol, L.buf[pos]);
inc(pos);
if not (L.buf[pos] in s) then break
end;
inc(L.col, pos - L.bufpos);
L.bufpos := pos;
end;
procedure getAdornment(var L: TLexer; var tok: TToken);
var
pos: int;
c: char;
begin
tok.kind := tkAdornment;
tok.line := L.line;
tok.col := L.col;
pos := L.bufpos;
c := L.buf[pos];
while True do begin
addChar(tok.symbol, L.buf[pos]);
inc(pos);
if L.buf[pos] <> c then break
end;
inc(L.col, pos - L.bufpos);
L.bufpos := pos
end;
function getIndentAux(var L: TLexer; start: int): int;
var
buf: PChar;
pos: int;
begin
pos := start;
buf := L.buf;
// skip the newline (but include it in the token!)
if buf[pos] = #13 then begin
if buf[pos+1] = #10 then inc(pos, 2) else inc(pos);
end
else if buf[pos] = #10 then inc(pos);
if L.skipPounds then begin
if buf[pos] = '#' then inc(pos);
if buf[pos] = '#' then inc(pos);
end;
result := 0;
while True do begin
case buf[pos] of
' ', #11, #12: begin
inc(pos);
inc(result);
end;
#9: begin
inc(pos);
result := result - (result mod 8) + 8;
end;
else break; // EndOfFile also leaves the loop
end;
end;
if buf[pos] = #0 then result := 0
else if (buf[pos] = #10) or (buf[pos] = #13) then begin
// look at the next line for proper indentation:
result := getIndentAux(L, pos);
end;
L.bufpos := pos; // no need to set back buf
end;
procedure getIndent(var L: TLexer; var tok: TToken);
begin
inc(L.line);
tok.line := L.line;
tok.col := 0;
tok.kind := tkIndent;
// skip the newline (but include it in the token!)
tok.ival := getIndentAux(L, L.bufpos);
L.col := tok.ival;
tok.ival := max(tok.ival - L.baseIndent, 0);
tok.symbol := nl +{&} repeatChar(tok.ival);
end;
procedure rawGetTok(var L: TLexer; var tok: TToken);
var
c: Char;
begin
tok.symbol := '';
tok.ival := 0;
c := L.buf[L.bufpos];
case c of
'a'..'z', 'A'..'Z', #128..#255, '0'..'9': getThing(L, tok, SymChars);
' ', #9, #11, #12: begin
getThing(L, tok, {@set}[' ', #9]);
tok.kind := tkWhite;
if L.buf[L.bufpos] in [#13, #10] then
rawGetTok(L, tok); // ignore spaces before \n
end;
#13, #10: getIndent(L, tok);
'!', '"', '#', '$', '%', '&', '''',
'(', ')', '*', '+', ',', '-', '.', '/',
':', ';', '<', '=', '>', '?', '@', '[', '\', ']',
'^', '_', '`', '{', '|', '}', '~': begin
getAdornment(L, tok);
if length(tok.symbol) <= 3 then tok.kind := tkPunct;
end;
else begin
tok.line := L.line;
tok.col := L.col;
if c = #0 then
tok.kind := tkEof
else begin
tok.kind := tkOther;
addChar(tok.symbol, c);
inc(L.bufpos);
inc(L.col);
end
end
end;
tok.col := max(tok.col - L.baseIndent, 0);
end;
procedure getTokens(const buffer: string; skipPounds: bool;
var tokens: TTokenSeq);
var
L: TLexer;
len: int;
begin
{@ignore}
fillChar(L, sizeof(L), 0);
{@emit}
len := length(tokens);
L.buf := PChar(buffer);
L.line := 1;
// skip UTF-8 BOM
if (L.buf[0] = #239) and (L.buf[1] = #187) and (L.buf[2] = #191) then
inc(L.bufpos, 3);
L.skipPounds := skipPounds;
if skipPounds then begin
if L.buf[L.bufpos] = '#' then inc(L.bufpos);
if L.buf[L.bufpos] = '#' then inc(L.bufpos);
L.baseIndent := 0;
while L.buf[L.bufpos] = ' ' do begin
inc(L.bufpos);
inc(L.baseIndent);
end
end;
while true do begin
inc(len);
setLength(tokens, len);
rawGetTok(L, tokens[len-1]);
if tokens[len-1].kind = tkEof then break;
end;
if tokens[0].kind = tkWhite then begin // BUGFIX
tokens[0].ival := length(tokens[0].symbol);
tokens[0].kind := tkIndent
end
end;
// --------------------------------------------------------------------------
procedure addSon(father, son: PRstNode);
var
L: int;
begin
L := length(father.sons);
setLength(father.sons, L+1);
father.sons[L] := son;
end;
procedure addSonIfNotNil(father, son: PRstNode);
begin
if son <> nil then addSon(father, son);
end;
function rsonsLen(n: PRstNode): int;
begin
result := length(n.sons)
end;
function newRstNode(kind: TRstNodeKind): PRstNode; overload;
begin
new(result);
{@ignore}
fillChar(result^, sizeof(result^), 0);
{@emit
result.sons := @[];
}
result.kind := kind;
end;
function newRstNode(kind: TRstNodeKind; const s: string): PRstNode; overload;
begin
result := newRstNode(kind);
result.text := s;
end;
// ---------------------------------------------------------------------------
type
TLevelMap = array [Char] of int;
TSubstitution = record
key: string;
value: PRstNode;
end;
TSharedState = record
uLevel, oLevel: int; // counters for the section levels
subs: array of TSubstitution; // substitutions
refs: array of TSubstitution; // references
underlineToLevel: TLevelMap;
// Saves for each possible title adornment character its level in the
// current document. This is for single underline adornments.
overlineToLevel: TLevelMap;
// Saves for each possible title adornment character its level in the
// current document. This is for over-underline adornments.
end;
PSharedState = ^TSharedState;
TRstParser = object(NObject)
idx: int;
tok: TTokenSeq;
s: PSharedState;
indentStack: array of int;
filename: string;
line, col: int;
hasToc: bool;
end;
function newSharedState(): PSharedState;
begin
new(result);
{@ignore}
fillChar(result^, sizeof(result^), 0);
{@emit}
{@emit
result.subs := @[];}
{@emit
result.refs := @[];}
end;
function tokInfo(const p: TRstParser; const tok: TToken): TLineInfo;
begin
result := newLineInfo(p.filename, p.line+tok.line, p.col+tok.col);
end;
procedure rstMessage(const p: TRstParser; msgKind: TMsgKind;
const arg: string); overload;
begin
liMessage(tokInfo(p, p.tok[p.idx]), msgKind, arg);
end;
procedure rstMessage(const p: TRstParser; msgKind: TMsgKind); overload;
begin
liMessage(tokInfo(p, p.tok[p.idx]), msgKind, p.tok[p.idx].symbol);
end;
function currInd(const p: TRstParser): int;
begin
result := p.indentStack[high(p.indentStack)];
end;
procedure pushInd(var p: TRstParser; ind: int);
var
len: int;
begin
len := length(p.indentStack);
setLength(p.indentStack, len+1);
p.indentStack[len] := ind;
end;
procedure popInd(var p: TRstParser);
begin
if length(p.indentStack) > 1 then
setLength(p.indentStack, length(p.indentStack)-1);
end;
procedure initParser(var p: TRstParser; sharedState: PSharedState);
begin
{@ignore}
fillChar(p, sizeof(p), 0);
p.tok := nil;
p.indentStack := nil;
pushInd(p, 0);
{@emit
p.indentStack := @[0];}
{@emit
p.tok := @[];}
p.idx := 0;
p.filename := '';
p.hasToc := false;
p.col := 0;
p.line := 1;
p.s := sharedState;
end;
// ---------------------------------------------------------------
procedure addNodesAux(n: PRstNode; var result: string);
var
i: int;
begin
if n.kind = rnLeaf then
add(result, n.text)
else begin
for i := 0 to rsonsLen(n)-1 do
addNodesAux(n.sons[i], result)
end
end;
function addNodes(n: PRstNode): string;
begin
result := '';
addNodesAux(n, result);
end;
procedure rstnodeToRefnameAux(n: PRstNode; var r: string; var b: bool);
var
i: int;
begin
if n.kind = rnLeaf then begin
for i := strStart to length(n.text)+strStart-1 do begin
case n.text[i] of
'0'..'9': begin
if b then begin addChar(r, '-'); b := false; end;
// BUGFIX: HTML id's cannot start with a digit
if length(r) = 0 then addChar(r, 'Z');
addChar(r, n.text[i])
end;
'a'..'z': begin
if b then begin addChar(r, '-'); b := false; end;
addChar(r, n.text[i])
end;
'A'..'Z': begin
if b then begin addChar(r, '-'); b := false; end;
addChar(r, chr(ord(n.text[i]) - ord('A') + ord('a')));
end;
else if (length(r) > 0) then b := true;
end
end
end
else begin
for i := 0 to rsonsLen(n)-1 do rstnodeToRefnameAux(n.sons[i], r, b)
end
end;
function rstnodeToRefname(n: PRstNode): string;
var
b: bool;
begin
result := '';
b := false;
rstnodeToRefnameAux(n, result, b);
end;
function findSub(var p: TRstParser; n: PRstNode): int;
var
key: string;
i: int;
begin
key := addNodes(n);
// the spec says: if no exact match, try one without case distinction:
for i := 0 to high(p.s.subs) do
if key = p.s.subs[i].key then begin
result := i; exit
end;
for i := 0 to high(p.s.subs) do
if cmpIgnoreStyle(key, p.s.subs[i].key) = 0 then begin
result := i; exit
end;
result := -1
end;
procedure setSub(var p: TRstParser; const key: string; value: PRstNode);
var
i, len: int;
begin
len := length(p.s.subs);
for i := 0 to len-1 do
if key = p.s.subs[i].key then begin
p.s.subs[i].value := value; exit
end;
setLength(p.s.subs, len+1);
p.s.subs[len].key := key;
p.s.subs[len].value := value;
end;
procedure setRef(var p: TRstParser; const key: string; value: PRstNode);
var
i, len: int;
begin
len := length(p.s.refs);
for i := 0 to len-1 do
if key = p.s.refs[i].key then begin
p.s.refs[i].value := value;
rstMessage(p, warnRedefinitionOfLabel, key);
exit
end;
setLength(p.s.refs, len+1);
p.s.refs[len].key := key;
p.s.refs[len].value := value;
end;
function findRef(var p: TRstParser; const key: string): PRstNode;
var
i: int;
begin
for i := 0 to high(p.s.refs) do
if key = p.s.refs[i].key then begin
result := p.s.refs[i].value; exit
end;
result := nil
end;
function cmpNodes(a, b: PRstNode): int;
var
x, y: PRstNode;
begin
assert(a.kind = rnDefItem);
assert(b.kind = rnDefItem);
x := a.sons[0];
y := b.sons[0];
result := cmpIgnoreStyle(addNodes(x), addNodes(y))
end;
procedure sortIndex(a: PRstNode);
// we use shellsort here; fast and simple
var
N, i, j, h: int;
v: PRstNode;
begin
assert(a.kind = rnDefList);
N := rsonsLen(a);
h := 1; repeat h := 3*h+1; until h > N;
repeat
h := h div 3;
for i := h to N-1 do begin
v := a.sons[i]; j := i;
while cmpNodes(a.sons[j-h], v) >= 0 do begin
a.sons[j] := a.sons[j-h]; j := j - h;
if j < h then break
end;
a.sons[j] := v;
end;
until h = 1
end;
function eqRstNodes(a, b: PRstNode): bool;
var
i: int;
begin
result := false;
if a.kind <> b.kind then exit;
if a.kind = rnLeaf then
result := a.text = b.text
else begin
if rsonsLen(a) <> rsonsLen(b) then exit;
for i := 0 to rsonsLen(a)-1 do
if not eqRstNodes(a.sons[i], b.sons[i]) then exit;
result := true
end
end;
function matchesHyperlink(h: PRstNode; const filename: string): bool;
var
s: string;
begin
if h.kind = rnInner then begin
assert(rsonsLen(h) = 1);
result := matchesHyperlink(h.sons[0], filename)
end
else if h.kind = rnHyperlink then begin
s := addNodes(h.sons[1]);
if startsWith(s, filename) and (s[length(filename)+strStart] = '#') then
result := true
else
result := false
end
else // this may happen in broken indexes!
result := false
end;
procedure clearIndex(index: PRstNode; const filename: string);
var
i, j, k, items, lastItem: int;
val: PRstNode;
begin
assert(index.kind = rnDefList);
for i := 0 to rsonsLen(index)-1 do begin
assert(index.sons[i].sons[1].kind = rnDefBody);
val := index.sons[i].sons[1].sons[0];
if val.kind = rnInner then val := val.sons[0];
if val.kind = rnBulletList then begin
items := rsonsLen(val);
lastItem := -1; // save the last valid item index
for j := 0 to rsonsLen(val)-1 do begin
if val.sons[j] = nil then
dec(items)
else if matchesHyperlink(val.sons[j].sons[0], filename) then begin
val.sons[j] := nil;
dec(items)
end
else lastItem := j
end;
if items = 1 then // remove bullet list:
index.sons[i].sons[1].sons[0] := val.sons[lastItem].sons[0]
else if items = 0 then
index.sons[i] := nil
end
else if matchesHyperlink(val, filename) then
index.sons[i] := nil
end;
// remove nil nodes:
k := 0;
for i := 0 to rsonsLen(index)-1 do begin
if index.sons[i] <> nil then begin
if k <> i then index.sons[k] := index.sons[i];
inc(k)
end
end;
setLength(index.sons, k);
end;
procedure setIndexPair(index, key, val: PRstNode);
var
i: int;
e, a, b: PRstNode;
begin
// writeln(rstnodekindToStr[key.kind], ': ', rstnodekindToStr[val.kind]);
assert(index.kind = rnDefList);
assert(key.kind <> rnDefName);
a := newRstNode(rnDefName);
addSon(a, key);
for i := 0 to rsonsLen(index)-1 do begin
if eqRstNodes(index.sons[i].sons[0], a) then begin
assert(index.sons[i].sons[1].kind = rnDefBody);
e := index.sons[i].sons[1].sons[0];
if e.kind <> rnBulletList then begin
e := newRstNode(rnBulletList);
b := newRstNode(rnBulletItem);
addSon(b, index.sons[i].sons[1].sons[0]);
addSon(e, b);
index.sons[i].sons[1].sons[0] := e;
end;
b := newRstNode(rnBulletItem);
addSon(b, val);
addSon(e, b);
exit // key already exists
end
end;
e := newRstNode(rnDefItem);
assert(val.kind <> rnDefBody);
b := newRstNode(rnDefBody);
addSon(b, val);
addSon(e, a);
addSon(e, b);
addSon(index, e);
end;
// ---------------------------------------------------------------------------
function newLeaf(var p: TRstParser): PRstNode;
begin
result := newRstNode(rnLeaf, p.tok[p.idx].symbol)
end;
function getReferenceName(var p: TRstParser; const endStr: string): PRstNode;
var
res: PRstNode;
begin
res := newRstNode(rnInner);
while true do begin
case p.tok[p.idx].kind of
tkWord, tkOther, tkWhite: addSon(res, newLeaf(p));
tkPunct:
if p.tok[p.idx].symbol = endStr then begin inc(p.idx); break end
else addSon(res, newLeaf(p));
else begin
rstMessage(p, errXexpected, endStr);
break
end
end;
inc(p.idx);
end;
result := res;
end;
function untilEol(var p: TRstParser): PRstNode;
begin
result := newRstNode(rnInner);
while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin
addSon(result, newLeaf(p)); inc(p.idx);
end
end;
procedure expect(var p: TRstParser; const tok: string);
begin
if p.tok[p.idx].symbol = tok then inc(p.idx)
else rstMessage(p, errXexpected, tok)
end;
(*
From the specification:
The inline markup start-string and end-string recognition rules are as
follows. If any of the conditions are not met, the start-string or end-string
will not be recognized or processed.
1. Inline markup start-strings must start a text block or be immediately
preceded by whitespace or one of: ' " ( [ { < - / :
2. Inline markup start-strings must be immediately followed by
non-whitespace.
3. Inline markup end-strings must be immediately preceded by non-whitespace.
4. Inline markup end-strings must end a text block or be immediately
followed by whitespace or one of: ' " ) ] } > - / : . , ; ! ? \
5. If an inline markup start-string is immediately preceded by a single or
double quote, "(", "[", "{", or "<", it must not be immediately followed
by the corresponding single or double quote, ")", "]", "}", or ">".
6. An inline markup end-string must be separated by at least one character
from the start-string.
7. An unescaped backslash preceding a start-string or end-string will
disable markup recognition, except for the end-string of inline literals.
See Escaping Mechanism above for details.
*)
function isInlineMarkupEnd(const p: TRstParser; const markup: string): bool;
begin
result := p.tok[p.idx].symbol = markup;
if not result then exit;
// Rule 3:
result := not (p.tok[p.idx-1].kind in [tkIndent, tkWhite]);
if not result then exit;
// Rule 4:
result := (p.tok[p.idx+1].kind in [tkIndent, tkWhite, tkEof])
or (p.tok[p.idx+1].symbol[strStart] in ['''', '"', ')', ']', '}', '>',
'-', '/', '\', ':', '.', ',',
';', '!', '?', '_']);
if not result then exit;
// Rule 7:
if p.idx > 0 then begin
if (markup <> '``') and (p.tok[p.idx-1].symbol = '\'+'') then begin
result := false
end
end
end;
function isInlineMarkupStart(const p: TRstParser; const markup: string): bool;
var
c, d: Char;
begin
result := p.tok[p.idx].symbol = markup;
if not result then exit;
// Rule 1:
result := (p.idx = 0) or (p.tok[p.idx-1].kind in [tkIndent, tkWhite])
or (p.tok[p.idx-1].symbol[strStart] in ['''', '"', '(', '[', '{', '<',
'-', '/', ':', '_']);
if not result then exit;
// Rule 2:
result := not (p.tok[p.idx+1].kind in [tkIndent, tkWhite, tkEof]);
if not result then exit;
// Rule 5 & 7:
if p.idx > 0 then begin
if p.tok[p.idx-1].symbol = '\'+'' then
result := false
else begin
c := p.tok[p.idx-1].symbol[strStart];
case c of
'''', '"': d := c;
'(': d := ')';
'[': d := ']';
'{': d := '}';
'<': d := '>';
else d := #0;
end;
if d <> #0 then
result := p.tok[p.idx+1].symbol[strStart] <> d;
end
end
end;
procedure parseBackslash(var p: TRstParser; father: PRstNode);
begin
assert(p.tok[p.idx].kind = tkPunct);
if p.tok[p.idx].symbol = '\\' then begin
addSon(father, newRstNode(rnLeaf, '\'+''));
inc(p.idx);
end
else if p.tok[p.idx].symbol = '\'+'' then begin
// XXX: Unicode?
inc(p.idx);
if p.tok[p.idx].kind <> tkWhite then addSon(father, newLeaf(p));
inc(p.idx);
end
else begin
addSon(father, newLeaf(p));
inc(p.idx)
end
end;
function match(const p: TRstParser; start: int; const expr: string): bool;
// regular expressions are:
// special char exact match
// 'w' tkWord
// ' ' tkWhite
// 'a' tkAdornment
// 'i' tkIndent
// 'p' tkPunct
// 'T' always true
// 'E' whitespace, indent or eof
// 'e' tkWord or '#' (for enumeration lists)
var
i, j, last, len: int;
c: char;
begin
i := strStart;
j := start;
last := length(expr)+strStart-1;
while i <= last do begin
case expr[i] of
'w': result := p.tok[j].kind = tkWord;
' ': result := p.tok[j].kind = tkWhite;
'i': result := p.tok[j].kind = tkIndent;
'p': result := p.tok[j].kind = tkPunct;
'a': result := p.tok[j].kind = tkAdornment;
'o': result := p.tok[j].kind = tkOther;
'T': result := true;
'E': result := p.tok[j].kind in [tkEof, tkWhite, tkIndent];
'e': begin
result := (p.tok[j].kind = tkWord) or (p.tok[j].symbol = '#'+'');
if result then
case p.tok[j].symbol[strStart] of
'a'..'z', 'A'..'Z': result := length(p.tok[j].symbol) = 1;
'0'..'9': result := allCharsInSet(p.tok[j].symbol, ['0'..'9']);
else begin end
end
end
else begin
c := expr[i];
len := 0;
while (i <= last) and (expr[i] = c) do begin inc(i); inc(len) end;
dec(i);
result := (p.tok[j].kind in [tkPunct, tkAdornment])
and (length(p.tok[j].symbol) = len)
and (p.tok[j].symbol[strStart] = c);
end
end;
if not result then exit;
inc(j);
inc(i)
end;
result := true
end;
procedure fixupEmbeddedRef(n, a, b: PRstNode);
var
i, sep, incr: int;
begin
sep := -1;
for i := rsonsLen(n)-2 downto 0 do
if n.sons[i].text = '<'+'' then begin sep := i; break end;
if (sep > 0) and (n.sons[sep-1].text[strStart] = ' ') then incr := 2
else incr := 1;
for i := 0 to sep-incr do addSon(a, n.sons[i]);
for i := sep+1 to rsonsLen(n)-2 do addSon(b, n.sons[i]);
end;
function parsePostfix(var p: TRstParser; n: PRstNode): PRstNode;
var
a, b: PRstNode;
begin
result := n;
if isInlineMarkupEnd(p, '_'+'') then begin
inc(p.idx);
if (p.tok[p.idx-2].symbol ='`'+'')
and (p.tok[p.idx-3].symbol = '>'+'') then begin
a := newRstNode(rnInner);
b := newRstNode(rnInner);
fixupEmbeddedRef(n, a, b);
if rsonsLen(a) = 0 then begin
result := newRstNode(rnStandaloneHyperlink);
addSon(result, b);
end
else begin
result := newRstNode(rnHyperlink);
addSon(result, a);
addSon(result, b);
setRef(p, rstnodeToRefname(a), b);
end
end
else if n.kind = rnInterpretedText then
n.kind := rnRef
else begin
result := newRstNode(rnRef);
addSon(result, n);
end;
end
else if match(p, p.idx, ':w:') then begin
// a role:
if p.tok[p.idx+1].symbol = 'idx' then
n.kind := rnIdx
else if p.tok[p.idx+1].symbol = 'literal' then
n.kind := rnInlineLiteral
else if p.tok[p.idx+1].symbol = 'strong' then
n.kind := rnStrongEmphasis
else if p.tok[p.idx+1].symbol = 'emphasis' then
n.kind := rnEmphasis
else if (p.tok[p.idx+1].symbol = 'sub')
or (p.tok[p.idx+1].symbol = 'subscript') then
n.kind := rnSub
else if (p.tok[p.idx+1].symbol = 'sup')
or (p.tok[p.idx+1].symbol = 'supscript') then
n.kind := rnSup
else begin
result := newRstNode(rnGeneralRole);
n.kind := rnInner;
addSon(result, n);
addSon(result, newRstNode(rnLeaf, p.tok[p.idx+1].symbol));
end;
inc(p.idx, 3)
end
end;
function isURL(const p: TRstParser; i: int): bool;
begin
result := (p.tok[i+1].symbol = ':'+'') and (p.tok[i+2].symbol = '//')
and (p.tok[i+3].kind = tkWord) and (p.tok[i+4].symbol = '.'+'')
end;
procedure parseURL(var p: TRstParser; father: PRstNode);
var
n: PRstNode;
begin
//if p.tok[p.idx].symbol[strStart] = '<' then begin
if isURL(p, p.idx) then begin
n := newRstNode(rnStandaloneHyperlink);
while true do begin
case p.tok[p.idx].kind of
tkWord, tkAdornment, tkOther: begin end;
tkPunct: begin
if not (p.tok[p.idx+1].kind in [tkWord, tkAdornment, tkOther, tkPunct])
then break
end
else break
end;
addSon(n, newLeaf(p));
inc(p.idx);
end;
addSon(father, n);
end
else begin
n := newLeaf(p);
inc(p.idx);
if p.tok[p.idx].symbol = '_'+'' then n := parsePostfix(p, n);
addSon(father, n);
end
end;
procedure parseUntil(var p: TRstParser; father: PRstNode;
const postfix: string; interpretBackslash: bool);
begin
while true do begin
case p.tok[p.idx].kind of
tkPunct: begin
if isInlineMarkupEnd(p, postfix) then begin
inc(p.idx);
break;
end
else if interpretBackslash then
parseBackslash(p, father)
else begin
addSon(father, newLeaf(p));
inc(p.idx);
end
end;
tkAdornment, tkWord, tkOther: begin
addSon(father, newLeaf(p));
inc(p.idx);
end;
tkIndent: begin
addSon(father, newRstNode(rnLeaf, ' '+''));
inc(p.idx);
if p.tok[p.idx].kind = tkIndent then begin
rstMessage(p, errXExpected, postfix);
break
end
end;
tkWhite: begin
addSon(father, newRstNode(rnLeaf, ' '+''));
inc(p.idx);
end
else
rstMessage(p, errXExpected, postfix);
end
end
end;
procedure parseInline(var p: TRstParser; father: PRstNode);
var
n: PRstNode;
begin
case p.tok[p.idx].kind of
tkPunct: begin
if isInlineMarkupStart(p, '**') then begin
inc(p.idx);
n := newRstNode(rnStrongEmphasis);
parseUntil(p, n, '**', true);
addSon(father, n);
end
else if isInlineMarkupStart(p, '*'+'') then begin
inc(p.idx);
n := newRstNode(rnEmphasis);
parseUntil(p, n, '*'+'', true);
addSon(father, n);
end
else if isInlineMarkupStart(p, '``') then begin
inc(p.idx);
n := newRstNode(rnInlineLiteral);
parseUntil(p, n, '``', false);
addSon(father, n);
end
else if isInlineMarkupStart(p, '`'+'') then begin
inc(p.idx);
n := newRstNode(rnInterpretedText);
parseUntil(p, n, '`'+'', true);
n := parsePostfix(p, n);
addSon(father, n);
end
else if isInlineMarkupStart(p, '|'+'') then begin
inc(p.idx);
n := newRstNode(rnSubstitutionReferences);
parseUntil(p, n, '|'+'', false);
addSon(father, n);
end
else begin
parseBackslash(p, father);
end;
end;
tkWord: parseURL(p, father);
tkAdornment, tkOther, tkWhite: begin
addSon(father, newLeaf(p));
inc(p.idx);
end
else assert(false);
end
end;
function getDirective(var p: TRstParser): string;
var
j: int;
begin
if (p.tok[p.idx].kind = tkWhite) and (p.tok[p.idx+1].kind = tkWord) then begin
j := p.idx;
inc(p.idx);
result := p.tok[p.idx].symbol;
inc(p.idx);
while p.tok[p.idx].kind in [tkWord, tkPunct, tkAdornment, tkOther] do begin
if p.tok[p.idx].symbol = '::' then break;
add(result, p.tok[p.idx].symbol);
inc(p.idx);
end;
if (p.tok[p.idx].kind = tkWhite) then inc(p.idx);
if p.tok[p.idx].symbol = '::' then begin
inc(p.idx);
if (p.tok[p.idx].kind = tkWhite) then inc(p.idx);
end
else begin
p.idx := j; // set back
result := '' // error
end
end
else
result := '';
end;
function parseComment(var p: TRstParser): PRstNode;
var
indent: int;
begin
case p.tok[p.idx].kind of
tkIndent, tkEof: begin
if p.tok[p.idx+1].kind = tkIndent then begin
inc(p.idx);
// empty comment
end
else begin
indent := p.tok[p.idx].ival;
while True do begin
case p.tok[p.idx].kind of
tkEof: break;
tkIndent: begin
if (p.tok[p.idx].ival < indent) then break;
end
else begin end
end;
inc(p.idx)
end
end
end
else
while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do inc(p.idx);
end;
result := nil;
end;
type
TDirKind = ( // must be ordered alphabetically!
dkNone, dkAuthor, dkAuthors, dkCodeBlock, dkContainer,
dkContents, dkFigure, dkImage, dkInclude, dkIndex, dkRaw, dkTitle
);
const
DirIds: array [0..11] of string = (
'', 'author', 'authors', 'code-block', 'container',
'contents', 'figure', 'image', 'include', 'index', 'raw', 'title'
);
function getDirKind(const s: string): TDirKind;
var
i: int;
begin
i := binaryStrSearch(DirIds, s);
if i >= 0 then result := TDirKind(i)
else result := dkNone
end;
procedure parseLine(var p: TRstParser; father: PRstNode);
begin
while True do begin
case p.tok[p.idx].kind of
tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father);
else break;
end
end
end;
procedure parseSection(var p: TRstParser; result: PRstNode); forward;
function parseField(var p: TRstParser): PRstNode;
var
col, indent: int;
fieldname, fieldbody: PRstNode;
begin
result := newRstNode(rnField);
col := p.tok[p.idx].col;
inc(p.idx); // skip :
fieldname := newRstNode(rnFieldname);
parseUntil(p, fieldname, ':'+'', false);
fieldbody := newRstNode(rnFieldbody);
if p.tok[p.idx].kind <> tkIndent then
parseLine(p, fieldbody);
if p.tok[p.idx].kind = tkIndent then begin
indent := p.tok[p.idx].ival;
if indent > col then begin
pushInd(p, indent);
parseSection(p, fieldbody);
popInd(p);
end
end;
addSon(result, fieldname);
addSon(result, fieldbody);
end;
function parseFields(var p: TRstParser): PRstNode;
var
col: int;
begin
result := nil;
if (p.tok[p.idx].kind = tkIndent)
and (p.tok[p.idx+1].symbol = ':'+'') then begin
col := p.tok[p.idx].ival; // BUGFIX!
result := newRstNode(rnFieldList);
inc(p.idx);
while true do begin
addSon(result, parseField(p));
if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col)
and (p.tok[p.idx+1].symbol = ':'+'') then inc(p.idx)
else break
end
end
end;
function getFieldValue(n: PRstNode; const fieldname: string): string;
var
i: int;
f: PRstNode;
begin
result := '';
if n.sons[1] = nil then exit;
if (n.sons[1].kind <> rnFieldList) then
InternalError('getFieldValue (2): ' + rstnodeKindToStr[n.sons[1].kind]);
for i := 0 to rsonsLen(n.sons[1])-1 do begin
f := n.sons[1].sons[i];
if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) = 0 then begin
result := addNodes(f.sons[1]);
if result = '' then result := #1#1; // indicates that the field exists
exit
end
end
end;
function getArgument(n: PRstNode): string;
begin
if n.sons[0] = nil then result := ''
else result := addNodes(n.sons[0]);
end;
function parseDotDot(var p: TRstParser): PRstNode; forward;
function parseLiteralBlock(var p: TRstParser): PRstNode;
var
indent: int;
n: PRstNode;
begin
result := newRstNode(rnLiteralBlock);
n := newRstNode(rnLeaf, '');
if p.tok[p.idx].kind = tkIndent then begin
indent := p.tok[p.idx].ival;
inc(p.idx);
while True do begin
case p.tok[p.idx].kind of
tkEof: break;
tkIndent: begin
if (p.tok[p.idx].ival < indent) then begin
break;
end
else begin
add(n.text, nl);
add(n.text, repeatChar(p.tok[p.idx].ival - indent));
inc(p.idx)
end
end
else begin
add(n.text, p.tok[p.idx].symbol);
inc(p.idx)
end
end
end
end
else begin
while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin
add(n.text, p.tok[p.idx].symbol);
inc(p.idx)
end
end;
addSon(result, n);
end;
function getLevel(var map: TLevelMap; var lvl: int; c: Char): int;
begin
if map[c] = 0 then begin
inc(lvl);
map[c] := lvl;
end;
result := map[c]
end;
function tokenAfterNewline(const p: TRstParser): int;
begin
result := p.idx;
while true do
case p.tok[result].kind of
tkEof: break;
tkIndent: begin inc(result); break end;
else inc(result)
end
end;
// ---------------------------------------------------------------------------
function isLineBlock(const p: TRstParser): bool;
var
j: int;
begin
j := tokenAfterNewline(p);
result := (p.tok[p.idx].col = p.tok[j].col) and (p.tok[j].symbol = '|'+'')
or (p.tok[j].col > p.tok[p.idx].col)
end;
function predNL(const p: TRstParser): bool;
begin
result := true;
if (p.idx > 0) then
result := (p.tok[p.idx-1].kind = tkIndent)
and (p.tok[p.idx-1].ival = currInd(p))
end;
function isDefList(const p: TRstParser): bool;
var
j: int;
begin
j := tokenAfterNewline(p);
result := (p.tok[p.idx].col < p.tok[j].col)
and (p.tok[j].kind in [tkWord, tkOther, tkPunct])
and (p.tok[j-2].symbol <> '::');
end;
function whichSection(const p: TRstParser): TRstNodeKind;
begin
case p.tok[p.idx].kind of
tkAdornment: begin
if match(p, p.idx+1, 'ii') then result := rnTransition
else if match(p, p.idx+1, ' a') then result := rnTable
else if match(p, p.idx+1, 'i'+'') then result := rnOverline
else result := rnLeaf
end;
tkPunct: begin
if match(p, tokenAfterNewLine(p), 'ai') then
result := rnHeadline
else if p.tok[p.idx].symbol = '::' then
result := rnLiteralBlock
else if predNL(p)
and ((p.tok[p.idx].symbol = '+'+'') or
(p.tok[p.idx].symbol = '*'+'') or
(p.tok[p.idx].symbol = '-'+''))
and (p.tok[p.idx+1].kind = tkWhite) then
result := rnBulletList
else if (p.tok[p.idx].symbol = '|'+'') and isLineBlock(p) then
result := rnLineBlock
else if (p.tok[p.idx].symbol = '..') and predNL(p) then
result := rnDirective
else if (p.tok[p.idx].symbol = ':'+'') and predNL(p) then
result := rnFieldList
else if match(p, p.idx, '(e) ') then
result := rnEnumList
else if match(p, p.idx, '+a+') then begin
result := rnGridTable;
rstMessage(p, errGridTableNotImplemented);
end
else if isDefList(p) then
result := rnDefList
else if match(p, p.idx, '-w') or match(p, p.idx, '--w')
or match(p, p.idx, '/w') then
result := rnOptionList
else
result := rnParagraph
end;
tkWord, tkOther, tkWhite: begin
if match(p, tokenAfterNewLine(p), 'ai') then
result := rnHeadline
else if isDefList(p) then
result := rnDefList
else if match(p, p.idx, 'e) ') or match(p, p.idx, 'e. ') then
result := rnEnumList
else
result := rnParagraph;
end;
else result := rnLeaf;
end
end;
function parseLineBlock(var p: TRstParser): PRstNode;
var
col: int;
item: PRstNode;
begin
result := nil;
if p.tok[p.idx+1].kind = tkWhite then begin
col := p.tok[p.idx].col;
result := newRstNode(rnLineBlock);
pushInd(p, p.tok[p.idx+2].col);
inc(p.idx, 2);
while true do begin
item := newRstNode(rnLineBlockItem);
parseSection(p, item);
addSon(result, item);
if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col)
and (p.tok[p.idx+1].symbol = '|'+'')
and (p.tok[p.idx+2].kind = tkWhite) then inc(p.idx, 3)
else break;
end;
popInd(p);
end;
end;
procedure parseParagraph(var p: TRstParser; result: PRstNode);
begin
while True do begin
case p.tok[p.idx].kind of
tkIndent: begin
if p.tok[p.idx+1].kind = tkIndent then begin
inc(p.idx);
break
end
else if (p.tok[p.idx].ival = currInd(p)) then begin
inc(p.idx);
case whichSection(p) of
rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective:
addSon(result, newRstNode(rnLeaf, ' '+''));
rnLineBlock: addSonIfNotNil(result, parseLineBlock(p));
else break;
end;
end
else break
end;
tkPunct: begin
if (p.tok[p.idx].symbol = '::') and (p.tok[p.idx+1].kind = tkIndent)
and (currInd(p) < p.tok[p.idx+1].ival) then begin
addSon(result, newRstNode(rnLeaf, ':'+''));
inc(p.idx); // skip '::'
addSon(result, parseLiteralBlock(p));
break
end
else
parseInline(p, result)
end;
tkWhite, tkWord, tkAdornment, tkOther:
parseInline(p, result);
else break;
end
end
end;
function parseParagraphWrapper(var p: TRstParser): PRstNode;
begin
result := newRstNode(rnParagraph);
parseParagraph(p, result);
end;
function parseHeadline(var p: TRstParser): PRstNode;
var
c: Char;
begin
result := newRstNode(rnHeadline);
parseLine(p, result);
assert(p.tok[p.idx].kind = tkIndent);
assert(p.tok[p.idx+1].kind = tkAdornment);
c := p.tok[p.idx+1].symbol[strStart];
inc(p.idx, 2);
result.level := getLevel(p.s.underlineToLevel, p.s.uLevel, c);
end;
type
TIntSeq = array of int;
function tokEnd(const p: TRstParser): int;
begin
result := p.tok[p.idx].col + length(p.tok[p.idx].symbol) - 1;
end;
procedure getColumns(var p: TRstParser; var cols: TIntSeq);
var
L: int;
begin
L := 0;
while true do begin
inc(L);
setLength(cols, L);
cols[L-1] := tokEnd(p);
assert(p.tok[p.idx].kind = tkAdornment);
inc(p.idx);
if p.tok[p.idx].kind <> tkWhite then break;
inc(p.idx);
if p.tok[p.idx].kind <> tkAdornment then break
end;
if p.tok[p.idx].kind = tkIndent then inc(p.idx);
// last column has no limit:
cols[L-1] := 32000;
end;
function parseDoc(var p: TRstParser): PRstNode; forward;
function parseSimpleTable(var p: TRstParser): PRstNode;
var
cols: TIntSeq;
row: array of string;
j, i, last, line: int;
c: Char;
q: TRstParser;
a, b: PRstNode;
begin
result := newRstNode(rnTable);
{@ignore}
cols := nil;
row := nil;
{@emit
cols := @[];}
{@emit
row := @[];}
a := nil;
c := p.tok[p.idx].symbol[strStart];
while true do begin
if p.tok[p.idx].kind = tkAdornment then begin
last := tokenAfterNewline(p);
if p.tok[last].kind in [tkEof, tkIndent] then begin
// skip last adornment line:
p.idx := last; break
end;
getColumns(p, cols);
setLength(row, length(cols));
if a <> nil then
for j := 0 to rsonsLen(a)-1 do a.sons[j].kind := rnTableHeaderCell;
end;
if p.tok[p.idx].kind = tkEof then break;
for j := 0 to high(row) do row[j] := '';
// the following while loop iterates over the lines a single cell may span:
line := p.tok[p.idx].line;
while true do begin
i := 0;
while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin
if (tokEnd(p) <= cols[i]) then begin
add(row[i], p.tok[p.idx].symbol);
inc(p.idx);
end
else begin
if p.tok[p.idx].kind = tkWhite then inc(p.idx);
inc(i)
end
end;
if p.tok[p.idx].kind = tkIndent then inc(p.idx);
if tokEnd(p) <= cols[0] then break;
if p.tok[p.idx].kind in [tkEof, tkAdornment] then break;
for j := 1 to high(row) do addChar(row[j], #10);
end;
// process all the cells:
a := newRstNode(rnTableRow);
for j := 0 to high(row) do begin
initParser(q, p.s);
q.col := cols[j];
q.line := line-1;
q.filename := p.filename;
getTokens(row[j], false, q.tok);
b := newRstNode(rnTableDataCell);
addSon(b, parseDoc(q));
addSon(a, b);
end;
addSon(result, a);
end;
end;
function parseTransition(var p: TRstParser): PRstNode;
begin
result := newRstNode(rnTransition);
inc(p.idx);
if p.tok[p.idx].kind = tkIndent then inc(p.idx);
if p.tok[p.idx].kind = tkIndent then inc(p.idx);
end;
function parseOverline(var p: TRstParser): PRstNode;
var
c: char;
begin
c := p.tok[p.idx].symbol[strStart];
inc(p.idx, 2);
result := newRstNode(rnOverline);
while true do begin
parseLine(p, result);
if p.tok[p.idx].kind = tkIndent then begin
inc(p.idx);
if p.tok[p.idx-1].ival > currInd(p) then
addSon(result, newRstNode(rnLeaf, ' '+''))
else
break
end
else break
end;
result.level := getLevel(p.s.overlineToLevel, p.s.oLevel, c);
if p.tok[p.idx].kind = tkAdornment then begin
inc(p.idx); // XXX: check?
if p.tok[p.idx].kind = tkIndent then inc(p.idx);
end
end;
function parseBulletList(var p: TRstParser): PRstNode;
var
bullet: string;
col: int;
item: PRstNode;
begin
result := nil;
if p.tok[p.idx+1].kind = tkWhite then begin
bullet := p.tok[p.idx].symbol;
col := p.tok[p.idx].col;
result := newRstNode(rnBulletList);
pushInd(p, p.tok[p.idx+2].col);
inc(p.idx, 2);
while true do begin
item := newRstNode(rnBulletItem);
parseSection(p, item);
addSon(result, item);
if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col)
and (p.tok[p.idx+1].symbol = bullet)
and (p.tok[p.idx+2].kind = tkWhite) then inc(p.idx, 3)
else break;
end;
popInd(p);
end;
end;
function parseOptionList(var p: TRstParser): PRstNode;
var
a, b, c: PRstNode;
j: int;
begin
result := newRstNode(rnOptionList);
while true do begin
if match(p, p.idx, '-w')
or match(p, p.idx, '--w')
or match(p, p.idx, '/w') then begin
a := newRstNode(rnOptionGroup);
b := newRstNode(rnDescription);
c := newRstNode(rnOptionListItem);
while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin
if (p.tok[p.idx].kind = tkWhite)
and (length(p.tok[p.idx].symbol) > 1) then begin
inc(p.idx); break
end;
addSon(a, newLeaf(p));
inc(p.idx);
end;
j := tokenAfterNewline(p);
if (j > 0) and (p.tok[j-1].kind = tkIndent)
and (p.tok[j-1].ival > currInd(p)) then begin
pushInd(p, p.tok[j-1].ival);
parseSection(p, b);
popInd(p);
end
else begin
parseLine(p, b);
end;
if (p.tok[p.idx].kind = tkIndent) then inc(p.idx);
addSon(c, a);
addSon(c, b);
addSon(result, c);
end
else break;
end
end;
function parseDefinitionList(var p: TRstParser): PRstNode;
var
j, col: int;
a, b, c: PRstNode;
begin
result := nil;
j := tokenAfterNewLine(p)-1;
if (j >= 1) and (p.tok[j].kind = tkIndent)
and (p.tok[j].ival > currInd(p)) and (p.tok[j-1].symbol <> '::') then begin
col := p.tok[p.idx].col;
result := newRstNode(rnDefList);
while true do begin
j := p.idx;
a := newRstNode(rnDefName);
parseLine(p, a);
//writeln('after def line: ', p.tok[p.idx].ival :1, ' ', col : 1);
if (p.tok[p.idx].kind = tkIndent)
and (p.tok[p.idx].ival > currInd(p))
and (p.tok[p.idx+1].symbol <> '::')
and not (p.tok[p.idx+1].kind in [tkIndent, tkEof]) then begin
pushInd(p, p.tok[p.idx].ival);
b := newRstNode(rnDefBody);
parseSection(p, b);
c := newRstNode(rnDefItem);
addSon(c, a);
addSon(c, b);
addSon(result, c);
popInd(p);
end
else begin
p.idx := j;
break
end;
if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) then begin
inc(p.idx);
j := tokenAfterNewLine(p)-1;
if (j >= 1) and (p.tok[j].kind = tkIndent)
and (p.tok[j].ival > col)
and (p.tok[j-1].symbol <> '::')
and (p.tok[j+1].kind <> tkIndent) then begin end
else break
end
end;
if rsonsLen(result) = 0 then result := nil
end
end;
function parseEnumList(var p: TRstParser): PRstNode;
const
wildcards: array [0..2] of string = ('(e) ', 'e) ', 'e. ');
wildpos: array [0..2] of int = (1, 0, 0);
var
w, col, j: int;
item: PRstNode;
begin
result := nil;
w := 0;
while w <= 2 do begin
if match(p, p.idx, wildcards[w]) then break;
inc(w);
end;
if w <= 2 then begin
col := p.tok[p.idx].col;
result := newRstNode(rnEnumList);
inc(p.idx, wildpos[w]+3);
j := tokenAfterNewLine(p);
if (p.tok[j].col = p.tok[p.idx].col) or match(p, j, wildcards[w]) then begin
pushInd(p, p.tok[p.idx].col);
while true do begin
item := newRstNode(rnEnumItem);
parseSection(p, item);
addSon(result, item);
if (p.tok[p.idx].kind = tkIndent)
and (p.tok[p.idx].ival = col)
and match(p, p.idx+1, wildcards[w]) then
inc(p.idx, wildpos[w]+4)
else
break
end;
popInd(p);
end
else begin
dec(p.idx, wildpos[w]+3);
result := nil
end
end
end;
function sonKind(father: PRstNode; i: int): TRstNodeKind;
begin
result := rnLeaf;
if i < rsonsLen(father) then result := father.sons[i].kind;
end;
procedure parseSection(var p: TRstParser; result: PRstNode);
var
a: PRstNode;
k: TRstNodeKind;
leave: bool;
begin
while true do begin
leave := false;
assert(p.idx >= 0);
while p.tok[p.idx].kind = tkIndent do begin
if currInd(p) = p.tok[p.idx].ival then begin
inc(p.idx);
end
else if p.tok[p.idx].ival > currInd(p) then begin
pushInd(p, p.tok[p.idx].ival);
a := newRstNode(rnBlockQuote);
parseSection(p, a);
addSon(result, a);
popInd(p);
end
else begin
leave := true;
break;
end
end;
if leave then break;
if p.tok[p.idx].kind = tkEof then break;
a := nil;
k := whichSection(p);
case k of
rnLiteralBlock: begin
inc(p.idx); // skip '::'
a := parseLiteralBlock(p);
end;
rnBulletList: a := parseBulletList(p);
rnLineblock: a := parseLineBlock(p);
rnDirective: a := parseDotDot(p);
rnEnumList: a := parseEnumList(p);
rnLeaf: begin
rstMessage(p, errNewSectionExpected);
end;
rnParagraph: begin end;
rnDefList: a := parseDefinitionList(p);
rnFieldList: begin
dec(p.idx);
a := parseFields(p);
end;
rnTransition: a := parseTransition(p);
rnHeadline: a := parseHeadline(p);
rnOverline: a := parseOverline(p);
rnTable: a := parseSimpleTable(p);
rnOptionList: a := parseOptionList(p);
else InternalError('rst.parseSection()');
end;
if (a = nil) and (k <> rnDirective) then begin
a := newRstNode(rnParagraph);
parseParagraph(p, a);
end;
addSonIfNotNil(result, a);
end;
if (sonKind(result, 0) = rnParagraph)
and (sonKind(result, 1) <> rnParagraph) then
result.sons[0].kind := rnInner;
end;
function parseSectionWrapper(var p: TRstParser): PRstNode;
begin
result := newRstNode(rnInner);
parseSection(p, result);
while (result.kind = rnInner) and (rsonsLen(result) = 1) do
result := result.sons[0]
end;
function parseDoc(var p: TRstParser): PRstNode;
begin
result := parseSectionWrapper(p);
if p.tok[p.idx].kind <> tkEof then
rstMessage(p, errGeneralParseError);
end;
type
TDirFlag = (hasArg, hasOptions, argIsFile);
TDirFlags = set of TDirFlag;
TSectionParser = function (var p: TRstParser): PRstNode;
function parseDirective(var p: TRstParser; flags: TDirFlags;
contentParser: TSectionParser): PRstNode;
var
args, options, content: PRstNode;
begin
result := newRstNode(rnDirective);
args := nil;
options := nil;
if hasArg in flags then begin
args := newRstNode(rnDirArg);
if argIsFile in flags then begin
while True do begin
case p.tok[p.idx].kind of
tkWord, tkOther, tkPunct, tkAdornment: begin
addSon(args, newLeaf(p));
inc(p.idx);
end;
else break;
end
end
end
else begin
parseLine(p, args);
end
end;
addSon(result, args);
if hasOptions in flags then begin
if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival >= 3)
and (p.tok[p.idx+1].symbol = ':'+'') then
options := parseFields(p);
end;
addSon(result, options);
if (assigned(contentParser)) and (p.tok[p.idx].kind = tkIndent)
and (p.tok[p.idx].ival > currInd(p)) then begin
pushInd(p, p.tok[p.idx].ival);
content := contentParser(p);
popInd(p);
addSon(result, content)
end
else
addSon(result, nil);
end;
function dirInclude(var p: TRstParser): PRstNode;
(*
The following options are recognized:
start-after : text to find in the external data file
Only the content after the first occurrence of the specified text will
be included.
end-before : text to find in the external data file
Only the content before the first occurrence of the specified text
(but after any after text) will be included.
literal : flag (empty)
The entire included text is inserted into the document as a single
literal block (useful for program listings).
encoding : name of text encoding
The text encoding of the external data file. Defaults to the document's
encoding (if specified).
*)
var
n: PRstNode;
filename, path: string;
q: TRstParser;
begin
result := nil;
n := parseDirective(p, {@set}[hasArg, argIsFile, hasOptions], nil);
filename := strip(addNodes(n.sons[0]));
path := findFile(filename);
if path = '' then
rstMessage(p, errCannotOpenFile, filename)
else begin
// XXX: error handling; recursive file inclusion!
if getFieldValue(n, 'literal') <> '' then begin
result := newRstNode(rnLiteralBlock);
addSon(result, newRstNode(rnLeaf, readFile(path)));
end
else begin
initParser(q, p.s);
q.filename := filename;
getTokens(readFile(path), false, q.tok);
// workaround a GCC bug:
if find(q.tok[high(q.tok)].symbol, #0#1#2) > 0 then begin
InternalError('Too many binary zeros in include file');
end;
result := parseDoc(q);
end
end
end;
function dirCodeBlock(var p: TRstParser): PRstNode;
var
n: PRstNode;
filename, path: string;
begin
result := parseDirective(p, {@set}[hasArg, hasOptions], parseLiteralBlock);
filename := strip(getFieldValue(result, 'file'));
if filename <> '' then begin
path := findFile(filename);
if path = '' then rstMessage(p, errCannotOpenFile, filename);
n := newRstNode(rnLiteralBlock);
addSon(n, newRstNode(rnLeaf, readFile(path)));
result.sons[2] := n;
end;
result.kind := rnCodeBlock;
end;
function dirContainer(var p: TRstParser): PRstNode;
begin
result := parseDirective(p, {@set}[hasArg], parseSectionWrapper);
assert(result.kind = rnDirective);
assert(rsonsLen(result) = 3);
result.kind := rnContainer;
end;
function dirImage(var p: TRstParser): PRstNode;
begin
result := parseDirective(p, {@set}[hasOptions, hasArg, argIsFile], nil);
result.kind := rnImage
end;
function dirFigure(var p: TRstParser): PRstNode;
begin
result := parseDirective(p, {@set}[hasOptions, hasArg, argIsFile],
parseSectionWrapper);
result.kind := rnFigure
end;
function dirTitle(var p: TRstParser): PRstNode;
begin
result := parseDirective(p, {@set}[hasArg], nil);
result.kind := rnTitle
end;
function dirContents(var p: TRstParser): PRstNode;
begin
result := parseDirective(p, {@set}[hasArg], nil);
result.kind := rnContents
end;
function dirIndex(var p: TRstParser): PRstNode;
begin
result := parseDirective(p, {@set}[], parseSectionWrapper);
result.kind := rnIndex
end;
function dirRaw(var p: TRstParser): PRstNode;
(*
The following options are recognized:
file : string (newlines removed)
The local filesystem path of a raw data file to be included.
url : string (whitespace removed)
An Internet URL reference to a raw data file to be included.
encoding : name of text encoding
The text encoding of the external raw data (file or URL).
Defaults to the document's encoding (if specified).
*)
var
filename, path, f: string;
begin
result := parseDirective(p, {@set}[hasOptions], parseSectionWrapper);
result.kind := rnRaw;
filename := getFieldValue(result, 'file');
if filename <> '' then begin
path := findFile(filename);
if path = '' then
rstMessage(p, errCannotOpenFile, filename)
else begin
f := readFile(path);
result := newRstNode(rnRaw);
addSon(result, newRstNode(rnLeaf, f));
end
end
end;
function parseDotDot(var p: TRstParser): PRstNode;
var
d: string;
col: int;
a, b: PRstNode;
begin
result := nil;
col := p.tok[p.idx].col;
inc(p.idx);
d := getDirective(p);
if d <> '' then begin
pushInd(p, col);
case getDirKind(d) of
dkInclude: result := dirInclude(p);
dkImage: result := dirImage(p);
dkFigure: result := dirFigure(p);
dkTitle: result := dirTitle(p);
dkContainer: result := dirContainer(p);
dkContents: result := dirContents(p);
dkRaw: result := dirRaw(p);
dkCodeblock: result := dirCodeBlock(p);
dkIndex: result := dirIndex(p);
else rstMessage(p, errInvalidDirectiveX, d);
end;
popInd(p);
end
else if match(p, p.idx, ' _') then begin
// hyperlink target:
inc(p.idx, 2);
a := getReferenceName(p, ':'+'');
if p.tok[p.idx].kind = tkWhite then inc(p.idx);
b := untilEol(p);
setRef(p, rstnodeToRefname(a), b);
end
else if match(p, p.idx, ' |') then begin
// substitution definitions:
inc(p.idx, 2);
a := getReferenceName(p, '|'+'');
if p.tok[p.idx].kind = tkWhite then inc(p.idx);
if cmpIgnoreStyle(p.tok[p.idx].symbol, 'replace') = 0 then begin
inc(p.idx);
expect(p, '::');
b := untilEol(p);
end
else if cmpIgnoreStyle(p.tok[p.idx].symbol, 'image') = 0 then begin
inc(p.idx);
b := dirImage(p);
end
else
rstMessage(p, errInvalidDirectiveX, p.tok[p.idx].symbol);
setSub(p, addNodes(a), b);
end
else if match(p, p.idx, ' [') then begin
// footnotes, citations
inc(p.idx, 2);
a := getReferenceName(p, ']'+'');
if p.tok[p.idx].kind = tkWhite then inc(p.idx);
b := untilEol(p);
setRef(p, rstnodeToRefname(a), b);
end
else
result := parseComment(p);
end;
function resolveSubs(var p: TRstParser; n: PRstNode): PRstNode;
var
i, x: int;
y: PRstNode;
e, key: string;
begin
result := n;
if n = nil then exit;
case n.kind of
rnSubstitutionReferences: begin
x := findSub(p, n);
if x >= 0 then result := p.s.subs[x].value
else begin
key := addNodes(n);
e := getEnv(key);
if e <> '' then result := newRstNode(rnLeaf, e)
else rstMessage(p, warnUnknownSubstitutionX, key);
end
end;
rnRef: begin
y := findRef(p, rstnodeToRefname(n));
if y <> nil then begin
result := newRstNode(rnHyperlink);
n.kind := rnInner;
addSon(result, n);
addSon(result, y);
end
end;
rnLeaf: begin end;
rnContents: p.hasToc := true;
else begin
for i := 0 to rsonsLen(n)-1 do
n.sons[i] := resolveSubs(p, n.sons[i]);
end
end
end;
function rstParse(const text: string; // the text to be parsed
skipPounds: bool;
const filename: string; // for error messages
line, column: int;
var hasToc: bool): PRstNode;
var
p: TRstParser;
begin
if isNil(text) then
rawMessage(errCannotOpenFile, filename);
initParser(p, newSharedState());
p.filename := filename;
p.line := line;
p.col := column;
getTokens(text, skipPounds, p.tok);
result := resolveSubs(p, parseDoc(p));
hasToc := p.hasToc;
end;
end.