diff options
author | Araq <rumpf_a@web.de> | 2011-04-12 01:13:42 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2011-04-12 01:13:42 +0200 |
commit | cd292568d775d55d9abb51e962882ecda12c03a9 (patch) | |
tree | 85451f0e1f17dc0463350915f12bdd0a82a73455 /nim/rst.pas | |
parent | 46c41e43690cba9bc1caff6a994bb6915df8a1b7 (diff) | |
download | Nim-cd292568d775d55d9abb51e962882ecda12c03a9.tar.gz |
big repo cleanup
Diffstat (limited to 'nim/rst.pas')
-rwxr-xr-x | nim/rst.pas | 2184 |
1 files changed, 0 insertions, 2184 deletions
diff --git a/nim/rst.pas b/nim/rst.pas deleted file mode 100755 index 89ef2c501..000000000 --- a/nim/rst.pas +++ /dev/null @@ -1,2184 +0,0 @@ -// -// -// The Nimrod Compiler -// (c) Copyright 2009 Andreas Rumpf -// -// See the file "copying.txt", included in this -// distribution, for details about the copyright. -// -unit rst; - -// This module implements a *reStructuredText* parser. A larget -// subset is provided. - -interface - -{$include 'config.inc'} - -uses - nsystem, nos, msgs, strutils, platform, nhashes, ropes, charsets, options; - -type - TRstNodeKind = ( - rnInner, // an inner node or a root - rnHeadline, // a headline - rnOverline, // an over- and underlined headline - rnTransition, // a transition (the ------------- <hr> thingie) - rnParagraph, // a paragraph - - rnBulletList, // a bullet list - rnBulletItem, // a bullet item - rnEnumList, // an enumerated list - rnEnumItem, // an enumerated item - - rnDefList, // a definition list - rnDefItem, // an item of a definition list consisting of ... - rnDefName, // ... a name part ... - rnDefBody, // ... and a body part ... - - rnFieldList, // a field list - rnField, // a field item - rnFieldName, // consisting of a field name ... - rnFieldBody, // ... and a field body - - rnOptionList, - rnOptionListItem, - rnOptionGroup, - rnOption, - rnOptionString, - rnOptionArgument, - rnDescription, - - rnLiteralBlock, - rnQuotedLiteralBlock, - - rnLineBlock, // the | thingie - rnLineBlockItem, // sons of the | thing - - rnBlockQuote, // text just indented - - rnTable, - rnGridTable, - rnTableRow, - rnTableHeaderCell, - rnTableDataCell, - - rnLabel, // used for footnotes and other things - rnFootnote, // a footnote - - rnCitation, // similar to footnote - - rnStandaloneHyperlink, - rnHyperlink, - rnRef, - rnDirective, // a directive - rnDirArg, - rnRaw, - rnTitle, - rnContents, - rnImage, - rnFigure, - rnCodeBlock, - rnContainer, // ``container`` directive - rnIndex, // index directve: - // .. index:: - // key - // * `file#id <file#id>`_ - // * `file#id <file#id>'_ - - rnSubstitutionDef, // a definition of a substitution - - rnGeneralRole, - // Inline markup: - rnSub, - rnSup, - rnIdx, - rnEmphasis, // "*" - rnStrongEmphasis, // "**" - rnInterpretedText, // "`" - rnInlineLiteral, // "``" - rnSubstitutionReferences, // "|" - - rnLeaf // a leaf; the node's text field contains the leaf val - ); -const - rstnodekindToStr: array [TRstNodeKind] of string = ( - 'Inner', 'Headline', 'Overline', 'Transition', 'Paragraph', - 'BulletList', 'BulletItem', 'EnumList', 'EnumItem', 'DefList', 'DefItem', - 'DefName', 'DefBody', 'FieldList', 'Field', 'FieldName', 'FieldBody', - 'OptionList', 'OptionListItem', 'OptionGroup', 'Option', 'OptionString', - 'OptionArgument', 'Description', 'LiteralBlock', 'QuotedLiteralBlock', - 'LineBlock', 'LineBlockItem', 'BlockQuote', 'Table', 'GridTable', - 'TableRow', 'TableHeaderCell', 'TableDataCell', 'Label', 'Footnote', - 'Citation', 'StandaloneHyperlink', 'Hyperlink', 'Ref', 'Directive', - 'DirArg', 'Raw', 'Title', 'Contents', 'Image', 'Figure', 'CodeBlock', - 'Container', 'Index', 'SubstitutionDef', 'GeneralRole', - 'Sub', 'Sup', 'Idx', 'Emphasis', 'StrongEmphasis', 'InterpretedText', - 'InlineLiteral', 'SubstitutionReferences', 'Leaf' - ); - -type - // the syntax tree of RST: - PRSTNode = ^TRstNode; - TRstNodeSeq = array of PRstNode; - TRSTNode = record - kind: TRstNodeKind; - text: string; // valid for leafs in the AST; and the title of - // the document or the section - level: int; // valid for some node kinds - sons: TRstNodeSeq; // the node's sons - end {@acyclic}; - - -function rstParse(const text: string; // the text to be parsed - skipPounds: bool; - const filename: string; // for error messages - line, column: int; - var hasToc: bool): PRstNode; -function rsonsLen(n: PRstNode): int; -function newRstNode(kind: TRstNodeKind): PRstNode; overload; -function newRstNode(kind: TRstNodeKind; const s: string): PRstNode; overload; -procedure addSon(father, son: PRstNode); - -function rstnodeToRefname(n: PRstNode): string; - -function addNodes(n: PRstNode): string; - -function getFieldValue(n: PRstNode; const fieldname: string): string; -function getArgument(n: PRstNode): string; - -// index handling: -procedure setIndexPair(index, key, val: PRstNode); -procedure sortIndex(a: PRstNode); -procedure clearIndex(index: PRstNode; const filename: string); - - -implementation - -// ----------------------------- scanner part -------------------------------- - -const - SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', #128..#255]; - -type - TTokType = (tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther); - TToken = record // a RST token - kind: TTokType; // the type of the token - ival: int; // the indentation or parsed integer value - symbol: string; // the parsed symbol as string - line, col: int; // line and column of the token - end; - TTokenSeq = array of TToken; - TLexer = object(NObject) - buf: PChar; - bufpos: int; - line, col, baseIndent: int; - skipPounds: bool; - end; - -procedure getThing(var L: TLexer; var tok: TToken; const s: TCharSet); -var - pos: int; -begin - tok.kind := tkWord; - tok.line := L.line; - tok.col := L.col; - pos := L.bufpos; - while True do begin - addChar(tok.symbol, L.buf[pos]); - inc(pos); - if not (L.buf[pos] in s) then break - end; - inc(L.col, pos - L.bufpos); - L.bufpos := pos; -end; - -procedure getAdornment(var L: TLexer; var tok: TToken); -var - pos: int; - c: char; -begin - tok.kind := tkAdornment; - tok.line := L.line; - tok.col := L.col; - pos := L.bufpos; - c := L.buf[pos]; - while True do begin - addChar(tok.symbol, L.buf[pos]); - inc(pos); - if L.buf[pos] <> c then break - end; - inc(L.col, pos - L.bufpos); - L.bufpos := pos -end; - -function getIndentAux(var L: TLexer; start: int): int; -var - buf: PChar; - pos: int; -begin - pos := start; - buf := L.buf; - // skip the newline (but include it in the token!) - if buf[pos] = #13 then begin - if buf[pos+1] = #10 then inc(pos, 2) else inc(pos); - end - else if buf[pos] = #10 then inc(pos); - if L.skipPounds then begin - if buf[pos] = '#' then inc(pos); - if buf[pos] = '#' then inc(pos); - end; - result := 0; - while True do begin - case buf[pos] of - ' ', #11, #12: begin - inc(pos); - inc(result); - end; - #9: begin - inc(pos); - result := result - (result mod 8) + 8; - end; - else break; // EndOfFile also leaves the loop - end; - end; - if buf[pos] = #0 then result := 0 - else if (buf[pos] = #10) or (buf[pos] = #13) then begin - // look at the next line for proper indentation: - result := getIndentAux(L, pos); - end; - L.bufpos := pos; // no need to set back buf -end; - -procedure getIndent(var L: TLexer; var tok: TToken); -begin - inc(L.line); - tok.line := L.line; - tok.col := 0; - tok.kind := tkIndent; - // skip the newline (but include it in the token!) - tok.ival := getIndentAux(L, L.bufpos); - L.col := tok.ival; - tok.ival := max(tok.ival - L.baseIndent, 0); - tok.symbol := nl +{&} repeatChar(tok.ival); -end; - -procedure rawGetTok(var L: TLexer; var tok: TToken); -var - c: Char; -begin - tok.symbol := ''; - tok.ival := 0; - c := L.buf[L.bufpos]; - case c of - 'a'..'z', 'A'..'Z', #128..#255, '0'..'9': getThing(L, tok, SymChars); - ' ', #9, #11, #12: begin - getThing(L, tok, {@set}[' ', #9]); - tok.kind := tkWhite; - if L.buf[L.bufpos] in [#13, #10] then - rawGetTok(L, tok); // ignore spaces before \n - end; - #13, #10: getIndent(L, tok); - '!', '"', '#', '$', '%', '&', '''', - '(', ')', '*', '+', ',', '-', '.', '/', - ':', ';', '<', '=', '>', '?', '@', '[', '\', ']', - '^', '_', '`', '{', '|', '}', '~': begin - getAdornment(L, tok); - if length(tok.symbol) <= 3 then tok.kind := tkPunct; - end; - else begin - tok.line := L.line; - tok.col := L.col; - if c = #0 then - tok.kind := tkEof - else begin - tok.kind := tkOther; - addChar(tok.symbol, c); - inc(L.bufpos); - inc(L.col); - end - end - end; - tok.col := max(tok.col - L.baseIndent, 0); -end; - -procedure getTokens(const buffer: string; skipPounds: bool; - var tokens: TTokenSeq); -var - L: TLexer; - len: int; -begin -{@ignore} - fillChar(L, sizeof(L), 0); -{@emit} - len := length(tokens); - L.buf := PChar(buffer); - L.line := 1; - // skip UTF-8 BOM - if (L.buf[0] = #239) and (L.buf[1] = #187) and (L.buf[2] = #191) then - inc(L.bufpos, 3); - L.skipPounds := skipPounds; - if skipPounds then begin - if L.buf[L.bufpos] = '#' then inc(L.bufpos); - if L.buf[L.bufpos] = '#' then inc(L.bufpos); - L.baseIndent := 0; - while L.buf[L.bufpos] = ' ' do begin - inc(L.bufpos); - inc(L.baseIndent); - end - end; - while true do begin - inc(len); - setLength(tokens, len); - rawGetTok(L, tokens[len-1]); - if tokens[len-1].kind = tkEof then break; - end; - if tokens[0].kind = tkWhite then begin // BUGFIX - tokens[0].ival := length(tokens[0].symbol); - tokens[0].kind := tkIndent - end -end; - -// -------------------------------------------------------------------------- - -procedure addSon(father, son: PRstNode); -var - L: int; -begin - L := length(father.sons); - setLength(father.sons, L+1); - father.sons[L] := son; -end; - -procedure addSonIfNotNil(father, son: PRstNode); -begin - if son <> nil then addSon(father, son); -end; - -function rsonsLen(n: PRstNode): int; -begin - result := length(n.sons) -end; - -function newRstNode(kind: TRstNodeKind): PRstNode; overload; -begin - new(result); -{@ignore} - fillChar(result^, sizeof(result^), 0); -{@emit - result.sons := @[]; -} - result.kind := kind; -end; - -function newRstNode(kind: TRstNodeKind; const s: string): PRstNode; overload; -begin - result := newRstNode(kind); - result.text := s; -end; - -// --------------------------------------------------------------------------- -type - TLevelMap = array [Char] of int; - TSubstitution = record - key: string; - value: PRstNode; - end; - TSharedState = record - uLevel, oLevel: int; // counters for the section levels - subs: array of TSubstitution; // substitutions - refs: array of TSubstitution; // references - underlineToLevel: TLevelMap; - // Saves for each possible title adornment character its level in the - // current document. This is for single underline adornments. - overlineToLevel: TLevelMap; - // Saves for each possible title adornment character its level in the - // current document. This is for over-underline adornments. - end; - PSharedState = ^TSharedState; - TRstParser = object(NObject) - idx: int; - tok: TTokenSeq; - s: PSharedState; - indentStack: array of int; - filename: string; - line, col: int; - hasToc: bool; - end; - -function newSharedState(): PSharedState; -begin - new(result); -{@ignore} - fillChar(result^, sizeof(result^), 0); -{@emit} - {@emit - result.subs := @[];} - {@emit - result.refs := @[];} -end; - -function tokInfo(const p: TRstParser; const tok: TToken): TLineInfo; -begin - result := newLineInfo(p.filename, p.line+tok.line, p.col+tok.col); -end; - -procedure rstMessage(const p: TRstParser; msgKind: TMsgKind; - const arg: string); overload; -begin - liMessage(tokInfo(p, p.tok[p.idx]), msgKind, arg); -end; - -procedure rstMessage(const p: TRstParser; msgKind: TMsgKind); overload; -begin - liMessage(tokInfo(p, p.tok[p.idx]), msgKind, p.tok[p.idx].symbol); -end; - -function currInd(const p: TRstParser): int; -begin - result := p.indentStack[high(p.indentStack)]; -end; - -procedure pushInd(var p: TRstParser; ind: int); -var - len: int; -begin - len := length(p.indentStack); - setLength(p.indentStack, len+1); - p.indentStack[len] := ind; -end; - -procedure popInd(var p: TRstParser); -begin - if length(p.indentStack) > 1 then - setLength(p.indentStack, length(p.indentStack)-1); -end; - -procedure initParser(var p: TRstParser; sharedState: PSharedState); -begin - {@ignore} - fillChar(p, sizeof(p), 0); - p.tok := nil; - p.indentStack := nil; - pushInd(p, 0); - {@emit - p.indentStack := @[0];} - {@emit - p.tok := @[];} - p.idx := 0; - p.filename := ''; - p.hasToc := false; - p.col := 0; - p.line := 1; - p.s := sharedState; -end; - -// --------------------------------------------------------------- - -procedure addNodesAux(n: PRstNode; var result: string); -var - i: int; -begin - if n.kind = rnLeaf then - add(result, n.text) - else begin - for i := 0 to rsonsLen(n)-1 do - addNodesAux(n.sons[i], result) - end -end; - -function addNodes(n: PRstNode): string; -begin - result := ''; - addNodesAux(n, result); -end; - -procedure rstnodeToRefnameAux(n: PRstNode; var r: string; var b: bool); -var - i: int; -begin - if n.kind = rnLeaf then begin - for i := strStart to length(n.text)+strStart-1 do begin - case n.text[i] of - '0'..'9': begin - if b then begin addChar(r, '-'); b := false; end; - // BUGFIX: HTML id's cannot start with a digit - if length(r) = 0 then addChar(r, 'Z'); - addChar(r, n.text[i]) - end; - 'a'..'z': begin - if b then begin addChar(r, '-'); b := false; end; - addChar(r, n.text[i]) - end; - 'A'..'Z': begin - if b then begin addChar(r, '-'); b := false; end; - addChar(r, chr(ord(n.text[i]) - ord('A') + ord('a'))); - end; - else if (length(r) > 0) then b := true; - end - end - end - else begin - for i := 0 to rsonsLen(n)-1 do rstnodeToRefnameAux(n.sons[i], r, b) - end -end; - -function rstnodeToRefname(n: PRstNode): string; -var - b: bool; -begin - result := ''; - b := false; - rstnodeToRefnameAux(n, result, b); -end; - -function findSub(var p: TRstParser; n: PRstNode): int; -var - key: string; - i: int; -begin - key := addNodes(n); - // the spec says: if no exact match, try one without case distinction: - for i := 0 to high(p.s.subs) do - if key = p.s.subs[i].key then begin - result := i; exit - end; - for i := 0 to high(p.s.subs) do - if cmpIgnoreStyle(key, p.s.subs[i].key) = 0 then begin - result := i; exit - end; - result := -1 -end; - -procedure setSub(var p: TRstParser; const key: string; value: PRstNode); -var - i, len: int; -begin - len := length(p.s.subs); - for i := 0 to len-1 do - if key = p.s.subs[i].key then begin - p.s.subs[i].value := value; exit - end; - setLength(p.s.subs, len+1); - p.s.subs[len].key := key; - p.s.subs[len].value := value; -end; - -procedure setRef(var p: TRstParser; const key: string; value: PRstNode); -var - i, len: int; -begin - len := length(p.s.refs); - for i := 0 to len-1 do - if key = p.s.refs[i].key then begin - p.s.refs[i].value := value; - rstMessage(p, warnRedefinitionOfLabel, key); - exit - end; - setLength(p.s.refs, len+1); - p.s.refs[len].key := key; - p.s.refs[len].value := value; -end; - -function findRef(var p: TRstParser; const key: string): PRstNode; -var - i: int; -begin - for i := 0 to high(p.s.refs) do - if key = p.s.refs[i].key then begin - result := p.s.refs[i].value; exit - end; - result := nil -end; - -function cmpNodes(a, b: PRstNode): int; -var - x, y: PRstNode; -begin - assert(a.kind = rnDefItem); - assert(b.kind = rnDefItem); - x := a.sons[0]; - y := b.sons[0]; - result := cmpIgnoreStyle(addNodes(x), addNodes(y)) -end; - -procedure sortIndex(a: PRstNode); -// we use shellsort here; fast and simple -var - N, i, j, h: int; - v: PRstNode; -begin - assert(a.kind = rnDefList); - N := rsonsLen(a); - h := 1; repeat h := 3*h+1; until h > N; - repeat - h := h div 3; - for i := h to N-1 do begin - v := a.sons[i]; j := i; - while cmpNodes(a.sons[j-h], v) >= 0 do begin - a.sons[j] := a.sons[j-h]; j := j - h; - if j < h then break - end; - a.sons[j] := v; - end; - until h = 1 -end; - -function eqRstNodes(a, b: PRstNode): bool; -var - i: int; -begin - result := false; - if a.kind <> b.kind then exit; - if a.kind = rnLeaf then - result := a.text = b.text - else begin - if rsonsLen(a) <> rsonsLen(b) then exit; - for i := 0 to rsonsLen(a)-1 do - if not eqRstNodes(a.sons[i], b.sons[i]) then exit; - result := true - end -end; - -function matchesHyperlink(h: PRstNode; const filename: string): bool; -var - s: string; -begin - if h.kind = rnInner then begin - assert(rsonsLen(h) = 1); - result := matchesHyperlink(h.sons[0], filename) - end - else if h.kind = rnHyperlink then begin - s := addNodes(h.sons[1]); - if startsWith(s, filename) and (s[length(filename)+strStart] = '#') then - result := true - else - result := false - end - else // this may happen in broken indexes! - result := false -end; - -procedure clearIndex(index: PRstNode; const filename: string); -var - i, j, k, items, lastItem: int; - val: PRstNode; -begin - assert(index.kind = rnDefList); - for i := 0 to rsonsLen(index)-1 do begin - assert(index.sons[i].sons[1].kind = rnDefBody); - val := index.sons[i].sons[1].sons[0]; - if val.kind = rnInner then val := val.sons[0]; - if val.kind = rnBulletList then begin - items := rsonsLen(val); - lastItem := -1; // save the last valid item index - for j := 0 to rsonsLen(val)-1 do begin - if val.sons[j] = nil then - dec(items) - else if matchesHyperlink(val.sons[j].sons[0], filename) then begin - val.sons[j] := nil; - dec(items) - end - else lastItem := j - end; - if items = 1 then // remove bullet list: - index.sons[i].sons[1].sons[0] := val.sons[lastItem].sons[0] - else if items = 0 then - index.sons[i] := nil - end - else if matchesHyperlink(val, filename) then - index.sons[i] := nil - end; - // remove nil nodes: - k := 0; - for i := 0 to rsonsLen(index)-1 do begin - if index.sons[i] <> nil then begin - if k <> i then index.sons[k] := index.sons[i]; - inc(k) - end - end; - setLength(index.sons, k); -end; - -procedure setIndexPair(index, key, val: PRstNode); -var - i: int; - e, a, b: PRstNode; -begin - // writeln(rstnodekindToStr[key.kind], ': ', rstnodekindToStr[val.kind]); - assert(index.kind = rnDefList); - assert(key.kind <> rnDefName); - a := newRstNode(rnDefName); - addSon(a, key); - - for i := 0 to rsonsLen(index)-1 do begin - if eqRstNodes(index.sons[i].sons[0], a) then begin - assert(index.sons[i].sons[1].kind = rnDefBody); - e := index.sons[i].sons[1].sons[0]; - if e.kind <> rnBulletList then begin - e := newRstNode(rnBulletList); - b := newRstNode(rnBulletItem); - addSon(b, index.sons[i].sons[1].sons[0]); - addSon(e, b); - index.sons[i].sons[1].sons[0] := e; - end; - b := newRstNode(rnBulletItem); - addSon(b, val); - addSon(e, b); - - exit // key already exists - end - end; - e := newRstNode(rnDefItem); - assert(val.kind <> rnDefBody); - b := newRstNode(rnDefBody); - addSon(b, val); - addSon(e, a); - addSon(e, b); - addSon(index, e); -end; - -// --------------------------------------------------------------------------- - -function newLeaf(var p: TRstParser): PRstNode; -begin - result := newRstNode(rnLeaf, p.tok[p.idx].symbol) -end; - -function getReferenceName(var p: TRstParser; const endStr: string): PRstNode; -var - res: PRstNode; -begin - res := newRstNode(rnInner); - while true do begin - case p.tok[p.idx].kind of - tkWord, tkOther, tkWhite: addSon(res, newLeaf(p)); - tkPunct: - if p.tok[p.idx].symbol = endStr then begin inc(p.idx); break end - else addSon(res, newLeaf(p)); - else begin - rstMessage(p, errXexpected, endStr); - break - end - end; - inc(p.idx); - end; - result := res; -end; - -function untilEol(var p: TRstParser): PRstNode; -begin - result := newRstNode(rnInner); - while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin - addSon(result, newLeaf(p)); inc(p.idx); - end -end; - -procedure expect(var p: TRstParser; const tok: string); -begin - if p.tok[p.idx].symbol = tok then inc(p.idx) - else rstMessage(p, errXexpected, tok) -end; - -(* - From the specification: - - The inline markup start-string and end-string recognition rules are as - follows. If any of the conditions are not met, the start-string or end-string - will not be recognized or processed. - - 1. Inline markup start-strings must start a text block or be immediately - preceded by whitespace or one of: ' " ( [ { < - / : - 2. Inline markup start-strings must be immediately followed by - non-whitespace. - 3. Inline markup end-strings must be immediately preceded by non-whitespace. - 4. Inline markup end-strings must end a text block or be immediately - followed by whitespace or one of: ' " ) ] } > - / : . , ; ! ? \ - 5. If an inline markup start-string is immediately preceded by a single or - double quote, "(", "[", "{", or "<", it must not be immediately followed - by the corresponding single or double quote, ")", "]", "}", or ">". - 6. An inline markup end-string must be separated by at least one character - from the start-string. - 7. An unescaped backslash preceding a start-string or end-string will - disable markup recognition, except for the end-string of inline literals. - See Escaping Mechanism above for details. -*) -function isInlineMarkupEnd(const p: TRstParser; const markup: string): bool; -begin - result := p.tok[p.idx].symbol = markup; - if not result then exit; - // Rule 3: - result := not (p.tok[p.idx-1].kind in [tkIndent, tkWhite]); - if not result then exit; - // Rule 4: - result := (p.tok[p.idx+1].kind in [tkIndent, tkWhite, tkEof]) - or (p.tok[p.idx+1].symbol[strStart] in ['''', '"', ')', ']', '}', '>', - '-', '/', '\', ':', '.', ',', - ';', '!', '?', '_']); - if not result then exit; - // Rule 7: - if p.idx > 0 then begin - if (markup <> '``') and (p.tok[p.idx-1].symbol = '\'+'') then begin - result := false - end - end -end; - -function isInlineMarkupStart(const p: TRstParser; const markup: string): bool; -var - c, d: Char; -begin - result := p.tok[p.idx].symbol = markup; - if not result then exit; - // Rule 1: - result := (p.idx = 0) or (p.tok[p.idx-1].kind in [tkIndent, tkWhite]) - or (p.tok[p.idx-1].symbol[strStart] in ['''', '"', '(', '[', '{', '<', - '-', '/', ':', '_']); - if not result then exit; - // Rule 2: - result := not (p.tok[p.idx+1].kind in [tkIndent, tkWhite, tkEof]); - if not result then exit; - // Rule 5 & 7: - if p.idx > 0 then begin - if p.tok[p.idx-1].symbol = '\'+'' then - result := false - else begin - c := p.tok[p.idx-1].symbol[strStart]; - case c of - '''', '"': d := c; - '(': d := ')'; - '[': d := ']'; - '{': d := '}'; - '<': d := '>'; - else d := #0; - end; - if d <> #0 then - result := p.tok[p.idx+1].symbol[strStart] <> d; - end - end -end; - -procedure parseBackslash(var p: TRstParser; father: PRstNode); -begin - assert(p.tok[p.idx].kind = tkPunct); - if p.tok[p.idx].symbol = '\\' then begin - addSon(father, newRstNode(rnLeaf, '\'+'')); - inc(p.idx); - end - else if p.tok[p.idx].symbol = '\'+'' then begin - // XXX: Unicode? - inc(p.idx); - if p.tok[p.idx].kind <> tkWhite then addSon(father, newLeaf(p)); - inc(p.idx); - end - else begin - addSon(father, newLeaf(p)); - inc(p.idx) - end -end; - -function match(const p: TRstParser; start: int; const expr: string): bool; -// regular expressions are: -// special char exact match -// 'w' tkWord -// ' ' tkWhite -// 'a' tkAdornment -// 'i' tkIndent -// 'p' tkPunct -// 'T' always true -// 'E' whitespace, indent or eof -// 'e' tkWord or '#' (for enumeration lists) -var - i, j, last, len: int; - c: char; -begin - i := strStart; - j := start; - last := length(expr)+strStart-1; - while i <= last do begin - case expr[i] of - 'w': result := p.tok[j].kind = tkWord; - ' ': result := p.tok[j].kind = tkWhite; - 'i': result := p.tok[j].kind = tkIndent; - 'p': result := p.tok[j].kind = tkPunct; - 'a': result := p.tok[j].kind = tkAdornment; - 'o': result := p.tok[j].kind = tkOther; - 'T': result := true; - 'E': result := p.tok[j].kind in [tkEof, tkWhite, tkIndent]; - 'e': begin - result := (p.tok[j].kind = tkWord) or (p.tok[j].symbol = '#'+''); - if result then - case p.tok[j].symbol[strStart] of - 'a'..'z', 'A'..'Z': result := length(p.tok[j].symbol) = 1; - '0'..'9': result := allCharsInSet(p.tok[j].symbol, ['0'..'9']); - else begin end - end - end - else begin - c := expr[i]; - len := 0; - while (i <= last) and (expr[i] = c) do begin inc(i); inc(len) end; - dec(i); - result := (p.tok[j].kind in [tkPunct, tkAdornment]) - and (length(p.tok[j].symbol) = len) - and (p.tok[j].symbol[strStart] = c); - end - end; - if not result then exit; - inc(j); - inc(i) - end; - result := true -end; - -procedure fixupEmbeddedRef(n, a, b: PRstNode); -var - i, sep, incr: int; -begin - sep := -1; - for i := rsonsLen(n)-2 downto 0 do - if n.sons[i].text = '<'+'' then begin sep := i; break end; - if (sep > 0) and (n.sons[sep-1].text[strStart] = ' ') then incr := 2 - else incr := 1; - for i := 0 to sep-incr do addSon(a, n.sons[i]); - for i := sep+1 to rsonsLen(n)-2 do addSon(b, n.sons[i]); -end; - -function parsePostfix(var p: TRstParser; n: PRstNode): PRstNode; -var - a, b: PRstNode; -begin - result := n; - if isInlineMarkupEnd(p, '_'+'') then begin - inc(p.idx); - if (p.tok[p.idx-2].symbol ='`'+'') - and (p.tok[p.idx-3].symbol = '>'+'') then begin - a := newRstNode(rnInner); - b := newRstNode(rnInner); - fixupEmbeddedRef(n, a, b); - if rsonsLen(a) = 0 then begin - result := newRstNode(rnStandaloneHyperlink); - addSon(result, b); - end - else begin - result := newRstNode(rnHyperlink); - addSon(result, a); - addSon(result, b); - setRef(p, rstnodeToRefname(a), b); - end - end - else if n.kind = rnInterpretedText then - n.kind := rnRef - else begin - result := newRstNode(rnRef); - addSon(result, n); - end; - end - else if match(p, p.idx, ':w:') then begin - // a role: - if p.tok[p.idx+1].symbol = 'idx' then - n.kind := rnIdx - else if p.tok[p.idx+1].symbol = 'literal' then - n.kind := rnInlineLiteral - else if p.tok[p.idx+1].symbol = 'strong' then - n.kind := rnStrongEmphasis - else if p.tok[p.idx+1].symbol = 'emphasis' then - n.kind := rnEmphasis - else if (p.tok[p.idx+1].symbol = 'sub') - or (p.tok[p.idx+1].symbol = 'subscript') then - n.kind := rnSub - else if (p.tok[p.idx+1].symbol = 'sup') - or (p.tok[p.idx+1].symbol = 'supscript') then - n.kind := rnSup - else begin - result := newRstNode(rnGeneralRole); - n.kind := rnInner; - addSon(result, n); - addSon(result, newRstNode(rnLeaf, p.tok[p.idx+1].symbol)); - end; - inc(p.idx, 3) - end -end; - -function isURL(const p: TRstParser; i: int): bool; -begin - result := (p.tok[i+1].symbol = ':'+'') and (p.tok[i+2].symbol = '//') - and (p.tok[i+3].kind = tkWord) and (p.tok[i+4].symbol = '.'+'') -end; - -procedure parseURL(var p: TRstParser; father: PRstNode); -var - n: PRstNode; -begin - //if p.tok[p.idx].symbol[strStart] = '<' then begin - if isURL(p, p.idx) then begin - n := newRstNode(rnStandaloneHyperlink); - while true do begin - case p.tok[p.idx].kind of - tkWord, tkAdornment, tkOther: begin end; - tkPunct: begin - if not (p.tok[p.idx+1].kind in [tkWord, tkAdornment, tkOther, tkPunct]) - then break - end - else break - end; - addSon(n, newLeaf(p)); - inc(p.idx); - end; - addSon(father, n); - end - else begin - n := newLeaf(p); - inc(p.idx); - if p.tok[p.idx].symbol = '_'+'' then n := parsePostfix(p, n); - addSon(father, n); - end -end; - -procedure parseUntil(var p: TRstParser; father: PRstNode; - const postfix: string; interpretBackslash: bool); -begin - while true do begin - case p.tok[p.idx].kind of - tkPunct: begin - if isInlineMarkupEnd(p, postfix) then begin - inc(p.idx); - break; - end - else if interpretBackslash then - parseBackslash(p, father) - else begin - addSon(father, newLeaf(p)); - inc(p.idx); - end - end; - tkAdornment, tkWord, tkOther: begin - addSon(father, newLeaf(p)); - inc(p.idx); - end; - tkIndent: begin - addSon(father, newRstNode(rnLeaf, ' '+'')); - inc(p.idx); - if p.tok[p.idx].kind = tkIndent then begin - rstMessage(p, errXExpected, postfix); - break - end - end; - tkWhite: begin - addSon(father, newRstNode(rnLeaf, ' '+'')); - inc(p.idx); - end - else - rstMessage(p, errXExpected, postfix); - end - end -end; - -procedure parseInline(var p: TRstParser; father: PRstNode); -var - n: PRstNode; -begin - case p.tok[p.idx].kind of - tkPunct: begin - if isInlineMarkupStart(p, '**') then begin - inc(p.idx); - n := newRstNode(rnStrongEmphasis); - parseUntil(p, n, '**', true); - addSon(father, n); - end - else if isInlineMarkupStart(p, '*'+'') then begin - inc(p.idx); - n := newRstNode(rnEmphasis); - parseUntil(p, n, '*'+'', true); - addSon(father, n); - end - else if isInlineMarkupStart(p, '``') then begin - inc(p.idx); - n := newRstNode(rnInlineLiteral); - parseUntil(p, n, '``', false); - addSon(father, n); - end - else if isInlineMarkupStart(p, '`'+'') then begin - inc(p.idx); - n := newRstNode(rnInterpretedText); - parseUntil(p, n, '`'+'', true); - n := parsePostfix(p, n); - addSon(father, n); - end - else if isInlineMarkupStart(p, '|'+'') then begin - inc(p.idx); - n := newRstNode(rnSubstitutionReferences); - parseUntil(p, n, '|'+'', false); - addSon(father, n); - end - else begin - parseBackslash(p, father); - end; - end; - tkWord: parseURL(p, father); - tkAdornment, tkOther, tkWhite: begin - addSon(father, newLeaf(p)); - inc(p.idx); - end - else assert(false); - end -end; - -function getDirective(var p: TRstParser): string; -var - j: int; -begin - if (p.tok[p.idx].kind = tkWhite) and (p.tok[p.idx+1].kind = tkWord) then begin - j := p.idx; - inc(p.idx); - result := p.tok[p.idx].symbol; - inc(p.idx); - while p.tok[p.idx].kind in [tkWord, tkPunct, tkAdornment, tkOther] do begin - if p.tok[p.idx].symbol = '::' then break; - add(result, p.tok[p.idx].symbol); - inc(p.idx); - end; - if (p.tok[p.idx].kind = tkWhite) then inc(p.idx); - if p.tok[p.idx].symbol = '::' then begin - inc(p.idx); - if (p.tok[p.idx].kind = tkWhite) then inc(p.idx); - end - else begin - p.idx := j; // set back - result := '' // error - end - end - else - result := ''; -end; - -function parseComment(var p: TRstParser): PRstNode; -var - indent: int; -begin - case p.tok[p.idx].kind of - tkIndent, tkEof: begin - if p.tok[p.idx+1].kind = tkIndent then begin - inc(p.idx); - // empty comment - end - else begin - indent := p.tok[p.idx].ival; - while True do begin - case p.tok[p.idx].kind of - tkEof: break; - tkIndent: begin - if (p.tok[p.idx].ival < indent) then break; - end - else begin end - end; - inc(p.idx) - end - end - end - else - while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do inc(p.idx); - end; - result := nil; -end; - -type - TDirKind = ( // must be ordered alphabetically! - dkNone, dkAuthor, dkAuthors, dkCodeBlock, dkContainer, - dkContents, dkFigure, dkImage, dkInclude, dkIndex, dkRaw, dkTitle - ); -const - DirIds: array [0..11] of string = ( - '', 'author', 'authors', 'code-block', 'container', - 'contents', 'figure', 'image', 'include', 'index', 'raw', 'title' - ); - -function getDirKind(const s: string): TDirKind; -var - i: int; -begin - i := binaryStrSearch(DirIds, s); - if i >= 0 then result := TDirKind(i) - else result := dkNone -end; - -procedure parseLine(var p: TRstParser; father: PRstNode); -begin - while True do begin - case p.tok[p.idx].kind of - tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father); - else break; - end - end -end; - -procedure parseSection(var p: TRstParser; result: PRstNode); forward; - -function parseField(var p: TRstParser): PRstNode; -var - col, indent: int; - fieldname, fieldbody: PRstNode; -begin - result := newRstNode(rnField); - col := p.tok[p.idx].col; - inc(p.idx); // skip : - fieldname := newRstNode(rnFieldname); - parseUntil(p, fieldname, ':'+'', false); - fieldbody := newRstNode(rnFieldbody); - - if p.tok[p.idx].kind <> tkIndent then - parseLine(p, fieldbody); - if p.tok[p.idx].kind = tkIndent then begin - indent := p.tok[p.idx].ival; - if indent > col then begin - pushInd(p, indent); - parseSection(p, fieldbody); - popInd(p); - end - end; - addSon(result, fieldname); - addSon(result, fieldbody); -end; - -function parseFields(var p: TRstParser): PRstNode; -var - col: int; -begin - result := nil; - if (p.tok[p.idx].kind = tkIndent) - and (p.tok[p.idx+1].symbol = ':'+'') then begin - col := p.tok[p.idx].ival; // BUGFIX! - result := newRstNode(rnFieldList); - inc(p.idx); - while true do begin - addSon(result, parseField(p)); - if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) - and (p.tok[p.idx+1].symbol = ':'+'') then inc(p.idx) - else break - end - end -end; - -function getFieldValue(n: PRstNode; const fieldname: string): string; -var - i: int; - f: PRstNode; -begin - result := ''; - if n.sons[1] = nil then exit; - if (n.sons[1].kind <> rnFieldList) then - InternalError('getFieldValue (2): ' + rstnodeKindToStr[n.sons[1].kind]); - for i := 0 to rsonsLen(n.sons[1])-1 do begin - f := n.sons[1].sons[i]; - if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) = 0 then begin - result := addNodes(f.sons[1]); - if result = '' then result := #1#1; // indicates that the field exists - exit - end - end -end; - -function getArgument(n: PRstNode): string; -begin - if n.sons[0] = nil then result := '' - else result := addNodes(n.sons[0]); -end; - -function parseDotDot(var p: TRstParser): PRstNode; forward; - -function parseLiteralBlock(var p: TRstParser): PRstNode; -var - indent: int; - n: PRstNode; -begin - result := newRstNode(rnLiteralBlock); - n := newRstNode(rnLeaf, ''); - if p.tok[p.idx].kind = tkIndent then begin - indent := p.tok[p.idx].ival; - inc(p.idx); - while True do begin - case p.tok[p.idx].kind of - tkEof: break; - tkIndent: begin - if (p.tok[p.idx].ival < indent) then begin - break; - end - else begin - add(n.text, nl); - add(n.text, repeatChar(p.tok[p.idx].ival - indent)); - inc(p.idx) - end - end - else begin - add(n.text, p.tok[p.idx].symbol); - inc(p.idx) - end - end - end - end - else begin - while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin - add(n.text, p.tok[p.idx].symbol); - inc(p.idx) - end - end; - addSon(result, n); -end; - -function getLevel(var map: TLevelMap; var lvl: int; c: Char): int; -begin - if map[c] = 0 then begin - inc(lvl); - map[c] := lvl; - end; - result := map[c] -end; - -function tokenAfterNewline(const p: TRstParser): int; -begin - result := p.idx; - while true do - case p.tok[result].kind of - tkEof: break; - tkIndent: begin inc(result); break end; - else inc(result) - end -end; - -// --------------------------------------------------------------------------- - -function isLineBlock(const p: TRstParser): bool; -var - j: int; -begin - j := tokenAfterNewline(p); - result := (p.tok[p.idx].col = p.tok[j].col) and (p.tok[j].symbol = '|'+'') - or (p.tok[j].col > p.tok[p.idx].col) -end; - -function predNL(const p: TRstParser): bool; -begin - result := true; - if (p.idx > 0) then - result := (p.tok[p.idx-1].kind = tkIndent) - and (p.tok[p.idx-1].ival = currInd(p)) -end; - -function isDefList(const p: TRstParser): bool; -var - j: int; -begin - j := tokenAfterNewline(p); - result := (p.tok[p.idx].col < p.tok[j].col) - and (p.tok[j].kind in [tkWord, tkOther, tkPunct]) - and (p.tok[j-2].symbol <> '::'); -end; - -function whichSection(const p: TRstParser): TRstNodeKind; -begin - case p.tok[p.idx].kind of - tkAdornment: begin - if match(p, p.idx+1, 'ii') then result := rnTransition - else if match(p, p.idx+1, ' a') then result := rnTable - else if match(p, p.idx+1, 'i'+'') then result := rnOverline - else result := rnLeaf - end; - tkPunct: begin - if match(p, tokenAfterNewLine(p), 'ai') then - result := rnHeadline - else if p.tok[p.idx].symbol = '::' then - result := rnLiteralBlock - else if predNL(p) - and ((p.tok[p.idx].symbol = '+'+'') or - (p.tok[p.idx].symbol = '*'+'') or - (p.tok[p.idx].symbol = '-'+'')) - and (p.tok[p.idx+1].kind = tkWhite) then - result := rnBulletList - else if (p.tok[p.idx].symbol = '|'+'') and isLineBlock(p) then - result := rnLineBlock - else if (p.tok[p.idx].symbol = '..') and predNL(p) then - result := rnDirective - else if (p.tok[p.idx].symbol = ':'+'') and predNL(p) then - result := rnFieldList - else if match(p, p.idx, '(e) ') then - result := rnEnumList - else if match(p, p.idx, '+a+') then begin - result := rnGridTable; - rstMessage(p, errGridTableNotImplemented); - end - else if isDefList(p) then - result := rnDefList - else if match(p, p.idx, '-w') or match(p, p.idx, '--w') - or match(p, p.idx, '/w') then - result := rnOptionList - else - result := rnParagraph - end; - tkWord, tkOther, tkWhite: begin - if match(p, tokenAfterNewLine(p), 'ai') then - result := rnHeadline - else if isDefList(p) then - result := rnDefList - else if match(p, p.idx, 'e) ') or match(p, p.idx, 'e. ') then - result := rnEnumList - else - result := rnParagraph; - end; - else result := rnLeaf; - end -end; - -function parseLineBlock(var p: TRstParser): PRstNode; -var - col: int; - item: PRstNode; -begin - result := nil; - if p.tok[p.idx+1].kind = tkWhite then begin - col := p.tok[p.idx].col; - result := newRstNode(rnLineBlock); - pushInd(p, p.tok[p.idx+2].col); - inc(p.idx, 2); - while true do begin - item := newRstNode(rnLineBlockItem); - parseSection(p, item); - addSon(result, item); - if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) - and (p.tok[p.idx+1].symbol = '|'+'') - and (p.tok[p.idx+2].kind = tkWhite) then inc(p.idx, 3) - else break; - end; - popInd(p); - end; -end; - -procedure parseParagraph(var p: TRstParser; result: PRstNode); -begin - while True do begin - case p.tok[p.idx].kind of - tkIndent: begin - if p.tok[p.idx+1].kind = tkIndent then begin - inc(p.idx); - break - end - else if (p.tok[p.idx].ival = currInd(p)) then begin - inc(p.idx); - case whichSection(p) of - rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective: - addSon(result, newRstNode(rnLeaf, ' '+'')); - rnLineBlock: addSonIfNotNil(result, parseLineBlock(p)); - else break; - end; - end - else break - end; - tkPunct: begin - if (p.tok[p.idx].symbol = '::') and (p.tok[p.idx+1].kind = tkIndent) - and (currInd(p) < p.tok[p.idx+1].ival) then begin - addSon(result, newRstNode(rnLeaf, ':'+'')); - inc(p.idx); // skip '::' - addSon(result, parseLiteralBlock(p)); - break - end - else - parseInline(p, result) - end; - tkWhite, tkWord, tkAdornment, tkOther: - parseInline(p, result); - else break; - end - end -end; - -function parseParagraphWrapper(var p: TRstParser): PRstNode; -begin - result := newRstNode(rnParagraph); - parseParagraph(p, result); -end; - -function parseHeadline(var p: TRstParser): PRstNode; -var - c: Char; -begin - result := newRstNode(rnHeadline); - parseLine(p, result); - assert(p.tok[p.idx].kind = tkIndent); - assert(p.tok[p.idx+1].kind = tkAdornment); - c := p.tok[p.idx+1].symbol[strStart]; - inc(p.idx, 2); - result.level := getLevel(p.s.underlineToLevel, p.s.uLevel, c); -end; - -type - TIntSeq = array of int; - -function tokEnd(const p: TRstParser): int; -begin - result := p.tok[p.idx].col + length(p.tok[p.idx].symbol) - 1; -end; - -procedure getColumns(var p: TRstParser; var cols: TIntSeq); -var - L: int; -begin - L := 0; - while true do begin - inc(L); - setLength(cols, L); - cols[L-1] := tokEnd(p); - assert(p.tok[p.idx].kind = tkAdornment); - inc(p.idx); - if p.tok[p.idx].kind <> tkWhite then break; - inc(p.idx); - if p.tok[p.idx].kind <> tkAdornment then break - end; - if p.tok[p.idx].kind = tkIndent then inc(p.idx); - // last column has no limit: - cols[L-1] := 32000; -end; - -function parseDoc(var p: TRstParser): PRstNode; forward; - -function parseSimpleTable(var p: TRstParser): PRstNode; -var - cols: TIntSeq; - row: array of string; - j, i, last, line: int; - c: Char; - q: TRstParser; - a, b: PRstNode; -begin - result := newRstNode(rnTable); -{@ignore} - cols := nil; - row := nil; -{@emit - cols := @[];} -{@emit - row := @[];} - a := nil; - c := p.tok[p.idx].symbol[strStart]; - while true do begin - if p.tok[p.idx].kind = tkAdornment then begin - last := tokenAfterNewline(p); - if p.tok[last].kind in [tkEof, tkIndent] then begin - // skip last adornment line: - p.idx := last; break - end; - getColumns(p, cols); - setLength(row, length(cols)); - if a <> nil then - for j := 0 to rsonsLen(a)-1 do a.sons[j].kind := rnTableHeaderCell; - end; - if p.tok[p.idx].kind = tkEof then break; - for j := 0 to high(row) do row[j] := ''; - // the following while loop iterates over the lines a single cell may span: - line := p.tok[p.idx].line; - while true do begin - i := 0; - while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin - if (tokEnd(p) <= cols[i]) then begin - add(row[i], p.tok[p.idx].symbol); - inc(p.idx); - end - else begin - if p.tok[p.idx].kind = tkWhite then inc(p.idx); - inc(i) - end - end; - if p.tok[p.idx].kind = tkIndent then inc(p.idx); - if tokEnd(p) <= cols[0] then break; - if p.tok[p.idx].kind in [tkEof, tkAdornment] then break; - for j := 1 to high(row) do addChar(row[j], #10); - end; - // process all the cells: - a := newRstNode(rnTableRow); - for j := 0 to high(row) do begin - initParser(q, p.s); - q.col := cols[j]; - q.line := line-1; - q.filename := p.filename; - getTokens(row[j], false, q.tok); - b := newRstNode(rnTableDataCell); - addSon(b, parseDoc(q)); - addSon(a, b); - end; - addSon(result, a); - end; -end; - -function parseTransition(var p: TRstParser): PRstNode; -begin - result := newRstNode(rnTransition); - inc(p.idx); - if p.tok[p.idx].kind = tkIndent then inc(p.idx); - if p.tok[p.idx].kind = tkIndent then inc(p.idx); -end; - -function parseOverline(var p: TRstParser): PRstNode; -var - c: char; -begin - c := p.tok[p.idx].symbol[strStart]; - inc(p.idx, 2); - result := newRstNode(rnOverline); - while true do begin - parseLine(p, result); - if p.tok[p.idx].kind = tkIndent then begin - inc(p.idx); - if p.tok[p.idx-1].ival > currInd(p) then - addSon(result, newRstNode(rnLeaf, ' '+'')) - else - break - end - else break - end; - result.level := getLevel(p.s.overlineToLevel, p.s.oLevel, c); - if p.tok[p.idx].kind = tkAdornment then begin - inc(p.idx); // XXX: check? - if p.tok[p.idx].kind = tkIndent then inc(p.idx); - end -end; - -function parseBulletList(var p: TRstParser): PRstNode; -var - bullet: string; - col: int; - item: PRstNode; -begin - result := nil; - if p.tok[p.idx+1].kind = tkWhite then begin - bullet := p.tok[p.idx].symbol; - col := p.tok[p.idx].col; - result := newRstNode(rnBulletList); - pushInd(p, p.tok[p.idx+2].col); - inc(p.idx, 2); - while true do begin - item := newRstNode(rnBulletItem); - parseSection(p, item); - addSon(result, item); - if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) - and (p.tok[p.idx+1].symbol = bullet) - and (p.tok[p.idx+2].kind = tkWhite) then inc(p.idx, 3) - else break; - end; - popInd(p); - end; -end; - -function parseOptionList(var p: TRstParser): PRstNode; -var - a, b, c: PRstNode; - j: int; -begin - result := newRstNode(rnOptionList); - while true do begin - if match(p, p.idx, '-w') - or match(p, p.idx, '--w') - or match(p, p.idx, '/w') then begin - a := newRstNode(rnOptionGroup); - b := newRstNode(rnDescription); - c := newRstNode(rnOptionListItem); - while not (p.tok[p.idx].kind in [tkIndent, tkEof]) do begin - if (p.tok[p.idx].kind = tkWhite) - and (length(p.tok[p.idx].symbol) > 1) then begin - inc(p.idx); break - end; - addSon(a, newLeaf(p)); - inc(p.idx); - end; - j := tokenAfterNewline(p); - if (j > 0) and (p.tok[j-1].kind = tkIndent) - and (p.tok[j-1].ival > currInd(p)) then begin - pushInd(p, p.tok[j-1].ival); - parseSection(p, b); - popInd(p); - end - else begin - parseLine(p, b); - end; - if (p.tok[p.idx].kind = tkIndent) then inc(p.idx); - addSon(c, a); - addSon(c, b); - addSon(result, c); - end - else break; - end -end; - -function parseDefinitionList(var p: TRstParser): PRstNode; -var - j, col: int; - a, b, c: PRstNode; -begin - result := nil; - j := tokenAfterNewLine(p)-1; - if (j >= 1) and (p.tok[j].kind = tkIndent) - and (p.tok[j].ival > currInd(p)) and (p.tok[j-1].symbol <> '::') then begin - col := p.tok[p.idx].col; - result := newRstNode(rnDefList); - while true do begin - j := p.idx; - a := newRstNode(rnDefName); - parseLine(p, a); - //writeln('after def line: ', p.tok[p.idx].ival :1, ' ', col : 1); - if (p.tok[p.idx].kind = tkIndent) - and (p.tok[p.idx].ival > currInd(p)) - and (p.tok[p.idx+1].symbol <> '::') - and not (p.tok[p.idx+1].kind in [tkIndent, tkEof]) then begin - pushInd(p, p.tok[p.idx].ival); - b := newRstNode(rnDefBody); - parseSection(p, b); - c := newRstNode(rnDefItem); - addSon(c, a); - addSon(c, b); - addSon(result, c); - popInd(p); - end - else begin - p.idx := j; - break - end; - if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival = col) then begin - inc(p.idx); - j := tokenAfterNewLine(p)-1; - if (j >= 1) and (p.tok[j].kind = tkIndent) - and (p.tok[j].ival > col) - and (p.tok[j-1].symbol <> '::') - and (p.tok[j+1].kind <> tkIndent) then begin end - else break - end - end; - if rsonsLen(result) = 0 then result := nil - end -end; - -function parseEnumList(var p: TRstParser): PRstNode; -const - wildcards: array [0..2] of string = ('(e) ', 'e) ', 'e. '); - wildpos: array [0..2] of int = (1, 0, 0); -var - w, col, j: int; - item: PRstNode; -begin - result := nil; - w := 0; - while w <= 2 do begin - if match(p, p.idx, wildcards[w]) then break; - inc(w); - end; - if w <= 2 then begin - col := p.tok[p.idx].col; - result := newRstNode(rnEnumList); - inc(p.idx, wildpos[w]+3); - j := tokenAfterNewLine(p); - if (p.tok[j].col = p.tok[p.idx].col) or match(p, j, wildcards[w]) then begin - pushInd(p, p.tok[p.idx].col); - while true do begin - item := newRstNode(rnEnumItem); - parseSection(p, item); - addSon(result, item); - if (p.tok[p.idx].kind = tkIndent) - and (p.tok[p.idx].ival = col) - and match(p, p.idx+1, wildcards[w]) then - inc(p.idx, wildpos[w]+4) - else - break - end; - popInd(p); - end - else begin - dec(p.idx, wildpos[w]+3); - result := nil - end - end -end; - -function sonKind(father: PRstNode; i: int): TRstNodeKind; -begin - result := rnLeaf; - if i < rsonsLen(father) then result := father.sons[i].kind; -end; - -procedure parseSection(var p: TRstParser; result: PRstNode); -var - a: PRstNode; - k: TRstNodeKind; - leave: bool; -begin - while true do begin - leave := false; - assert(p.idx >= 0); - while p.tok[p.idx].kind = tkIndent do begin - if currInd(p) = p.tok[p.idx].ival then begin - inc(p.idx); - end - else if p.tok[p.idx].ival > currInd(p) then begin - pushInd(p, p.tok[p.idx].ival); - a := newRstNode(rnBlockQuote); - parseSection(p, a); - addSon(result, a); - popInd(p); - end - else begin - leave := true; - break; - end - end; - if leave then break; - if p.tok[p.idx].kind = tkEof then break; - a := nil; - k := whichSection(p); - case k of - rnLiteralBlock: begin - inc(p.idx); // skip '::' - a := parseLiteralBlock(p); - end; - rnBulletList: a := parseBulletList(p); - rnLineblock: a := parseLineBlock(p); - rnDirective: a := parseDotDot(p); - rnEnumList: a := parseEnumList(p); - rnLeaf: begin - rstMessage(p, errNewSectionExpected); - end; - rnParagraph: begin end; - rnDefList: a := parseDefinitionList(p); - rnFieldList: begin - dec(p.idx); - a := parseFields(p); - end; - rnTransition: a := parseTransition(p); - rnHeadline: a := parseHeadline(p); - rnOverline: a := parseOverline(p); - rnTable: a := parseSimpleTable(p); - rnOptionList: a := parseOptionList(p); - else InternalError('rst.parseSection()'); - end; - if (a = nil) and (k <> rnDirective) then begin - a := newRstNode(rnParagraph); - parseParagraph(p, a); - end; - addSonIfNotNil(result, a); - end; - if (sonKind(result, 0) = rnParagraph) - and (sonKind(result, 1) <> rnParagraph) then - result.sons[0].kind := rnInner; -end; - -function parseSectionWrapper(var p: TRstParser): PRstNode; -begin - result := newRstNode(rnInner); - parseSection(p, result); - while (result.kind = rnInner) and (rsonsLen(result) = 1) do - result := result.sons[0] -end; - -function parseDoc(var p: TRstParser): PRstNode; -begin - result := parseSectionWrapper(p); - if p.tok[p.idx].kind <> tkEof then - rstMessage(p, errGeneralParseError); -end; - -type - TDirFlag = (hasArg, hasOptions, argIsFile); - TDirFlags = set of TDirFlag; - TSectionParser = function (var p: TRstParser): PRstNode; - -function parseDirective(var p: TRstParser; flags: TDirFlags; - contentParser: TSectionParser): PRstNode; -var - args, options, content: PRstNode; -begin - result := newRstNode(rnDirective); - args := nil; - options := nil; - if hasArg in flags then begin - args := newRstNode(rnDirArg); - if argIsFile in flags then begin - while True do begin - case p.tok[p.idx].kind of - tkWord, tkOther, tkPunct, tkAdornment: begin - addSon(args, newLeaf(p)); - inc(p.idx); - end; - else break; - end - end - end - else begin - parseLine(p, args); - end - end; - addSon(result, args); - if hasOptions in flags then begin - if (p.tok[p.idx].kind = tkIndent) and (p.tok[p.idx].ival >= 3) - and (p.tok[p.idx+1].symbol = ':'+'') then - options := parseFields(p); - end; - addSon(result, options); - if (assigned(contentParser)) and (p.tok[p.idx].kind = tkIndent) - and (p.tok[p.idx].ival > currInd(p)) then begin - pushInd(p, p.tok[p.idx].ival); - content := contentParser(p); - popInd(p); - addSon(result, content) - end - else - addSon(result, nil); -end; - -function dirInclude(var p: TRstParser): PRstNode; -(* -The following options are recognized: - -start-after : text to find in the external data file - Only the content after the first occurrence of the specified text will - be included. -end-before : text to find in the external data file - Only the content before the first occurrence of the specified text - (but after any after text) will be included. -literal : flag (empty) - The entire included text is inserted into the document as a single - literal block (useful for program listings). -encoding : name of text encoding - The text encoding of the external data file. Defaults to the document's - encoding (if specified). -*) -var - n: PRstNode; - filename, path: string; - q: TRstParser; -begin - result := nil; - n := parseDirective(p, {@set}[hasArg, argIsFile, hasOptions], nil); - filename := strip(addNodes(n.sons[0])); - path := findFile(filename); - if path = '' then - rstMessage(p, errCannotOpenFile, filename) - else begin - // XXX: error handling; recursive file inclusion! - if getFieldValue(n, 'literal') <> '' then begin - result := newRstNode(rnLiteralBlock); - addSon(result, newRstNode(rnLeaf, readFile(path))); - end - else begin - initParser(q, p.s); - q.filename := filename; - getTokens(readFile(path), false, q.tok); - // workaround a GCC bug: - if find(q.tok[high(q.tok)].symbol, #0#1#2) > 0 then begin - InternalError('Too many binary zeros in include file'); - end; - result := parseDoc(q); - end - end -end; - -function dirCodeBlock(var p: TRstParser): PRstNode; -var - n: PRstNode; - filename, path: string; -begin - result := parseDirective(p, {@set}[hasArg, hasOptions], parseLiteralBlock); - filename := strip(getFieldValue(result, 'file')); - if filename <> '' then begin - path := findFile(filename); - if path = '' then rstMessage(p, errCannotOpenFile, filename); - n := newRstNode(rnLiteralBlock); - addSon(n, newRstNode(rnLeaf, readFile(path))); - result.sons[2] := n; - end; - result.kind := rnCodeBlock; -end; - -function dirContainer(var p: TRstParser): PRstNode; -begin - result := parseDirective(p, {@set}[hasArg], parseSectionWrapper); - assert(result.kind = rnDirective); - assert(rsonsLen(result) = 3); - result.kind := rnContainer; -end; - -function dirImage(var p: TRstParser): PRstNode; -begin - result := parseDirective(p, {@set}[hasOptions, hasArg, argIsFile], nil); - result.kind := rnImage -end; - -function dirFigure(var p: TRstParser): PRstNode; -begin - result := parseDirective(p, {@set}[hasOptions, hasArg, argIsFile], - parseSectionWrapper); - result.kind := rnFigure -end; - -function dirTitle(var p: TRstParser): PRstNode; -begin - result := parseDirective(p, {@set}[hasArg], nil); - result.kind := rnTitle -end; - -function dirContents(var p: TRstParser): PRstNode; -begin - result := parseDirective(p, {@set}[hasArg], nil); - result.kind := rnContents -end; - -function dirIndex(var p: TRstParser): PRstNode; -begin - result := parseDirective(p, {@set}[], parseSectionWrapper); - result.kind := rnIndex -end; - -function dirRaw(var p: TRstParser): PRstNode; -(* -The following options are recognized: - -file : string (newlines removed) - The local filesystem path of a raw data file to be included. -url : string (whitespace removed) - An Internet URL reference to a raw data file to be included. -encoding : name of text encoding - The text encoding of the external raw data (file or URL). - Defaults to the document's encoding (if specified). -*) -var - filename, path, f: string; -begin - result := parseDirective(p, {@set}[hasOptions], parseSectionWrapper); - result.kind := rnRaw; - filename := getFieldValue(result, 'file'); - if filename <> '' then begin - path := findFile(filename); - if path = '' then - rstMessage(p, errCannotOpenFile, filename) - else begin - f := readFile(path); - result := newRstNode(rnRaw); - addSon(result, newRstNode(rnLeaf, f)); - end - end -end; - -function parseDotDot(var p: TRstParser): PRstNode; -var - d: string; - col: int; - a, b: PRstNode; -begin - result := nil; - col := p.tok[p.idx].col; - inc(p.idx); - d := getDirective(p); - if d <> '' then begin - pushInd(p, col); - case getDirKind(d) of - dkInclude: result := dirInclude(p); - dkImage: result := dirImage(p); - dkFigure: result := dirFigure(p); - dkTitle: result := dirTitle(p); - dkContainer: result := dirContainer(p); - dkContents: result := dirContents(p); - dkRaw: result := dirRaw(p); - dkCodeblock: result := dirCodeBlock(p); - dkIndex: result := dirIndex(p); - else rstMessage(p, errInvalidDirectiveX, d); - end; - popInd(p); - end - else if match(p, p.idx, ' _') then begin - // hyperlink target: - inc(p.idx, 2); - a := getReferenceName(p, ':'+''); - if p.tok[p.idx].kind = tkWhite then inc(p.idx); - b := untilEol(p); - setRef(p, rstnodeToRefname(a), b); - end - else if match(p, p.idx, ' |') then begin - // substitution definitions: - inc(p.idx, 2); - a := getReferenceName(p, '|'+''); - if p.tok[p.idx].kind = tkWhite then inc(p.idx); - if cmpIgnoreStyle(p.tok[p.idx].symbol, 'replace') = 0 then begin - inc(p.idx); - expect(p, '::'); - b := untilEol(p); - end - else if cmpIgnoreStyle(p.tok[p.idx].symbol, 'image') = 0 then begin - inc(p.idx); - b := dirImage(p); - end - else - rstMessage(p, errInvalidDirectiveX, p.tok[p.idx].symbol); - setSub(p, addNodes(a), b); - end - else if match(p, p.idx, ' [') then begin - // footnotes, citations - inc(p.idx, 2); - a := getReferenceName(p, ']'+''); - if p.tok[p.idx].kind = tkWhite then inc(p.idx); - b := untilEol(p); - setRef(p, rstnodeToRefname(a), b); - end - else - result := parseComment(p); -end; - -function resolveSubs(var p: TRstParser; n: PRstNode): PRstNode; -var - i, x: int; - y: PRstNode; - e, key: string; -begin - result := n; - if n = nil then exit; - case n.kind of - rnSubstitutionReferences: begin - x := findSub(p, n); - if x >= 0 then result := p.s.subs[x].value - else begin - key := addNodes(n); - e := getEnv(key); - if e <> '' then result := newRstNode(rnLeaf, e) - else rstMessage(p, warnUnknownSubstitutionX, key); - end - end; - rnRef: begin - y := findRef(p, rstnodeToRefname(n)); - if y <> nil then begin - result := newRstNode(rnHyperlink); - n.kind := rnInner; - addSon(result, n); - addSon(result, y); - end - end; - rnLeaf: begin end; - rnContents: p.hasToc := true; - else begin - for i := 0 to rsonsLen(n)-1 do - n.sons[i] := resolveSubs(p, n.sons[i]); - end - end -end; - -function rstParse(const text: string; // the text to be parsed - skipPounds: bool; - const filename: string; // for error messages - line, column: int; - var hasToc: bool): PRstNode; -var - p: TRstParser; -begin - if isNil(text) then - rawMessage(errCannotOpenFile, filename); - initParser(p, newSharedState()); - p.filename := filename; - p.line := line; - p.col := column; - getTokens(text, skipPounds, p.tok); - result := resolveSubs(p, parseDoc(p)); - hasToc := p.hasToc; -end; - -end. |