From c5d2e421ce4ec77c080f1d3efb9a44bcda7be4fc Mon Sep 17 00:00:00 2001 From: Dominik Picheta Date: Mon, 25 Feb 2013 18:51:20 +0000 Subject: Moved packages to lib directory. --- compiler/nimrod.cfg | 2 +- lib/packages/docutils/highlite.nim | 537 ++++++++++++ lib/packages/docutils/rst.nim | 1639 ++++++++++++++++++++++++++++++++++++ lib/packages/docutils/rstast.nim | 288 +++++++ lib/packages/docutils/rstgen.nim | 695 +++++++++++++++ packages/docutils/highlite.nim | 537 ------------ packages/docutils/rst.nim | 1639 ------------------------------------ packages/docutils/rstast.nim | 288 ------- packages/docutils/rstgen.nim | 695 --------------- 9 files changed, 3160 insertions(+), 3160 deletions(-) create mode 100755 lib/packages/docutils/highlite.nim create mode 100755 lib/packages/docutils/rst.nim create mode 100644 lib/packages/docutils/rstast.nim create mode 100644 lib/packages/docutils/rstgen.nim delete mode 100755 packages/docutils/highlite.nim delete mode 100755 packages/docutils/rst.nim delete mode 100644 packages/docutils/rstast.nim delete mode 100644 packages/docutils/rstgen.nim diff --git a/compiler/nimrod.cfg b/compiler/nimrod.cfg index aa49729b9..a0e59aa09 100755 --- a/compiler/nimrod.cfg +++ b/compiler/nimrod.cfg @@ -6,7 +6,7 @@ hint[XDeclaredButNotUsed]:off path:"llvm" path:"$projectPath/.." -path:"$nimrod/packages/docutils" +path:"$lib/packages/docutils" define:booting diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim new file mode 100755 index 000000000..21dd1543a --- /dev/null +++ b/lib/packages/docutils/highlite.nim @@ -0,0 +1,537 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Source highlighter for programming or markup languages. +## Currently only few languages are supported, other languages may be added. +## The interface supports one language nested in another. + +import + strutils + +type + TTokenClass* = enum + gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber, + gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit, + gtLongStringLit, gtCharLit, gtEscapeSequence, # escape sequence like \xff + gtOperator, gtPunctation, gtComment, gtLongComment, gtRegularExpression, + gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler, + gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel, + gtReference, gtOther + TGeneralTokenizer* = object of TObject + kind*: TTokenClass + start*, length*: int + buf: cstring + pos: int + state: TTokenClass + + TSourceLanguage* = enum + langNone, langNimrod, langCpp, langCsharp, langC, langJava + +const + sourceLanguageToStr*: array[TSourceLanguage, string] = ["none", "Nimrod", + "C++", "C#", "C", "Java"] + tokenClassToStr*: array[TTokenClass, string] = ["Eof", "None", "Whitespace", + "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber", + "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit", + "EscapeSequence", "Operator", "Punctation", "Comment", "LongComment", + "RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData", + "Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink", + "Label", "Reference", "Other"] + + nimrodKeywords = slurp("doc/keywords.txt").split + +proc getSourceLanguage*(name: string): TSourceLanguage = + for i in countup(succ(low(TSourceLanguage)), high(TSourceLanguage)): + if cmpIgnoreStyle(name, sourceLanguageToStr[i]) == 0: + return i + result = langNone + +proc initGeneralTokenizer*(g: var TGeneralTokenizer, buf: string) = + g.buf = cstring(buf) + g.kind = low(TTokenClass) + g.start = 0 + g.length = 0 + g.state = low(TTokenClass) + var pos = 0 # skip initial whitespace: + while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) + g.pos = pos + +proc deinitGeneralTokenizer*(g: var TGeneralTokenizer) = + nil + +proc nimGetKeyword(id: string): TTokenClass = + for k in nimrodKeywords: + if cmpIgnoreStyle(id, k) == 0: return gtKeyword + result = gtIdentifier + when false: + var i = getIdent(id) + if (i.id >= ord(tokKeywordLow) - ord(tkSymbol)) and + (i.id <= ord(tokKeywordHigh) - ord(tkSymbol)): + result = gtKeyword + else: + result = gtIdentifier + +proc nimNumberPostfix(g: var TGeneralTokenizer, position: int): int = + var pos = position + if g.buf[pos] == '\'': + inc(pos) + case g.buf[pos] + of 'f', 'F': + g.kind = gtFloatNumber + inc(pos) + if g.buf[pos] in {'0'..'9'}: inc(pos) + if g.buf[pos] in {'0'..'9'}: inc(pos) + of 'i', 'I': + inc(pos) + if g.buf[pos] in {'0'..'9'}: inc(pos) + if g.buf[pos] in {'0'..'9'}: inc(pos) + else: + nil + result = pos + +proc nimNumber(g: var TGeneralTokenizer, position: int): int = + const decChars = {'0'..'9', '_'} + var pos = position + g.kind = gtDecNumber + while g.buf[pos] in decChars: inc(pos) + if g.buf[pos] == '.': + g.kind = gtFloatNumber + inc(pos) + while g.buf[pos] in decChars: inc(pos) + if g.buf[pos] in {'e', 'E'}: + g.kind = gtFloatNumber + inc(pos) + if g.buf[pos] in {'+', '-'}: inc(pos) + while g.buf[pos] in decChars: inc(pos) + result = nimNumberPostfix(g, pos) + +const + OpChars = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.', + '|', '=', '%', '&', '$', '@', '~', ':', '\x80'..'\xFF'} + +proc nimNextToken(g: var TGeneralTokenizer) = + const + hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'} + octChars = {'0'..'7', '_'} + binChars = {'0'..'1', '_'} + SymChars = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'} + var pos = g.pos + g.start = g.pos + if g.state == gtStringLit: + g.kind = gtStringLit + while true: + case g.buf[pos] + of '\\': + g.kind = gtEscapeSequence + inc(pos) + case g.buf[pos] + of 'x', 'X': + inc(pos) + if g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in hexChars: inc(pos) + of '0'..'9': + while g.buf[pos] in {'0'..'9'}: inc(pos) + of '\0': + g.state = gtNone + else: inc(pos) + break + of '\0', '\x0D', '\x0A': + g.state = gtNone + break + of '\"': + inc(pos) + g.state = gtNone + break + else: inc(pos) + else: + case g.buf[pos] + of ' ', '\x09'..'\x0D': + g.kind = gtWhitespace + while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) + of '#': + g.kind = gtComment + while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos) + of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF': + var id = "" + while g.buf[pos] in SymChars + {'_'}: + add(id, g.buf[pos]) + inc(pos) + if (g.buf[pos] == '\"'): + if (g.buf[pos + 1] == '\"') and (g.buf[pos + 2] == '\"'): + inc(pos, 3) + g.kind = gtLongStringLit + while true: + case g.buf[pos] + of '\0': + break + of '\"': + inc(pos) + if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and + g.buf[pos+2] != '\"': + inc(pos, 2) + break + else: inc(pos) + else: + g.kind = gtRawData + inc(pos) + while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): + if g.buf[pos] == '"' and g.buf[pos+1] != '"': break + inc(pos) + if g.buf[pos] == '\"': inc(pos) + else: + g.kind = nimGetKeyword(id) + of '0': + inc(pos) + case g.buf[pos] + of 'b', 'B': + inc(pos) + while g.buf[pos] in binChars: inc(pos) + pos = nimNumberPostfix(g, pos) + of 'x', 'X': + inc(pos) + while g.buf[pos] in hexChars: inc(pos) + pos = nimNumberPostfix(g, pos) + of 'o', 'O': + inc(pos) + while g.buf[pos] in octChars: inc(pos) + pos = nimNumberPostfix(g, pos) + else: pos = nimNumber(g, pos) + of '1'..'9': + pos = nimNumber(g, pos) + of '\'': + inc(pos) + g.kind = gtCharLit + while true: + case g.buf[pos] + of '\0', '\x0D', '\x0A': + break + of '\'': + inc(pos) + break + of '\\': + inc(pos, 2) + else: inc(pos) + of '\"': + inc(pos) + if (g.buf[pos] == '\"') and (g.buf[pos + 1] == '\"'): + inc(pos, 2) + g.kind = gtLongStringLit + while true: + case g.buf[pos] + of '\0': + break + of '\"': + inc(pos) + if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and + g.buf[pos+2] != '\"': + inc(pos, 2) + break + else: inc(pos) + else: + g.kind = gtStringLit + while true: + case g.buf[pos] + of '\0', '\x0D', '\x0A': + break + of '\"': + inc(pos) + break + of '\\': + g.state = g.kind + break + else: inc(pos) + of '(', ')', '[', ']', '{', '}', '`', ':', ',', ';': + inc(pos) + g.kind = gtPunctation + of '\0': + g.kind = gtEof + else: + if g.buf[pos] in OpChars: + g.kind = gtOperator + while g.buf[pos] in OpChars: inc(pos) + else: + inc(pos) + g.kind = gtNone + g.length = pos - g.pos + if g.kind != gtEof and g.length <= 0: + assert false, "nimNextToken: produced an empty token" + g.pos = pos + +proc generalNumber(g: var TGeneralTokenizer, position: int): int = + const decChars = {'0'..'9'} + var pos = position + g.kind = gtDecNumber + while g.buf[pos] in decChars: inc(pos) + if g.buf[pos] == '.': + g.kind = gtFloatNumber + inc(pos) + while g.buf[pos] in decChars: inc(pos) + if g.buf[pos] in {'e', 'E'}: + g.kind = gtFloatNumber + inc(pos) + if g.buf[pos] in {'+', '-'}: inc(pos) + while g.buf[pos] in decChars: inc(pos) + result = pos + +proc generalStrLit(g: var TGeneralTokenizer, position: int): int = + const + decChars = {'0'..'9'} + hexChars = {'0'..'9', 'A'..'F', 'a'..'f'} + var pos = position + g.kind = gtStringLit + var c = g.buf[pos] + inc(pos) # skip " or ' + while true: + case g.buf[pos] + of '\0': + break + of '\\': + inc(pos) + case g.buf[pos] + of '\0': + break + of '0'..'9': + while g.buf[pos] in decChars: inc(pos) + of 'x', 'X': + inc(pos) + if g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in hexChars: inc(pos) + else: inc(pos, 2) + else: + if g.buf[pos] == c: + inc(pos) + break + else: + inc(pos) + result = pos + +proc isKeyword(x: openarray[string], y: string): int = + var a = 0 + var b = len(x) - 1 + while a <= b: + var mid = (a + b) div 2 + var c = cmp(x[mid], y) + if c < 0: + a = mid + 1 + elif c > 0: + b = mid - 1 + else: + return mid + result = - 1 + +proc isKeywordIgnoreCase(x: openarray[string], y: string): int = + var a = 0 + var b = len(x) - 1 + while a <= b: + var mid = (a + b) div 2 + var c = cmpIgnoreCase(x[mid], y) + if c < 0: + a = mid + 1 + elif c > 0: + b = mid - 1 + else: + return mid + result = - 1 + +type + TTokenizerFlag = enum + hasPreprocessor, hasNestedComments + TTokenizerFlags = set[TTokenizerFlag] + +proc clikeNextToken(g: var TGeneralTokenizer, keywords: openarray[string], + flags: TTokenizerFlags) = + const + hexChars = {'0'..'9', 'A'..'F', 'a'..'f'} + octChars = {'0'..'7'} + binChars = {'0'..'1'} + symChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '\x80'..'\xFF'} + var pos = g.pos + g.start = g.pos + if g.state == gtStringLit: + g.kind = gtStringLit + while true: + case g.buf[pos] + of '\\': + g.kind = gtEscapeSequence + inc(pos) + case g.buf[pos] + of 'x', 'X': + inc(pos) + if g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in hexChars: inc(pos) + of '0'..'9': + while g.buf[pos] in {'0'..'9'}: inc(pos) + of '\0': + g.state = gtNone + else: inc(pos) + break + of '\0', '\x0D', '\x0A': + g.state = gtNone + break + of '\"': + inc(pos) + g.state = gtNone + break + else: inc(pos) + else: + case g.buf[pos] + of ' ', '\x09'..'\x0D': + g.kind = gtWhitespace + while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) + of '/': + inc(pos) + if g.buf[pos] == '/': + g.kind = gtComment + while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos) + elif g.buf[pos] == '*': + g.kind = gtLongComment + var nested = 0 + inc(pos) + while true: + case g.buf[pos] + of '*': + inc(pos) + if g.buf[pos] == '/': + inc(pos) + if nested == 0: break + of '/': + inc(pos) + if g.buf[pos] == '*': + inc(pos) + if hasNestedComments in flags: inc(nested) + of '\0': + break + else: inc(pos) + of '#': + inc(pos) + if hasPreprocessor in flags: + g.kind = gtPreprocessor + while g.buf[pos] in {' ', '\t'}: inc(pos) + while g.buf[pos] in symChars: inc(pos) + else: + g.kind = gtOperator + of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF': + var id = "" + while g.buf[pos] in SymChars: + add(id, g.buf[pos]) + inc(pos) + if isKeyword(keywords, id) >= 0: g.kind = gtKeyword + else: g.kind = gtIdentifier + of '0': + inc(pos) + case g.buf[pos] + of 'b', 'B': + inc(pos) + while g.buf[pos] in binChars: inc(pos) + if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) + of 'x', 'X': + inc(pos) + while g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) + of '0'..'7': + inc(pos) + while g.buf[pos] in octChars: inc(pos) + if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) + else: + pos = generalNumber(g, pos) + if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) + of '1'..'9': + pos = generalNumber(g, pos) + if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) + of '\'': + pos = generalStrLit(g, pos) + g.kind = gtCharLit + of '\"': + inc(pos) + g.kind = gtStringLit + while true: + case g.buf[pos] + of '\0': + break + of '\"': + inc(pos) + break + of '\\': + g.state = g.kind + break + else: inc(pos) + of '(', ')', '[', ']', '{', '}', ':', ',', ';', '.': + inc(pos) + g.kind = gtPunctation + of '\0': + g.kind = gtEof + else: + if g.buf[pos] in OpChars: + g.kind = gtOperator + while g.buf[pos] in OpChars: inc(pos) + else: + inc(pos) + g.kind = gtNone + g.length = pos - g.pos + if g.kind != gtEof and g.length <= 0: + assert false, "clikeNextToken: produced an empty token" + g.pos = pos + +proc cNextToken(g: var TGeneralTokenizer) = + const + keywords: array[0..36, string] = ["_Bool", "_Complex", "_Imaginary", "auto", + "break", "case", "char", "const", "continue", "default", "do", "double", + "else", "enum", "extern", "float", "for", "goto", "if", "inline", "int", + "long", "register", "restrict", "return", "short", "signed", "sizeof", + "static", "struct", "switch", "typedef", "union", "unsigned", "void", + "volatile", "while"] + clikeNextToken(g, keywords, {hasPreprocessor}) + +proc cppNextToken(g: var TGeneralTokenizer) = + const + keywords: array[0..47, string] = ["asm", "auto", "break", "case", "catch", + "char", "class", "const", "continue", "default", "delete", "do", "double", + "else", "enum", "extern", "float", "for", "friend", "goto", "if", + "inline", "int", "long", "new", "operator", "private", "protected", + "public", "register", "return", "short", "signed", "sizeof", "static", + "struct", "switch", "template", "this", "throw", "try", "typedef", + "union", "unsigned", "virtual", "void", "volatile", "while"] + clikeNextToken(g, keywords, {hasPreprocessor}) + +proc csharpNextToken(g: var TGeneralTokenizer) = + const + keywords: array[0..76, string] = ["abstract", "as", "base", "bool", "break", + "byte", "case", "catch", "char", "checked", "class", "const", "continue", + "decimal", "default", "delegate", "do", "double", "else", "enum", "event", + "explicit", "extern", "false", "finally", "fixed", "float", "for", + "foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", + "is", "lock", "long", "namespace", "new", "null", "object", "operator", + "out", "override", "params", "private", "protected", "public", "readonly", + "ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc", + "static", "string", "struct", "switch", "this", "throw", "true", "try", + "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using", + "virtual", "void", "volatile", "while"] + clikeNextToken(g, keywords, {hasPreprocessor}) + +proc javaNextToken(g: var TGeneralTokenizer) = + const + keywords: array[0..52, string] = ["abstract", "assert", "boolean", "break", + "byte", "case", "catch", "char", "class", "const", "continue", "default", + "do", "double", "else", "enum", "extends", "false", "final", "finally", + "float", "for", "goto", "if", "implements", "import", "instanceof", "int", + "interface", "long", "native", "new", "null", "package", "private", + "protected", "public", "return", "short", "static", "strictfp", "super", + "switch", "synchronized", "this", "throw", "throws", "transient", "true", + "try", "void", "volatile", "while"] + clikeNextToken(g, keywords, {}) + +proc getNextToken*(g: var TGeneralTokenizer, lang: TSourceLanguage) = + case lang + of langNone: assert false + of langNimrod: nimNextToken(g) + of langCpp: cppNextToken(g) + of langCsharp: csharpNextToken(g) + of langC: cNextToken(g) + of langJava: javaNextToken(g) + diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim new file mode 100755 index 000000000..b22bdf6ce --- /dev/null +++ b/lib/packages/docutils/rst.nim @@ -0,0 +1,1639 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a `reStructuredText`:idx parser. A large +## subset is implemented. Some features of the `markdown`:idx: wiki syntax are +## also supported. + +import + os, strutils, rstast + +type + TRstParseOption* = enum ## options for the RST parser + roSkipPounds, ## skip ``#`` at line beginning (documentation + ## embedded in Nimrod comments) + roSupportSmilies, ## make the RST parser support smilies like ``:)`` + roSupportRawDirective, ## support the ``raw`` directive (don't support + ## it for sandboxing) + roSupportMarkdown ## support additional features of markdown + + TRstParseOptions* = set[TRstParseOption] + + TMsgClass* = enum + mcHint = "Hint", + mcWarning = "Warning", + mcError = "Error" + + TMsgKind* = enum ## the possible messages + meCannotOpenFile, + meExpected, + meGridTableNotImplemented, + meNewSectionExpected, + meGeneralParseError, + meInvalidDirective, + mwRedefinitionOfLabel, + mwUnknownSubstitution, + mwUnsupportedLanguage + + TMsgHandler* = proc (filename: string, line, col: int, msgKind: TMsgKind, + arg: string) {.nimcall.} ## what to do in case of an error + TFindFileHandler* = proc (filename: string): string {.nimcall.} + +const + messages: array [TMsgKind, string] = [ + meCannotOpenFile: "cannot open '$1'", + meExpected: "'$1' expected", + meGridTableNotImplemented: "grid table is not implemented", + meNewSectionExpected: "new section expected", + meGeneralParseError: "general parse error", + meInvalidDirective: "invalid directive: '$1'", + mwRedefinitionOfLabel: "redefinition of label '$1'", + mwUnknownSubstitution: "unknown substitution '$1'", + mwUnsupportedLanguage: "language '$1' not supported" + ] + +proc rstnodeToRefname*(n: PRstNode): string +proc addNodes*(n: PRstNode): string +proc getFieldValue*(n: PRstNode, fieldname: string): string +proc getArgument*(n: PRstNode): string + +# ----------------------------- scanner part -------------------------------- + +const + SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'} + SmileyStartChars: TCharSet = {':', ';', '8'} + Smilies = { + ":D": "icon_e_biggrin", + ":-D": "icon_e_biggrin", + ":)": "icon_e_smile", + ":-)": "icon_e_smile", + ";)": "icon_e_wink", + ";-)": "icon_e_wink", + ":(": "icon_e_sad", + ":-(": "icon_e_sad", + ":o": "icon_e_surprised", + ":-o": "icon_e_surprised", + ":shock:": "icon_eek", + ":?": "icon_e_confused", + ":-?": "icon_e_confused", + ":-/": "icon_e_confused", + + "8-)": "icon_cool", + + ":lol:": "icon_lol", + ":x": "icon_mad", + ":-x": "icon_mad", + ":P": "icon_razz", + ":-P": "icon_razz", + ":oops:": "icon_redface", + ":cry:": "icon_cry", + ":evil:": "icon_evil", + ":twisted:": "icon_twisted", + ":roll:": "icon_rolleyes", + ":!:": "icon_exclaim", + + ":?:": "icon_question", + ":idea:": "icon_idea", + ":arrow:": "icon_arrow", + ":|": "icon_neutral", + ":-|": "icon_neutral", + ":mrgreen:": "icon_mrgreen", + ":geek:": "icon_e_geek", + ":ugeek:": "icon_e_ugeek" + } + +type + TTokType = enum + tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther + TToken{.final.} = object # a RST token + kind*: TTokType # the type of the token + ival*: int # the indentation or parsed integer value + symbol*: string # the parsed symbol as string + line*, col*: int # line and column of the token + + TTokenSeq = seq[TToken] + TLexer = object of TObject + buf*: cstring + bufpos*: int + line*, col*, baseIndent*: int + skipPounds*: bool + + +proc getThing(L: var TLexer, tok: var TToken, s: TCharSet) = + tok.kind = tkWord + tok.line = L.line + tok.col = L.col + var pos = L.bufpos + while True: + add(tok.symbol, L.buf[pos]) + inc(pos) + if L.buf[pos] notin s: break + inc(L.col, pos - L.bufpos) + L.bufpos = pos + +proc getAdornment(L: var TLexer, tok: var TToken) = + tok.kind = tkAdornment + tok.line = L.line + tok.col = L.col + var pos = L.bufpos + var c = L.buf[pos] + while True: + add(tok.symbol, L.buf[pos]) + inc(pos) + if L.buf[pos] != c: break + inc(L.col, pos - L.bufpos) + L.bufpos = pos + +proc getIndentAux(L: var TLexer, start: int): int = + var pos = start + var buf = L.buf + # skip the newline (but include it in the token!) + if buf[pos] == '\x0D': + if buf[pos + 1] == '\x0A': inc(pos, 2) + else: inc(pos) + elif buf[pos] == '\x0A': + inc(pos) + if L.skipPounds: + if buf[pos] == '#': inc(pos) + if buf[pos] == '#': inc(pos) + while True: + case buf[pos] + of ' ', '\x0B', '\x0C': + inc(pos) + inc(result) + of '\x09': + inc(pos) + result = result - (result mod 8) + 8 + else: + break # EndOfFile also leaves the loop + if buf[pos] == '\0': + result = 0 + elif (buf[pos] == '\x0A') or (buf[pos] == '\x0D'): + # look at the next line for proper indentation: + result = getIndentAux(L, pos) + L.bufpos = pos # no need to set back buf + +proc getIndent(L: var TLexer, tok: var TToken) = + inc(L.line) + tok.line = L.line + tok.col = 0 + tok.kind = tkIndent # skip the newline (but include it in the token!) + tok.ival = getIndentAux(L, L.bufpos) + L.col = tok.ival + tok.ival = max(tok.ival - L.baseIndent, 0) + tok.symbol = "\n" & repeatChar(tok.ival) + +proc rawGetTok(L: var TLexer, tok: var TToken) = + tok.symbol = "" + tok.ival = 0 + var c = L.buf[L.bufpos] + case c + of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9': + getThing(L, tok, SymChars) + of ' ', '\x09', '\x0B', '\x0C': + getThing(L, tok, {' ', '\x09'}) + tok.kind = tkWhite + if L.buf[L.bufpos] in {'\x0D', '\x0A'}: + rawGetTok(L, tok) # ignore spaces before \n + of '\x0D', '\x0A': + getIndent(L, tok) + of '!', '\"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', + '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', + '|', '}', '~': + getAdornment(L, tok) + if len(tok.symbol) <= 3: tok.kind = tkPunct + else: + tok.line = L.line + tok.col = L.col + if c == '\0': + tok.kind = tkEof + else: + tok.kind = tkOther + add(tok.symbol, c) + inc(L.bufpos) + inc(L.col) + tok.col = max(tok.col - L.baseIndent, 0) + +proc getTokens(buffer: string, skipPounds: bool, tokens: var TTokenSeq) = + var L: TLexer + var length = len(tokens) + L.buf = cstring(buffer) + L.line = 1 # skip UTF-8 BOM + if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): + inc(L.bufpos, 3) + L.skipPounds = skipPounds + if skipPounds: + if L.buf[L.bufpos] == '#': inc(L.bufpos) + if L.buf[L.bufpos] == '#': inc(L.bufpos) + L.baseIndent = 0 + while L.buf[L.bufpos] == ' ': + inc(L.bufpos) + inc(L.baseIndent) + while true: + inc(length) + setlen(tokens, length) + rawGetTok(L, tokens[length - 1]) + if tokens[length - 1].kind == tkEof: break + if tokens[0].kind == tkWhite: + # BUGFIX + tokens[0].ival = len(tokens[0].symbol) + tokens[0].kind = tkIndent + +type + TLevelMap = array[Char, int] + TSubstitution{.final.} = object + key*: string + value*: PRstNode + + TSharedState {.final.} = object + options: TRstParseOptions # parsing options + uLevel, oLevel: int # counters for the section levels + subs: seq[TSubstitution] # substitutions + refs: seq[TSubstitution] # references + underlineToLevel: TLevelMap # Saves for each possible title adornment + # character its level in the + # current document. + # This is for single underline adornments. + overlineToLevel: TLevelMap # Saves for each possible title adornment + # character its level in the current + # document. + # This is for over-underline adornments. + msgHandler: TMsgHandler # How to handle errors. + findFile: TFindFileHandler # How to find files. + + PSharedState = ref TSharedState + TRstParser = object of TObject + idx*: int + tok*: TTokenSeq + s*: PSharedState + indentStack*: seq[int] + filename*: string + line*, col*: int + hasToc*: bool + + EParseError* = object of EInvalidValue + +proc whichMsgClass*(k: TMsgKind): TMsgClass = + ## returns which message class `k` belongs to. + case ($k)[1] + of 'e', 'E': result = mcError + of 'w', 'W': result = mcWarning + of 'h', 'H': result = mcHint + else: assert false, "msgkind does not fit naming scheme" + +proc defaultMsgHandler*(filename: string, line, col: int, msgkind: TMsgKind, + arg: string) {.procvar.} = + let mc = msgKind.whichMsgClass + let a = messages[msgKind] % arg + let message = "$1($2, $3) $4: $5" % [filename, $line, $col, $mc, a] + if mc == mcError: raise newException(EParseError, message) + else: Writeln(stdout, message) + +proc defaultFindFile*(filename: string): string {.procvar.} = + if existsFile(filename): result = filename + else: result = "" + +proc newSharedState(options: TRstParseOptions, + findFile: TFindFileHandler, + msgHandler: TMsgHandler): PSharedState = + new(result) + result.subs = @[] + result.refs = @[] + result.options = options + result.msgHandler = if isNil(msgHandler): defaultMsgHandler else: msgHandler + result.findFile = if isNil(findFile): defaultFindFile else: findFile + +proc rstMessage(p: TRstParser, msgKind: TMsgKind, arg: string) = + p.s.msgHandler(p.filename, p.line + p.tok[p.idx].line, + p.col + p.tok[p.idx].col, msgKind, arg) + +proc rstMessage(p: TRstParser, msgKind: TMsgKind, arg: string, line, col: int) = + p.s.msgHandler(p.filename, p.line + line, + p.col + col, msgKind, arg) + +proc rstMessage(p: TRstParser, msgKind: TMsgKind) = + p.s.msgHandler(p.filename, p.line + p.tok[p.idx].line, + p.col + p.tok[p.idx].col, msgKind, + p.tok[p.idx].symbol) + +when false: + proc corrupt(p: TRstParser) = + assert p.indentStack[0] == 0 + for i in 1 .. high(p.indentStack): assert p.indentStack[i] < 1_000 + +proc currInd(p: TRstParser): int = + result = p.indentStack[high(p.indentStack)] + +proc pushInd(p: var TRstParser, ind: int) = + add(p.indentStack, ind) + +proc popInd(p: var TRstParser) = + if len(p.indentStack) > 1: setlen(p.indentStack, len(p.indentStack) - 1) + +proc initParser(p: var TRstParser, sharedState: PSharedState) = + p.indentStack = @[0] + p.tok = @[] + p.idx = 0 + p.filename = "" + p.hasToc = false + p.col = 0 + p.line = 1 + p.s = sharedState + +proc addNodesAux(n: PRstNode, result: var string) = + if n.kind == rnLeaf: + add(result, n.text) + else: + for i in countup(0, len(n) - 1): addNodesAux(n.sons[i], result) + +proc addNodes(n: PRstNode): string = + result = "" + addNodesAux(n, result) + +proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) = + if n.kind == rnLeaf: + for i in countup(0, len(n.text) - 1): + case n.text[i] + of '0'..'9': + if b: + add(r, '-') + b = false + if len(r) == 0: add(r, 'Z') + add(r, n.text[i]) + of 'a'..'z': + if b: + add(r, '-') + b = false + add(r, n.text[i]) + of 'A'..'Z': + if b: + add(r, '-') + b = false + add(r, chr(ord(n.text[i]) - ord('A') + ord('a'))) + else: + if (len(r) > 0): b = true + else: + for i in countup(0, len(n) - 1): rstnodeToRefnameAux(n.sons[i], r, b) + +proc rstnodeToRefname(n: PRstNode): string = + result = "" + var b = false + rstnodeToRefnameAux(n, result, b) + +proc findSub(p: var TRstParser, n: PRstNode): int = + var key = addNodes(n) + # the spec says: if no exact match, try one without case distinction: + for i in countup(0, high(p.s.subs)): + if key == p.s.subs[i].key: + return i + for i in countup(0, high(p.s.subs)): + if cmpIgnoreStyle(key, p.s.subs[i].key) == 0: + return i + result = -1 + +proc setSub(p: var TRstParser, key: string, value: PRstNode) = + var length = len(p.s.subs) + for i in countup(0, length - 1): + if key == p.s.subs[i].key: + p.s.subs[i].value = value + return + setlen(p.s.subs, length + 1) + p.s.subs[length].key = key + p.s.subs[length].value = value + +proc setRef(p: var TRstParser, key: string, value: PRstNode) = + var length = len(p.s.refs) + for i in countup(0, length - 1): + if key == p.s.refs[i].key: + if p.s.refs[i].value.addNodes != value.addNodes: + rstMessage(p, mwRedefinitionOfLabel, key) + + p.s.refs[i].value = value + return + setlen(p.s.refs, length + 1) + p.s.refs[length].key = key + p.s.refs[length].value = value + +proc findRef(p: var TRstParser, key: string): PRstNode = + for i in countup(0, high(p.s.refs)): + if key == p.s.refs[i].key: + return p.s.refs[i].value + +proc newLeaf(p: var TRstParser): PRstNode = + result = newRstNode(rnLeaf, p.tok[p.idx].symbol) + +proc getReferenceName(p: var TRstParser, endStr: string): PRstNode = + var res = newRstNode(rnInner) + while true: + case p.tok[p.idx].kind + of tkWord, tkOther, tkWhite: + add(res, newLeaf(p)) + of tkPunct: + if p.tok[p.idx].symbol == endStr: + inc(p.idx) + break + else: + add(res, newLeaf(p)) + else: + rstMessage(p, meExpected, endStr) + break + inc(p.idx) + result = res + +proc untilEol(p: var TRstParser): PRstNode = + result = newRstNode(rnInner) + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + add(result, newLeaf(p)) + inc(p.idx) + +proc expect(p: var TRstParser, tok: string) = + if p.tok[p.idx].symbol == tok: inc(p.idx) + else: rstMessage(p, meExpected, tok) + +proc isInlineMarkupEnd(p: TRstParser, markup: string): bool = + result = p.tok[p.idx].symbol == markup + if not result: + return # Rule 3: + result = not (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) + if not result: + return # Rule 4: + result = (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) or + (p.tok[p.idx + 1].symbol[0] in + {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', + '?', '_'}) + if not result: + return # Rule 7: + if p.idx > 0: + if (markup != "``") and (p.tok[p.idx - 1].symbol == "\\"): + result = false + +proc isInlineMarkupStart(p: TRstParser, markup: string): bool = + var d: Char + result = p.tok[p.idx].symbol == markup + if not result: + return # Rule 1: + result = (p.idx == 0) or (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) or + (p.tok[p.idx - 1].symbol[0] in + {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}) + if not result: + return # Rule 2: + result = not (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) + if not result: + return # Rule 5 & 7: + if p.idx > 0: + if p.tok[p.idx - 1].symbol == "\\": + result = false + else: + var c = p.tok[p.idx - 1].symbol[0] + case c + of '\'', '\"': d = c + of '(': d = ')' + of '[': d = ']' + of '{': d = '}' + of '<': d = '>' + else: d = '\0' + if d != '\0': result = p.tok[p.idx + 1].symbol[0] != d + +proc match(p: TRstParser, start: int, expr: string): bool = + # regular expressions are: + # special char exact match + # 'w' tkWord + # ' ' tkWhite + # 'a' tkAdornment + # 'i' tkIndent + # 'p' tkPunct + # 'T' always true + # 'E' whitespace, indent or eof + # 'e' tkWord or '#' (for enumeration lists) + var i = 0 + var j = start + var last = len(expr) - 1 + while i <= last: + case expr[i] + of 'w': result = p.tok[j].kind == tkWord + of ' ': result = p.tok[j].kind == tkWhite + of 'i': result = p.tok[j].kind == tkIndent + of 'p': result = p.tok[j].kind == tkPunct + of 'a': result = p.tok[j].kind == tkAdornment + of 'o': result = p.tok[j].kind == tkOther + of 'T': result = true + of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent} + of 'e': + result = (p.tok[j].kind == tkWord) or (p.tok[j].symbol == "#") + if result: + case p.tok[j].symbol[0] + of 'a'..'z', 'A'..'Z': result = len(p.tok[j].symbol) == 1 + of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'}) + else: nil + else: + var c = expr[i] + var length = 0 + while (i <= last) and (expr[i] == c): + inc(i) + inc(length) + dec(i) + result = (p.tok[j].kind in {tkPunct, tkAdornment}) and + (len(p.tok[j].symbol) == length) and (p.tok[j].symbol[0] == c) + if not result: return + inc(j) + inc(i) + result = true + +proc fixupEmbeddedRef(n, a, b: PRstNode) = + var sep = - 1 + for i in countdown(len(n) - 2, 0): + if n.sons[i].text == "<": + sep = i + break + var incr = if (sep > 0) and (n.sons[sep - 1].text[0] == ' '): 2 else: 1 + for i in countup(0, sep - incr): add(a, n.sons[i]) + for i in countup(sep + 1, len(n) - 2): add(b, n.sons[i]) + +proc parsePostfix(p: var TRstParser, n: PRstNode): PRstNode = + result = n + if isInlineMarkupEnd(p, "_"): + inc(p.idx) + if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">": + var a = newRstNode(rnInner) + var b = newRstNode(rnInner) + fixupEmbeddedRef(n, a, b) + if len(a) == 0: + result = newRstNode(rnStandaloneHyperlink) + add(result, b) + else: + result = newRstNode(rnHyperlink) + add(result, a) + add(result, b) + setRef(p, rstnodeToRefname(a), b) + elif n.kind == rnInterpretedText: + n.kind = rnRef + else: + result = newRstNode(rnRef) + add(result, n) + elif match(p, p.idx, ":w:"): + # a role: + if p.tok[p.idx + 1].symbol == "idx": + n.kind = rnIdx + elif p.tok[p.idx + 1].symbol == "literal": + n.kind = rnInlineLiteral + elif p.tok[p.idx + 1].symbol == "strong": + n.kind = rnStrongEmphasis + elif p.tok[p.idx + 1].symbol == "emphasis": + n.kind = rnEmphasis + elif (p.tok[p.idx + 1].symbol == "sub") or + (p.tok[p.idx + 1].symbol == "subscript"): + n.kind = rnSub + elif (p.tok[p.idx + 1].symbol == "sup") or + (p.tok[p.idx + 1].symbol == "supscript"): + n.kind = rnSup + else: + result = newRstNode(rnGeneralRole) + n.kind = rnInner + add(result, n) + add(result, newRstNode(rnLeaf, p.tok[p.idx + 1].symbol)) + inc(p.idx, 3) + +proc matchVerbatim(p: TRstParser, start: int, expr: string): int = + result = start + var j = 0 + while j < expr.len and continuesWith(expr, p.tok[result].symbol, j): + inc j, p.tok[result].symbol.len + inc result + if j < expr.len: result = 0 + +proc parseSmiley(p: var TRstParser): PRstNode = + if p.tok[p.idx].symbol[0] notin SmileyStartChars: return + for key, val in items(smilies): + let m = matchVerbatim(p, p.idx, key) + if m > 0: + p.idx = m + result = newRstNode(rnSmiley) + result.text = val + return + +when false: + const + urlChars = {'A'..'Z', 'a'..'z', '0'..'9', ':', '#', '@', '%', '/', ';', + '$', '(', ')', '~', '_', '?', '+', '-', '=', '\\', '.', '&', + '\128'..'\255'} + +proc isURL(p: TRstParser, i: int): bool = + result = (p.tok[i+1].symbol == ":") and (p.tok[i+2].symbol == "//") and + (p.tok[i+3].kind == tkWord) and + (p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]) + +proc parseURL(p: var TRstParser, father: PRstNode) = + #if p.tok[p.idx].symbol[strStart] == '<': + if isURL(p, p.idx): + var n = newRstNode(rnStandaloneHyperlink) + while true: + case p.tok[p.idx].kind + of tkWord, tkAdornment, tkOther: nil + of tkPunct: + if p.tok[p.idx+1].kind notin {tkWord, tkAdornment, tkOther, tkPunct}: + break + else: break + add(n, newLeaf(p)) + inc(p.idx) + add(father, n) + else: + var n = newLeaf(p) + inc(p.idx) + if p.tok[p.idx].symbol == "_": n = parsePostfix(p, n) + add(father, n) + +proc parseBackslash(p: var TRstParser, father: PRstNode) = + assert(p.tok[p.idx].kind == tkPunct) + if p.tok[p.idx].symbol == "\\\\": + add(father, newRstNode(rnLeaf, "\\")) + inc(p.idx) + elif p.tok[p.idx].symbol == "\\": + # XXX: Unicode? + inc(p.idx) + if p.tok[p.idx].kind != tkWhite: add(father, newLeaf(p)) + if p.tok[p.idx].kind != tkEof: inc(p.idx) + else: + add(father, newLeaf(p)) + inc(p.idx) + +when false: + proc parseAdhoc(p: var TRstParser, father: PRstNode, verbatim: bool) = + if not verbatim and isURL(p, p.idx): + var n = newRstNode(rnStandaloneHyperlink) + while true: + case p.tok[p.idx].kind + of tkWord, tkAdornment, tkOther: nil + of tkPunct: + if p.tok[p.idx+1].kind notin {tkWord, tkAdornment, tkOther, tkPunct}: + break + else: break + add(n, newLeaf(p)) + inc(p.idx) + add(father, n) + elif not verbatim and roSupportSmilies in p.sharedState.options: + let n = parseSmiley(p) + if s != nil: + add(father, n) + else: + var n = newLeaf(p) + inc(p.idx) + if p.tok[p.idx].symbol == "_": n = parsePostfix(p, n) + add(father, n) + +proc parseUntil(p: var TRstParser, father: PRstNode, postfix: string, + interpretBackslash: bool) = + let + line = p.tok[p.idx].line + col = p.tok[p.idx].col + while true: + case p.tok[p.idx].kind + of tkPunct: + if isInlineMarkupEnd(p, postfix): + inc(p.idx) + break + elif interpretBackslash: + parseBackslash(p, father) + else: + add(father, newLeaf(p)) + inc(p.idx) + of tkAdornment, tkWord, tkOther: + add(father, newLeaf(p)) + inc(p.idx) + of tkIndent: + add(father, newRstNode(rnLeaf, " ")) + inc(p.idx) + if p.tok[p.idx].kind == tkIndent: + rstMessage(p, meExpected, postfix) + break + of tkWhite: + add(father, newRstNode(rnLeaf, " ")) + inc(p.idx) + else: rstMessage(p, meExpected, postfix, line, col) + +proc parseMarkdownCodeblock(p: var TRstParser): PRstNode = + var args = newRstNode(rnDirArg) + if p.tok[p.idx].kind == tkWord: + add(args, newLeaf(p)) + inc(p.idx) + else: + args = nil + var n = newRstNode(rnLeaf, "") + while true: + case p.tok[p.idx].kind + of tkEof: + rstMessage(p, meExpected, "```") + break + of tkPunct: + if p.tok[p.idx].symbol == "```": + inc(p.idx) + break + else: + add(n.text, p.tok[p.idx].symbol) + inc(p.idx) + else: + add(n.text, p.tok[p.idx].symbol) + inc(p.idx) + var lb = newRstNode(rnLiteralBlock) + add(lb, n) + result = newRstNode(rnCodeBlock) + add(result, args) + add(result, nil) + add(result, lb) + +proc parseInline(p: var TRstParser, father: PRstNode) = + case p.tok[p.idx].kind + of tkPunct: + if isInlineMarkupStart(p, "***"): + inc(p.idx) + var n = newRstNode(rnTripleEmphasis) + parseUntil(p, n, "***", true) + add(father, n) + elif isInlineMarkupStart(p, "**"): + inc(p.idx) + var n = newRstNode(rnStrongEmphasis) + parseUntil(p, n, "**", true) + add(father, n) + elif isInlineMarkupStart(p, "*"): + inc(p.idx) + var n = newRstNode(rnEmphasis) + parseUntil(p, n, "*", true) + add(father, n) + elif roSupportMarkdown in p.s.options and p.tok[p.idx].symbol == "```": + inc(p.idx) + add(father, parseMarkdownCodeblock(p)) + elif isInlineMarkupStart(p, "``"): + inc(p.idx) + var n = newRstNode(rnInlineLiteral) + parseUntil(p, n, "``", false) + add(father, n) + elif isInlineMarkupStart(p, "`"): + inc(p.idx) + var n = newRstNode(rnInterpretedText) + parseUntil(p, n, "`", true) + n = parsePostfix(p, n) + add(father, n) + elif isInlineMarkupStart(p, "|"): + inc(p.idx) + var n = newRstNode(rnSubstitutionReferences) + parseUntil(p, n, "|", false) + add(father, n) + else: + if roSupportSmilies in p.s.options: + let n = parseSmiley(p) + if n != nil: + add(father, n) + return + parseBackslash(p, father) + of tkWord: + if roSupportSmilies in p.s.options: + let n = parseSmiley(p) + if n != nil: + add(father, n) + return + parseUrl(p, father) + of tkAdornment, tkOther, tkWhite: + if roSupportSmilies in p.s.options: + let n = parseSmiley(p) + if n != nil: + add(father, n) + return + add(father, newLeaf(p)) + inc(p.idx) + else: nil + +proc getDirective(p: var TRstParser): string = + if p.tok[p.idx].kind == tkWhite and p.tok[p.idx+1].kind == tkWord: + var j = p.idx + inc(p.idx) + result = p.tok[p.idx].symbol + inc(p.idx) + while p.tok[p.idx].kind in {tkWord, tkPunct, tkAdornment, tkOther}: + if p.tok[p.idx].symbol == "::": break + add(result, p.tok[p.idx].symbol) + inc(p.idx) + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + if p.tok[p.idx].symbol == "::": + inc(p.idx) + if (p.tok[p.idx].kind == tkWhite): inc(p.idx) + else: + p.idx = j # set back + result = "" # error + else: + result = "" + +proc parseComment(p: var TRstParser): PRstNode = + case p.tok[p.idx].kind + of tkIndent, tkEof: + if p.tok[p.idx].kind != tkEof and p.tok[p.idx + 1].kind == tkIndent: + inc(p.idx) # empty comment + else: + var indent = p.tok[p.idx].ival + while True: + case p.tok[p.idx].kind + of tkEof: + break + of tkIndent: + if (p.tok[p.idx].ival < indent): break + else: + nil + inc(p.idx) + else: + while p.tok[p.idx].kind notin {tkIndent, tkEof}: inc(p.idx) + result = nil + +type + TDirKind = enum # must be ordered alphabetically! + dkNone, dkAuthor, dkAuthors, dkCodeBlock, dkContainer, dkContents, + dkFigure, dkImage, dkInclude, dkIndex, dkRaw, dkTitle + +const + DirIds: array[0..11, string] = ["", "author", "authors", "code-block", + "container", "contents", "figure", "image", "include", "index", "raw", + "title"] + +proc getDirKind(s: string): TDirKind = + let i = find(DirIds, s) + if i >= 0: result = TDirKind(i) + else: result = dkNone + +proc parseLine(p: var TRstParser, father: PRstNode) = + while True: + case p.tok[p.idx].kind + of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father) + else: break + +proc parseSection(p: var TRstParser, result: PRstNode) +proc parseField(p: var TRstParser): PRstNode = + result = newRstNode(rnField) + var col = p.tok[p.idx].col + inc(p.idx) # skip : + var fieldname = newRstNode(rnFieldname) + parseUntil(p, fieldname, ":", false) + var fieldbody = newRstNode(rnFieldbody) + if p.tok[p.idx].kind != tkIndent: parseLine(p, fieldbody) + if p.tok[p.idx].kind == tkIndent: + var indent = p.tok[p.idx].ival + if indent > col: + pushInd(p, indent) + parseSection(p, fieldbody) + popInd(p) + add(result, fieldname) + add(result, fieldbody) + +proc parseFields(p: var TRstParser): PRstNode = + result = nil + var atStart = p.idx == 0 and p.tok[0].symbol == ":" + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx + 1].symbol == ":") or + atStart: + var col = if atStart: p.tok[p.idx].col else: p.tok[p.idx].ival + result = newRstNode(rnFieldList) + if not atStart: inc(p.idx) + while true: + add(result, parseField(p)) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + (p.tok[p.idx + 1].symbol == ":"): + inc(p.idx) + else: + break + +proc getFieldValue(n: PRstNode, fieldname: string): string = + result = "" + if n.sons[1] == nil: return + if (n.sons[1].kind != rnFieldList): + #InternalError("getFieldValue (2): " & $n.sons[1].kind) + # We don't like internal errors here anymore as that would break the forum! + return + for i in countup(0, len(n.sons[1]) - 1): + var f = n.sons[1].sons[i] + if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0: + result = addNodes(f.sons[1]) + if result == "": result = "\x01\x01" # indicates that the field exists + return + +proc getArgument(n: PRstNode): string = + if n.sons[0] == nil: result = "" + else: result = addNodes(n.sons[0]) + +proc parseDotDot(p: var TRstParser): PRstNode +proc parseLiteralBlock(p: var TRstParser): PRstNode = + result = newRstNode(rnLiteralBlock) + var n = newRstNode(rnLeaf, "") + if p.tok[p.idx].kind == tkIndent: + var indent = p.tok[p.idx].ival + inc(p.idx) + while True: + case p.tok[p.idx].kind + of tkEof: + break + of tkIndent: + if (p.tok[p.idx].ival < indent): + break + else: + add(n.text, "\n") + add(n.text, repeatChar(p.tok[p.idx].ival - indent)) + inc(p.idx) + else: + add(n.text, p.tok[p.idx].symbol) + inc(p.idx) + else: + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + add(n.text, p.tok[p.idx].symbol) + inc(p.idx) + add(result, n) + +proc getLevel(map: var TLevelMap, lvl: var int, c: Char): int = + if map[c] == 0: + inc(lvl) + map[c] = lvl + result = map[c] + +proc tokenAfterNewline(p: TRstParser): int = + result = p.idx + while true: + case p.tok[result].kind + of tkEof: + break + of tkIndent: + inc(result) + break + else: inc(result) + +proc isLineBlock(p: TRstParser): bool = + var j = tokenAfterNewline(p) + result = (p.tok[p.idx].col == p.tok[j].col) and (p.tok[j].symbol == "|") or + (p.tok[j].col > p.tok[p.idx].col) + +proc predNL(p: TRstParser): bool = + result = true + if p.idx > 0: + result = p.tok[p.idx-1].kind == tkIndent and + p.tok[p.idx-1].ival == currInd(p) + +proc isDefList(p: TRstParser): bool = + var j = tokenAfterNewline(p) + result = (p.tok[p.idx].col < p.tok[j].col) and + (p.tok[j].kind in {tkWord, tkOther, tkPunct}) and + (p.tok[j - 2].symbol != "::") + +proc isOptionList(p: TRstParser): bool = + result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or + match(p, p.idx, "/w") or match(p, p.idx, "//w") + +proc whichSection(p: TRstParser): TRstNodeKind = + case p.tok[p.idx].kind + of tkAdornment: + if match(p, p.idx + 1, "ii"): result = rnTransition + elif match(p, p.idx + 1, " a"): result = rnTable + elif match(p, p.idx + 1, "i"): result = rnOverline + else: result = rnLeaf + of tkPunct: + if match(p, tokenAfterNewLine(p), "ai"): + result = rnHeadline + elif p.tok[p.idx].symbol == "::": + result = rnLiteralBlock + elif predNL(p) and + ((p.tok[p.idx].symbol == "+") or (p.tok[p.idx].symbol == "*") or + (p.tok[p.idx].symbol == "-")) and (p.tok[p.idx + 1].kind == tkWhite): + result = rnBulletList + elif (p.tok[p.idx].symbol == "|") and isLineBlock(p): + result = rnLineBlock + elif (p.tok[p.idx].symbol == "..") and predNL(p): + result = rnDirective + elif match(p, p.idx, ":w:") and predNL(p): + # (p.tok[p.idx].symbol == ":") + result = rnFieldList + elif match(p, p.idx, "(e) "): + result = rnEnumList + elif match(p, p.idx, "+a+"): + result = rnGridTable + rstMessage(p, meGridTableNotImplemented) + elif isDefList(p): + result = rnDefList + elif isOptionList(p): + result = rnOptionList + else: + result = rnParagraph + of tkWord, tkOther, tkWhite: + if match(p, tokenAfterNewLine(p), "ai"): result = rnHeadline + elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList + elif isDefList(p): result = rnDefList + else: result = rnParagraph + else: result = rnLeaf + +proc parseLineBlock(p: var TRstParser): PRstNode = + result = nil + if p.tok[p.idx + 1].kind == tkWhite: + var col = p.tok[p.idx].col + result = newRstNode(rnLineBlock) + pushInd(p, p.tok[p.idx + 2].col) + inc(p.idx, 2) + while true: + var item = newRstNode(rnLineBlockItem) + parseSection(p, item) + add(result, item) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + (p.tok[p.idx + 1].symbol == "|") and + (p.tok[p.idx + 2].kind == tkWhite): + inc(p.idx, 3) + else: + break + popInd(p) + +proc parseParagraph(p: var TRstParser, result: PRstNode) = + while True: + case p.tok[p.idx].kind + of tkIndent: + if p.tok[p.idx + 1].kind == tkIndent: + inc(p.idx) + break + elif (p.tok[p.idx].ival == currInd(p)): + inc(p.idx) + case whichSection(p) + of rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective: + add(result, newRstNode(rnLeaf, " ")) + of rnLineBlock: + addIfNotNil(result, parseLineBlock(p)) + else: break + else: + break + of tkPunct: + if (p.tok[p.idx].symbol == "::") and + (p.tok[p.idx + 1].kind == tkIndent) and + (currInd(p) < p.tok[p.idx + 1].ival): + add(result, newRstNode(rnLeaf, ":")) + inc(p.idx) # skip '::' + add(result, parseLiteralBlock(p)) + break + else: + parseInline(p, result) + of tkWhite, tkWord, tkAdornment, tkOther: + parseInline(p, result) + else: break + +proc parseHeadline(p: var TRstParser): PRstNode = + result = newRstNode(rnHeadline) + parseLine(p, result) + assert(p.tok[p.idx].kind == tkIndent) + assert(p.tok[p.idx + 1].kind == tkAdornment) + var c = p.tok[p.idx + 1].symbol[0] + inc(p.idx, 2) + result.level = getLevel(p.s.underlineToLevel, p.s.uLevel, c) + +type + TIntSeq = seq[int] + +proc tokEnd(p: TRstParser): int = + result = p.tok[p.idx].col + len(p.tok[p.idx].symbol) - 1 + +proc getColumns(p: var TRstParser, cols: var TIntSeq) = + var L = 0 + while true: + inc(L) + setlen(cols, L) + cols[L - 1] = tokEnd(p) + assert(p.tok[p.idx].kind == tkAdornment) + inc(p.idx) + if p.tok[p.idx].kind != tkWhite: break + inc(p.idx) + if p.tok[p.idx].kind != tkAdornment: break + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + # last column has no limit: + cols[L - 1] = 32000 + +proc parseDoc(p: var TRstParser): PRstNode + +proc parseSimpleTable(p: var TRstParser): PRstNode = + var + cols: TIntSeq + row: seq[string] + i, last, line: int + c: Char + q: TRstParser + a, b: PRstNode + result = newRstNode(rnTable) + cols = @[] + row = @[] + a = nil + c = p.tok[p.idx].symbol[0] + while true: + if p.tok[p.idx].kind == tkAdornment: + last = tokenAfterNewline(p) + if p.tok[last].kind in {tkEof, tkIndent}: + # skip last adornment line: + p.idx = last + break + getColumns(p, cols) + setlen(row, len(cols)) + if a != nil: + for j in 0..len(a)-1: a.sons[j].kind = rnTableHeaderCell + if p.tok[p.idx].kind == tkEof: break + for j in countup(0, high(row)): row[j] = "" + # the following while loop iterates over the lines a single cell may span: + line = p.tok[p.idx].line + while true: + i = 0 + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + if (tokEnd(p) <= cols[i]): + add(row[i], p.tok[p.idx].symbol) + inc(p.idx) + else: + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + inc(i) + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + if tokEnd(p) <= cols[0]: break + if p.tok[p.idx].kind in {tkEof, tkAdornment}: break + for j in countup(1, high(row)): add(row[j], '\x0A') + a = newRstNode(rnTableRow) + for j in countup(0, high(row)): + initParser(q, p.s) + q.col = cols[j] + q.line = line - 1 + q.filename = p.filename + getTokens(row[j], false, q.tok) + b = newRstNode(rnTableDataCell) + add(b, parseDoc(q)) + add(a, b) + add(result, a) + +proc parseTransition(p: var TRstParser): PRstNode = + result = newRstNode(rnTransition) + inc(p.idx) + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + +proc parseOverline(p: var TRstParser): PRstNode = + var c = p.tok[p.idx].symbol[0] + inc(p.idx, 2) + result = newRstNode(rnOverline) + while true: + parseLine(p, result) + if p.tok[p.idx].kind == tkIndent: + inc(p.idx) + if p.tok[p.idx - 1].ival > currInd(p): + add(result, newRstNode(rnLeaf, " ")) + else: + break + else: + break + result.level = getLevel(p.s.overlineToLevel, p.s.oLevel, c) + if p.tok[p.idx].kind == tkAdornment: + inc(p.idx) # XXX: check? + if p.tok[p.idx].kind == tkIndent: inc(p.idx) + +proc parseBulletList(p: var TRstParser): PRstNode = + result = nil + if p.tok[p.idx + 1].kind == tkWhite: + var bullet = p.tok[p.idx].symbol + var col = p.tok[p.idx].col + result = newRstNode(rnBulletList) + pushInd(p, p.tok[p.idx + 2].col) + inc(p.idx, 2) + while true: + var item = newRstNode(rnBulletItem) + parseSection(p, item) + add(result, item) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + (p.tok[p.idx + 1].symbol == bullet) and + (p.tok[p.idx + 2].kind == tkWhite): + inc(p.idx, 3) + else: + break + popInd(p) + +proc parseOptionList(p: var TRstParser): PRstNode = + result = newRstNode(rnOptionList) + while true: + if isOptionList(p): + var a = newRstNode(rnOptionGroup) + var b = newRstNode(rnDescription) + var c = newRstNode(rnOptionListItem) + if match(p, p.idx, "//w"): inc(p.idx) + while not (p.tok[p.idx].kind in {tkIndent, tkEof}): + if (p.tok[p.idx].kind == tkWhite) and (len(p.tok[p.idx].symbol) > 1): + inc(p.idx) + break + add(a, newLeaf(p)) + inc(p.idx) + var j = tokenAfterNewline(p) + if (j > 0) and (p.tok[j - 1].kind == tkIndent) and + (p.tok[j - 1].ival > currInd(p)): + pushInd(p, p.tok[j - 1].ival) + parseSection(p, b) + popInd(p) + else: + parseLine(p, b) + if (p.tok[p.idx].kind == tkIndent): inc(p.idx) + add(c, a) + add(c, b) + add(result, c) + else: + break + +proc parseDefinitionList(p: var TRstParser): PRstNode = + result = nil + var j = tokenAfterNewLine(p) - 1 + if (j >= 1) and (p.tok[j].kind == tkIndent) and + (p.tok[j].ival > currInd(p)) and (p.tok[j - 1].symbol != "::"): + var col = p.tok[p.idx].col + result = newRstNode(rnDefList) + while true: + j = p.idx + var a = newRstNode(rnDefName) + parseLine(p, a) + if (p.tok[p.idx].kind == tkIndent) and + (p.tok[p.idx].ival > currInd(p)) and + (p.tok[p.idx + 1].symbol != "::") and + not (p.tok[p.idx + 1].kind in {tkIndent, tkEof}): + pushInd(p, p.tok[p.idx].ival) + var b = newRstNode(rnDefBody) + parseSection(p, b) + var c = newRstNode(rnDefItem) + add(c, a) + add(c, b) + add(result, c) + popInd(p) + else: + p.idx = j + break + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col): + inc(p.idx) + j = tokenAfterNewLine(p) - 1 + if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and + p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent: + nil + else: + break + if len(result) == 0: result = nil + +proc parseEnumList(p: var TRstParser): PRstNode = + const + wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "] + wildpos: array[0..2, int] = [1, 0, 0] + result = nil + var w = 0 + while w <= 2: + if match(p, p.idx, wildcards[w]): break + inc(w) + if w <= 2: + var col = p.tok[p.idx].col + result = newRstNode(rnEnumList) + inc(p.idx, wildpos[w] + 3) + var j = tokenAfterNewLine(p) + if (p.tok[j].col == p.tok[p.idx].col) or match(p, j, wildcards[w]): + pushInd(p, p.tok[p.idx].col) + while true: + var item = newRstNode(rnEnumItem) + parseSection(p, item) + add(result, item) + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and + match(p, p.idx + 1, wildcards[w]): + inc(p.idx, wildpos[w] + 4) + else: + break + popInd(p) + else: + dec(p.idx, wildpos[w] + 3) + result = nil + +proc sonKind(father: PRstNode, i: int): TRstNodeKind = + result = rnLeaf + if i < len(father): result = father.sons[i].kind + +proc parseSection(p: var TRstParser, result: PRstNode) = + while true: + var leave = false + assert(p.idx >= 0) + while p.tok[p.idx].kind == tkIndent: + if currInd(p) == p.tok[p.idx].ival: + inc(p.idx) + elif p.tok[p.idx].ival > currInd(p): + pushInd(p, p.tok[p.idx].ival) + var a = newRstNode(rnBlockQuote) + parseSection(p, a) + add(result, a) + popInd(p) + else: + leave = true + break + if leave or p.tok[p.idx].kind == tkEof: break + var a: PRstNode = nil + var k = whichSection(p) + case k + of rnLiteralBlock: + inc(p.idx) # skip '::' + a = parseLiteralBlock(p) + of rnBulletList: a = parseBulletList(p) + of rnLineblock: a = parseLineBlock(p) + of rnDirective: a = parseDotDot(p) + of rnEnumList: a = parseEnumList(p) + of rnLeaf: rstMessage(p, meNewSectionExpected) + of rnParagraph: nil + of rnDefList: a = parseDefinitionList(p) + of rnFieldList: + if p.idx > 0: dec(p.idx) + a = parseFields(p) + of rnTransition: a = parseTransition(p) + of rnHeadline: a = parseHeadline(p) + of rnOverline: a = parseOverline(p) + of rnTable: a = parseSimpleTable(p) + of rnOptionList: a = parseOptionList(p) + else: + #InternalError("rst.parseSection()") + nil + if a == nil and k != rnDirective: + a = newRstNode(rnParagraph) + parseParagraph(p, a) + addIfNotNil(result, a) + if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph: + result.sons[0].kind = rnInner + +proc parseSectionWrapper(p: var TRstParser): PRstNode = + result = newRstNode(rnInner) + parseSection(p, result) + while (result.kind == rnInner) and (len(result) == 1): + result = result.sons[0] + +proc `$`(t: TToken): string = + result = $t.kind & ' ' & (if isNil(t.symbol): "NIL" else: t.symbol) + +proc parseDoc(p: var TRstParser): PRstNode = + result = parseSectionWrapper(p) + if p.tok[p.idx].kind != tkEof: + when false: + assert isAllocatedPtr(cast[pointer](p.tok)) + for i in 0 .. high(p.tok): + assert isNil(p.tok[i].symbol) or + isAllocatedPtr(cast[pointer](p.tok[i].symbol)) + echo "index: ", p.idx, " length: ", high(p.tok), "##", + p.tok[p.idx-1], p.tok[p.idx], p.tok[p.idx+1] + #assert isAllocatedPtr(cast[pointer](p.indentStack)) + rstMessage(p, meGeneralParseError) + +type + TDirFlag = enum + hasArg, hasOptions, argIsFile, argIsWord + TDirFlags = set[TDirFlag] + TSectionParser = proc (p: var TRstParser): PRstNode {.nimcall.} + +proc parseDirective(p: var TRstParser, flags: TDirFlags): PRstNode = + result = newRstNode(rnDirective) + var args: PRstNode = nil + var options: PRstNode = nil + if hasArg in flags: + args = newRstNode(rnDirArg) + if argIsFile in flags: + while True: + case p.tok[p.idx].kind + of tkWord, tkOther, tkPunct, tkAdornment: + add(args, newLeaf(p)) + inc(p.idx) + else: break + elif argIsWord in flags: + while p.tok[p.idx].kind == tkWhite: inc(p.idx) + if p.tok[p.idx].kind == tkWord: + add(args, newLeaf(p)) + inc(p.idx) + else: + args = nil + else: + parseLine(p, args) + add(result, args) + if hasOptions in flags: + if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival >= 3) and + (p.tok[p.idx + 1].symbol == ":"): + options = parseFields(p) + add(result, options) + +proc indFollows(p: TRstParser): bool = + result = p.tok[p.idx].kind == tkIndent and p.tok[p.idx].ival > currInd(p) + +proc parseDirective(p: var TRstParser, flags: TDirFlags, + contentParser: TSectionParser): PRstNode = + result = parseDirective(p, flags) + if not isNil(contentParser) and indFollows(p): + pushInd(p, p.tok[p.idx].ival) + var content = contentParser(p) + popInd(p) + add(result, content) + else: + add(result, nil) + +proc parseDirBody(p: var TRstParser, contentParser: TSectionParser): PRstNode = + if indFollows(p): + pushInd(p, p.tok[p.idx].ival) + result = contentParser(p) + popInd(p) + +proc dirInclude(p: var TRstParser): PRstNode = + # + #The following options are recognized: + # + #start-after : text to find in the external data file + # Only the content after the first occurrence of the specified text will + # be included. + #end-before : text to find in the external data file + # Only the content before the first occurrence of the specified text + # (but after any after text) will be included. + #literal : flag (empty) + # The entire included text is inserted into the document as a single + # literal block (useful for program listings). + #encoding : name of text encoding + # The text encoding of the external data file. Defaults to the document's + # encoding (if specified). + # + result = nil + var n = parseDirective(p, {hasArg, argIsFile, hasOptions}, nil) + var filename = strip(addNodes(n.sons[0])) + var path = p.s.findFile(filename) + if path == "": + rstMessage(p, meCannotOpenFile, filename) + else: + # XXX: error handling; recursive file inclusion! + if getFieldValue(n, "literal") != "": + result = newRstNode(rnLiteralBlock) + add(result, newRstNode(rnLeaf, readFile(path))) + else: + var q: TRstParser + initParser(q, p.s) + q.filename = filename + getTokens(readFile(path), false, q.tok) + # workaround a GCC bug; more like the interior pointer bug? + #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0: + # InternalError("Too many binary zeros in include file") + result = parseDoc(q) + +proc dirCodeBlock(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasArg, hasOptions}, parseLiteralBlock) + var filename = strip(getFieldValue(result, "file")) + if filename != "": + var path = p.s.findFile(filename) + if path == "": rstMessage(p, meCannotOpenFile, filename) + var n = newRstNode(rnLiteralBlock) + add(n, newRstNode(rnLeaf, readFile(path))) + result.sons[2] = n + result.kind = rnCodeBlock + +proc dirContainer(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasArg}, parseSectionWrapper) + assert(result.kind == rnDirective) + assert(len(result) == 3) + result.kind = rnContainer + +proc dirImage(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasOptions, hasArg, argIsFile}, nil) + result.kind = rnImage + +proc dirFigure(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasOptions, hasArg, argIsFile}, + parseSectionWrapper) + result.kind = rnFigure + +proc dirTitle(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasArg}, nil) + result.kind = rnTitle + +proc dirContents(p: var TRstParser): PRstNode = + result = parseDirective(p, {hasArg}, nil) + result.kind = rnContents + +proc dirIndex(p: var TRstParser): PRstNode = + result = parseDirective(p, {}, parseSectionWrapper) + result.kind = rnIndex + +proc dirRawAux(p: var TRstParser, result: var PRstNode, kind: TRstNodeKind, + contentParser: TSectionParser) = + var filename = getFieldValue(result, "file") + if filename.len > 0: + var path = p.s.findFile(filename) + if path.len == 0: + rstMessage(p, meCannotOpenFile, filename) + else: + var f = readFile(path) + result = newRstNode(kind) + add(result, newRstNode(rnLeaf, f)) + else: + result.kind = kind + add(result, parseDirBody(p, contentParser)) + +proc dirRaw(p: var TRstParser): PRstNode = + # + #The following options are recognized: + # + #file : string (newlines removed) + # The local filesystem path of a raw data file to be included. + # + # html + # latex + result = parseDirective(p, {hasOptions, hasArg, argIsWord}) + if result.sons[0] != nil: + if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0: + dirRawAux(p, result, rnRawHtml, parseLiteralBlock) + elif cmpIgnoreCase(result.sons[0].sons[0].text, "latex") == 0: + dirRawAux(p, result, rnRawLatex, parseLiteralBlock) + else: + rstMessage(p, meInvalidDirective, result.sons[0].text) + else: + dirRawAux(p, result, rnRaw, parseSectionWrapper) + +proc parseDotDot(p: var TRstParser): PRstNode = + result = nil + var col = p.tok[p.idx].col + inc(p.idx) + var d = getDirective(p) + if d != "": + pushInd(p, col) + case getDirKind(d) + of dkInclude: result = dirInclude(p) + of dkImage: result = dirImage(p) + of dkFigure: result = dirFigure(p) + of dkTitle: result = dirTitle(p) + of dkContainer: result = dirContainer(p) + of dkContents: result = dirContents(p) + of dkRaw: + if roSupportRawDirective in p.s.options: + result = dirRaw(p) + else: + rstMessage(p, meInvalidDirective, d) + of dkCodeblock: result = dirCodeBlock(p) + of dkIndex: result = dirIndex(p) + else: rstMessage(p, meInvalidDirective, d) + popInd(p) + elif match(p, p.idx, " _"): + # hyperlink target: + inc(p.idx, 2) + var a = getReferenceName(p, ":") + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + var b = untilEol(p) + setRef(p, rstnodeToRefname(a), b) + elif match(p, p.idx, " |"): + # substitution definitions: + inc(p.idx, 2) + var a = getReferenceName(p, "|") + var b: PRstNode + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + if cmpIgnoreStyle(p.tok[p.idx].symbol, "replace") == 0: + inc(p.idx) + expect(p, "::") + b = untilEol(p) + elif cmpIgnoreStyle(p.tok[p.idx].symbol, "image") == 0: + inc(p.idx) + b = dirImage(p) + else: + rstMessage(p, meInvalidDirective, p.tok[p.idx].symbol) + setSub(p, addNodes(a), b) + elif match(p, p.idx, " ["): + # footnotes, citations + inc(p.idx, 2) + var a = getReferenceName(p, "]") + if p.tok[p.idx].kind == tkWhite: inc(p.idx) + var b = untilEol(p) + setRef(p, rstnodeToRefname(a), b) + else: + result = parseComment(p) + +proc resolveSubs(p: var TRstParser, n: PRstNode): PRstNode = + result = n + if n == nil: return + case n.kind + of rnSubstitutionReferences: + var x = findSub(p, n) + if x >= 0: + result = p.s.subs[x].value + else: + var key = addNodes(n) + var e = getEnv(key) + if e != "": result = newRstNode(rnLeaf, e) + else: rstMessage(p, mwUnknownSubstitution, key) + of rnRef: + var y = findRef(p, rstnodeToRefname(n)) + if y != nil: + result = newRstNode(rnHyperlink) + n.kind = rnInner + add(result, n) + add(result, y) + of rnLeaf: + nil + of rnContents: + p.hasToc = true + else: + for i in countup(0, len(n) - 1): n.sons[i] = resolveSubs(p, n.sons[i]) + +proc rstParse*(text, filename: string, + line, column: int, hasToc: var bool, + options: TRstParseOptions, + findFile: TFindFileHandler = nil, + msgHandler: TMsgHandler = nil): PRstNode = + var p: TRstParser + initParser(p, newSharedState(options, findFile, msgHandler)) + p.filename = filename + p.line = line + p.col = column + getTokens(text, roSkipPounds in options, p.tok) + result = resolveSubs(p, parseDoc(p)) + hasToc = p.hasToc diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim new file mode 100644 index 000000000..23233fd39 --- /dev/null +++ b/lib/packages/docutils/rstast.nim @@ -0,0 +1,288 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements an AST for the `reStructuredText`:idx parser. + +import strutils + +type + TRstNodeKind* = enum ## the possible node kinds of an PRstNode + rnInner, # an inner node or a root + rnHeadline, # a headline + rnOverline, # an over- and underlined headline + rnTransition, # a transition (the -------------
thingie) + rnParagraph, # a paragraph + rnBulletList, # a bullet list + rnBulletItem, # a bullet item + rnEnumList, # an enumerated list + rnEnumItem, # an enumerated item + rnDefList, # a definition list + rnDefItem, # an item of a definition list consisting of ... + rnDefName, # ... a name part ... + rnDefBody, # ... and a body part ... + rnFieldList, # a field list + rnField, # a field item + rnFieldName, # consisting of a field name ... + rnFieldBody, # ... and a field body + rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, + rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, + rnLineBlock, # the | thingie + rnLineBlockItem, # sons of the | thing + rnBlockQuote, # text just indented + rnTable, rnGridTable, rnTableRow, rnTableHeaderCell, rnTableDataCell, + rnLabel, # used for footnotes and other things + rnFootnote, # a footnote + rnCitation, # similar to footnote + rnStandaloneHyperlink, rnHyperlink, rnRef, rnDirective, # a directive + rnDirArg, rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, + rnRawHtml, rnRawLatex, + rnContainer, # ``container`` directive + rnIndex, # index directve: + # .. index:: + # key + # * `file#id `_ + # * `file#id '_ + rnSubstitutionDef, # a definition of a substitution + rnGeneralRole, # Inline markup: + rnSub, rnSup, rnIdx, + rnEmphasis, # "*" + rnStrongEmphasis, # "**" + rnTripleEmphasis, # "***" + rnInterpretedText, # "`" + rnInlineLiteral, # "``" + rnSubstitutionReferences, # "|" + rnSmiley, # some smiley + rnLeaf # a leaf; the node's text field contains the + # leaf val + + + PRSTNode* = ref TRstNode ## an RST node + TRstNodeSeq* = seq[PRstNode] + TRSTNode* {.acyclic, final.} = object ## an RST node's description + kind*: TRstNodeKind ## the node's kind + text*: string ## valid for leafs in the AST; and the title of + ## the document or the section + level*: int ## valid for some node kinds + sons*: TRstNodeSeq ## the node's sons + +proc len*(n: PRstNode): int = + result = len(n.sons) + +proc newRstNode*(kind: TRstNodeKind): PRstNode = + new(result) + result.sons = @[] + result.kind = kind + +proc newRstNode*(kind: TRstNodeKind, s: string): PRstNode = + result = newRstNode(kind) + result.text = s + +proc lastSon*(n: PRstNode): PRstNode = + result = n.sons[len(n.sons)-1] + +proc add*(father, son: PRstNode) = + add(father.sons, son) + +proc addIfNotNil*(father, son: PRstNode) = + if son != nil: add(father, son) + + +type + TRenderContext {.pure.} = object + indent: int + verbatim: int + +proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) + +proc renderRstSons(d: var TRenderContext, n: PRstNode, result: var string) = + for i in countup(0, len(n) - 1): + renderRstToRst(d, n.sons[i], result) + +proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = + # this is needed for the index generation; it may also be useful for + # debugging, but most code is already debugged... + const + lvlToChar: array[0..8, char] = ['!', '=', '-', '~', '`', '<', '*', '|', '+'] + if n == nil: return + var ind = repeatChar(d.indent) + case n.kind + of rnInner: + renderRstSons(d, n, result) + of rnHeadline: + result.add("\n") + result.add(ind) + + let oldLen = result.len + renderRstSons(d, n, result) + let HeadlineLen = result.len - oldLen + + result.add("\n") + result.add(ind) + result.add repeatChar(HeadlineLen, lvlToChar[n.level]) + of rnOverline: + result.add("\n") + result.add(ind) + + var headline = "" + renderRstSons(d, n, headline) + + let lvl = repeatChar(headline.Len - d.indent, lvlToChar[n.level]) + result.add(lvl) + result.add("\n") + result.add(headline) + + result.add("\n") + result.add(ind) + result.add(lvl) + of rnTransition: + result.add("\n\n") + result.add(ind) + result.add repeatChar(78-d.indent, '-') + result.add("\n\n") + of rnParagraph: + result.add("\n\n") + result.add(ind) + renderRstSons(d, n, result) + of rnBulletItem: + inc(d.indent, 2) + var tmp = "" + renderRstSons(d, n, tmp) + if tmp.len > 0: + result.add("\n") + result.add(ind) + result.add("* ") + result.add(tmp) + dec(d.indent, 2) + of rnEnumItem: + inc(d.indent, 4) + var tmp = "" + renderRstSons(d, n, tmp) + if tmp.len > 0: + result.add("\n") + result.add(ind) + result.add("(#) ") + result.add(tmp) + dec(d.indent, 4) + of rnOptionList, rnFieldList, rnDefList, rnDefItem, rnLineBlock, rnFieldName, + rnFieldBody, rnStandaloneHyperlink, rnBulletList, rnEnumList: + renderRstSons(d, n, result) + of rnDefName: + result.add("\n\n") + result.add(ind) + renderRstSons(d, n, result) + of rnDefBody: + inc(d.indent, 2) + if n.sons[0].kind != rnBulletList: + result.add("\n") + result.add(ind) + result.add(" ") + renderRstSons(d, n, result) + dec(d.indent, 2) + of rnField: + var tmp = "" + renderRstToRst(d, n.sons[0], tmp) + + var L = max(tmp.len + 3, 30) + inc(d.indent, L) + + result.add "\n" + result.add ind + result.add ':' + result.add tmp + result.add ':' + result.add repeatChar(L - tmp.len - 2) + renderRstToRst(d, n.sons[1], result) + + dec(d.indent, L) + of rnLineBlockItem: + result.add("\n") + result.add(ind) + result.add("| ") + renderRstSons(d, n, result) + of rnBlockQuote: + inc(d.indent, 2) + renderRstSons(d, n, result) + dec(d.indent, 2) + of rnRef: + result.add("`") + renderRstSons(d, n, result) + result.add("`_") + of rnHyperlink: + result.add('`') + renderRstToRst(d, n.sons[0], result) + result.add(" <") + renderRstToRst(d, n.sons[1], result) + result.add(">`_") + of rnGeneralRole: + result.add('`') + renderRstToRst(d, n.sons[0],result) + result.add("`:") + renderRstToRst(d, n.sons[1],result) + result.add(':') + of rnSub: + result.add('`') + renderRstSons(d, n, result) + result.add("`:sub:") + of rnSup: + result.add('`') + renderRstSons(d, n, result) + result.add("`:sup:") + of rnIdx: + result.add('`') + renderRstSons(d, n, result) + result.add("`:idx:") + of rnEmphasis: + result.add("*") + renderRstSons(d, n, result) + result.add("*") + of rnStrongEmphasis: + result.add("**") + renderRstSons(d, n, result) + result.add("**") + of rnTripleEmphasis: + result.add("***") + renderRstSons(d, n, result) + result.add("***") + of rnInterpretedText: + result.add('`') + renderRstSons(d, n, result) + result.add('`') + of rnInlineLiteral: + inc(d.verbatim) + result.add("``") + renderRstSons(d, n, result) + result.add("``") + dec(d.verbatim) + of rnSmiley: + result.add(n.text) + of rnLeaf: + if d.verbatim == 0 and n.text == "\\": + result.add("\\\\") # XXX: escape more special characters! + else: + result.add(n.text) + of rnIndex: + result.add("\n\n") + result.add(ind) + result.add(".. index::\n") + + inc(d.indent, 3) + if n.sons[2] != nil: renderRstSons(d, n.sons[2], result) + dec(d.indent, 3) + of rnContents: + result.add("\n\n") + result.add(ind) + result.add(".. contents::") + else: + result.add("Error: cannot render: " & $n.kind) + +proc renderRstToRst*(n: PRstNode, result: var string) = + ## renders `n` into its string representation and appends to `result`. + var d: TRenderContext + renderRstToRst(d, n, result) + diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim new file mode 100644 index 000000000..53bd8188e --- /dev/null +++ b/lib/packages/docutils/rstgen.nim @@ -0,0 +1,695 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a generator of HTML/Latex from `reStructuredText`:idx. + +import strutils, os, hashes, strtabs, rstast, rst, highlite + +const + HtmlExt = "html" + IndexExt* = ".idx" + +type + TOutputTarget* = enum ## which document type to generate + outHtml, # output is HTML + outLatex # output is Latex + + TTocEntry{.final.} = object + n*: PRstNode + refname*, header*: string + + TMetaEnum* = enum + metaNone, metaTitle, metaSubtitle, metaAuthor, metaVersion + + TRstGenerator* = object of TObject + target*: TOutputTarget + config*: PStringTable + splitAfter*: int # split too long entries in the TOC + tocPart*: seq[TTocEntry] + hasToc*: bool + theIndex: string + options*: TRstParseOptions + findFile*: TFindFileHandler + msgHandler*: TMsgHandler + filename*: string + meta*: array[TMetaEnum, string] + + PDoc = var TRstGenerator + +proc initRstGenerator*(g: var TRstGenerator, target: TOutputTarget, + config: PStringTable, filename: string, + options: TRstParseOptions, + findFile: TFindFileHandler, + msgHandler: TMsgHandler) = + g.config = config + g.target = target + g.tocPart = @[] + g.filename = filename + g.splitAfter = 20 + g.theIndex = "" + g.options = options + g.findFile = findFile + g.msgHandler = msgHandler + + let s = config["split.item.toc"] + if s != "": g.splitAfter = parseInt(s) + for i in low(g.meta)..high(g.meta): g.meta[i] = "" + +proc writeIndexFile*(g: var TRstGenerator, outfile: string) = + if g.theIndex.len > 0: writeFile(outfile, g.theIndex) + +proc addXmlChar(dest: var string, c: Char) = + case c + of '&': add(dest, "&") + of '<': add(dest, "<") + of '>': add(dest, ">") + of '\"': add(dest, """) + else: add(dest, c) + +proc addRtfChar(dest: var string, c: Char) = + case c + of '{': add(dest, "\\{") + of '}': add(dest, "\\}") + of '\\': add(dest, "\\\\") + else: add(dest, c) + +proc addTexChar(dest: var string, c: Char) = + case c + of '_': add(dest, "\\_") + of '{': add(dest, "\\symbol{123}") + of '}': add(dest, "\\symbol{125}") + of '[': add(dest, "\\symbol{91}") + of ']': add(dest, "\\symbol{93}") + of '\\': add(dest, "\\symbol{92}") + of '$': add(dest, "\\$") + of '&': add(dest, "\\&") + of '#': add(dest, "\\#") + of '%': add(dest, "\\%") + of '~': add(dest, "\\symbol{126}") + of '@': add(dest, "\\symbol{64}") + of '^': add(dest, "\\symbol{94}") + of '`': add(dest, "\\symbol{96}") + else: add(dest, c) + +var splitter*: string = "" + +proc escChar*(target: TOutputTarget, dest: var string, c: Char) {.inline.} = + case target + of outHtml: addXmlChar(dest, c) + of outLatex: addTexChar(dest, c) + +proc nextSplitPoint*(s: string, start: int): int = + result = start + while result < len(s) + 0: + case s[result] + of '_': return + of 'a'..'z': + if result + 1 < len(s) + 0: + if s[result + 1] in {'A'..'Z'}: return + else: nil + inc(result) + dec(result) # last valid index + +proc esc*(target: TOutputTarget, s: string, splitAfter = -1): string = + result = "" + if splitAfter >= 0: + var partLen = 0 + var j = 0 + while j < len(s): + var k = nextSplitPoint(s, j) + if (splitter != " ") or (partLen + k - j + 1 > splitAfter): + partLen = 0 + add(result, splitter) + for i in countup(j, k): escChar(target, result, s[i]) + inc(partLen, k - j + 1) + j = k + 1 + else: + for i in countup(0, len(s) - 1): escChar(target, result, s[i]) + + +proc disp(target: TOutputTarget, xml, tex: string): string = + if target != outLatex: result = xml + else: result = tex + +proc dispF(target: TOutputTarget, xml, tex: string, + args: varargs[string]): string = + if target != outLatex: result = xml % args + else: result = tex % args + +proc dispA(target: TOutputTarget, dest: var string, + xml, tex: string, args: varargs[string]) = + if target != outLatex: addf(dest, xml, args) + else: addf(dest, tex, args) + +proc renderRstToOut*(d: PDoc, n: PRstNode, result: var string) + +proc renderAux(d: PDoc, n: PRstNode, result: var string) = + for i in countup(0, len(n)-1): renderRstToOut(d, n.sons[i], result) + +proc renderAux(d: PDoc, n: PRstNode, frmtA, frmtB: string, result: var string) = + var tmp = "" + for i in countup(0, len(n)-1): renderRstToOut(d, n.sons[i], tmp) + if d.target != outLatex: + result.addf(frmtA, [tmp]) + else: + result.addf(frmtB, [tmp]) + +# ---------------- index handling -------------------------------------------- + +proc setIndexTerm*(d: PDoc, id, term: string) = + d.theIndex.add(term) + d.theIndex.add('\t') + let htmlFile = changeFileExt(extractFilename(d.filename), HtmlExt) + d.theIndex.add(htmlFile) + d.theIndex.add('#') + d.theIndex.add(id) + d.theIndex.add("\n") + +proc hash(n: PRstNode): int = + if n.kind == rnLeaf: + result = hash(n.text) + elif n.len > 0: + result = hash(n.sons[0]) + for i in 1 .. $2", "$2\\label{$1}", + [id, term]) + +type + TIndexEntry {.pure, final.} = object + keyword: string + link: string + +proc cmp(a, b: TIndexEntry): int = + result = cmpIgnoreStyle(a.keyword, b.keyword) + +proc `<-`(a: var TIndexEntry, b: TIndexEntry) = + shallowCopy a.keyword, b.keyword + shallowCopy a.link, b.link + +proc sortIndex(a: var openArray[TIndexEntry]) = + # we use shellsort here; fast and simple + let N = len(a) + var h = 1 + while true: + h = 3 * h + 1 + if h > N: break + while true: + h = h div 3 + for i in countup(h, N - 1): + var v: TIndexEntry + v <- a[i] + var j = i + while cmp(a[j-h], v) >= 0: + a[j] <- a[j-h] + j = j-h + if j < h: break + a[j] <- v + if h == 1: break + +proc mergeIndexes*(dir: string): string = + ## merges all index files in `dir` and returns the generated index as HTML. + ## The result is no full HTML for flexibility. + var a: seq[TIndexEntry] + newSeq(a, 15_000) + setLen(a, 0) + var L = 0 + for kind, path in walkDir(dir): + if kind == pcFile and path.endsWith(IndexExt): + for line in lines(path): + let s = line.find('\t') + if s < 0: continue + setLen(a, L+1) + a[L].keyword = line.substr(0, s-1) + a[L].link = line.substr(s+1) + inc L + sortIndex(a) + result = "" + var i = 0 + while i < L: + result.addf("
$1
    \n", + [a[i].keyword]) + var j = i + while j < L and a[i].keyword == a[j].keyword: + result.addf( + "
  • $1
  • \n", + [a[j].link]) + inc j + result.add("
\n") + i = j + +# ---------------------------------------------------------------------------- + +proc renderHeadline(d: PDoc, n: PRstNode, result: var string) = + var tmp = "" + for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp) + var refname = rstnodeToRefname(n) + if d.hasToc: + var length = len(d.tocPart) + setlen(d.tocPart, length + 1) + d.tocPart[length].refname = refname + d.tocPart[length].n = n + d.tocPart[length].header = tmp + + dispA(d.target, result, + "$3", + "\\rsth$4{$3}\\label{$2}\n", [$n.level, + d.tocPart[length].refname, tmp, + $chr(n.level - 1 + ord('A'))]) + else: + dispA(d.target, result, "$3", + "\\rsth$4{$3}\\label{$2}\n", [ + $n.level, refname, tmp, + $chr(n.level - 1 + ord('A'))]) + +proc renderOverline(d: PDoc, n: PRstNode, result: var string) = + if d.meta[metaTitle].len == 0: + for i in countup(0, len(n)-1): + renderRstToOut(d, n.sons[i], d.meta[metaTitle]) + elif d.meta[metaSubtitle].len == 0: + for i in countup(0, len(n)-1): + renderRstToOut(d, n.sons[i], d.meta[metaSubtitle]) + else: + var tmp = "" + for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp) + dispA(d.target, result, "
$3
", + "\\rstov$4{$3}\\label{$2}\n", [$n.level, + rstnodeToRefname(n), tmp, $chr(n.level - 1 + ord('A'))]) + + +proc renderTocEntry(d: PDoc, e: TTocEntry, result: var string) = + dispA(d.target, result, + "
  • $2
  • \n", + "\\item\\label{$1_toc} $2\\ref{$1}\n", [e.refname, e.header]) + +proc renderTocEntries*(d: PDoc, j: var int, lvl: int, result: var string) = + var tmp = "" + while j <= high(d.tocPart): + var a = abs(d.tocPart[j].n.level) + if a == lvl: + renderTocEntry(d, d.tocPart[j], tmp) + inc(j) + elif a > lvl: + renderTocEntries(d, j, a, tmp) + else: + break + if lvl > 1: + dispA(d.target, result, "
      $1
    ", + "\\begin{enumerate}$1\\end{enumerate}", [tmp]) + else: + result.add(tmp) + +proc renderImage(d: PDoc, n: PRstNode, result: var string) = + var options = "" + var s = getFieldValue(n, "scale") + if s != "": dispA(d.target, options, " scale=\"$1\"", " scale=$1", [strip(s)]) + + s = getFieldValue(n, "height") + if s != "": dispA(d.target, options, " height=\"$1\"", " height=$1", [strip(s)]) + + s = getFieldValue(n, "width") + if s != "": dispA(d.target, options, " width=\"$1\"", " width=$1", [strip(s)]) + + s = getFieldValue(n, "alt") + if s != "": dispA(d.target, options, " alt=\"$1\"", "", [strip(s)]) + + s = getFieldValue(n, "align") + if s != "": dispA(d.target, options, " align=\"$1\"", "", [strip(s)]) + + if options.len > 0: options = dispF(d.target, "$1", "[$1]", [options]) + + dispA(d.target, result, "", "\\includegraphics$2{$1}", + [getArgument(n), options]) + if len(n) >= 3: renderRstToOut(d, n.sons[2], result) + +proc renderSmiley(d: PDoc, n: PRstNode, result: var string) = + dispA(d.target, result, + """""", + "\\includegraphics{$1}", [n.text]) + +proc renderCodeBlock(d: PDoc, n: PRstNode, result: var string) = + if n.sons[2] == nil: return + var m = n.sons[2].sons[0] + assert m.kind == rnLeaf + var langstr = strip(getArgument(n)) + var lang: TSourceLanguage + if langstr == "": + lang = langNimrod # default language + else: + lang = getSourceLanguage(langstr) + + dispA(d.target, result, "
    ", "\\begin{rstpre}\n", [])
    +  if lang == langNone:
    +    d.msgHandler(d.filename, 1, 0, mwUnsupportedLanguage, langstr)
    +    result.add(m.text)
    +  else:
    +    var g: TGeneralTokenizer
    +    initGeneralTokenizer(g, m.text)
    +    while true: 
    +      getNextToken(g, lang)
    +      case g.kind
    +      of gtEof: break 
    +      of gtNone, gtWhitespace: 
    +        add(result, substr(m.text, g.start, g.length + g.start - 1))
    +      else:
    +        dispA(d.target, result, "$1", "\\span$2{$1}", [
    +          esc(d.target, substr(m.text, g.start, g.length+g.start-1)),
    +          tokenClassToStr[g.kind]])
    +    deinitGeneralTokenizer(g)
    +  dispA(d.target, result, "
    ", "\n\\end{rstpre}\n") + +proc renderContainer(d: PDoc, n: PRstNode, result: var string) = + var tmp = "" + renderRstToOut(d, n.sons[2], tmp) + var arg = strip(getArgument(n)) + if arg == "": + dispA(d.target, result, "
    $1
    ", "$1", [tmp]) + else: + dispA(d.target, result, "
    $2
    ", "$2", [arg, tmp]) + +proc texColumns(n: PRstNode): string = + result = "" + for i in countup(1, len(n)): add(result, "|X") + +proc renderField(d: PDoc, n: PRstNode, result: var string) = + var b = false + if d.target == outLatex: + var fieldname = addNodes(n.sons[0]) + var fieldval = esc(d.target, strip(addNodes(n.sons[1]))) + if cmpIgnoreStyle(fieldname, "author") == 0 or + cmpIgnoreStyle(fieldname, "authors") == 0: + if d.meta[metaAuthor].len == 0: + d.meta[metaAuthor] = fieldval + b = true + elif cmpIgnoreStyle(fieldName, "version") == 0: + if d.meta[metaVersion].len == 0: + d.meta[metaVersion] = fieldval + b = true + if not b: + renderAux(d, n, "$1\n", "$1", result) + +proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = + if n == nil: return + case n.kind + of rnInner: renderAux(d, n, result) + of rnHeadline: renderHeadline(d, n, result) + of rnOverline: renderOverline(d, n, result) + of rnTransition: renderAux(d, n, "
    \n", "\\hrule\n", result) + of rnParagraph: renderAux(d, n, "

    $1

    \n", "$1\n\n", result) + of rnBulletList: + renderAux(d, n, "
      $1
    \n", + "\\begin{itemize}$1\\end{itemize}\n", result) + of rnBulletItem, rnEnumItem: + renderAux(d, n, "
  • $1
  • \n", "\\item $1\n", result) + of rnEnumList: + renderAux(d, n, "
      $1
    \n", + "\\begin{enumerate}$1\\end{enumerate}\n", result) + of rnDefList: + renderAux(d, n, "
    $1
    \n", + "\\begin{description}$1\\end{description}\n", result) + of rnDefItem: renderAux(d, n, result) + of rnDefName: renderAux(d, n, "
    $1
    \n", "\\item[$1] ", result) + of rnDefBody: renderAux(d, n, "
    $1
    \n", "$1\n", result) + of rnFieldList: + var tmp = "" + for i in countup(0, len(n) - 1): + renderRstToOut(d, n.sons[i], tmp) + if tmp.len != 0: + dispA(d.target, result, + "" & + "" & + "" & + "$1" & + "
    ", + "\\begin{description}$1\\end{description}\n", + [tmp]) + of rnField: renderField(d, n, result) + of rnFieldName: + renderAux(d, n, "$1:", "\\item[$1:]", result) + of rnFieldBody: + renderAux(d, n, "$1", " $1\n", result) + of rnIndex: + renderRstToOut(d, n.sons[2], result) + of rnOptionList: + renderAux(d, n, "$1
    ", + "\\begin{description}\n$1\\end{description}\n", result) + of rnOptionListItem: + renderAux(d, n, "$1\n", "$1", result) + of rnOptionGroup: + renderAux(d, n, "$1", "\\item[$1]", result) + of rnDescription: + renderAux(d, n, "$1\n", " $1\n", result) + of rnOption, rnOptionString, rnOptionArgument: + doAssert false, "renderRstToOut" + of rnLiteralBlock: + renderAux(d, n, "
    $1
    \n", + "\\begin{rstpre}\n$1\n\\end{rstpre}\n", result) + of rnQuotedLiteralBlock: + doAssert false, "renderRstToOut" + of rnLineBlock: + renderAux(d, n, "

    $1

    ", "$1\n\n", result) + of rnLineBlockItem: + renderAux(d, n, "$1
    ", "$1\\\\\n", result) + of rnBlockQuote: + renderAux(d, n, "

    $1

    \n", + "\\begin{quote}$1\\end{quote}\n", result) + of rnTable, rnGridTable: + renderAux(d, n, + "$1
    ", + "\\begin{table}\\begin{rsttab}{" & + texColumns(n) & "|}\n\\hline\n$1\\end{rsttab}\\end{table}", result) + of rnTableRow: + if len(n) >= 1: + if d.target == outLatex: + #var tmp = "" + renderRstToOut(d, n.sons[0], result) + for i in countup(1, len(n) - 1): + result.add(" & ") + renderRstToOut(d, n.sons[i], result) + result.add("\\\\\n\\hline\n") + else: + result.add("") + renderAux(d, n, result) + result.add("\n") + of rnTableDataCell: + renderAux(d, n, "$1", "$1", result) + of rnTableHeaderCell: + renderAux(d, n, "$1", "\\textbf{$1}", result) + of rnLabel: + doAssert false, "renderRstToOut" # used for footnotes and other + of rnFootnote: + doAssert false, "renderRstToOut" # a footnote + of rnCitation: + doAssert false, "renderRstToOut" # similar to footnote + of rnRef: + var tmp = "" + renderAux(d, n, tmp) + dispA(d.target, result, "$1", + "$1\\ref{$2}", [tmp, rstnodeToRefname(n)]) + of rnStandaloneHyperlink: + renderAux(d, n, + "$1", + "\\href{$1}{$1}", result) + of rnHyperlink: + var tmp0 = "" + var tmp1 = "" + renderRstToOut(d, n.sons[0], tmp0) + renderRstToOut(d, n.sons[1], tmp1) + dispA(d.target, result, "$1", + "\\href{$2}{$1}", + [tmp0, tmp1]) + of rnDirArg, rnRaw: renderAux(d, n, result) + of rnRawHtml: + if d.target != outLatex: + result.add addNodes(lastSon(n)) + of rnRawLatex: + if d.target == outLatex: + result.add addNodes(lastSon(n)) + + of rnImage, rnFigure: renderImage(d, n, result) + of rnCodeBlock: renderCodeBlock(d, n, result) + of rnContainer: renderContainer(d, n, result) + of rnSubstitutionReferences, rnSubstitutionDef: + renderAux(d, n, "|$1|", "|$1|", result) + of rnDirective: + renderAux(d, n, "", "", result) + of rnGeneralRole: + var tmp0 = "" + var tmp1 = "" + renderRstToOut(d, n.sons[0], tmp0) + renderRstToOut(d, n.sons[1], tmp1) + dispA(d.target, result, "$1", "\\span$2{$1}", + [tmp0, tmp1]) + of rnSub: renderAux(d, n, "$1", "\\rstsub{$1}", result) + of rnSup: renderAux(d, n, "$1", "\\rstsup{$1}", result) + of rnEmphasis: renderAux(d, n, "$1", "\\emph{$1}", result) + of rnStrongEmphasis: + renderAux(d, n, "$1", "\\textbf{$1}", result) + of rnTripleEmphasis: + renderAux(d, n, "$1", + "\\textbf{emph{$1}}", result) + of rnInterpretedText: + renderAux(d, n, "$1", "\\emph{$1}", result) + of rnIdx: + renderIndexTerm(d, n, result) + of rnInlineLiteral: + renderAux(d, n, + "$1", + "\\texttt{$1}", result) + of rnSmiley: renderSmiley(d, n, result) + of rnLeaf: result.add(esc(d.target, n.text)) + of rnContents: d.hasToc = true + of rnTitle: + d.meta[metaTitle] = "" + renderRstToOut(d, n.sons[0], d.meta[metaTitle]) + +# ----------------------------------------------------------------------------- + +proc getVarIdx(varnames: openarray[string], id: string): int = + for i in countup(0, high(varnames)): + if cmpIgnoreStyle(varnames[i], id) == 0: + return i + result = -1 + +proc formatNamedVars*(frmt: string, varnames: openarray[string], + varvalues: openarray[string]): string = + var i = 0 + var L = len(frmt) + result = "" + var num = 0 + while i < L: + if frmt[i] == '$': + inc(i) # skip '$' + case frmt[i] + of '#': + add(result, varvalues[num]) + inc(num) + inc(i) + of '$': + add(result, "$") + inc(i) + of '0'..'9': + var j = 0 + while true: + j = (j * 10) + Ord(frmt[i]) - ord('0') + inc(i) + if i > L-1 or frmt[i] notin {'0'..'9'}: break + if j > high(varvalues) + 1: + raise newException(EInvalidValue, "invalid index: " & $j) + num = j + add(result, varvalues[j - 1]) + of 'A'..'Z', 'a'..'z', '\x80'..'\xFF': + var id = "" + while true: + add(id, frmt[i]) + inc(i) + if frmt[i] notin {'A'..'Z', '_', 'a'..'z', '\x80'..'\xFF'}: break + var idx = getVarIdx(varnames, id) + if idx >= 0: + add(result, varvalues[idx]) + else: + raise newException(EInvalidValue, "unknown substitution var: " & id) + of '{': + var id = "" + inc(i) + while frmt[i] != '}': + if frmt[i] == '\0': + raise newException(EInvalidValue, "'}' expected") + add(id, frmt[i]) + inc(i) + inc(i) # skip } + # search for the variable: + var idx = getVarIdx(varnames, id) + if idx >= 0: add(result, varvalues[idx]) + else: + raise newException(EInvalidValue, "unknown substitution var: " & id) + else: + raise newException(EInvalidValue, "unknown substitution: $" & $frmt[i]) + var start = i + while i < L: + if frmt[i] != '$': inc(i) + else: break + if i-1 >= start: add(result, substr(frmt, start, i - 1)) + + +proc defaultConfig*(): PStringTable = + ## creates a default configuration for HTML generation. + result = newStringTable(modeStyleInsensitive) + + template setConfigVar(key, val: expr) = + result[key] = val + + setConfigVar("split.item.toc", "20") + setConfigVar("doc.section", """ +
    +

    $sectionTitle

    +
    +$content +
    +""") + setConfigVar("doc.section.toc", """ +
  • + $sectionTitle +
      + $content +
    +
  • +""") + setConfigVar("doc.item", """ +
    $header
    +
    +$desc +
    +""") + setConfigVar("doc.item.toc", """ +
  • $name
  • +""") + setConfigVar("doc.toc", """ +""") + setConfigVar("doc.body_toc", """ +$tableofcontents +
    +$moduledesc +$content +
    +""") + setConfigVar("doc.body_no_toc", "$moduledesc $content") + setConfigVar("doc.file", "$content") + +# ---------- forum --------------------------------------------------------- + +proc rstToHtml*(s: string, options: TRstParseOptions, + config: PStringTable): string = + ## exported for *nimforum*. + + proc myFindFile(filename: string): string = + # we don't find any files in online mode: + result = "" + + const filen = "input" + var d: TRstGenerator + initRstGenerator(d, outHtml, config, filen, options, myFindFile, + rst.defaultMsgHandler) + var dummyHasToc = false + var rst = rstParse(s, filen, 0, 1, dummyHasToc, options) + result = "" + renderRstToOut(d, rst, result) + \ No newline at end of file diff --git a/packages/docutils/highlite.nim b/packages/docutils/highlite.nim deleted file mode 100755 index 21dd1543a..000000000 --- a/packages/docutils/highlite.nim +++ /dev/null @@ -1,537 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Source highlighter for programming or markup languages. -## Currently only few languages are supported, other languages may be added. -## The interface supports one language nested in another. - -import - strutils - -type - TTokenClass* = enum - gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber, - gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit, - gtLongStringLit, gtCharLit, gtEscapeSequence, # escape sequence like \xff - gtOperator, gtPunctation, gtComment, gtLongComment, gtRegularExpression, - gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler, - gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel, - gtReference, gtOther - TGeneralTokenizer* = object of TObject - kind*: TTokenClass - start*, length*: int - buf: cstring - pos: int - state: TTokenClass - - TSourceLanguage* = enum - langNone, langNimrod, langCpp, langCsharp, langC, langJava - -const - sourceLanguageToStr*: array[TSourceLanguage, string] = ["none", "Nimrod", - "C++", "C#", "C", "Java"] - tokenClassToStr*: array[TTokenClass, string] = ["Eof", "None", "Whitespace", - "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber", - "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit", - "EscapeSequence", "Operator", "Punctation", "Comment", "LongComment", - "RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData", - "Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink", - "Label", "Reference", "Other"] - - nimrodKeywords = slurp("doc/keywords.txt").split - -proc getSourceLanguage*(name: string): TSourceLanguage = - for i in countup(succ(low(TSourceLanguage)), high(TSourceLanguage)): - if cmpIgnoreStyle(name, sourceLanguageToStr[i]) == 0: - return i - result = langNone - -proc initGeneralTokenizer*(g: var TGeneralTokenizer, buf: string) = - g.buf = cstring(buf) - g.kind = low(TTokenClass) - g.start = 0 - g.length = 0 - g.state = low(TTokenClass) - var pos = 0 # skip initial whitespace: - while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) - g.pos = pos - -proc deinitGeneralTokenizer*(g: var TGeneralTokenizer) = - nil - -proc nimGetKeyword(id: string): TTokenClass = - for k in nimrodKeywords: - if cmpIgnoreStyle(id, k) == 0: return gtKeyword - result = gtIdentifier - when false: - var i = getIdent(id) - if (i.id >= ord(tokKeywordLow) - ord(tkSymbol)) and - (i.id <= ord(tokKeywordHigh) - ord(tkSymbol)): - result = gtKeyword - else: - result = gtIdentifier - -proc nimNumberPostfix(g: var TGeneralTokenizer, position: int): int = - var pos = position - if g.buf[pos] == '\'': - inc(pos) - case g.buf[pos] - of 'f', 'F': - g.kind = gtFloatNumber - inc(pos) - if g.buf[pos] in {'0'..'9'}: inc(pos) - if g.buf[pos] in {'0'..'9'}: inc(pos) - of 'i', 'I': - inc(pos) - if g.buf[pos] in {'0'..'9'}: inc(pos) - if g.buf[pos] in {'0'..'9'}: inc(pos) - else: - nil - result = pos - -proc nimNumber(g: var TGeneralTokenizer, position: int): int = - const decChars = {'0'..'9', '_'} - var pos = position - g.kind = gtDecNumber - while g.buf[pos] in decChars: inc(pos) - if g.buf[pos] == '.': - g.kind = gtFloatNumber - inc(pos) - while g.buf[pos] in decChars: inc(pos) - if g.buf[pos] in {'e', 'E'}: - g.kind = gtFloatNumber - inc(pos) - if g.buf[pos] in {'+', '-'}: inc(pos) - while g.buf[pos] in decChars: inc(pos) - result = nimNumberPostfix(g, pos) - -const - OpChars = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.', - '|', '=', '%', '&', '$', '@', '~', ':', '\x80'..'\xFF'} - -proc nimNextToken(g: var TGeneralTokenizer) = - const - hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'} - octChars = {'0'..'7', '_'} - binChars = {'0'..'1', '_'} - SymChars = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'} - var pos = g.pos - g.start = g.pos - if g.state == gtStringLit: - g.kind = gtStringLit - while true: - case g.buf[pos] - of '\\': - g.kind = gtEscapeSequence - inc(pos) - case g.buf[pos] - of 'x', 'X': - inc(pos) - if g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in hexChars: inc(pos) - of '0'..'9': - while g.buf[pos] in {'0'..'9'}: inc(pos) - of '\0': - g.state = gtNone - else: inc(pos) - break - of '\0', '\x0D', '\x0A': - g.state = gtNone - break - of '\"': - inc(pos) - g.state = gtNone - break - else: inc(pos) - else: - case g.buf[pos] - of ' ', '\x09'..'\x0D': - g.kind = gtWhitespace - while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) - of '#': - g.kind = gtComment - while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos) - of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF': - var id = "" - while g.buf[pos] in SymChars + {'_'}: - add(id, g.buf[pos]) - inc(pos) - if (g.buf[pos] == '\"'): - if (g.buf[pos + 1] == '\"') and (g.buf[pos + 2] == '\"'): - inc(pos, 3) - g.kind = gtLongStringLit - while true: - case g.buf[pos] - of '\0': - break - of '\"': - inc(pos) - if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and - g.buf[pos+2] != '\"': - inc(pos, 2) - break - else: inc(pos) - else: - g.kind = gtRawData - inc(pos) - while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): - if g.buf[pos] == '"' and g.buf[pos+1] != '"': break - inc(pos) - if g.buf[pos] == '\"': inc(pos) - else: - g.kind = nimGetKeyword(id) - of '0': - inc(pos) - case g.buf[pos] - of 'b', 'B': - inc(pos) - while g.buf[pos] in binChars: inc(pos) - pos = nimNumberPostfix(g, pos) - of 'x', 'X': - inc(pos) - while g.buf[pos] in hexChars: inc(pos) - pos = nimNumberPostfix(g, pos) - of 'o', 'O': - inc(pos) - while g.buf[pos] in octChars: inc(pos) - pos = nimNumberPostfix(g, pos) - else: pos = nimNumber(g, pos) - of '1'..'9': - pos = nimNumber(g, pos) - of '\'': - inc(pos) - g.kind = gtCharLit - while true: - case g.buf[pos] - of '\0', '\x0D', '\x0A': - break - of '\'': - inc(pos) - break - of '\\': - inc(pos, 2) - else: inc(pos) - of '\"': - inc(pos) - if (g.buf[pos] == '\"') and (g.buf[pos + 1] == '\"'): - inc(pos, 2) - g.kind = gtLongStringLit - while true: - case g.buf[pos] - of '\0': - break - of '\"': - inc(pos) - if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and - g.buf[pos+2] != '\"': - inc(pos, 2) - break - else: inc(pos) - else: - g.kind = gtStringLit - while true: - case g.buf[pos] - of '\0', '\x0D', '\x0A': - break - of '\"': - inc(pos) - break - of '\\': - g.state = g.kind - break - else: inc(pos) - of '(', ')', '[', ']', '{', '}', '`', ':', ',', ';': - inc(pos) - g.kind = gtPunctation - of '\0': - g.kind = gtEof - else: - if g.buf[pos] in OpChars: - g.kind = gtOperator - while g.buf[pos] in OpChars: inc(pos) - else: - inc(pos) - g.kind = gtNone - g.length = pos - g.pos - if g.kind != gtEof and g.length <= 0: - assert false, "nimNextToken: produced an empty token" - g.pos = pos - -proc generalNumber(g: var TGeneralTokenizer, position: int): int = - const decChars = {'0'..'9'} - var pos = position - g.kind = gtDecNumber - while g.buf[pos] in decChars: inc(pos) - if g.buf[pos] == '.': - g.kind = gtFloatNumber - inc(pos) - while g.buf[pos] in decChars: inc(pos) - if g.buf[pos] in {'e', 'E'}: - g.kind = gtFloatNumber - inc(pos) - if g.buf[pos] in {'+', '-'}: inc(pos) - while g.buf[pos] in decChars: inc(pos) - result = pos - -proc generalStrLit(g: var TGeneralTokenizer, position: int): int = - const - decChars = {'0'..'9'} - hexChars = {'0'..'9', 'A'..'F', 'a'..'f'} - var pos = position - g.kind = gtStringLit - var c = g.buf[pos] - inc(pos) # skip " or ' - while true: - case g.buf[pos] - of '\0': - break - of '\\': - inc(pos) - case g.buf[pos] - of '\0': - break - of '0'..'9': - while g.buf[pos] in decChars: inc(pos) - of 'x', 'X': - inc(pos) - if g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in hexChars: inc(pos) - else: inc(pos, 2) - else: - if g.buf[pos] == c: - inc(pos) - break - else: - inc(pos) - result = pos - -proc isKeyword(x: openarray[string], y: string): int = - var a = 0 - var b = len(x) - 1 - while a <= b: - var mid = (a + b) div 2 - var c = cmp(x[mid], y) - if c < 0: - a = mid + 1 - elif c > 0: - b = mid - 1 - else: - return mid - result = - 1 - -proc isKeywordIgnoreCase(x: openarray[string], y: string): int = - var a = 0 - var b = len(x) - 1 - while a <= b: - var mid = (a + b) div 2 - var c = cmpIgnoreCase(x[mid], y) - if c < 0: - a = mid + 1 - elif c > 0: - b = mid - 1 - else: - return mid - result = - 1 - -type - TTokenizerFlag = enum - hasPreprocessor, hasNestedComments - TTokenizerFlags = set[TTokenizerFlag] - -proc clikeNextToken(g: var TGeneralTokenizer, keywords: openarray[string], - flags: TTokenizerFlags) = - const - hexChars = {'0'..'9', 'A'..'F', 'a'..'f'} - octChars = {'0'..'7'} - binChars = {'0'..'1'} - symChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '\x80'..'\xFF'} - var pos = g.pos - g.start = g.pos - if g.state == gtStringLit: - g.kind = gtStringLit - while true: - case g.buf[pos] - of '\\': - g.kind = gtEscapeSequence - inc(pos) - case g.buf[pos] - of 'x', 'X': - inc(pos) - if g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in hexChars: inc(pos) - of '0'..'9': - while g.buf[pos] in {'0'..'9'}: inc(pos) - of '\0': - g.state = gtNone - else: inc(pos) - break - of '\0', '\x0D', '\x0A': - g.state = gtNone - break - of '\"': - inc(pos) - g.state = gtNone - break - else: inc(pos) - else: - case g.buf[pos] - of ' ', '\x09'..'\x0D': - g.kind = gtWhitespace - while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos) - of '/': - inc(pos) - if g.buf[pos] == '/': - g.kind = gtComment - while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos) - elif g.buf[pos] == '*': - g.kind = gtLongComment - var nested = 0 - inc(pos) - while true: - case g.buf[pos] - of '*': - inc(pos) - if g.buf[pos] == '/': - inc(pos) - if nested == 0: break - of '/': - inc(pos) - if g.buf[pos] == '*': - inc(pos) - if hasNestedComments in flags: inc(nested) - of '\0': - break - else: inc(pos) - of '#': - inc(pos) - if hasPreprocessor in flags: - g.kind = gtPreprocessor - while g.buf[pos] in {' ', '\t'}: inc(pos) - while g.buf[pos] in symChars: inc(pos) - else: - g.kind = gtOperator - of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF': - var id = "" - while g.buf[pos] in SymChars: - add(id, g.buf[pos]) - inc(pos) - if isKeyword(keywords, id) >= 0: g.kind = gtKeyword - else: g.kind = gtIdentifier - of '0': - inc(pos) - case g.buf[pos] - of 'b', 'B': - inc(pos) - while g.buf[pos] in binChars: inc(pos) - if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) - of 'x', 'X': - inc(pos) - while g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) - of '0'..'7': - inc(pos) - while g.buf[pos] in octChars: inc(pos) - if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) - else: - pos = generalNumber(g, pos) - if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) - of '1'..'9': - pos = generalNumber(g, pos) - if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos) - of '\'': - pos = generalStrLit(g, pos) - g.kind = gtCharLit - of '\"': - inc(pos) - g.kind = gtStringLit - while true: - case g.buf[pos] - of '\0': - break - of '\"': - inc(pos) - break - of '\\': - g.state = g.kind - break - else: inc(pos) - of '(', ')', '[', ']', '{', '}', ':', ',', ';', '.': - inc(pos) - g.kind = gtPunctation - of '\0': - g.kind = gtEof - else: - if g.buf[pos] in OpChars: - g.kind = gtOperator - while g.buf[pos] in OpChars: inc(pos) - else: - inc(pos) - g.kind = gtNone - g.length = pos - g.pos - if g.kind != gtEof and g.length <= 0: - assert false, "clikeNextToken: produced an empty token" - g.pos = pos - -proc cNextToken(g: var TGeneralTokenizer) = - const - keywords: array[0..36, string] = ["_Bool", "_Complex", "_Imaginary", "auto", - "break", "case", "char", "const", "continue", "default", "do", "double", - "else", "enum", "extern", "float", "for", "goto", "if", "inline", "int", - "long", "register", "restrict", "return", "short", "signed", "sizeof", - "static", "struct", "switch", "typedef", "union", "unsigned", "void", - "volatile", "while"] - clikeNextToken(g, keywords, {hasPreprocessor}) - -proc cppNextToken(g: var TGeneralTokenizer) = - const - keywords: array[0..47, string] = ["asm", "auto", "break", "case", "catch", - "char", "class", "const", "continue", "default", "delete", "do", "double", - "else", "enum", "extern", "float", "for", "friend", "goto", "if", - "inline", "int", "long", "new", "operator", "private", "protected", - "public", "register", "return", "short", "signed", "sizeof", "static", - "struct", "switch", "template", "this", "throw", "try", "typedef", - "union", "unsigned", "virtual", "void", "volatile", "while"] - clikeNextToken(g, keywords, {hasPreprocessor}) - -proc csharpNextToken(g: var TGeneralTokenizer) = - const - keywords: array[0..76, string] = ["abstract", "as", "base", "bool", "break", - "byte", "case", "catch", "char", "checked", "class", "const", "continue", - "decimal", "default", "delegate", "do", "double", "else", "enum", "event", - "explicit", "extern", "false", "finally", "fixed", "float", "for", - "foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", - "is", "lock", "long", "namespace", "new", "null", "object", "operator", - "out", "override", "params", "private", "protected", "public", "readonly", - "ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc", - "static", "string", "struct", "switch", "this", "throw", "true", "try", - "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using", - "virtual", "void", "volatile", "while"] - clikeNextToken(g, keywords, {hasPreprocessor}) - -proc javaNextToken(g: var TGeneralTokenizer) = - const - keywords: array[0..52, string] = ["abstract", "assert", "boolean", "break", - "byte", "case", "catch", "char", "class", "const", "continue", "default", - "do", "double", "else", "enum", "extends", "false", "final", "finally", - "float", "for", "goto", "if", "implements", "import", "instanceof", "int", - "interface", "long", "native", "new", "null", "package", "private", - "protected", "public", "return", "short", "static", "strictfp", "super", - "switch", "synchronized", "this", "throw", "throws", "transient", "true", - "try", "void", "volatile", "while"] - clikeNextToken(g, keywords, {}) - -proc getNextToken*(g: var TGeneralTokenizer, lang: TSourceLanguage) = - case lang - of langNone: assert false - of langNimrod: nimNextToken(g) - of langCpp: cppNextToken(g) - of langCsharp: csharpNextToken(g) - of langC: cNextToken(g) - of langJava: javaNextToken(g) - diff --git a/packages/docutils/rst.nim b/packages/docutils/rst.nim deleted file mode 100755 index b22bdf6ce..000000000 --- a/packages/docutils/rst.nim +++ /dev/null @@ -1,1639 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a `reStructuredText`:idx parser. A large -## subset is implemented. Some features of the `markdown`:idx: wiki syntax are -## also supported. - -import - os, strutils, rstast - -type - TRstParseOption* = enum ## options for the RST parser - roSkipPounds, ## skip ``#`` at line beginning (documentation - ## embedded in Nimrod comments) - roSupportSmilies, ## make the RST parser support smilies like ``:)`` - roSupportRawDirective, ## support the ``raw`` directive (don't support - ## it for sandboxing) - roSupportMarkdown ## support additional features of markdown - - TRstParseOptions* = set[TRstParseOption] - - TMsgClass* = enum - mcHint = "Hint", - mcWarning = "Warning", - mcError = "Error" - - TMsgKind* = enum ## the possible messages - meCannotOpenFile, - meExpected, - meGridTableNotImplemented, - meNewSectionExpected, - meGeneralParseError, - meInvalidDirective, - mwRedefinitionOfLabel, - mwUnknownSubstitution, - mwUnsupportedLanguage - - TMsgHandler* = proc (filename: string, line, col: int, msgKind: TMsgKind, - arg: string) {.nimcall.} ## what to do in case of an error - TFindFileHandler* = proc (filename: string): string {.nimcall.} - -const - messages: array [TMsgKind, string] = [ - meCannotOpenFile: "cannot open '$1'", - meExpected: "'$1' expected", - meGridTableNotImplemented: "grid table is not implemented", - meNewSectionExpected: "new section expected", - meGeneralParseError: "general parse error", - meInvalidDirective: "invalid directive: '$1'", - mwRedefinitionOfLabel: "redefinition of label '$1'", - mwUnknownSubstitution: "unknown substitution '$1'", - mwUnsupportedLanguage: "language '$1' not supported" - ] - -proc rstnodeToRefname*(n: PRstNode): string -proc addNodes*(n: PRstNode): string -proc getFieldValue*(n: PRstNode, fieldname: string): string -proc getArgument*(n: PRstNode): string - -# ----------------------------- scanner part -------------------------------- - -const - SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'} - SmileyStartChars: TCharSet = {':', ';', '8'} - Smilies = { - ":D": "icon_e_biggrin", - ":-D": "icon_e_biggrin", - ":)": "icon_e_smile", - ":-)": "icon_e_smile", - ";)": "icon_e_wink", - ";-)": "icon_e_wink", - ":(": "icon_e_sad", - ":-(": "icon_e_sad", - ":o": "icon_e_surprised", - ":-o": "icon_e_surprised", - ":shock:": "icon_eek", - ":?": "icon_e_confused", - ":-?": "icon_e_confused", - ":-/": "icon_e_confused", - - "8-)": "icon_cool", - - ":lol:": "icon_lol", - ":x": "icon_mad", - ":-x": "icon_mad", - ":P": "icon_razz", - ":-P": "icon_razz", - ":oops:": "icon_redface", - ":cry:": "icon_cry", - ":evil:": "icon_evil", - ":twisted:": "icon_twisted", - ":roll:": "icon_rolleyes", - ":!:": "icon_exclaim", - - ":?:": "icon_question", - ":idea:": "icon_idea", - ":arrow:": "icon_arrow", - ":|": "icon_neutral", - ":-|": "icon_neutral", - ":mrgreen:": "icon_mrgreen", - ":geek:": "icon_e_geek", - ":ugeek:": "icon_e_ugeek" - } - -type - TTokType = enum - tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther - TToken{.final.} = object # a RST token - kind*: TTokType # the type of the token - ival*: int # the indentation or parsed integer value - symbol*: string # the parsed symbol as string - line*, col*: int # line and column of the token - - TTokenSeq = seq[TToken] - TLexer = object of TObject - buf*: cstring - bufpos*: int - line*, col*, baseIndent*: int - skipPounds*: bool - - -proc getThing(L: var TLexer, tok: var TToken, s: TCharSet) = - tok.kind = tkWord - tok.line = L.line - tok.col = L.col - var pos = L.bufpos - while True: - add(tok.symbol, L.buf[pos]) - inc(pos) - if L.buf[pos] notin s: break - inc(L.col, pos - L.bufpos) - L.bufpos = pos - -proc getAdornment(L: var TLexer, tok: var TToken) = - tok.kind = tkAdornment - tok.line = L.line - tok.col = L.col - var pos = L.bufpos - var c = L.buf[pos] - while True: - add(tok.symbol, L.buf[pos]) - inc(pos) - if L.buf[pos] != c: break - inc(L.col, pos - L.bufpos) - L.bufpos = pos - -proc getIndentAux(L: var TLexer, start: int): int = - var pos = start - var buf = L.buf - # skip the newline (but include it in the token!) - if buf[pos] == '\x0D': - if buf[pos + 1] == '\x0A': inc(pos, 2) - else: inc(pos) - elif buf[pos] == '\x0A': - inc(pos) - if L.skipPounds: - if buf[pos] == '#': inc(pos) - if buf[pos] == '#': inc(pos) - while True: - case buf[pos] - of ' ', '\x0B', '\x0C': - inc(pos) - inc(result) - of '\x09': - inc(pos) - result = result - (result mod 8) + 8 - else: - break # EndOfFile also leaves the loop - if buf[pos] == '\0': - result = 0 - elif (buf[pos] == '\x0A') or (buf[pos] == '\x0D'): - # look at the next line for proper indentation: - result = getIndentAux(L, pos) - L.bufpos = pos # no need to set back buf - -proc getIndent(L: var TLexer, tok: var TToken) = - inc(L.line) - tok.line = L.line - tok.col = 0 - tok.kind = tkIndent # skip the newline (but include it in the token!) - tok.ival = getIndentAux(L, L.bufpos) - L.col = tok.ival - tok.ival = max(tok.ival - L.baseIndent, 0) - tok.symbol = "\n" & repeatChar(tok.ival) - -proc rawGetTok(L: var TLexer, tok: var TToken) = - tok.symbol = "" - tok.ival = 0 - var c = L.buf[L.bufpos] - case c - of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9': - getThing(L, tok, SymChars) - of ' ', '\x09', '\x0B', '\x0C': - getThing(L, tok, {' ', '\x09'}) - tok.kind = tkWhite - if L.buf[L.bufpos] in {'\x0D', '\x0A'}: - rawGetTok(L, tok) # ignore spaces before \n - of '\x0D', '\x0A': - getIndent(L, tok) - of '!', '\"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', - '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', - '|', '}', '~': - getAdornment(L, tok) - if len(tok.symbol) <= 3: tok.kind = tkPunct - else: - tok.line = L.line - tok.col = L.col - if c == '\0': - tok.kind = tkEof - else: - tok.kind = tkOther - add(tok.symbol, c) - inc(L.bufpos) - inc(L.col) - tok.col = max(tok.col - L.baseIndent, 0) - -proc getTokens(buffer: string, skipPounds: bool, tokens: var TTokenSeq) = - var L: TLexer - var length = len(tokens) - L.buf = cstring(buffer) - L.line = 1 # skip UTF-8 BOM - if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): - inc(L.bufpos, 3) - L.skipPounds = skipPounds - if skipPounds: - if L.buf[L.bufpos] == '#': inc(L.bufpos) - if L.buf[L.bufpos] == '#': inc(L.bufpos) - L.baseIndent = 0 - while L.buf[L.bufpos] == ' ': - inc(L.bufpos) - inc(L.baseIndent) - while true: - inc(length) - setlen(tokens, length) - rawGetTok(L, tokens[length - 1]) - if tokens[length - 1].kind == tkEof: break - if tokens[0].kind == tkWhite: - # BUGFIX - tokens[0].ival = len(tokens[0].symbol) - tokens[0].kind = tkIndent - -type - TLevelMap = array[Char, int] - TSubstitution{.final.} = object - key*: string - value*: PRstNode - - TSharedState {.final.} = object - options: TRstParseOptions # parsing options - uLevel, oLevel: int # counters for the section levels - subs: seq[TSubstitution] # substitutions - refs: seq[TSubstitution] # references - underlineToLevel: TLevelMap # Saves for each possible title adornment - # character its level in the - # current document. - # This is for single underline adornments. - overlineToLevel: TLevelMap # Saves for each possible title adornment - # character its level in the current - # document. - # This is for over-underline adornments. - msgHandler: TMsgHandler # How to handle errors. - findFile: TFindFileHandler # How to find files. - - PSharedState = ref TSharedState - TRstParser = object of TObject - idx*: int - tok*: TTokenSeq - s*: PSharedState - indentStack*: seq[int] - filename*: string - line*, col*: int - hasToc*: bool - - EParseError* = object of EInvalidValue - -proc whichMsgClass*(k: TMsgKind): TMsgClass = - ## returns which message class `k` belongs to. - case ($k)[1] - of 'e', 'E': result = mcError - of 'w', 'W': result = mcWarning - of 'h', 'H': result = mcHint - else: assert false, "msgkind does not fit naming scheme" - -proc defaultMsgHandler*(filename: string, line, col: int, msgkind: TMsgKind, - arg: string) {.procvar.} = - let mc = msgKind.whichMsgClass - let a = messages[msgKind] % arg - let message = "$1($2, $3) $4: $5" % [filename, $line, $col, $mc, a] - if mc == mcError: raise newException(EParseError, message) - else: Writeln(stdout, message) - -proc defaultFindFile*(filename: string): string {.procvar.} = - if existsFile(filename): result = filename - else: result = "" - -proc newSharedState(options: TRstParseOptions, - findFile: TFindFileHandler, - msgHandler: TMsgHandler): PSharedState = - new(result) - result.subs = @[] - result.refs = @[] - result.options = options - result.msgHandler = if isNil(msgHandler): defaultMsgHandler else: msgHandler - result.findFile = if isNil(findFile): defaultFindFile else: findFile - -proc rstMessage(p: TRstParser, msgKind: TMsgKind, arg: string) = - p.s.msgHandler(p.filename, p.line + p.tok[p.idx].line, - p.col + p.tok[p.idx].col, msgKind, arg) - -proc rstMessage(p: TRstParser, msgKind: TMsgKind, arg: string, line, col: int) = - p.s.msgHandler(p.filename, p.line + line, - p.col + col, msgKind, arg) - -proc rstMessage(p: TRstParser, msgKind: TMsgKind) = - p.s.msgHandler(p.filename, p.line + p.tok[p.idx].line, - p.col + p.tok[p.idx].col, msgKind, - p.tok[p.idx].symbol) - -when false: - proc corrupt(p: TRstParser) = - assert p.indentStack[0] == 0 - for i in 1 .. high(p.indentStack): assert p.indentStack[i] < 1_000 - -proc currInd(p: TRstParser): int = - result = p.indentStack[high(p.indentStack)] - -proc pushInd(p: var TRstParser, ind: int) = - add(p.indentStack, ind) - -proc popInd(p: var TRstParser) = - if len(p.indentStack) > 1: setlen(p.indentStack, len(p.indentStack) - 1) - -proc initParser(p: var TRstParser, sharedState: PSharedState) = - p.indentStack = @[0] - p.tok = @[] - p.idx = 0 - p.filename = "" - p.hasToc = false - p.col = 0 - p.line = 1 - p.s = sharedState - -proc addNodesAux(n: PRstNode, result: var string) = - if n.kind == rnLeaf: - add(result, n.text) - else: - for i in countup(0, len(n) - 1): addNodesAux(n.sons[i], result) - -proc addNodes(n: PRstNode): string = - result = "" - addNodesAux(n, result) - -proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) = - if n.kind == rnLeaf: - for i in countup(0, len(n.text) - 1): - case n.text[i] - of '0'..'9': - if b: - add(r, '-') - b = false - if len(r) == 0: add(r, 'Z') - add(r, n.text[i]) - of 'a'..'z': - if b: - add(r, '-') - b = false - add(r, n.text[i]) - of 'A'..'Z': - if b: - add(r, '-') - b = false - add(r, chr(ord(n.text[i]) - ord('A') + ord('a'))) - else: - if (len(r) > 0): b = true - else: - for i in countup(0, len(n) - 1): rstnodeToRefnameAux(n.sons[i], r, b) - -proc rstnodeToRefname(n: PRstNode): string = - result = "" - var b = false - rstnodeToRefnameAux(n, result, b) - -proc findSub(p: var TRstParser, n: PRstNode): int = - var key = addNodes(n) - # the spec says: if no exact match, try one without case distinction: - for i in countup(0, high(p.s.subs)): - if key == p.s.subs[i].key: - return i - for i in countup(0, high(p.s.subs)): - if cmpIgnoreStyle(key, p.s.subs[i].key) == 0: - return i - result = -1 - -proc setSub(p: var TRstParser, key: string, value: PRstNode) = - var length = len(p.s.subs) - for i in countup(0, length - 1): - if key == p.s.subs[i].key: - p.s.subs[i].value = value - return - setlen(p.s.subs, length + 1) - p.s.subs[length].key = key - p.s.subs[length].value = value - -proc setRef(p: var TRstParser, key: string, value: PRstNode) = - var length = len(p.s.refs) - for i in countup(0, length - 1): - if key == p.s.refs[i].key: - if p.s.refs[i].value.addNodes != value.addNodes: - rstMessage(p, mwRedefinitionOfLabel, key) - - p.s.refs[i].value = value - return - setlen(p.s.refs, length + 1) - p.s.refs[length].key = key - p.s.refs[length].value = value - -proc findRef(p: var TRstParser, key: string): PRstNode = - for i in countup(0, high(p.s.refs)): - if key == p.s.refs[i].key: - return p.s.refs[i].value - -proc newLeaf(p: var TRstParser): PRstNode = - result = newRstNode(rnLeaf, p.tok[p.idx].symbol) - -proc getReferenceName(p: var TRstParser, endStr: string): PRstNode = - var res = newRstNode(rnInner) - while true: - case p.tok[p.idx].kind - of tkWord, tkOther, tkWhite: - add(res, newLeaf(p)) - of tkPunct: - if p.tok[p.idx].symbol == endStr: - inc(p.idx) - break - else: - add(res, newLeaf(p)) - else: - rstMessage(p, meExpected, endStr) - break - inc(p.idx) - result = res - -proc untilEol(p: var TRstParser): PRstNode = - result = newRstNode(rnInner) - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - add(result, newLeaf(p)) - inc(p.idx) - -proc expect(p: var TRstParser, tok: string) = - if p.tok[p.idx].symbol == tok: inc(p.idx) - else: rstMessage(p, meExpected, tok) - -proc isInlineMarkupEnd(p: TRstParser, markup: string): bool = - result = p.tok[p.idx].symbol == markup - if not result: - return # Rule 3: - result = not (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) - if not result: - return # Rule 4: - result = (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) or - (p.tok[p.idx + 1].symbol[0] in - {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', - '?', '_'}) - if not result: - return # Rule 7: - if p.idx > 0: - if (markup != "``") and (p.tok[p.idx - 1].symbol == "\\"): - result = false - -proc isInlineMarkupStart(p: TRstParser, markup: string): bool = - var d: Char - result = p.tok[p.idx].symbol == markup - if not result: - return # Rule 1: - result = (p.idx == 0) or (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) or - (p.tok[p.idx - 1].symbol[0] in - {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}) - if not result: - return # Rule 2: - result = not (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) - if not result: - return # Rule 5 & 7: - if p.idx > 0: - if p.tok[p.idx - 1].symbol == "\\": - result = false - else: - var c = p.tok[p.idx - 1].symbol[0] - case c - of '\'', '\"': d = c - of '(': d = ')' - of '[': d = ']' - of '{': d = '}' - of '<': d = '>' - else: d = '\0' - if d != '\0': result = p.tok[p.idx + 1].symbol[0] != d - -proc match(p: TRstParser, start: int, expr: string): bool = - # regular expressions are: - # special char exact match - # 'w' tkWord - # ' ' tkWhite - # 'a' tkAdornment - # 'i' tkIndent - # 'p' tkPunct - # 'T' always true - # 'E' whitespace, indent or eof - # 'e' tkWord or '#' (for enumeration lists) - var i = 0 - var j = start - var last = len(expr) - 1 - while i <= last: - case expr[i] - of 'w': result = p.tok[j].kind == tkWord - of ' ': result = p.tok[j].kind == tkWhite - of 'i': result = p.tok[j].kind == tkIndent - of 'p': result = p.tok[j].kind == tkPunct - of 'a': result = p.tok[j].kind == tkAdornment - of 'o': result = p.tok[j].kind == tkOther - of 'T': result = true - of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent} - of 'e': - result = (p.tok[j].kind == tkWord) or (p.tok[j].symbol == "#") - if result: - case p.tok[j].symbol[0] - of 'a'..'z', 'A'..'Z': result = len(p.tok[j].symbol) == 1 - of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'}) - else: nil - else: - var c = expr[i] - var length = 0 - while (i <= last) and (expr[i] == c): - inc(i) - inc(length) - dec(i) - result = (p.tok[j].kind in {tkPunct, tkAdornment}) and - (len(p.tok[j].symbol) == length) and (p.tok[j].symbol[0] == c) - if not result: return - inc(j) - inc(i) - result = true - -proc fixupEmbeddedRef(n, a, b: PRstNode) = - var sep = - 1 - for i in countdown(len(n) - 2, 0): - if n.sons[i].text == "<": - sep = i - break - var incr = if (sep > 0) and (n.sons[sep - 1].text[0] == ' '): 2 else: 1 - for i in countup(0, sep - incr): add(a, n.sons[i]) - for i in countup(sep + 1, len(n) - 2): add(b, n.sons[i]) - -proc parsePostfix(p: var TRstParser, n: PRstNode): PRstNode = - result = n - if isInlineMarkupEnd(p, "_"): - inc(p.idx) - if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">": - var a = newRstNode(rnInner) - var b = newRstNode(rnInner) - fixupEmbeddedRef(n, a, b) - if len(a) == 0: - result = newRstNode(rnStandaloneHyperlink) - add(result, b) - else: - result = newRstNode(rnHyperlink) - add(result, a) - add(result, b) - setRef(p, rstnodeToRefname(a), b) - elif n.kind == rnInterpretedText: - n.kind = rnRef - else: - result = newRstNode(rnRef) - add(result, n) - elif match(p, p.idx, ":w:"): - # a role: - if p.tok[p.idx + 1].symbol == "idx": - n.kind = rnIdx - elif p.tok[p.idx + 1].symbol == "literal": - n.kind = rnInlineLiteral - elif p.tok[p.idx + 1].symbol == "strong": - n.kind = rnStrongEmphasis - elif p.tok[p.idx + 1].symbol == "emphasis": - n.kind = rnEmphasis - elif (p.tok[p.idx + 1].symbol == "sub") or - (p.tok[p.idx + 1].symbol == "subscript"): - n.kind = rnSub - elif (p.tok[p.idx + 1].symbol == "sup") or - (p.tok[p.idx + 1].symbol == "supscript"): - n.kind = rnSup - else: - result = newRstNode(rnGeneralRole) - n.kind = rnInner - add(result, n) - add(result, newRstNode(rnLeaf, p.tok[p.idx + 1].symbol)) - inc(p.idx, 3) - -proc matchVerbatim(p: TRstParser, start: int, expr: string): int = - result = start - var j = 0 - while j < expr.len and continuesWith(expr, p.tok[result].symbol, j): - inc j, p.tok[result].symbol.len - inc result - if j < expr.len: result = 0 - -proc parseSmiley(p: var TRstParser): PRstNode = - if p.tok[p.idx].symbol[0] notin SmileyStartChars: return - for key, val in items(smilies): - let m = matchVerbatim(p, p.idx, key) - if m > 0: - p.idx = m - result = newRstNode(rnSmiley) - result.text = val - return - -when false: - const - urlChars = {'A'..'Z', 'a'..'z', '0'..'9', ':', '#', '@', '%', '/', ';', - '$', '(', ')', '~', '_', '?', '+', '-', '=', '\\', '.', '&', - '\128'..'\255'} - -proc isURL(p: TRstParser, i: int): bool = - result = (p.tok[i+1].symbol == ":") and (p.tok[i+2].symbol == "//") and - (p.tok[i+3].kind == tkWord) and - (p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]) - -proc parseURL(p: var TRstParser, father: PRstNode) = - #if p.tok[p.idx].symbol[strStart] == '<': - if isURL(p, p.idx): - var n = newRstNode(rnStandaloneHyperlink) - while true: - case p.tok[p.idx].kind - of tkWord, tkAdornment, tkOther: nil - of tkPunct: - if p.tok[p.idx+1].kind notin {tkWord, tkAdornment, tkOther, tkPunct}: - break - else: break - add(n, newLeaf(p)) - inc(p.idx) - add(father, n) - else: - var n = newLeaf(p) - inc(p.idx) - if p.tok[p.idx].symbol == "_": n = parsePostfix(p, n) - add(father, n) - -proc parseBackslash(p: var TRstParser, father: PRstNode) = - assert(p.tok[p.idx].kind == tkPunct) - if p.tok[p.idx].symbol == "\\\\": - add(father, newRstNode(rnLeaf, "\\")) - inc(p.idx) - elif p.tok[p.idx].symbol == "\\": - # XXX: Unicode? - inc(p.idx) - if p.tok[p.idx].kind != tkWhite: add(father, newLeaf(p)) - if p.tok[p.idx].kind != tkEof: inc(p.idx) - else: - add(father, newLeaf(p)) - inc(p.idx) - -when false: - proc parseAdhoc(p: var TRstParser, father: PRstNode, verbatim: bool) = - if not verbatim and isURL(p, p.idx): - var n = newRstNode(rnStandaloneHyperlink) - while true: - case p.tok[p.idx].kind - of tkWord, tkAdornment, tkOther: nil - of tkPunct: - if p.tok[p.idx+1].kind notin {tkWord, tkAdornment, tkOther, tkPunct}: - break - else: break - add(n, newLeaf(p)) - inc(p.idx) - add(father, n) - elif not verbatim and roSupportSmilies in p.sharedState.options: - let n = parseSmiley(p) - if s != nil: - add(father, n) - else: - var n = newLeaf(p) - inc(p.idx) - if p.tok[p.idx].symbol == "_": n = parsePostfix(p, n) - add(father, n) - -proc parseUntil(p: var TRstParser, father: PRstNode, postfix: string, - interpretBackslash: bool) = - let - line = p.tok[p.idx].line - col = p.tok[p.idx].col - while true: - case p.tok[p.idx].kind - of tkPunct: - if isInlineMarkupEnd(p, postfix): - inc(p.idx) - break - elif interpretBackslash: - parseBackslash(p, father) - else: - add(father, newLeaf(p)) - inc(p.idx) - of tkAdornment, tkWord, tkOther: - add(father, newLeaf(p)) - inc(p.idx) - of tkIndent: - add(father, newRstNode(rnLeaf, " ")) - inc(p.idx) - if p.tok[p.idx].kind == tkIndent: - rstMessage(p, meExpected, postfix) - break - of tkWhite: - add(father, newRstNode(rnLeaf, " ")) - inc(p.idx) - else: rstMessage(p, meExpected, postfix, line, col) - -proc parseMarkdownCodeblock(p: var TRstParser): PRstNode = - var args = newRstNode(rnDirArg) - if p.tok[p.idx].kind == tkWord: - add(args, newLeaf(p)) - inc(p.idx) - else: - args = nil - var n = newRstNode(rnLeaf, "") - while true: - case p.tok[p.idx].kind - of tkEof: - rstMessage(p, meExpected, "```") - break - of tkPunct: - if p.tok[p.idx].symbol == "```": - inc(p.idx) - break - else: - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) - else: - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) - var lb = newRstNode(rnLiteralBlock) - add(lb, n) - result = newRstNode(rnCodeBlock) - add(result, args) - add(result, nil) - add(result, lb) - -proc parseInline(p: var TRstParser, father: PRstNode) = - case p.tok[p.idx].kind - of tkPunct: - if isInlineMarkupStart(p, "***"): - inc(p.idx) - var n = newRstNode(rnTripleEmphasis) - parseUntil(p, n, "***", true) - add(father, n) - elif isInlineMarkupStart(p, "**"): - inc(p.idx) - var n = newRstNode(rnStrongEmphasis) - parseUntil(p, n, "**", true) - add(father, n) - elif isInlineMarkupStart(p, "*"): - inc(p.idx) - var n = newRstNode(rnEmphasis) - parseUntil(p, n, "*", true) - add(father, n) - elif roSupportMarkdown in p.s.options and p.tok[p.idx].symbol == "```": - inc(p.idx) - add(father, parseMarkdownCodeblock(p)) - elif isInlineMarkupStart(p, "``"): - inc(p.idx) - var n = newRstNode(rnInlineLiteral) - parseUntil(p, n, "``", false) - add(father, n) - elif isInlineMarkupStart(p, "`"): - inc(p.idx) - var n = newRstNode(rnInterpretedText) - parseUntil(p, n, "`", true) - n = parsePostfix(p, n) - add(father, n) - elif isInlineMarkupStart(p, "|"): - inc(p.idx) - var n = newRstNode(rnSubstitutionReferences) - parseUntil(p, n, "|", false) - add(father, n) - else: - if roSupportSmilies in p.s.options: - let n = parseSmiley(p) - if n != nil: - add(father, n) - return - parseBackslash(p, father) - of tkWord: - if roSupportSmilies in p.s.options: - let n = parseSmiley(p) - if n != nil: - add(father, n) - return - parseUrl(p, father) - of tkAdornment, tkOther, tkWhite: - if roSupportSmilies in p.s.options: - let n = parseSmiley(p) - if n != nil: - add(father, n) - return - add(father, newLeaf(p)) - inc(p.idx) - else: nil - -proc getDirective(p: var TRstParser): string = - if p.tok[p.idx].kind == tkWhite and p.tok[p.idx+1].kind == tkWord: - var j = p.idx - inc(p.idx) - result = p.tok[p.idx].symbol - inc(p.idx) - while p.tok[p.idx].kind in {tkWord, tkPunct, tkAdornment, tkOther}: - if p.tok[p.idx].symbol == "::": break - add(result, p.tok[p.idx].symbol) - inc(p.idx) - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - if p.tok[p.idx].symbol == "::": - inc(p.idx) - if (p.tok[p.idx].kind == tkWhite): inc(p.idx) - else: - p.idx = j # set back - result = "" # error - else: - result = "" - -proc parseComment(p: var TRstParser): PRstNode = - case p.tok[p.idx].kind - of tkIndent, tkEof: - if p.tok[p.idx].kind != tkEof and p.tok[p.idx + 1].kind == tkIndent: - inc(p.idx) # empty comment - else: - var indent = p.tok[p.idx].ival - while True: - case p.tok[p.idx].kind - of tkEof: - break - of tkIndent: - if (p.tok[p.idx].ival < indent): break - else: - nil - inc(p.idx) - else: - while p.tok[p.idx].kind notin {tkIndent, tkEof}: inc(p.idx) - result = nil - -type - TDirKind = enum # must be ordered alphabetically! - dkNone, dkAuthor, dkAuthors, dkCodeBlock, dkContainer, dkContents, - dkFigure, dkImage, dkInclude, dkIndex, dkRaw, dkTitle - -const - DirIds: array[0..11, string] = ["", "author", "authors", "code-block", - "container", "contents", "figure", "image", "include", "index", "raw", - "title"] - -proc getDirKind(s: string): TDirKind = - let i = find(DirIds, s) - if i >= 0: result = TDirKind(i) - else: result = dkNone - -proc parseLine(p: var TRstParser, father: PRstNode) = - while True: - case p.tok[p.idx].kind - of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father) - else: break - -proc parseSection(p: var TRstParser, result: PRstNode) -proc parseField(p: var TRstParser): PRstNode = - result = newRstNode(rnField) - var col = p.tok[p.idx].col - inc(p.idx) # skip : - var fieldname = newRstNode(rnFieldname) - parseUntil(p, fieldname, ":", false) - var fieldbody = newRstNode(rnFieldbody) - if p.tok[p.idx].kind != tkIndent: parseLine(p, fieldbody) - if p.tok[p.idx].kind == tkIndent: - var indent = p.tok[p.idx].ival - if indent > col: - pushInd(p, indent) - parseSection(p, fieldbody) - popInd(p) - add(result, fieldname) - add(result, fieldbody) - -proc parseFields(p: var TRstParser): PRstNode = - result = nil - var atStart = p.idx == 0 and p.tok[0].symbol == ":" - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx + 1].symbol == ":") or - atStart: - var col = if atStart: p.tok[p.idx].col else: p.tok[p.idx].ival - result = newRstNode(rnFieldList) - if not atStart: inc(p.idx) - while true: - add(result, parseField(p)) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - (p.tok[p.idx + 1].symbol == ":"): - inc(p.idx) - else: - break - -proc getFieldValue(n: PRstNode, fieldname: string): string = - result = "" - if n.sons[1] == nil: return - if (n.sons[1].kind != rnFieldList): - #InternalError("getFieldValue (2): " & $n.sons[1].kind) - # We don't like internal errors here anymore as that would break the forum! - return - for i in countup(0, len(n.sons[1]) - 1): - var f = n.sons[1].sons[i] - if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0: - result = addNodes(f.sons[1]) - if result == "": result = "\x01\x01" # indicates that the field exists - return - -proc getArgument(n: PRstNode): string = - if n.sons[0] == nil: result = "" - else: result = addNodes(n.sons[0]) - -proc parseDotDot(p: var TRstParser): PRstNode -proc parseLiteralBlock(p: var TRstParser): PRstNode = - result = newRstNode(rnLiteralBlock) - var n = newRstNode(rnLeaf, "") - if p.tok[p.idx].kind == tkIndent: - var indent = p.tok[p.idx].ival - inc(p.idx) - while True: - case p.tok[p.idx].kind - of tkEof: - break - of tkIndent: - if (p.tok[p.idx].ival < indent): - break - else: - add(n.text, "\n") - add(n.text, repeatChar(p.tok[p.idx].ival - indent)) - inc(p.idx) - else: - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) - else: - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) - add(result, n) - -proc getLevel(map: var TLevelMap, lvl: var int, c: Char): int = - if map[c] == 0: - inc(lvl) - map[c] = lvl - result = map[c] - -proc tokenAfterNewline(p: TRstParser): int = - result = p.idx - while true: - case p.tok[result].kind - of tkEof: - break - of tkIndent: - inc(result) - break - else: inc(result) - -proc isLineBlock(p: TRstParser): bool = - var j = tokenAfterNewline(p) - result = (p.tok[p.idx].col == p.tok[j].col) and (p.tok[j].symbol == "|") or - (p.tok[j].col > p.tok[p.idx].col) - -proc predNL(p: TRstParser): bool = - result = true - if p.idx > 0: - result = p.tok[p.idx-1].kind == tkIndent and - p.tok[p.idx-1].ival == currInd(p) - -proc isDefList(p: TRstParser): bool = - var j = tokenAfterNewline(p) - result = (p.tok[p.idx].col < p.tok[j].col) and - (p.tok[j].kind in {tkWord, tkOther, tkPunct}) and - (p.tok[j - 2].symbol != "::") - -proc isOptionList(p: TRstParser): bool = - result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or - match(p, p.idx, "/w") or match(p, p.idx, "//w") - -proc whichSection(p: TRstParser): TRstNodeKind = - case p.tok[p.idx].kind - of tkAdornment: - if match(p, p.idx + 1, "ii"): result = rnTransition - elif match(p, p.idx + 1, " a"): result = rnTable - elif match(p, p.idx + 1, "i"): result = rnOverline - else: result = rnLeaf - of tkPunct: - if match(p, tokenAfterNewLine(p), "ai"): - result = rnHeadline - elif p.tok[p.idx].symbol == "::": - result = rnLiteralBlock - elif predNL(p) and - ((p.tok[p.idx].symbol == "+") or (p.tok[p.idx].symbol == "*") or - (p.tok[p.idx].symbol == "-")) and (p.tok[p.idx + 1].kind == tkWhite): - result = rnBulletList - elif (p.tok[p.idx].symbol == "|") and isLineBlock(p): - result = rnLineBlock - elif (p.tok[p.idx].symbol == "..") and predNL(p): - result = rnDirective - elif match(p, p.idx, ":w:") and predNL(p): - # (p.tok[p.idx].symbol == ":") - result = rnFieldList - elif match(p, p.idx, "(e) "): - result = rnEnumList - elif match(p, p.idx, "+a+"): - result = rnGridTable - rstMessage(p, meGridTableNotImplemented) - elif isDefList(p): - result = rnDefList - elif isOptionList(p): - result = rnOptionList - else: - result = rnParagraph - of tkWord, tkOther, tkWhite: - if match(p, tokenAfterNewLine(p), "ai"): result = rnHeadline - elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList - elif isDefList(p): result = rnDefList - else: result = rnParagraph - else: result = rnLeaf - -proc parseLineBlock(p: var TRstParser): PRstNode = - result = nil - if p.tok[p.idx + 1].kind == tkWhite: - var col = p.tok[p.idx].col - result = newRstNode(rnLineBlock) - pushInd(p, p.tok[p.idx + 2].col) - inc(p.idx, 2) - while true: - var item = newRstNode(rnLineBlockItem) - parseSection(p, item) - add(result, item) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - (p.tok[p.idx + 1].symbol == "|") and - (p.tok[p.idx + 2].kind == tkWhite): - inc(p.idx, 3) - else: - break - popInd(p) - -proc parseParagraph(p: var TRstParser, result: PRstNode) = - while True: - case p.tok[p.idx].kind - of tkIndent: - if p.tok[p.idx + 1].kind == tkIndent: - inc(p.idx) - break - elif (p.tok[p.idx].ival == currInd(p)): - inc(p.idx) - case whichSection(p) - of rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective: - add(result, newRstNode(rnLeaf, " ")) - of rnLineBlock: - addIfNotNil(result, parseLineBlock(p)) - else: break - else: - break - of tkPunct: - if (p.tok[p.idx].symbol == "::") and - (p.tok[p.idx + 1].kind == tkIndent) and - (currInd(p) < p.tok[p.idx + 1].ival): - add(result, newRstNode(rnLeaf, ":")) - inc(p.idx) # skip '::' - add(result, parseLiteralBlock(p)) - break - else: - parseInline(p, result) - of tkWhite, tkWord, tkAdornment, tkOther: - parseInline(p, result) - else: break - -proc parseHeadline(p: var TRstParser): PRstNode = - result = newRstNode(rnHeadline) - parseLine(p, result) - assert(p.tok[p.idx].kind == tkIndent) - assert(p.tok[p.idx + 1].kind == tkAdornment) - var c = p.tok[p.idx + 1].symbol[0] - inc(p.idx, 2) - result.level = getLevel(p.s.underlineToLevel, p.s.uLevel, c) - -type - TIntSeq = seq[int] - -proc tokEnd(p: TRstParser): int = - result = p.tok[p.idx].col + len(p.tok[p.idx].symbol) - 1 - -proc getColumns(p: var TRstParser, cols: var TIntSeq) = - var L = 0 - while true: - inc(L) - setlen(cols, L) - cols[L - 1] = tokEnd(p) - assert(p.tok[p.idx].kind == tkAdornment) - inc(p.idx) - if p.tok[p.idx].kind != tkWhite: break - inc(p.idx) - if p.tok[p.idx].kind != tkAdornment: break - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - # last column has no limit: - cols[L - 1] = 32000 - -proc parseDoc(p: var TRstParser): PRstNode - -proc parseSimpleTable(p: var TRstParser): PRstNode = - var - cols: TIntSeq - row: seq[string] - i, last, line: int - c: Char - q: TRstParser - a, b: PRstNode - result = newRstNode(rnTable) - cols = @[] - row = @[] - a = nil - c = p.tok[p.idx].symbol[0] - while true: - if p.tok[p.idx].kind == tkAdornment: - last = tokenAfterNewline(p) - if p.tok[last].kind in {tkEof, tkIndent}: - # skip last adornment line: - p.idx = last - break - getColumns(p, cols) - setlen(row, len(cols)) - if a != nil: - for j in 0..len(a)-1: a.sons[j].kind = rnTableHeaderCell - if p.tok[p.idx].kind == tkEof: break - for j in countup(0, high(row)): row[j] = "" - # the following while loop iterates over the lines a single cell may span: - line = p.tok[p.idx].line - while true: - i = 0 - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - if (tokEnd(p) <= cols[i]): - add(row[i], p.tok[p.idx].symbol) - inc(p.idx) - else: - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - inc(i) - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - if tokEnd(p) <= cols[0]: break - if p.tok[p.idx].kind in {tkEof, tkAdornment}: break - for j in countup(1, high(row)): add(row[j], '\x0A') - a = newRstNode(rnTableRow) - for j in countup(0, high(row)): - initParser(q, p.s) - q.col = cols[j] - q.line = line - 1 - q.filename = p.filename - getTokens(row[j], false, q.tok) - b = newRstNode(rnTableDataCell) - add(b, parseDoc(q)) - add(a, b) - add(result, a) - -proc parseTransition(p: var TRstParser): PRstNode = - result = newRstNode(rnTransition) - inc(p.idx) - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - -proc parseOverline(p: var TRstParser): PRstNode = - var c = p.tok[p.idx].symbol[0] - inc(p.idx, 2) - result = newRstNode(rnOverline) - while true: - parseLine(p, result) - if p.tok[p.idx].kind == tkIndent: - inc(p.idx) - if p.tok[p.idx - 1].ival > currInd(p): - add(result, newRstNode(rnLeaf, " ")) - else: - break - else: - break - result.level = getLevel(p.s.overlineToLevel, p.s.oLevel, c) - if p.tok[p.idx].kind == tkAdornment: - inc(p.idx) # XXX: check? - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - -proc parseBulletList(p: var TRstParser): PRstNode = - result = nil - if p.tok[p.idx + 1].kind == tkWhite: - var bullet = p.tok[p.idx].symbol - var col = p.tok[p.idx].col - result = newRstNode(rnBulletList) - pushInd(p, p.tok[p.idx + 2].col) - inc(p.idx, 2) - while true: - var item = newRstNode(rnBulletItem) - parseSection(p, item) - add(result, item) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - (p.tok[p.idx + 1].symbol == bullet) and - (p.tok[p.idx + 2].kind == tkWhite): - inc(p.idx, 3) - else: - break - popInd(p) - -proc parseOptionList(p: var TRstParser): PRstNode = - result = newRstNode(rnOptionList) - while true: - if isOptionList(p): - var a = newRstNode(rnOptionGroup) - var b = newRstNode(rnDescription) - var c = newRstNode(rnOptionListItem) - if match(p, p.idx, "//w"): inc(p.idx) - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - if (p.tok[p.idx].kind == tkWhite) and (len(p.tok[p.idx].symbol) > 1): - inc(p.idx) - break - add(a, newLeaf(p)) - inc(p.idx) - var j = tokenAfterNewline(p) - if (j > 0) and (p.tok[j - 1].kind == tkIndent) and - (p.tok[j - 1].ival > currInd(p)): - pushInd(p, p.tok[j - 1].ival) - parseSection(p, b) - popInd(p) - else: - parseLine(p, b) - if (p.tok[p.idx].kind == tkIndent): inc(p.idx) - add(c, a) - add(c, b) - add(result, c) - else: - break - -proc parseDefinitionList(p: var TRstParser): PRstNode = - result = nil - var j = tokenAfterNewLine(p) - 1 - if (j >= 1) and (p.tok[j].kind == tkIndent) and - (p.tok[j].ival > currInd(p)) and (p.tok[j - 1].symbol != "::"): - var col = p.tok[p.idx].col - result = newRstNode(rnDefList) - while true: - j = p.idx - var a = newRstNode(rnDefName) - parseLine(p, a) - if (p.tok[p.idx].kind == tkIndent) and - (p.tok[p.idx].ival > currInd(p)) and - (p.tok[p.idx + 1].symbol != "::") and - not (p.tok[p.idx + 1].kind in {tkIndent, tkEof}): - pushInd(p, p.tok[p.idx].ival) - var b = newRstNode(rnDefBody) - parseSection(p, b) - var c = newRstNode(rnDefItem) - add(c, a) - add(c, b) - add(result, c) - popInd(p) - else: - p.idx = j - break - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col): - inc(p.idx) - j = tokenAfterNewLine(p) - 1 - if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and - p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent: - nil - else: - break - if len(result) == 0: result = nil - -proc parseEnumList(p: var TRstParser): PRstNode = - const - wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "] - wildpos: array[0..2, int] = [1, 0, 0] - result = nil - var w = 0 - while w <= 2: - if match(p, p.idx, wildcards[w]): break - inc(w) - if w <= 2: - var col = p.tok[p.idx].col - result = newRstNode(rnEnumList) - inc(p.idx, wildpos[w] + 3) - var j = tokenAfterNewLine(p) - if (p.tok[j].col == p.tok[p.idx].col) or match(p, j, wildcards[w]): - pushInd(p, p.tok[p.idx].col) - while true: - var item = newRstNode(rnEnumItem) - parseSection(p, item) - add(result, item) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - match(p, p.idx + 1, wildcards[w]): - inc(p.idx, wildpos[w] + 4) - else: - break - popInd(p) - else: - dec(p.idx, wildpos[w] + 3) - result = nil - -proc sonKind(father: PRstNode, i: int): TRstNodeKind = - result = rnLeaf - if i < len(father): result = father.sons[i].kind - -proc parseSection(p: var TRstParser, result: PRstNode) = - while true: - var leave = false - assert(p.idx >= 0) - while p.tok[p.idx].kind == tkIndent: - if currInd(p) == p.tok[p.idx].ival: - inc(p.idx) - elif p.tok[p.idx].ival > currInd(p): - pushInd(p, p.tok[p.idx].ival) - var a = newRstNode(rnBlockQuote) - parseSection(p, a) - add(result, a) - popInd(p) - else: - leave = true - break - if leave or p.tok[p.idx].kind == tkEof: break - var a: PRstNode = nil - var k = whichSection(p) - case k - of rnLiteralBlock: - inc(p.idx) # skip '::' - a = parseLiteralBlock(p) - of rnBulletList: a = parseBulletList(p) - of rnLineblock: a = parseLineBlock(p) - of rnDirective: a = parseDotDot(p) - of rnEnumList: a = parseEnumList(p) - of rnLeaf: rstMessage(p, meNewSectionExpected) - of rnParagraph: nil - of rnDefList: a = parseDefinitionList(p) - of rnFieldList: - if p.idx > 0: dec(p.idx) - a = parseFields(p) - of rnTransition: a = parseTransition(p) - of rnHeadline: a = parseHeadline(p) - of rnOverline: a = parseOverline(p) - of rnTable: a = parseSimpleTable(p) - of rnOptionList: a = parseOptionList(p) - else: - #InternalError("rst.parseSection()") - nil - if a == nil and k != rnDirective: - a = newRstNode(rnParagraph) - parseParagraph(p, a) - addIfNotNil(result, a) - if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph: - result.sons[0].kind = rnInner - -proc parseSectionWrapper(p: var TRstParser): PRstNode = - result = newRstNode(rnInner) - parseSection(p, result) - while (result.kind == rnInner) and (len(result) == 1): - result = result.sons[0] - -proc `$`(t: TToken): string = - result = $t.kind & ' ' & (if isNil(t.symbol): "NIL" else: t.symbol) - -proc parseDoc(p: var TRstParser): PRstNode = - result = parseSectionWrapper(p) - if p.tok[p.idx].kind != tkEof: - when false: - assert isAllocatedPtr(cast[pointer](p.tok)) - for i in 0 .. high(p.tok): - assert isNil(p.tok[i].symbol) or - isAllocatedPtr(cast[pointer](p.tok[i].symbol)) - echo "index: ", p.idx, " length: ", high(p.tok), "##", - p.tok[p.idx-1], p.tok[p.idx], p.tok[p.idx+1] - #assert isAllocatedPtr(cast[pointer](p.indentStack)) - rstMessage(p, meGeneralParseError) - -type - TDirFlag = enum - hasArg, hasOptions, argIsFile, argIsWord - TDirFlags = set[TDirFlag] - TSectionParser = proc (p: var TRstParser): PRstNode {.nimcall.} - -proc parseDirective(p: var TRstParser, flags: TDirFlags): PRstNode = - result = newRstNode(rnDirective) - var args: PRstNode = nil - var options: PRstNode = nil - if hasArg in flags: - args = newRstNode(rnDirArg) - if argIsFile in flags: - while True: - case p.tok[p.idx].kind - of tkWord, tkOther, tkPunct, tkAdornment: - add(args, newLeaf(p)) - inc(p.idx) - else: break - elif argIsWord in flags: - while p.tok[p.idx].kind == tkWhite: inc(p.idx) - if p.tok[p.idx].kind == tkWord: - add(args, newLeaf(p)) - inc(p.idx) - else: - args = nil - else: - parseLine(p, args) - add(result, args) - if hasOptions in flags: - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival >= 3) and - (p.tok[p.idx + 1].symbol == ":"): - options = parseFields(p) - add(result, options) - -proc indFollows(p: TRstParser): bool = - result = p.tok[p.idx].kind == tkIndent and p.tok[p.idx].ival > currInd(p) - -proc parseDirective(p: var TRstParser, flags: TDirFlags, - contentParser: TSectionParser): PRstNode = - result = parseDirective(p, flags) - if not isNil(contentParser) and indFollows(p): - pushInd(p, p.tok[p.idx].ival) - var content = contentParser(p) - popInd(p) - add(result, content) - else: - add(result, nil) - -proc parseDirBody(p: var TRstParser, contentParser: TSectionParser): PRstNode = - if indFollows(p): - pushInd(p, p.tok[p.idx].ival) - result = contentParser(p) - popInd(p) - -proc dirInclude(p: var TRstParser): PRstNode = - # - #The following options are recognized: - # - #start-after : text to find in the external data file - # Only the content after the first occurrence of the specified text will - # be included. - #end-before : text to find in the external data file - # Only the content before the first occurrence of the specified text - # (but after any after text) will be included. - #literal : flag (empty) - # The entire included text is inserted into the document as a single - # literal block (useful for program listings). - #encoding : name of text encoding - # The text encoding of the external data file. Defaults to the document's - # encoding (if specified). - # - result = nil - var n = parseDirective(p, {hasArg, argIsFile, hasOptions}, nil) - var filename = strip(addNodes(n.sons[0])) - var path = p.s.findFile(filename) - if path == "": - rstMessage(p, meCannotOpenFile, filename) - else: - # XXX: error handling; recursive file inclusion! - if getFieldValue(n, "literal") != "": - result = newRstNode(rnLiteralBlock) - add(result, newRstNode(rnLeaf, readFile(path))) - else: - var q: TRstParser - initParser(q, p.s) - q.filename = filename - getTokens(readFile(path), false, q.tok) - # workaround a GCC bug; more like the interior pointer bug? - #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0: - # InternalError("Too many binary zeros in include file") - result = parseDoc(q) - -proc dirCodeBlock(p: var TRstParser): PRstNode = - result = parseDirective(p, {hasArg, hasOptions}, parseLiteralBlock) - var filename = strip(getFieldValue(result, "file")) - if filename != "": - var path = p.s.findFile(filename) - if path == "": rstMessage(p, meCannotOpenFile, filename) - var n = newRstNode(rnLiteralBlock) - add(n, newRstNode(rnLeaf, readFile(path))) - result.sons[2] = n - result.kind = rnCodeBlock - -proc dirContainer(p: var TRstParser): PRstNode = - result = parseDirective(p, {hasArg}, parseSectionWrapper) - assert(result.kind == rnDirective) - assert(len(result) == 3) - result.kind = rnContainer - -proc dirImage(p: var TRstParser): PRstNode = - result = parseDirective(p, {hasOptions, hasArg, argIsFile}, nil) - result.kind = rnImage - -proc dirFigure(p: var TRstParser): PRstNode = - result = parseDirective(p, {hasOptions, hasArg, argIsFile}, - parseSectionWrapper) - result.kind = rnFigure - -proc dirTitle(p: var TRstParser): PRstNode = - result = parseDirective(p, {hasArg}, nil) - result.kind = rnTitle - -proc dirContents(p: var TRstParser): PRstNode = - result = parseDirective(p, {hasArg}, nil) - result.kind = rnContents - -proc dirIndex(p: var TRstParser): PRstNode = - result = parseDirective(p, {}, parseSectionWrapper) - result.kind = rnIndex - -proc dirRawAux(p: var TRstParser, result: var PRstNode, kind: TRstNodeKind, - contentParser: TSectionParser) = - var filename = getFieldValue(result, "file") - if filename.len > 0: - var path = p.s.findFile(filename) - if path.len == 0: - rstMessage(p, meCannotOpenFile, filename) - else: - var f = readFile(path) - result = newRstNode(kind) - add(result, newRstNode(rnLeaf, f)) - else: - result.kind = kind - add(result, parseDirBody(p, contentParser)) - -proc dirRaw(p: var TRstParser): PRstNode = - # - #The following options are recognized: - # - #file : string (newlines removed) - # The local filesystem path of a raw data file to be included. - # - # html - # latex - result = parseDirective(p, {hasOptions, hasArg, argIsWord}) - if result.sons[0] != nil: - if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0: - dirRawAux(p, result, rnRawHtml, parseLiteralBlock) - elif cmpIgnoreCase(result.sons[0].sons[0].text, "latex") == 0: - dirRawAux(p, result, rnRawLatex, parseLiteralBlock) - else: - rstMessage(p, meInvalidDirective, result.sons[0].text) - else: - dirRawAux(p, result, rnRaw, parseSectionWrapper) - -proc parseDotDot(p: var TRstParser): PRstNode = - result = nil - var col = p.tok[p.idx].col - inc(p.idx) - var d = getDirective(p) - if d != "": - pushInd(p, col) - case getDirKind(d) - of dkInclude: result = dirInclude(p) - of dkImage: result = dirImage(p) - of dkFigure: result = dirFigure(p) - of dkTitle: result = dirTitle(p) - of dkContainer: result = dirContainer(p) - of dkContents: result = dirContents(p) - of dkRaw: - if roSupportRawDirective in p.s.options: - result = dirRaw(p) - else: - rstMessage(p, meInvalidDirective, d) - of dkCodeblock: result = dirCodeBlock(p) - of dkIndex: result = dirIndex(p) - else: rstMessage(p, meInvalidDirective, d) - popInd(p) - elif match(p, p.idx, " _"): - # hyperlink target: - inc(p.idx, 2) - var a = getReferenceName(p, ":") - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - var b = untilEol(p) - setRef(p, rstnodeToRefname(a), b) - elif match(p, p.idx, " |"): - # substitution definitions: - inc(p.idx, 2) - var a = getReferenceName(p, "|") - var b: PRstNode - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - if cmpIgnoreStyle(p.tok[p.idx].symbol, "replace") == 0: - inc(p.idx) - expect(p, "::") - b = untilEol(p) - elif cmpIgnoreStyle(p.tok[p.idx].symbol, "image") == 0: - inc(p.idx) - b = dirImage(p) - else: - rstMessage(p, meInvalidDirective, p.tok[p.idx].symbol) - setSub(p, addNodes(a), b) - elif match(p, p.idx, " ["): - # footnotes, citations - inc(p.idx, 2) - var a = getReferenceName(p, "]") - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - var b = untilEol(p) - setRef(p, rstnodeToRefname(a), b) - else: - result = parseComment(p) - -proc resolveSubs(p: var TRstParser, n: PRstNode): PRstNode = - result = n - if n == nil: return - case n.kind - of rnSubstitutionReferences: - var x = findSub(p, n) - if x >= 0: - result = p.s.subs[x].value - else: - var key = addNodes(n) - var e = getEnv(key) - if e != "": result = newRstNode(rnLeaf, e) - else: rstMessage(p, mwUnknownSubstitution, key) - of rnRef: - var y = findRef(p, rstnodeToRefname(n)) - if y != nil: - result = newRstNode(rnHyperlink) - n.kind = rnInner - add(result, n) - add(result, y) - of rnLeaf: - nil - of rnContents: - p.hasToc = true - else: - for i in countup(0, len(n) - 1): n.sons[i] = resolveSubs(p, n.sons[i]) - -proc rstParse*(text, filename: string, - line, column: int, hasToc: var bool, - options: TRstParseOptions, - findFile: TFindFileHandler = nil, - msgHandler: TMsgHandler = nil): PRstNode = - var p: TRstParser - initParser(p, newSharedState(options, findFile, msgHandler)) - p.filename = filename - p.line = line - p.col = column - getTokens(text, roSkipPounds in options, p.tok) - result = resolveSubs(p, parseDoc(p)) - hasToc = p.hasToc diff --git a/packages/docutils/rstast.nim b/packages/docutils/rstast.nim deleted file mode 100644 index 23233fd39..000000000 --- a/packages/docutils/rstast.nim +++ /dev/null @@ -1,288 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements an AST for the `reStructuredText`:idx parser. - -import strutils - -type - TRstNodeKind* = enum ## the possible node kinds of an PRstNode - rnInner, # an inner node or a root - rnHeadline, # a headline - rnOverline, # an over- and underlined headline - rnTransition, # a transition (the -------------
    thingie) - rnParagraph, # a paragraph - rnBulletList, # a bullet list - rnBulletItem, # a bullet item - rnEnumList, # an enumerated list - rnEnumItem, # an enumerated item - rnDefList, # a definition list - rnDefItem, # an item of a definition list consisting of ... - rnDefName, # ... a name part ... - rnDefBody, # ... and a body part ... - rnFieldList, # a field list - rnField, # a field item - rnFieldName, # consisting of a field name ... - rnFieldBody, # ... and a field body - rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, - rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, - rnLineBlock, # the | thingie - rnLineBlockItem, # sons of the | thing - rnBlockQuote, # text just indented - rnTable, rnGridTable, rnTableRow, rnTableHeaderCell, rnTableDataCell, - rnLabel, # used for footnotes and other things - rnFootnote, # a footnote - rnCitation, # similar to footnote - rnStandaloneHyperlink, rnHyperlink, rnRef, rnDirective, # a directive - rnDirArg, rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, - rnRawHtml, rnRawLatex, - rnContainer, # ``container`` directive - rnIndex, # index directve: - # .. index:: - # key - # * `file#id `_ - # * `file#id '_ - rnSubstitutionDef, # a definition of a substitution - rnGeneralRole, # Inline markup: - rnSub, rnSup, rnIdx, - rnEmphasis, # "*" - rnStrongEmphasis, # "**" - rnTripleEmphasis, # "***" - rnInterpretedText, # "`" - rnInlineLiteral, # "``" - rnSubstitutionReferences, # "|" - rnSmiley, # some smiley - rnLeaf # a leaf; the node's text field contains the - # leaf val - - - PRSTNode* = ref TRstNode ## an RST node - TRstNodeSeq* = seq[PRstNode] - TRSTNode* {.acyclic, final.} = object ## an RST node's description - kind*: TRstNodeKind ## the node's kind - text*: string ## valid for leafs in the AST; and the title of - ## the document or the section - level*: int ## valid for some node kinds - sons*: TRstNodeSeq ## the node's sons - -proc len*(n: PRstNode): int = - result = len(n.sons) - -proc newRstNode*(kind: TRstNodeKind): PRstNode = - new(result) - result.sons = @[] - result.kind = kind - -proc newRstNode*(kind: TRstNodeKind, s: string): PRstNode = - result = newRstNode(kind) - result.text = s - -proc lastSon*(n: PRstNode): PRstNode = - result = n.sons[len(n.sons)-1] - -proc add*(father, son: PRstNode) = - add(father.sons, son) - -proc addIfNotNil*(father, son: PRstNode) = - if son != nil: add(father, son) - - -type - TRenderContext {.pure.} = object - indent: int - verbatim: int - -proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) - -proc renderRstSons(d: var TRenderContext, n: PRstNode, result: var string) = - for i in countup(0, len(n) - 1): - renderRstToRst(d, n.sons[i], result) - -proc renderRstToRst(d: var TRenderContext, n: PRstNode, result: var string) = - # this is needed for the index generation; it may also be useful for - # debugging, but most code is already debugged... - const - lvlToChar: array[0..8, char] = ['!', '=', '-', '~', '`', '<', '*', '|', '+'] - if n == nil: return - var ind = repeatChar(d.indent) - case n.kind - of rnInner: - renderRstSons(d, n, result) - of rnHeadline: - result.add("\n") - result.add(ind) - - let oldLen = result.len - renderRstSons(d, n, result) - let HeadlineLen = result.len - oldLen - - result.add("\n") - result.add(ind) - result.add repeatChar(HeadlineLen, lvlToChar[n.level]) - of rnOverline: - result.add("\n") - result.add(ind) - - var headline = "" - renderRstSons(d, n, headline) - - let lvl = repeatChar(headline.Len - d.indent, lvlToChar[n.level]) - result.add(lvl) - result.add("\n") - result.add(headline) - - result.add("\n") - result.add(ind) - result.add(lvl) - of rnTransition: - result.add("\n\n") - result.add(ind) - result.add repeatChar(78-d.indent, '-') - result.add("\n\n") - of rnParagraph: - result.add("\n\n") - result.add(ind) - renderRstSons(d, n, result) - of rnBulletItem: - inc(d.indent, 2) - var tmp = "" - renderRstSons(d, n, tmp) - if tmp.len > 0: - result.add("\n") - result.add(ind) - result.add("* ") - result.add(tmp) - dec(d.indent, 2) - of rnEnumItem: - inc(d.indent, 4) - var tmp = "" - renderRstSons(d, n, tmp) - if tmp.len > 0: - result.add("\n") - result.add(ind) - result.add("(#) ") - result.add(tmp) - dec(d.indent, 4) - of rnOptionList, rnFieldList, rnDefList, rnDefItem, rnLineBlock, rnFieldName, - rnFieldBody, rnStandaloneHyperlink, rnBulletList, rnEnumList: - renderRstSons(d, n, result) - of rnDefName: - result.add("\n\n") - result.add(ind) - renderRstSons(d, n, result) - of rnDefBody: - inc(d.indent, 2) - if n.sons[0].kind != rnBulletList: - result.add("\n") - result.add(ind) - result.add(" ") - renderRstSons(d, n, result) - dec(d.indent, 2) - of rnField: - var tmp = "" - renderRstToRst(d, n.sons[0], tmp) - - var L = max(tmp.len + 3, 30) - inc(d.indent, L) - - result.add "\n" - result.add ind - result.add ':' - result.add tmp - result.add ':' - result.add repeatChar(L - tmp.len - 2) - renderRstToRst(d, n.sons[1], result) - - dec(d.indent, L) - of rnLineBlockItem: - result.add("\n") - result.add(ind) - result.add("| ") - renderRstSons(d, n, result) - of rnBlockQuote: - inc(d.indent, 2) - renderRstSons(d, n, result) - dec(d.indent, 2) - of rnRef: - result.add("`") - renderRstSons(d, n, result) - result.add("`_") - of rnHyperlink: - result.add('`') - renderRstToRst(d, n.sons[0], result) - result.add(" <") - renderRstToRst(d, n.sons[1], result) - result.add(">`_") - of rnGeneralRole: - result.add('`') - renderRstToRst(d, n.sons[0],result) - result.add("`:") - renderRstToRst(d, n.sons[1],result) - result.add(':') - of rnSub: - result.add('`') - renderRstSons(d, n, result) - result.add("`:sub:") - of rnSup: - result.add('`') - renderRstSons(d, n, result) - result.add("`:sup:") - of rnIdx: - result.add('`') - renderRstSons(d, n, result) - result.add("`:idx:") - of rnEmphasis: - result.add("*") - renderRstSons(d, n, result) - result.add("*") - of rnStrongEmphasis: - result.add("**") - renderRstSons(d, n, result) - result.add("**") - of rnTripleEmphasis: - result.add("***") - renderRstSons(d, n, result) - result.add("***") - of rnInterpretedText: - result.add('`') - renderRstSons(d, n, result) - result.add('`') - of rnInlineLiteral: - inc(d.verbatim) - result.add("``") - renderRstSons(d, n, result) - result.add("``") - dec(d.verbatim) - of rnSmiley: - result.add(n.text) - of rnLeaf: - if d.verbatim == 0 and n.text == "\\": - result.add("\\\\") # XXX: escape more special characters! - else: - result.add(n.text) - of rnIndex: - result.add("\n\n") - result.add(ind) - result.add(".. index::\n") - - inc(d.indent, 3) - if n.sons[2] != nil: renderRstSons(d, n.sons[2], result) - dec(d.indent, 3) - of rnContents: - result.add("\n\n") - result.add(ind) - result.add(".. contents::") - else: - result.add("Error: cannot render: " & $n.kind) - -proc renderRstToRst*(n: PRstNode, result: var string) = - ## renders `n` into its string representation and appends to `result`. - var d: TRenderContext - renderRstToRst(d, n, result) - diff --git a/packages/docutils/rstgen.nim b/packages/docutils/rstgen.nim deleted file mode 100644 index 53bd8188e..000000000 --- a/packages/docutils/rstgen.nim +++ /dev/null @@ -1,695 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a generator of HTML/Latex from `reStructuredText`:idx. - -import strutils, os, hashes, strtabs, rstast, rst, highlite - -const - HtmlExt = "html" - IndexExt* = ".idx" - -type - TOutputTarget* = enum ## which document type to generate - outHtml, # output is HTML - outLatex # output is Latex - - TTocEntry{.final.} = object - n*: PRstNode - refname*, header*: string - - TMetaEnum* = enum - metaNone, metaTitle, metaSubtitle, metaAuthor, metaVersion - - TRstGenerator* = object of TObject - target*: TOutputTarget - config*: PStringTable - splitAfter*: int # split too long entries in the TOC - tocPart*: seq[TTocEntry] - hasToc*: bool - theIndex: string - options*: TRstParseOptions - findFile*: TFindFileHandler - msgHandler*: TMsgHandler - filename*: string - meta*: array[TMetaEnum, string] - - PDoc = var TRstGenerator - -proc initRstGenerator*(g: var TRstGenerator, target: TOutputTarget, - config: PStringTable, filename: string, - options: TRstParseOptions, - findFile: TFindFileHandler, - msgHandler: TMsgHandler) = - g.config = config - g.target = target - g.tocPart = @[] - g.filename = filename - g.splitAfter = 20 - g.theIndex = "" - g.options = options - g.findFile = findFile - g.msgHandler = msgHandler - - let s = config["split.item.toc"] - if s != "": g.splitAfter = parseInt(s) - for i in low(g.meta)..high(g.meta): g.meta[i] = "" - -proc writeIndexFile*(g: var TRstGenerator, outfile: string) = - if g.theIndex.len > 0: writeFile(outfile, g.theIndex) - -proc addXmlChar(dest: var string, c: Char) = - case c - of '&': add(dest, "&") - of '<': add(dest, "<") - of '>': add(dest, ">") - of '\"': add(dest, """) - else: add(dest, c) - -proc addRtfChar(dest: var string, c: Char) = - case c - of '{': add(dest, "\\{") - of '}': add(dest, "\\}") - of '\\': add(dest, "\\\\") - else: add(dest, c) - -proc addTexChar(dest: var string, c: Char) = - case c - of '_': add(dest, "\\_") - of '{': add(dest, "\\symbol{123}") - of '}': add(dest, "\\symbol{125}") - of '[': add(dest, "\\symbol{91}") - of ']': add(dest, "\\symbol{93}") - of '\\': add(dest, "\\symbol{92}") - of '$': add(dest, "\\$") - of '&': add(dest, "\\&") - of '#': add(dest, "\\#") - of '%': add(dest, "\\%") - of '~': add(dest, "\\symbol{126}") - of '@': add(dest, "\\symbol{64}") - of '^': add(dest, "\\symbol{94}") - of '`': add(dest, "\\symbol{96}") - else: add(dest, c) - -var splitter*: string = "" - -proc escChar*(target: TOutputTarget, dest: var string, c: Char) {.inline.} = - case target - of outHtml: addXmlChar(dest, c) - of outLatex: addTexChar(dest, c) - -proc nextSplitPoint*(s: string, start: int): int = - result = start - while result < len(s) + 0: - case s[result] - of '_': return - of 'a'..'z': - if result + 1 < len(s) + 0: - if s[result + 1] in {'A'..'Z'}: return - else: nil - inc(result) - dec(result) # last valid index - -proc esc*(target: TOutputTarget, s: string, splitAfter = -1): string = - result = "" - if splitAfter >= 0: - var partLen = 0 - var j = 0 - while j < len(s): - var k = nextSplitPoint(s, j) - if (splitter != " ") or (partLen + k - j + 1 > splitAfter): - partLen = 0 - add(result, splitter) - for i in countup(j, k): escChar(target, result, s[i]) - inc(partLen, k - j + 1) - j = k + 1 - else: - for i in countup(0, len(s) - 1): escChar(target, result, s[i]) - - -proc disp(target: TOutputTarget, xml, tex: string): string = - if target != outLatex: result = xml - else: result = tex - -proc dispF(target: TOutputTarget, xml, tex: string, - args: varargs[string]): string = - if target != outLatex: result = xml % args - else: result = tex % args - -proc dispA(target: TOutputTarget, dest: var string, - xml, tex: string, args: varargs[string]) = - if target != outLatex: addf(dest, xml, args) - else: addf(dest, tex, args) - -proc renderRstToOut*(d: PDoc, n: PRstNode, result: var string) - -proc renderAux(d: PDoc, n: PRstNode, result: var string) = - for i in countup(0, len(n)-1): renderRstToOut(d, n.sons[i], result) - -proc renderAux(d: PDoc, n: PRstNode, frmtA, frmtB: string, result: var string) = - var tmp = "" - for i in countup(0, len(n)-1): renderRstToOut(d, n.sons[i], tmp) - if d.target != outLatex: - result.addf(frmtA, [tmp]) - else: - result.addf(frmtB, [tmp]) - -# ---------------- index handling -------------------------------------------- - -proc setIndexTerm*(d: PDoc, id, term: string) = - d.theIndex.add(term) - d.theIndex.add('\t') - let htmlFile = changeFileExt(extractFilename(d.filename), HtmlExt) - d.theIndex.add(htmlFile) - d.theIndex.add('#') - d.theIndex.add(id) - d.theIndex.add("\n") - -proc hash(n: PRstNode): int = - if n.kind == rnLeaf: - result = hash(n.text) - elif n.len > 0: - result = hash(n.sons[0]) - for i in 1 .. $2", "$2\\label{$1}", - [id, term]) - -type - TIndexEntry {.pure, final.} = object - keyword: string - link: string - -proc cmp(a, b: TIndexEntry): int = - result = cmpIgnoreStyle(a.keyword, b.keyword) - -proc `<-`(a: var TIndexEntry, b: TIndexEntry) = - shallowCopy a.keyword, b.keyword - shallowCopy a.link, b.link - -proc sortIndex(a: var openArray[TIndexEntry]) = - # we use shellsort here; fast and simple - let N = len(a) - var h = 1 - while true: - h = 3 * h + 1 - if h > N: break - while true: - h = h div 3 - for i in countup(h, N - 1): - var v: TIndexEntry - v <- a[i] - var j = i - while cmp(a[j-h], v) >= 0: - a[j] <- a[j-h] - j = j-h - if j < h: break - a[j] <- v - if h == 1: break - -proc mergeIndexes*(dir: string): string = - ## merges all index files in `dir` and returns the generated index as HTML. - ## The result is no full HTML for flexibility. - var a: seq[TIndexEntry] - newSeq(a, 15_000) - setLen(a, 0) - var L = 0 - for kind, path in walkDir(dir): - if kind == pcFile and path.endsWith(IndexExt): - for line in lines(path): - let s = line.find('\t') - if s < 0: continue - setLen(a, L+1) - a[L].keyword = line.substr(0, s-1) - a[L].link = line.substr(s+1) - inc L - sortIndex(a) - result = "" - var i = 0 - while i < L: - result.addf("
    $1
      \n", - [a[i].keyword]) - var j = i - while j < L and a[i].keyword == a[j].keyword: - result.addf( - "
    • $1
    • \n", - [a[j].link]) - inc j - result.add("
    \n") - i = j - -# ---------------------------------------------------------------------------- - -proc renderHeadline(d: PDoc, n: PRstNode, result: var string) = - var tmp = "" - for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp) - var refname = rstnodeToRefname(n) - if d.hasToc: - var length = len(d.tocPart) - setlen(d.tocPart, length + 1) - d.tocPart[length].refname = refname - d.tocPart[length].n = n - d.tocPart[length].header = tmp - - dispA(d.target, result, - "$3", - "\\rsth$4{$3}\\label{$2}\n", [$n.level, - d.tocPart[length].refname, tmp, - $chr(n.level - 1 + ord('A'))]) - else: - dispA(d.target, result, "$3", - "\\rsth$4{$3}\\label{$2}\n", [ - $n.level, refname, tmp, - $chr(n.level - 1 + ord('A'))]) - -proc renderOverline(d: PDoc, n: PRstNode, result: var string) = - if d.meta[metaTitle].len == 0: - for i in countup(0, len(n)-1): - renderRstToOut(d, n.sons[i], d.meta[metaTitle]) - elif d.meta[metaSubtitle].len == 0: - for i in countup(0, len(n)-1): - renderRstToOut(d, n.sons[i], d.meta[metaSubtitle]) - else: - var tmp = "" - for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp) - dispA(d.target, result, "
    $3
    ", - "\\rstov$4{$3}\\label{$2}\n", [$n.level, - rstnodeToRefname(n), tmp, $chr(n.level - 1 + ord('A'))]) - - -proc renderTocEntry(d: PDoc, e: TTocEntry, result: var string) = - dispA(d.target, result, - "
  • $2
  • \n", - "\\item\\label{$1_toc} $2\\ref{$1}\n", [e.refname, e.header]) - -proc renderTocEntries*(d: PDoc, j: var int, lvl: int, result: var string) = - var tmp = "" - while j <= high(d.tocPart): - var a = abs(d.tocPart[j].n.level) - if a == lvl: - renderTocEntry(d, d.tocPart[j], tmp) - inc(j) - elif a > lvl: - renderTocEntries(d, j, a, tmp) - else: - break - if lvl > 1: - dispA(d.target, result, "
      $1
    ", - "\\begin{enumerate}$1\\end{enumerate}", [tmp]) - else: - result.add(tmp) - -proc renderImage(d: PDoc, n: PRstNode, result: var string) = - var options = "" - var s = getFieldValue(n, "scale") - if s != "": dispA(d.target, options, " scale=\"$1\"", " scale=$1", [strip(s)]) - - s = getFieldValue(n, "height") - if s != "": dispA(d.target, options, " height=\"$1\"", " height=$1", [strip(s)]) - - s = getFieldValue(n, "width") - if s != "": dispA(d.target, options, " width=\"$1\"", " width=$1", [strip(s)]) - - s = getFieldValue(n, "alt") - if s != "": dispA(d.target, options, " alt=\"$1\"", "", [strip(s)]) - - s = getFieldValue(n, "align") - if s != "": dispA(d.target, options, " align=\"$1\"", "", [strip(s)]) - - if options.len > 0: options = dispF(d.target, "$1", "[$1]", [options]) - - dispA(d.target, result, "", "\\includegraphics$2{$1}", - [getArgument(n), options]) - if len(n) >= 3: renderRstToOut(d, n.sons[2], result) - -proc renderSmiley(d: PDoc, n: PRstNode, result: var string) = - dispA(d.target, result, - """""", - "\\includegraphics{$1}", [n.text]) - -proc renderCodeBlock(d: PDoc, n: PRstNode, result: var string) = - if n.sons[2] == nil: return - var m = n.sons[2].sons[0] - assert m.kind == rnLeaf - var langstr = strip(getArgument(n)) - var lang: TSourceLanguage - if langstr == "": - lang = langNimrod # default language - else: - lang = getSourceLanguage(langstr) - - dispA(d.target, result, "
    ", "\\begin{rstpre}\n", [])
    -  if lang == langNone:
    -    d.msgHandler(d.filename, 1, 0, mwUnsupportedLanguage, langstr)
    -    result.add(m.text)
    -  else:
    -    var g: TGeneralTokenizer
    -    initGeneralTokenizer(g, m.text)
    -    while true: 
    -      getNextToken(g, lang)
    -      case g.kind
    -      of gtEof: break 
    -      of gtNone, gtWhitespace: 
    -        add(result, substr(m.text, g.start, g.length + g.start - 1))
    -      else:
    -        dispA(d.target, result, "$1", "\\span$2{$1}", [
    -          esc(d.target, substr(m.text, g.start, g.length+g.start-1)),
    -          tokenClassToStr[g.kind]])
    -    deinitGeneralTokenizer(g)
    -  dispA(d.target, result, "
    ", "\n\\end{rstpre}\n") - -proc renderContainer(d: PDoc, n: PRstNode, result: var string) = - var tmp = "" - renderRstToOut(d, n.sons[2], tmp) - var arg = strip(getArgument(n)) - if arg == "": - dispA(d.target, result, "
    $1
    ", "$1", [tmp]) - else: - dispA(d.target, result, "
    $2
    ", "$2", [arg, tmp]) - -proc texColumns(n: PRstNode): string = - result = "" - for i in countup(1, len(n)): add(result, "|X") - -proc renderField(d: PDoc, n: PRstNode, result: var string) = - var b = false - if d.target == outLatex: - var fieldname = addNodes(n.sons[0]) - var fieldval = esc(d.target, strip(addNodes(n.sons[1]))) - if cmpIgnoreStyle(fieldname, "author") == 0 or - cmpIgnoreStyle(fieldname, "authors") == 0: - if d.meta[metaAuthor].len == 0: - d.meta[metaAuthor] = fieldval - b = true - elif cmpIgnoreStyle(fieldName, "version") == 0: - if d.meta[metaVersion].len == 0: - d.meta[metaVersion] = fieldval - b = true - if not b: - renderAux(d, n, "$1\n", "$1", result) - -proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = - if n == nil: return - case n.kind - of rnInner: renderAux(d, n, result) - of rnHeadline: renderHeadline(d, n, result) - of rnOverline: renderOverline(d, n, result) - of rnTransition: renderAux(d, n, "
    \n", "\\hrule\n", result) - of rnParagraph: renderAux(d, n, "

    $1

    \n", "$1\n\n", result) - of rnBulletList: - renderAux(d, n, "
      $1
    \n", - "\\begin{itemize}$1\\end{itemize}\n", result) - of rnBulletItem, rnEnumItem: - renderAux(d, n, "
  • $1
  • \n", "\\item $1\n", result) - of rnEnumList: - renderAux(d, n, "
      $1
    \n", - "\\begin{enumerate}$1\\end{enumerate}\n", result) - of rnDefList: - renderAux(d, n, "
    $1
    \n", - "\\begin{description}$1\\end{description}\n", result) - of rnDefItem: renderAux(d, n, result) - of rnDefName: renderAux(d, n, "
    $1
    \n", "\\item[$1] ", result) - of rnDefBody: renderAux(d, n, "
    $1
    \n", "$1\n", result) - of rnFieldList: - var tmp = "" - for i in countup(0, len(n) - 1): - renderRstToOut(d, n.sons[i], tmp) - if tmp.len != 0: - dispA(d.target, result, - "" & - "" & - "" & - "$1" & - "
    ", - "\\begin{description}$1\\end{description}\n", - [tmp]) - of rnField: renderField(d, n, result) - of rnFieldName: - renderAux(d, n, "$1:", "\\item[$1:]", result) - of rnFieldBody: - renderAux(d, n, "$1", " $1\n", result) - of rnIndex: - renderRstToOut(d, n.sons[2], result) - of rnOptionList: - renderAux(d, n, "$1
    ", - "\\begin{description}\n$1\\end{description}\n", result) - of rnOptionListItem: - renderAux(d, n, "$1\n", "$1", result) - of rnOptionGroup: - renderAux(d, n, "$1", "\\item[$1]", result) - of rnDescription: - renderAux(d, n, "$1\n", " $1\n", result) - of rnOption, rnOptionString, rnOptionArgument: - doAssert false, "renderRstToOut" - of rnLiteralBlock: - renderAux(d, n, "
    $1
    \n", - "\\begin{rstpre}\n$1\n\\end{rstpre}\n", result) - of rnQuotedLiteralBlock: - doAssert false, "renderRstToOut" - of rnLineBlock: - renderAux(d, n, "

    $1

    ", "$1\n\n", result) - of rnLineBlockItem: - renderAux(d, n, "$1
    ", "$1\\\\\n", result) - of rnBlockQuote: - renderAux(d, n, "

    $1

    \n", - "\\begin{quote}$1\\end{quote}\n", result) - of rnTable, rnGridTable: - renderAux(d, n, - "$1
    ", - "\\begin{table}\\begin{rsttab}{" & - texColumns(n) & "|}\n\\hline\n$1\\end{rsttab}\\end{table}", result) - of rnTableRow: - if len(n) >= 1: - if d.target == outLatex: - #var tmp = "" - renderRstToOut(d, n.sons[0], result) - for i in countup(1, len(n) - 1): - result.add(" & ") - renderRstToOut(d, n.sons[i], result) - result.add("\\\\\n\\hline\n") - else: - result.add("") - renderAux(d, n, result) - result.add("\n") - of rnTableDataCell: - renderAux(d, n, "$1", "$1", result) - of rnTableHeaderCell: - renderAux(d, n, "$1", "\\textbf{$1}", result) - of rnLabel: - doAssert false, "renderRstToOut" # used for footnotes and other - of rnFootnote: - doAssert false, "renderRstToOut" # a footnote - of rnCitation: - doAssert false, "renderRstToOut" # similar to footnote - of rnRef: - var tmp = "" - renderAux(d, n, tmp) - dispA(d.target, result, "$1", - "$1\\ref{$2}", [tmp, rstnodeToRefname(n)]) - of rnStandaloneHyperlink: - renderAux(d, n, - "$1", - "\\href{$1}{$1}", result) - of rnHyperlink: - var tmp0 = "" - var tmp1 = "" - renderRstToOut(d, n.sons[0], tmp0) - renderRstToOut(d, n.sons[1], tmp1) - dispA(d.target, result, "$1", - "\\href{$2}{$1}", - [tmp0, tmp1]) - of rnDirArg, rnRaw: renderAux(d, n, result) - of rnRawHtml: - if d.target != outLatex: - result.add addNodes(lastSon(n)) - of rnRawLatex: - if d.target == outLatex: - result.add addNodes(lastSon(n)) - - of rnImage, rnFigure: renderImage(d, n, result) - of rnCodeBlock: renderCodeBlock(d, n, result) - of rnContainer: renderContainer(d, n, result) - of rnSubstitutionReferences, rnSubstitutionDef: - renderAux(d, n, "|$1|", "|$1|", result) - of rnDirective: - renderAux(d, n, "", "", result) - of rnGeneralRole: - var tmp0 = "" - var tmp1 = "" - renderRstToOut(d, n.sons[0], tmp0) - renderRstToOut(d, n.sons[1], tmp1) - dispA(d.target, result, "$1", "\\span$2{$1}", - [tmp0, tmp1]) - of rnSub: renderAux(d, n, "$1", "\\rstsub{$1}", result) - of rnSup: renderAux(d, n, "$1", "\\rstsup{$1}", result) - of rnEmphasis: renderAux(d, n, "$1", "\\emph{$1}", result) - of rnStrongEmphasis: - renderAux(d, n, "$1", "\\textbf{$1}", result) - of rnTripleEmphasis: - renderAux(d, n, "$1", - "\\textbf{emph{$1}}", result) - of rnInterpretedText: - renderAux(d, n, "$1", "\\emph{$1}", result) - of rnIdx: - renderIndexTerm(d, n, result) - of rnInlineLiteral: - renderAux(d, n, - "$1", - "\\texttt{$1}", result) - of rnSmiley: renderSmiley(d, n, result) - of rnLeaf: result.add(esc(d.target, n.text)) - of rnContents: d.hasToc = true - of rnTitle: - d.meta[metaTitle] = "" - renderRstToOut(d, n.sons[0], d.meta[metaTitle]) - -# ----------------------------------------------------------------------------- - -proc getVarIdx(varnames: openarray[string], id: string): int = - for i in countup(0, high(varnames)): - if cmpIgnoreStyle(varnames[i], id) == 0: - return i - result = -1 - -proc formatNamedVars*(frmt: string, varnames: openarray[string], - varvalues: openarray[string]): string = - var i = 0 - var L = len(frmt) - result = "" - var num = 0 - while i < L: - if frmt[i] == '$': - inc(i) # skip '$' - case frmt[i] - of '#': - add(result, varvalues[num]) - inc(num) - inc(i) - of '$': - add(result, "$") - inc(i) - of '0'..'9': - var j = 0 - while true: - j = (j * 10) + Ord(frmt[i]) - ord('0') - inc(i) - if i > L-1 or frmt[i] notin {'0'..'9'}: break - if j > high(varvalues) + 1: - raise newException(EInvalidValue, "invalid index: " & $j) - num = j - add(result, varvalues[j - 1]) - of 'A'..'Z', 'a'..'z', '\x80'..'\xFF': - var id = "" - while true: - add(id, frmt[i]) - inc(i) - if frmt[i] notin {'A'..'Z', '_', 'a'..'z', '\x80'..'\xFF'}: break - var idx = getVarIdx(varnames, id) - if idx >= 0: - add(result, varvalues[idx]) - else: - raise newException(EInvalidValue, "unknown substitution var: " & id) - of '{': - var id = "" - inc(i) - while frmt[i] != '}': - if frmt[i] == '\0': - raise newException(EInvalidValue, "'}' expected") - add(id, frmt[i]) - inc(i) - inc(i) # skip } - # search for the variable: - var idx = getVarIdx(varnames, id) - if idx >= 0: add(result, varvalues[idx]) - else: - raise newException(EInvalidValue, "unknown substitution var: " & id) - else: - raise newException(EInvalidValue, "unknown substitution: $" & $frmt[i]) - var start = i - while i < L: - if frmt[i] != '$': inc(i) - else: break - if i-1 >= start: add(result, substr(frmt, start, i - 1)) - - -proc defaultConfig*(): PStringTable = - ## creates a default configuration for HTML generation. - result = newStringTable(modeStyleInsensitive) - - template setConfigVar(key, val: expr) = - result[key] = val - - setConfigVar("split.item.toc", "20") - setConfigVar("doc.section", """ -
    -

    $sectionTitle

    -
    -$content -
    -""") - setConfigVar("doc.section.toc", """ -
  • - $sectionTitle -
      - $content -
    -
  • -""") - setConfigVar("doc.item", """ -
    $header
    -
    -$desc -
    -""") - setConfigVar("doc.item.toc", """ -
  • $name
  • -""") - setConfigVar("doc.toc", """ -""") - setConfigVar("doc.body_toc", """ -$tableofcontents -
    -$moduledesc -$content -
    -""") - setConfigVar("doc.body_no_toc", "$moduledesc $content") - setConfigVar("doc.file", "$content") - -# ---------- forum --------------------------------------------------------- - -proc rstToHtml*(s: string, options: TRstParseOptions, - config: PStringTable): string = - ## exported for *nimforum*. - - proc myFindFile(filename: string): string = - # we don't find any files in online mode: - result = "" - - const filen = "input" - var d: TRstGenerator - initRstGenerator(d, outHtml, config, filen, options, myFindFile, - rst.defaultMsgHandler) - var dummyHasToc = false - var rst = rstParse(s, filen, 0, 1, dummyHasToc, options) - result = "" - renderRstToOut(d, rst, result) - \ No newline at end of file -- cgit 1.4.1-2-gfad0