diff options
author | Araq <rumpf_a@web.de> | 2013-04-22 19:30:03 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2013-04-22 19:30:03 +0200 |
commit | 8dc9ad7ce3602c9a147b326fe74c656828c3f292 (patch) | |
tree | 9fdcbccb16ece69e00c7f475936579a764fa18b4 | |
parent | cc4250d746513e188784bcd5ec2a2924af40f239 (diff) | |
parent | adc75d020180da8b83fc3d22f5dfbc54d71060a8 (diff) | |
download | Nim-8dc9ad7ce3602c9a147b326fe74c656828c3f292.tar.gz |
Merge branch 'newparser' of github.com:Araq/Nimrod into newparser
-rw-r--r-- | compiler/docgen.nim | 2 | ||||
-rw-r--r-- | compiler/lexer.nim | 139 | ||||
-rw-r--r-- | compiler/msgs.nim | 7 | ||||
-rw-r--r-- | compiler/nimconf.nim | 2 | ||||
-rw-r--r-- | compiler/parser.nim | 964 | ||||
-rw-r--r-- | compiler/renderer.nim | 4 | ||||
-rw-r--r-- | compiler/sem.nim | 31 | ||||
-rw-r--r-- | compiler/semdata.nim | 2 | ||||
-rw-r--r-- | compiler/types.nim | 20 | ||||
-rw-r--r-- | doc/grammar.txt | 383 | ||||
-rw-r--r-- | doc/manual.txt | 300 | ||||
-rw-r--r-- | todo.txt | 29 |
12 files changed, 952 insertions, 931 deletions
diff --git a/compiler/docgen.nim b/compiler/docgen.nim index 2b7c567c6..9929b4bd9 100644 --- a/compiler/docgen.nim +++ b/compiler/docgen.nim @@ -237,7 +237,7 @@ proc genItem(d: PDoc, n, nameNode: PNode, k: TSymKind) = of tkSymbol: dispA(result, "<span class=\"Identifier\">$1</span>", "\\spanIdentifier{$1}", [toRope(esc(d.target, literal))]) - of tkInd, tkSad, tkDed, tkSpaces, tkInvalid: + of tkSpaces, tkInvalid: app(result, literal) of tkParLe, tkParRi, tkBracketLe, tkBracketRi, tkCurlyLe, tkCurlyRi, tkBracketDotLe, tkBracketDotRi, tkCurlyDotLe, tkCurlyDotRi, tkParDotLe, diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 9cf5ccb2b..93649c888 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2013 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -58,8 +58,7 @@ type tkParDotLe, tkParDotRi, # (. and .) tkComma, tkSemiColon, tkColon, tkColonColon, tkEquals, tkDot, tkDotDot, - tkOpr, tkComment, tkAccent, tkInd, tkSad, - tkDed, # pseudo token types used by the source renderers: + tkOpr, tkComment, tkAccent, tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr, TTokTypes* = set[TTokType] @@ -91,8 +90,8 @@ const ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)", ",", ";", ":", "::", "=", ".", "..", - "tkOpr", "tkComment", "`", "[new indentation]", - "[same indentation]", "[dedentation]", "tkSpaces", "tkInfixOpr", + "tkOpr", "tkComment", "`", + "tkSpaces", "tkInfixOpr", "tkPrefixOpr", "tkPostfixOpr"] type @@ -102,7 +101,8 @@ type base2, base8, base16 TToken* = object # a Nimrod token tokType*: TTokType # the type of the token - indent*: int # the indentation; only valid if tokType = tkIndent + indent*: int # the indentation; != -1 if the token has been + # preceeded with indentation ident*: PIdent # the parsed identifier iNumber*: BiggestInt # the parsed integer literal fNumber*: BiggestFloat # the parsed floating point literal @@ -113,8 +113,6 @@ type TLexer* = object of TBaseLexer fileIdx*: int32 - indentStack*: seq[int] # the indentation stack - dedent*: int # counter for DED token generation indentAhead*: int # if > 0 an indendation has already been read # this is needed because scanning comments # needs so much look-ahead @@ -122,9 +120,6 @@ type var gLinesCompiled*: int # all lines that have been compiled -proc pushInd*(L: var TLexer, indent: int) - -proc popInd*(L: var TLexer) proc isKeyword*(kind: TTokType): bool proc openLexer*(lex: var TLexer, fileidx: int32, inputstream: PLLStream) proc rawGetTok*(L: var TLexer, tok: var TToken) @@ -154,31 +149,14 @@ proc isNimrodIdentifier*(s: string): bool = inc(i) result = true -proc pushInd(L: var TLexer, indent: int) = - var length = len(L.indentStack) - setlen(L.indentStack, length + 1) - if (indent > L.indentStack[length - 1]): - L.indentstack[length] = indent - else: - InternalError("pushInd") - -proc popInd(L: var TLexer) = - var length = len(L.indentStack) - setlen(L.indentStack, length - 1) - -proc findIdent(L: TLexer, indent: int): bool = - for i in countdown(len(L.indentStack) - 1, 0): - if L.indentStack[i] == indent: - return true - proc tokToStr*(tok: TToken): string = case tok.tokType of tkIntLit..tkInt64Lit: result = $tok.iNumber of tkFloatLit..tkFloat64Lit: result = $tok.fNumber of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal - of tkParLe..tkColon, tkEof, tkInd, tkSad, tkDed, tkAccent: + of tkParLe..tkColon, tkEof, tkAccent: result = tokTypeToStr[tok.tokType] - else: + else: if tok.ident != nil: result = tok.ident.s else: @@ -216,7 +194,6 @@ proc fillToken(L: var TToken) = proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = openBaseLexer(lex, inputstream) - lex.indentStack = @[0] lex.fileIdx = fileIdx lex.indentAhead = - 1 inc(lex.Linenumber, inputstream.lineOffset) @@ -434,9 +411,10 @@ proc GetNumber(L: var TLexer): TToken = result.tokType = tkInt64Lit elif result.tokType != tkInt64Lit: lexMessage(L, errInvalidNumber, result.literal) - except EInvalidValue: lexMessage(L, errInvalidNumber, result.literal) - except EOverflow: lexMessage(L, errNumberOutOfRange, result.literal) - except EOutOfRange: lexMessage(L, errNumberOutOfRange, result.literal) + except EInvalidValue: + lexMessage(L, errInvalidNumber, result.literal) + except EOverflow, EOutOfRange: + lexMessage(L, errNumberOutOfRange, result.literal) L.bufpos = endpos proc handleHexChar(L: var TLexer, xi: var int) = @@ -651,24 +629,6 @@ proc getOperator(L: var TLexer, tok: var TToken) = Inc(pos) endOperator(L, tok, pos, h) -proc handleIndentation(L: var TLexer, tok: var TToken, indent: int) = - tok.indent = indent - var i = high(L.indentStack) - if indent > L.indentStack[i]: - tok.tokType = tkInd - elif indent == L.indentStack[i]: - tok.tokType = tkSad - else: - # check we have the indentation somewhere in the stack: - while (i >= 0) and (indent != L.indentStack[i]): - dec(i) - inc(L.dedent) - dec(L.dedent) - tok.tokType = tkDed - if i < 0: - tok.tokType = tkSad # for the parser it is better as SAD - lexMessage(L, errInvalidIndentation) - proc scanComment(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf @@ -705,53 +665,45 @@ proc scanComment(L: var TLexer, tok: var TToken) = else: if buf[pos] > ' ': L.indentAhead = indent - inc(L.dedent) - break + break L.bufpos = pos -proc skip(L: var TLexer, tok: var TToken) = +proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf - while true: + while true: case buf[pos] - of ' ': + of ' ': Inc(pos) - of Tabulator: + of Tabulator: lexMessagePos(L, errTabulatorsAreNotAllowed, pos) - inc(pos) # BUGFIX - of CR, LF: + inc(pos) + of CR, LF: pos = HandleCRLF(L, pos) buf = L.buf var indent = 0 - while buf[pos] == ' ': + while buf[pos] == ' ': Inc(pos) Inc(indent) - if (buf[pos] > ' '): - handleIndentation(L, tok, indent) - break - else: + if buf[pos] > ' ': + tok.indent = indent + break + else: break # EndOfFile also leaves the loop L.bufpos = pos -proc rawGetTok(L: var TLexer, tok: var TToken) = +proc rawGetTok(L: var TLexer, tok: var TToken) = fillToken(tok) - if L.dedent > 0: - dec(L.dedent) - if L.indentAhead >= 0: - handleIndentation(L, tok, L.indentAhead) - L.indentAhead = - 1 - else: - tok.tokType = tkDed - return + if L.indentAhead >= 0: + tok.indent = L.indentAhead + L.indentAhead = -1 + else: + tok.indent = -1 skip(L, tok) - # got an documentation comment or tkIndent, return that: - if tok.toktype != tkInvalid: return var c = L.buf[L.bufpos] - if c in SymStartChars - {'r', 'R', 'l'}: + if c in SymStartChars - {'r', 'R', 'l'}: getSymbol(L, tok) - elif c in {'0'..'9'}: - tok = getNumber(L) - else: + else: case c of '#': scanComment(L, tok) @@ -769,10 +721,10 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = of 'l': # if we parsed exactly one character and its a small L (l), this # is treated as a warning because it may be confused with the number 1 - if not (L.buf[L.bufpos + 1] in (SymChars + {'_'})): + if L.buf[L.bufpos+1] notin (SymChars + {'_'}): lexMessage(L, warnSmallLshouldNotBeUsed) getSymbol(L, tok) - of 'r', 'R': + of 'r', 'R': if L.buf[L.bufPos + 1] == '\"': Inc(L.bufPos) getString(L, tok, true) @@ -780,7 +732,7 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = getSymbol(L, tok) of '(': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkParDotLe Inc(L.bufpos) else: @@ -790,29 +742,29 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = Inc(L.bufpos) of '[': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkBracketDotLe Inc(L.bufpos) - else: + else: tok.toktype = tkBracketLe - of ']': + of ']': tok.toktype = tkBracketRi Inc(L.bufpos) - of '.': - if L.buf[L.bufPos + 1] == ']': + of '.': + if L.buf[L.bufPos+1] == ']': tok.tokType = tkBracketDotRi Inc(L.bufpos, 2) - elif L.buf[L.bufPos + 1] == '}': + elif L.buf[L.bufPos+1] == '}': tok.tokType = tkCurlyDotRi Inc(L.bufpos, 2) - elif L.buf[L.bufPos + 1] == ')': + elif L.buf[L.bufPos+1] == ')': tok.tokType = tkParDotRi Inc(L.bufpos, 2) else: getOperator(L, tok) of '{': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos+1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkCurlyDotLe Inc(L.bufpos) else: @@ -838,13 +790,16 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = tok.tokType = tkCharLit getCharacter(L, tok) tok.tokType = tkCharLit + of '0'..'9': + tok = getNumber(L) else: if c in OpChars: getOperator(L, tok) elif c == lexbase.EndOfFile: tok.toktype = tkEof + tok.indent = 0 else: - tok.literal = c & "" + tok.literal = $c tok.tokType = tkInvalid lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')') Inc(L.bufpos) diff --git a/compiler/msgs.nim b/compiler/msgs.nim index 8b45bf80c..4099d7622 100644 --- a/compiler/msgs.nim +++ b/compiler/msgs.nim @@ -711,7 +711,7 @@ var proc writeSurroundingSrc(info: TLineInfo) = const indent = " " - MsgWriteln(indent & info.sourceLine.data) + MsgWriteln(indent & info.sourceLine.ropeToStr) MsgWriteln(indent & repeatChar(info.col, ' ') & '^') proc liMessage(info: TLineInfo, msg: TMsgKind, arg: string, @@ -786,8 +786,9 @@ proc sourceLine*(i: TLineInfo): PRope = for line in lines(i.toFullPath): addSourceLine i.fileIndex, line.string - InternalAssert i.fileIndex < fileInfos.len and - i.line <= fileInfos[i.fileIndex].lines.len + InternalAssert i.fileIndex < fileInfos.len + # can happen if the error points to EOF: + if i.line > fileInfos[i.fileIndex].lines.len: return nil result = fileInfos[i.fileIndex].lines[i.line-1] diff --git a/compiler/nimconf.nim b/compiler/nimconf.nim index 0f0b76827..3bd97ccb2 100644 --- a/compiler/nimconf.nim +++ b/compiler/nimconf.nim @@ -19,7 +19,7 @@ import proc ppGetTok(L: var TLexer, tok: var TToken) = # simple filter rawGetTok(L, tok) - while tok.tokType in {tkInd, tkSad, tkDed, tkComment}: rawGetTok(L, tok) + while tok.tokType in {tkComment}: rawGetTok(L, tok) proc parseExpr(L: var TLexer, tok: var TToken): bool proc parseAtom(L: var TLexer, tok: var TToken): bool = diff --git a/compiler/parser.nim b/compiler/parser.nim index 769aa7a3e..175664a84 100644 --- a/compiler/parser.nim +++ b/compiler/parser.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2013 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -12,17 +12,28 @@ # it uses several helper routines to keep the parser small. A special # efficient algorithm is used for the precedence levels. The parser here can # be seen as a refinement of the grammar, as it specifies how the AST is built -# from the grammar and how comments belong to the AST. +# from the grammar and how comments belong to the AST. + + +# In fact the grammar is generated from this file: +when isMainModule: + import pegs + var outp = open("compiler/grammar.txt", fmWrite) + for line in lines("compiler/parser.nim"): + if line =~ peg" \s* '#| ' {.*}": + outp.writeln matches[0] + outp.close import - llstream, lexer, idents, strutils, ast, msgs + llstream, lexer, idents, strutils, ast, astalgo, msgs type TParser*{.final.} = object # a TParser object represents a module that # is being parsed + currInd: int # current indentation + firstTok: bool lex*: TLexer # the lexer that is used for parsing tok*: TToken # the current token - proc ParseAll*(p: var TParser): PNode proc openParser*(p: var TParser, filename: string, inputstream: PLLStream) @@ -68,6 +79,7 @@ proc OpenParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream) = initToken(p.tok) OpenLexer(p.lex, fileIdx, inputstream) getTok(p) # read the first token + p.firstTok = true proc OpenParser*(p: var TParser, filename: string, inputStream: PLLStream) = openParser(p, filename.fileInfoIdx, inputStream) @@ -81,51 +93,64 @@ proc parMessage(p: TParser, msg: TMsgKind, arg: string = "") = proc parMessage(p: TParser, msg: TMsgKind, tok: TToken) = lexMessage(p.lex, msg, prettyTok(tok)) -proc skipComment(p: var TParser, node: PNode) = - if p.tok.tokType == tkComment: - if node != nil: +template withInd(p: expr, body: stmt) {.immediate.} = + let oldInd = p.currInd + p.currInd = p.tok.indent + body + p.currInd = oldInd + +template realInd(p): bool = p.tok.indent > p.currInd +template sameInd(p): bool = p.tok.indent == p.currInd +template sameOrNoInd(p): bool = p.tok.indent == p.currInd or p.tok.indent < 0 + +proc rawSkipComment(p: var TParser, node: PNode) = + if p.tok.tokType == tkComment: + if node != nil: if node.comment == nil: node.comment = "" add(node.comment, p.tok.literal) - else: + else: parMessage(p, errInternal, "skipComment") getTok(p) -proc skipInd(p: var TParser) = - if p.tok.tokType == tkInd: getTok(p) - -proc optPar(p: var TParser) = - if p.tok.tokType == tkSad or p.tok.tokType == tkInd: getTok(p) - -proc optInd(p: var TParser, n: PNode) = +proc skipComment(p: var TParser, node: PNode) = + if p.tok.indent < 0: rawSkipComment(p, node) + +proc skipInd(p: var TParser) = + if p.tok.indent >= 0: + if not realInd(p): parMessage(p, errInvalidIndentation) + +proc optPar(p: var TParser) = + if p.tok.indent >= 0: + if p.tok.indent < p.currInd: parMessage(p, errInvalidIndentation) + +proc optInd(p: var TParser, n: PNode) = skipComment(p, n) skipInd(p) -proc ExpectNl(p: TParser) = - if p.tok.tokType notin {tkEof, tkSad, tkInd, tkDed, tkComment}: - lexMessage(p.lex, errNewlineExpected, prettyTok(p.tok)) +proc getTokNoInd(p: var TParser) = + getTok(p) + if p.tok.indent >= 0: parMessage(p, errInvalidIndentation) -proc expectIdentOrKeyw(p: TParser) = - if p.tok.tokType != tkSymbol and not isKeyword(p.tok.tokType): +proc expectIdentOrKeyw(p: TParser) = + if p.tok.tokType != tkSymbol and not isKeyword(p.tok.tokType): lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok)) -proc ExpectIdent(p: TParser) = - if p.tok.tokType != tkSymbol: +proc ExpectIdent(p: TParser) = + if p.tok.tokType != tkSymbol: lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok)) -proc Eat(p: var TParser, TokType: TTokType) = +proc Eat(p: var TParser, TokType: TTokType) = if p.tok.TokType == TokType: getTok(p) else: lexMessage(p.lex, errTokenExpected, TokTypeToStr[tokType]) -proc parLineInfo(p: TParser): TLineInfo = +proc parLineInfo(p: TParser): TLineInfo = result = getLineInfo(p.lex) -proc indAndComment(p: var TParser, n: PNode) = - if p.tok.tokType == tkInd: - var info = parLineInfo(p) - getTok(p) - if p.tok.tokType == tkComment: skipComment(p, n) - else: LocalError(info, errInvalidIndentation) - else: +proc indAndComment(p: var TParser, n: PNode) = + if p.tok.indent > p.currInd: + if p.tok.tokType == tkComment: rawSkipComment(p, n) + else: parMessage(p, errInvalidIndentation) + else: skipComment(p, n) proc newNodeP(kind: TNodeKind, p: TParser): PNode = @@ -195,7 +220,41 @@ proc getPrecedence(tok: TToken): int = proc isOperator(tok: TToken): bool = result = getPrecedence(tok) >= 0 -proc parseSymbol(p: var TParser): PNode = +#| module = stmt ^* (';' / IND{=}) +#| +#| comma = ',' COMMENT? +#| semicolon = ';' COMMENT? +#| colon = ':' COMMENT? +#| colcom = ':' COMMENT? +#| +#| operator = OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9 +#| | 'or' | 'xor' | 'and' +#| | 'is' | 'isnot' | 'in' | 'notin' | 'of' +#| | 'div' | 'mod' | 'shl' | 'shr' | 'not' | 'addr' | 'static' | '..' +#| +#| prefixOperator = operator +#| +#| optInd = COMMENT? +#| optPar = (IND{>} | IND{=})? +#| +#| simpleExpr = assignExpr (OP0 optInd assignExpr)* +#| assignExpr = orExpr (OP1 optInd orExpr)* +#| orExpr = andExpr (OP2 optInd andExpr)* +#| andExpr = cmpExpr (OP3 optInd cmpExpr)* +#| cmpExpr = sliceExpr (OP4 optInd sliceExpr)* +#| sliceExpr = ampExpr (OP5 optInd ampExpr)* +#| ampExpr = plusExpr (OP6 optInd plusExpr)* +#| plusExpr = mulExpr (OP7 optInd mulExpr)* +#| mulExpr = dollarExpr (OP8 optInd dollarExpr)* +#| dollarExpr = primary (OP9 optInd primary)* + +proc colcom(p: var TParser, n: PNode) = + eat(p, tkColon) + skipComment(p, n) + +proc parseSymbol(p: var TParser): PNode = + #| symbol = '`' (KEYW|IDENT|operator|'(' ')'|'[' ']'|'{' '}'|'='|literal)+ '`' + #| | IDENT case p.tok.tokType of tkSymbol: result = newIdentNodeP(p.tok.ident, p) @@ -231,30 +290,33 @@ proc parseSymbol(p: var TParser): PNode = parMessage(p, errIdentifierExpected, p.tok) break eat(p, tkAccent) - else: + else: parMessage(p, errIdentifierExpected, p.tok) getTok(p) # BUGFIX: We must consume a token here to prevent endless loops! result = ast.emptyNode proc indexExpr(p: var TParser): PNode = + #| indexExpr = expr result = parseExpr(p) proc indexExprList(p: var TParser, first: PNode, k: TNodeKind, endToken: TTokType): PNode = + #| indexExprList = indexExpr ^+ comma result = newNodeP(k, p) addSon(result, first) getTok(p) optInd(p, result) - while p.tok.tokType notin {endToken, tkEof, tkSad}: + while p.tok.tokType notin {endToken, tkEof}: var a = indexExpr(p) addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) - optInd(p, a) + skipComment(p, a) optPar(p) eat(p, endToken) proc exprColonEqExpr(p: var TParser): PNode = + #| exprColonEqExpr = expr (':'|'=' expr)? var a = parseExpr(p) if p.tok.tokType == tkColon: result = newNodeP(nkExprColonExpr, p) @@ -272,6 +334,7 @@ proc exprColonEqExpr(p: var TParser): PNode = result = a proc exprList(p: var TParser, endTok: TTokType, result: PNode) = + #| exprList = expr ^+ comma getTok(p) optInd(p, result) while (p.tok.tokType != endTok) and (p.tok.tokType != tkEof): @@ -283,6 +346,7 @@ proc exprList(p: var TParser, endTok: TTokType, result: PNode) = eat(p, endTok) proc dotExpr(p: var TParser, a: PNode): PNode = + #| dotExpr = expr '.' optInd ('type' | 'addr' | symbol) var info = p.lex.getlineInfo getTok(p) optInd(p, a) @@ -301,40 +365,31 @@ proc dotExpr(p: var TParser, a: PNode): PNode = addSon(result, parseSymbol(p)) proc qualifiedIdent(p: var TParser): PNode = - result = parseSymbol(p) #optInd(p, result); + #| qualifiedIdent = symbol ('.' optInd ('type' | 'addr' | symbol))? + result = parseSymbol(p) if p.tok.tokType == tkDot: result = dotExpr(p, result) -proc qualifiedIdentListAux(p: var TParser, endTok: TTokType, result: PNode) = - getTok(p) - optInd(p, result) - while (p.tok.tokType != endTok) and (p.tok.tokType != tkEof): - var a = qualifiedIdent(p) - addSon(result, a) #optInd(p, a); - if p.tok.tokType != tkComma: break - getTok(p) - optInd(p, a) - eat(p, endTok) - -proc exprColonEqExprListAux(p: var TParser, endTok: TTokType, result: PNode) = +proc exprColonEqExprListAux(p: var TParser, endTok: TTokType, result: PNode) = assert(endTok in {tkCurlyRi, tkCurlyDotRi, tkBracketRi, tkParRi}) getTok(p) optInd(p, result) - while (p.tok.tokType != endTok) and (p.tok.tokType != tkEof) and - (p.tok.tokType != tkSad) and (p.tok.tokType != tkInd): + while p.tok.tokType != endTok and p.tok.tokType != tkEof: var a = exprColonEqExpr(p) addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) - optInd(p, a) + skipComment(p, a) optPar(p) eat(p, endTok) -proc exprColonEqExprList(p: var TParser, kind: TNodeKind, - endTok: TTokType): PNode = +proc exprColonEqExprList(p: var TParser, kind: TNodeKind, + endTok: TTokType): PNode = + #| exprColonEqExprList = exprColonEqExpr (comma exprColonEqExpr)* (comma)? result = newNodeP(kind, p) exprColonEqExprListAux(p, endTok, result) proc setOrTableConstr(p: var TParser): PNode = + #| setOrTableConstr = '{' ((exprColonEqExpr comma)* | ':' ) '}' result = newNodeP(nkCurly, p) getTok(p) # skip '{' optInd(p, result) @@ -342,17 +397,18 @@ proc setOrTableConstr(p: var TParser): PNode = getTok(p) # skip ':' result.kind = nkTableConstr else: - while p.tok.tokType notin {tkCurlyRi, tkEof, tkSad, tkInd}: + while p.tok.tokType notin {tkCurlyRi, tkEof}: var a = exprColonEqExpr(p) if a.kind == nkExprColonExpr: result.kind = nkTableConstr addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) - optInd(p, a) + skipComment(p, a) optPar(p) eat(p, tkCurlyRi) # skip '}' proc parseCast(p: var TParser): PNode = + #| castExpr = 'cast' '[' optInd typeDesc optPar ']' '(' optInd expr optPar ')' result = newNodeP(nkCast, p) getTok(p) eat(p, tkBracketLe) @@ -366,15 +422,6 @@ proc parseCast(p: var TParser): PNode = optPar(p) eat(p, tkParRi) -proc parseAddr(p: var TParser): PNode = - result = newNodeP(nkAddr, p) - getTok(p) - eat(p, tkParLe) - optInd(p, result) - addSon(result, parseExpr(p)) - optPar(p) - eat(p, tkParRi) - proc setBaseFlags(n: PNode, base: TNumericalBase) = case base of base10: nil @@ -398,14 +445,26 @@ proc parseGStrLit(p: var TParser, a: PNode): PNode = result = a proc identOrLiteral(p: var TParser): PNode = + #| generalizedLit ::= GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT + #| identOrLiteral = generalizedLit | symbol + #| | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT + #| | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT + #| | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT + #| | STR_LIT | RSTR_LIT | TRIPLESTR_LIT + #| | CHAR_LIT + #| | NIL + #| | tupleConstr | arrayConstr | setOrTableConstr + #| | castExpr + #| tupleConstr = '(' optInd (exprColonEqExpr comma?)* optPar ')' + #| arrayConstr = '[' optInd (exprColonEqExpr comma?)* optPar ']' case p.tok.tokType - of tkSymbol: + of tkSymbol: result = newIdentNodeP(p.tok.ident, p) getTok(p) result = parseGStrLit(p, result) of tkAccent: result = parseSymbol(p) # literals - of tkIntLit: + of tkIntLit: result = newIntNodeP(nkIntLit, p.tok.iNumber, p) setBaseFlags(result, p.tok.base) getTok(p) @@ -493,8 +552,13 @@ proc identOrLiteral(p: var TParser): PNode = result = ast.emptyNode proc primarySuffix(p: var TParser, r: PNode): PNode = + #| primarySuffix = '(' (exprColonEqExpr comma?)* ')' doBlocks? + #| | doBlocks + #| | '.' optInd ('type' | 'addr' | symbol) generalizedLit? + #| | '[' optInd indexExprList optPar ']' + #| | '{' optInd indexExprList optPar '}' result = r - while true: + while p.tok.indent < 0: case p.tok.tokType of tkParLe: var a = result @@ -524,64 +588,68 @@ type proc primary(p: var TParser, mode: TPrimaryMode): PNode -proc lowestExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode = +proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode = result = primary(p, mode) # expand while operators have priorities higher than 'limit' var opPrec = getPrecedence(p.tok) let modeB = if mode == pmTypeDef: pmTypeDesc else: mode - while opPrec >= limit: + # the operator itself must not start on a new line: + while opPrec >= limit and p.tok.indent < 0: var leftAssoc = ord(IsLeftAssociative(p.tok)) var a = newNodeP(nkInfix, p) var opNode = newIdentNodeP(p.tok.ident, p) # skip operator: getTok(p) - optInd(p, opNode) + optInd(p, opNode) # read sub-expression with higher priority: - var b = lowestExprAux(p, opPrec + leftAssoc, modeB) + var b = simpleExprAux(p, opPrec + leftAssoc, modeB) addSon(a, opNode) addSon(a, result) addSon(a, b) result = a opPrec = getPrecedence(p.tok) -proc lowestExpr(p: var TParser, mode = pmNormal): PNode = - result = lowestExprAux(p, -1, mode) - -proc parseIfExpr(p: var TParser, kind: TNodeKind): PNode = +proc simpleExpr(p: var TParser, mode = pmNormal): PNode = + result = simpleExprAux(p, -1, mode) + +proc parseIfExpr(p: var TParser, kind: TNodeKind): PNode = + #| condExpr = expr colcom expr optInd + #| ('elif' expr colcom expr optInd)* + #| 'else' colcom expr + #| ifExpr = 'if' condExpr + #| whenExpr = 'when' condExpr result = newNodeP(kind, p) - while true: + while true: getTok(p) # skip `if`, `elif` var branch = newNodeP(nkElifExpr, p) addSon(branch, parseExpr(p)) - eat(p, tkColon) - optInd(p, branch) + colcom(p, branch) addSon(branch, parseExpr(p)) optInd(p, branch) addSon(result, branch) if p.tok.tokType != tkElif: break var branch = newNodeP(nkElseExpr, p) eat(p, tkElse) - eat(p, tkColon) - optInd(p, branch) + colcom(p, branch) addSon(branch, parseExpr(p)) addSon(result, branch) -proc parsePragma(p: var TParser): PNode = +proc parsePragma(p: var TParser): PNode = + #| pragma = '{.' optInd (exprColonExpr comma?)* optPar ('.}' | '}') result = newNodeP(nkPragma, p) getTok(p) optInd(p, result) - while (p.tok.tokType != tkCurlyDotRi) and (p.tok.tokType != tkCurlyRi) and - (p.tok.tokType != tkEof) and (p.tok.tokType != tkSad): + while p.tok.tokType notin {tkCurlyDotRi, tkCurlyRi, tkEof}: var a = exprColonEqExpr(p) addSon(result, a) - if p.tok.tokType == tkComma: + if p.tok.tokType == tkComma: getTok(p) - optInd(p, a) + skipComment(p, a) optPar(p) if p.tok.tokType in {tkCurlyDotRi, tkCurlyRi}: getTok(p) else: parMessage(p, errTokenExpected, ".}") proc identVis(p: var TParser): PNode = - # identifier with visability + #| identVis = symbol opr? # postfix position var a = parseSymbol(p) if p.tok.tokType == tkOpr: result = newNodeP(nkPostfix, p) @@ -592,6 +660,7 @@ proc identVis(p: var TParser): PNode = result = a proc identWithPragma(p: var TParser): PNode = + #| identWithPragma = identVis pragma? var a = identVis(p) if p.tok.tokType == tkCurlyDotLe: result = newNodeP(nkPragmaExpr, p) @@ -599,14 +668,18 @@ proc identWithPragma(p: var TParser): PNode = addSon(result, parsePragma(p)) else: result = a - -type + +type TDeclaredIdentFlag = enum withPragma, # identifier may have pragma withBothOptional # both ':' and '=' parts are optional TDeclaredIdentFlags = set[TDeclaredIdentFlag] proc parseIdentColonEquals(p: var TParser, flags: TDeclaredIdentFlags): PNode = + #| declColonEquals = identWithPragma (comma identWithPragma)* comma? + #| (':' optInd typeDesc)? ('=' optInd expr)? + #| identColonEquals = ident (comma ident)* comma? + #| (':' optInd typeDesc)? ('=' optInd expr)?) var a: PNode result = newNodeP(nkIdentDefs, p) while true: @@ -635,53 +708,53 @@ proc parseIdentColonEquals(p: var TParser, flags: TDeclaredIdentFlags): PNode = else: addSon(result, ast.emptyNode) -proc parseTuple(p: var TParser, indentAllowed = false): PNode = +proc parseTuple(p: var TParser, indentAllowed = false): PNode = + #| inlTupleDecl = 'tuple' + #| [' optInd (identColonEquals (comma/semicolon)?)* optPar ']' + #| extTupleDecl = 'tuple' + #| COMMENT? (IND{>} identColonEquals (IND{=} identColonEquals)*)? result = newNodeP(nkTupleTy, p) getTok(p) if p.tok.tokType == tkBracketLe: getTok(p) optInd(p, result) - while (p.tok.tokType == tkSymbol) or (p.tok.tokType == tkAccent): + while p.tok.tokType in {tkSymbol, tkAccent}: var a = parseIdentColonEquals(p, {}) addSon(result, a) - if p.tok.tokType notin {tkComma, tkSemicolon}: break + if p.tok.tokType notin {tkComma, tkSemicolon}: break getTok(p) - optInd(p, a) + skipComment(p, a) optPar(p) eat(p, tkBracketRi) elif indentAllowed: skipComment(p, result) - if p.tok.tokType == tkInd: - pushInd(p.lex, p.tok.indent) - getTok(p) - skipComment(p, result) - while true: - case p.tok.tokType - of tkSad: - getTok(p) - of tkSymbol, tkAccent: - var a = parseIdentColonEquals(p, {}) - skipComment(p, a) - addSon(result, a) - of tkDed: - getTok(p) - break - of tkEof: - break - else: - parMessage(p, errIdentifierExpected, p.tok) - break - popInd(p.lex) - -proc parseParamList(p: var TParser, retColon = true): PNode = + if realInd(p): + withInd(p): + skipComment(p, result) + while true: + case p.tok.tokType + of tkSymbol, tkAccent: + var a = parseIdentColonEquals(p, {}) + skipComment(p, a) + addSon(result, a) + of tkEof: break + else: + parMessage(p, errIdentifierExpected, p.tok) + break + if not sameInd(p): break + +proc parseParamList(p: var TParser, retColon = true): PNode = + #| paramList = '(' identColonEquals ^* (comma/semicolon) ')' + #| paramListArrow = paramList? ('->' optInd typeDesc)? + #| paramListColon = paramList? (':' optInd typeDesc)? var a: PNode result = newNodeP(nkFormalParams, p) addSon(result, ast.emptyNode) # return type - if p.tok.tokType == tkParLe: + if p.tok.tokType == tkParLe and p.tok.indent < 0: getTok(p) optInd(p, result) - while true: - case p.tok.tokType #optInd(p, a); + while true: + case p.tok.tokType of tkSymbol, tkAccent: a = parseIdentColonEquals(p, {withBothOptional}) of tkParRi: @@ -692,21 +765,24 @@ proc parseParamList(p: var TParser, retColon = true): PNode = addSon(result, a) if p.tok.tokType notin {tkComma, tkSemicolon}: break getTok(p) - optInd(p, a) + skipComment(p, a) optPar(p) eat(p, tkParRi) let hasRet = if retColon: p.tok.tokType == tkColon else: p.tok.tokType == tkOpr and IdentEq(p.tok.ident, "->") - if hasRet: + if hasRet and p.tok.indent < 0: getTok(p) optInd(p, result) result.sons[0] = parseTypeDesc(p) proc optPragmas(p: var TParser): PNode = - if p.tok.tokType == tkCurlyDotLe: result = parsePragma(p) - else: result = ast.emptyNode + if p.tok.tokType == tkCurlyDotLe and (p.tok.indent < 0 or realInd(p)): + result = parsePragma(p) + else: + result = ast.emptyNode proc parseDoBlock(p: var TParser): PNode = + #| doBlock = 'do' paramListArrow pragmas? colcom stmt let info = parLineInfo(p) getTok(p) let params = parseParamList(p, retColon=false) @@ -718,26 +794,27 @@ proc parseDoBlock(p: var TParser): PNode = pragmas = pragmas) proc parseDoBlocks(p: var TParser, call: PNode) = - while p.tok.tokType == tkDo: + #| doBlocks = doBlock ^* IND{=} + if p.tok.tokType == tkDo: addSon(call, parseDoBlock(p)) - + while sameInd(p) and p.tok.tokType == tkDo: + addSon(call, parseDoBlock(p)) + proc parseProcExpr(p: var TParser, isExpr: bool): PNode = + #| procExpr = 'proc' paramListColon pragmas? ('=' COMMENT? stmt)? # either a proc type or a anonymous proc - var - pragmas, params: PNode - info: TLineInfo - info = parLineInfo(p) + let info = parLineInfo(p) getTok(p) - let hasSignature = p.tok.tokType in {tkParLe, tkColon} - params = parseParamList(p) - pragmas = optPragmas(p) + let hasSignature = p.tok.tokType in {tkParLe, tkColon} and p.tok.indent < 0 + let params = parseParamList(p) + let pragmas = optPragmas(p) if p.tok.tokType == tkEquals and isExpr: getTok(p) skipComment(p, result) result = newProcNode(nkLambda, info, parseStmt(p), params = params, pragmas = pragmas) - else: + else: result = newNodeI(nkProcTy, info) if hasSignature: addSon(result, params) @@ -752,7 +829,8 @@ proc isExprStart(p: TParser): bool = result = true else: result = false -proc parseTypeDescKAux(p: var TParser, kind: TNodeKind, mode: TPrimaryMode): PNode = +proc parseTypeDescKAux(p: var TParser, kind: TNodeKind, + mode: TPrimaryMode): PNode = result = newNodeP(kind, p) getTok(p) optInd(p, result) @@ -760,16 +838,15 @@ proc parseTypeDescKAux(p: var TParser, kind: TNodeKind, mode: TPrimaryMode): PNo addSon(result, primary(p, mode)) proc parseExpr(p: var TParser): PNode = - # - #expr ::= lowestExpr - # | 'if' expr ':' expr ('elif' expr ':' expr)* 'else' ':' expr - # | 'when' expr ':' expr ('elif' expr ':' expr)* 'else' ':' expr - # + #| expr = (ifExpr + #| | whenExpr + #| | caseExpr) + #| / simpleExpr case p.tok.tokType: of tkIf: result = parseIfExpr(p, nkIfExpr) of tkWhen: result = parseIfExpr(p, nkWhenExpr) of tkCase: result = parseCase(p) - else: result = lowestExpr(p) + else: result = simpleExpr(p) # XXX needs proper support: #of tkTry: result = parseTry(p) @@ -778,7 +855,13 @@ proc parseDistinct(p: var TParser): PNode proc parseEnum(p: var TParser): PNode proc primary(p: var TParser, mode: TPrimaryMode): PNode = - # prefix operator? + #| typeKeyw = 'var' | 'ref' | 'ptr' | 'shared' | 'type' | 'tuple' + #| | 'proc' | 'iterator' | 'distinct' | 'object' | 'enum' + #| primary = typeKeyw typeDescK + #| / prefixOperator* identOrLiteral primarySuffix* + #| / 'addr' primary + #| / 'static' primary + #| / 'bind' primary if isOperator(p.tok): let isSigil = IsSigilLike(p.tok) result = newNodeP(nkPrefix, p) @@ -831,13 +914,13 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode = getTok(p) of tkAddr: result = newNodeP(nkAddr, p) - getTok(p) + getTokNoInd(p) addSon(result, primary(p, pmNormal)) of tkStatic: result = newNodeP(nkStaticExpr, p) - getTok(p) + getTokNoInd(p) addSon(result, primary(p, pmNormal)) - of tkBind: + of tkBind: result = newNodeP(nkBind, p) getTok(p) optInd(p, result) @@ -847,14 +930,32 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode = if mode != pmSkipSuffix: result = primarySuffix(p, result) -proc parseTypeDesc(p: var TParser): PNode = - result = lowestExpr(p, pmTypeDesc) +proc parseTypeDesc(p: var TParser): PNode = + #| typeDesc = simpleExpr + result = simpleExpr(p, pmTypeDesc) proc parseTypeDefAux(p: var TParser): PNode = - result = lowestExpr(p, pmTypeDef) + #| typeDefAux = simpleExpr + result = simpleExpr(p, pmTypeDef) + +proc makeCall(n: PNode): PNode = + if n.kind in nkCallKinds: + result = n + else: + result = newNodeI(nkCall, n.info) + result.add n proc parseExprStmt(p: var TParser): PNode = - var a = lowestExpr(p) + #| exprStmt = simpleExpr + #| (( '=' optInd expr ) + #| / ( expr ^+ comma + #| doBlocks + #| / ':' stmt? ( IND{=} 'of' exprList ':' stmt + #| | IND{=} 'elif' expr ':' stmt + #| | IND{=} 'except' exprList ':' stmt + #| | IND{=} 'else' ':' stmt )* + #| ))? + var a = simpleExpr(p) if p.tok.tokType == tkEquals: getTok(p) optInd(p, result) @@ -863,33 +964,31 @@ proc parseExprStmt(p: var TParser): PNode = addSon(result, a) addSon(result, b) else: - var call = if a.kind == nkCall: a - else: newNode(nkCommand, a.info, @[a]) - while true: - if not isExprStart(p): break - var e = parseExpr(p) - addSon(call, e) - if p.tok.tokType != tkComma: break - getTok(p) - optInd(p, a) - if p.tok.tokType == tkDo: - parseDoBlocks(p, call) - return - result = if call.sonsLen <= 1: a - else: call - if p.tok.tokType == tkColon: - result = call + if p.tok.indent < 0 and isExprStart(p): + result = newNode(nkCommand, a.info, @[a]) + while true: + var e = parseExpr(p) + addSon(result, e) + if p.tok.tokType != tkComma: break + getTok(p) + optInd(p, result) + else: + result = a + if p.tok.tokType == tkDo and p.tok.indent < 0: + result = makeCall(result) + parseDoBlocks(p, result) + return result + if p.tok.tokType == tkColon and p.tok.indent < 0: + result = makeCall(result) getTok(p) skipComment(p, result) - if p.tok.tokType == tkSad: getTok(p) if p.tok.TokType notin {tkOf, tkElif, tkElse, tkExcept}: let body = parseStmt(p) addSon(result, newProcNode(nkDo, body.info, body)) - while true: - if p.tok.tokType == tkSad: getTok(p) + while sameInd(p): var b: PNode case p.tok.tokType - of tkOf: + of tkOf: b = newNodeP(nkOfBranch, p) exprList(p, tkColon, b) of tkElif: @@ -900,7 +999,7 @@ proc parseExprStmt(p: var TParser): PNode = eat(p, tkColon) of tkExcept: b = newNodeP(nkExceptBranch, p) - qualifiedIdentListAux(p, tkColon, b) + exprList(p, tkColon, b) skipComment(p, b) of tkElse: b = newNodeP(nkElse, p) @@ -912,6 +1011,9 @@ proc parseExprStmt(p: var TParser): PNode = if b.kind == nkElse: break proc parseImport(p: var TParser, kind: TNodeKind): PNode = + #| importStmt = 'import' optInd expr + #| ((comma expr)* + #| / 'except' optInd (expr ^+ comma)) result = newNodeP(kind, p) getTok(p) # skip `import` or `export` optInd(p, result) @@ -922,29 +1024,33 @@ proc parseImport(p: var TParser, kind: TNodeKind): PNode = result.kind = succ(kind) getTok(p) optInd(p, result) - while p.tok.tokType notin {tkEof, tkSad, tkDed}: + while true: + # was: while p.tok.tokType notin {tkEof, tkSad, tkDed}: a = parseExpr(p) if a.kind == nkEmpty: break addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) optInd(p, a) - expectNl(p) + #expectNl(p) proc parseIncludeStmt(p: var TParser): PNode = + #| includeStmt = 'include' optInd expr ^+ comma result = newNodeP(nkIncludeStmt, p) getTok(p) # skip `import` or `include` optInd(p, result) - while p.tok.tokType notin {tkEof, tkSad, tkDed}: + while true: + # was: while p.tok.tokType notin {tkEof, tkSad, tkDed}: var a = parseExpr(p) if a.kind == nkEmpty: break addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) optInd(p, a) - expectNl(p) + #expectNl(p) -proc parseFromStmt(p: var TParser): PNode = +proc parseFromStmt(p: var TParser): PNode = + #| fromStmt = 'from' expr 'import' optInd expr (comma expr)* result = newNodeP(nkFromStmt, p) getTok(p) # skip `from` optInd(p, result) @@ -952,38 +1058,41 @@ proc parseFromStmt(p: var TParser): PNode = addSon(result, a) #optInd(p, a); eat(p, tkImport) optInd(p, result) - while p.tok.tokType notin {tkEof, tkSad, tkDed}: + while true: + # p.tok.tokType notin {tkEof, tkSad, tkDed}: a = parseExpr(p) if a.kind == nkEmpty: break addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) optInd(p, a) - expectNl(p) + #expectNl(p) proc parseReturnOrRaise(p: var TParser, kind: TNodeKind): PNode = + #| returnStmt = 'return' optInd expr? + #| raiseStmt = 'raise' optInd expr? + #| yieldStmt = 'yield' optInd expr? + #| discardStmt = 'discard' optInd expr? + #| breakStmt = 'break' optInd expr? + #| continueStmt = 'break' optInd expr? result = newNodeP(kind, p) getTok(p) - optInd(p, result) - case p.tok.tokType - of tkEof, tkSad, tkDed: addSon(result, ast.emptyNode) - else: addSon(result, parseExpr(p)) - -proc parseYieldOrDiscard(p: var TParser, kind: TNodeKind): PNode = - result = newNodeP(kind, p) - getTok(p) - optInd(p, result) - addSon(result, parseExpr(p)) - -proc parseBreakOrContinue(p: var TParser, kind: TNodeKind): PNode = - result = newNodeP(kind, p) - getTok(p) - optInd(p, result) - case p.tok.tokType - of tkEof, tkSad, tkDed: addSon(result, ast.emptyNode) - else: addSon(result, parseSymbol(p)) + if p.tok.tokType == tkComment: + skipComment(p, result) + addSon(result, ast.emptyNode) + elif p.tok.indent >= 0 and p.tok.indent <= p.currInd or + p.tok.tokType == tkEof: + # NL terminates: + addSon(result, ast.emptyNode) + else: + addSon(result, parseExpr(p)) proc parseIfOrWhen(p: var TParser, kind: TNodeKind): PNode = + #| condStmt = expr colcom stmt COMMENT? + #| (IND{=} 'elif' expr colcom stmt)* + #| (IND{=} 'else' colcom stmt)? + #| ifStmt = 'if' condStmt + #| whenStmt = 'when' condStmt result = newNodeP(kind, p) while true: getTok(p) # skip `if`, `when`, `elif` @@ -995,8 +1104,8 @@ proc parseIfOrWhen(p: var TParser, kind: TNodeKind): PNode = addSon(branch, parseStmt(p)) skipComment(p, branch) addSon(result, branch) - if p.tok.tokType != tkElif: break - if p.tok.tokType == tkElse: + if p.tok.tokType != tkElif or not sameOrNoInd(p): break + if p.tok.tokType == tkElse and sameOrNoInd(p): var branch = newNodeP(nkElse, p) eat(p, tkElse) eat(p, tkColon) @@ -1004,17 +1113,24 @@ proc parseIfOrWhen(p: var TParser, kind: TNodeKind): PNode = addSon(branch, parseStmt(p)) addSon(result, branch) -proc parseWhile(p: var TParser): PNode = +proc parseWhile(p: var TParser): PNode = + #| whileStmt = 'while' expr colcom stmt result = newNodeP(nkWhileStmt, p) getTok(p) optInd(p, result) addSon(result, parseExpr(p)) - eat(p, tkColon) - skipComment(p, result) + colcom(p, result) addSon(result, parseStmt(p)) -proc parseCase(p: var TParser): PNode = - var +proc parseCase(p: var TParser): PNode = + #| ofBranch = 'of' exprList colcom stmt + #| ofBranches = ofBranch (IND{=} ofBranch)* + #| (IND{=} 'elif' expr colcom stmt)* + #| (IND{=} 'else' colcom stmt)? + #| caseStmt = 'case' expr ':'? COMMENT? + #| (IND{>} ofBranches DED + #| | IND{=} ofBranches) + var b: PNode inElif= false wasIndented = false @@ -1024,57 +1140,57 @@ proc parseCase(p: var TParser): PNode = if p.tok.tokType == tkColon: getTok(p) skipComment(p, result) - if p.tok.tokType == tkInd: - pushInd(p.lex, p.tok.indent) - getTok(p) + let oldInd = p.currInd + if realInd(p): + p.currInd = p.tok.indent wasIndented = true - while true: - if p.tok.tokType == tkSad: getTok(p) + while sameInd(p): case p.tok.tokType - of tkOf: - if inElif: break + of tkOf: + if inElif: break b = newNodeP(nkOfBranch, p) exprList(p, tkColon, b) - of tkElif: + of tkElif: inElif = true b = newNodeP(nkElifBranch, p) getTok(p) optInd(p, b) addSon(b, parseExpr(p)) eat(p, tkColon) - of tkElse: + of tkElse: b = newNodeP(nkElse, p) getTok(p) eat(p, tkColon) - else: break + else: break skipComment(p, b) addSon(b, parseStmt(p)) addSon(result, b) if b.kind == nkElse: break if wasIndented: - if p.tok.tokType != tkEof: eat(p, tkDed) - popInd(p.lex) + p.currInd = oldInd -proc parseTry(p: var TParser): PNode = +proc parseTry(p: var TParser): PNode = + #| tryStmt = 'try' colcom stmt &(IND{=}? 'except'|'finally') + #| (IND{=}? 'except' exprList colcom stmt)* + #| (IND{=}? 'finally' colcom stmt)? result = newNodeP(nkTryStmt, p) getTok(p) eat(p, tkColon) skipComment(p, result) addSon(result, parseStmt(p)) var b: PNode = nil - while true: - if p.tok.tokType == tkSad: getTok(p) + while sameOrNoInd(p): case p.tok.tokType of tkExcept: b = newNodeP(nkExceptBranch, p) - qualifiedIdentListAux(p, tkColon, b) + exprList(p, tkColon, b) of tkFinally: b = newNodeP(nkFinally, p) - getTok(p) + getTokNoInd(p) eat(p, tkColon) - else: break + else: break skipComment(p, b) addSon(b, parseStmt(p)) addSon(result, b) @@ -1082,52 +1198,48 @@ proc parseTry(p: var TParser): PNode = if b == nil: parMessage(p, errTokenExpected, "except") proc parseExceptBlock(p: var TParser, kind: TNodeKind): PNode = + #| exceptBlock = 'except' colcom stmt result = newNodeP(kind, p) - getTok(p) - eat(p, tkColon) - skipComment(p, result) + getTokNoInd(p) + colcom(p, result) addSon(result, parseStmt(p)) -proc parseFor(p: var TParser): PNode = +proc parseFor(p: var TParser): PNode = + #| forStmt = 'for' symbol (comma symbol)* 'in' expr colcom stmt result = newNodeP(nkForStmt, p) - getTok(p) - optInd(p, result) + getTokNoInd(p) var a = parseSymbol(p) addSon(result, a) - while p.tok.tokType == tkComma: + while p.tok.tokType == tkComma: getTok(p) optInd(p, a) a = parseSymbol(p) addSon(result, a) eat(p, tkIn) addSon(result, parseExpr(p)) - eat(p, tkColon) - skipComment(p, result) + colcom(p, result) addSon(result, parseStmt(p)) proc parseBlock(p: var TParser): PNode = + #| blockStmt = 'block' symbol? colcom stmt result = newNodeP(nkBlockStmt, p) - getTok(p) - optInd(p, result) - case p.tok.tokType - of tkEof, tkSad, tkDed, tkColon: addSon(result, ast.emptyNode) + getTokNoInd(p) + if p.tok.tokType == tkColon: addSon(result, ast.emptyNode) else: addSon(result, parseSymbol(p)) - eat(p, tkColon) - skipComment(p, result) + colcom(p, result) addSon(result, parseStmt(p)) proc parseStatic(p: var TParser): PNode = + #| staticStmt = 'static' colcom stmt result = newNodeP(nkStaticStmt, p) - getTok(p) - optInd(p, result) - eat(p, tkColon) - skipComment(p, result) + getTokNoInd(p) + colcom(p, result) addSon(result, parseStmt(p)) -proc parseAsm(p: var TParser): PNode = +proc parseAsm(p: var TParser): PNode = + #| asmStmt = 'asm' pragma? (STR_LIT | RSTR_LIT | TRIPLE_STR_LIT) result = newNodeP(nkAsmStmt, p) - getTok(p) - optInd(p, result) + getTokNoInd(p) if p.tok.tokType == tkCurlyDotLe: addSon(result, parsePragma(p)) else: addSon(result, ast.emptyNode) case p.tok.tokType @@ -1141,7 +1253,8 @@ proc parseAsm(p: var TParser): PNode = return getTok(p) -proc parseGenericParam(p: var TParser): PNode = +proc parseGenericParam(p: var TParser): PNode = + #| genericParam = symbol (comma symbol)* (colon expr)? ('=' optInd expr)? var a: PNode result = newNodeP(nkIdentDefs, p) while true: @@ -1168,89 +1281,95 @@ proc parseGenericParam(p: var TParser): PNode = addSon(result, ast.emptyNode) proc parseGenericParamList(p: var TParser): PNode = + #| genericParamList = '[' optInd + #| genericParam ^* (comma/semicolon) optPar ']' result = newNodeP(nkGenericParams, p) getTok(p) optInd(p, result) - while (p.tok.tokType == tkSymbol) or (p.tok.tokType == tkAccent): + while p.tok.tokType in {tkSymbol, tkAccent}: var a = parseGenericParam(p) addSon(result, a) if p.tok.tokType notin {tkComma, tkSemicolon}: break getTok(p) - optInd(p, a) + skipComment(p, a) optPar(p) eat(p, tkBracketRi) proc parsePattern(p: var TParser): PNode = + #| pattern = '{' stmt '}' eat(p, tkCurlyLe) result = parseStmt(p) eat(p, tkCurlyRi) +proc validInd(p: var TParser): bool = + result = p.tok.indent < 0 or p.tok.indent > p.currInd + proc parseRoutine(p: var TParser, kind: TNodeKind): PNode = + #| indAndComment = (IND{>} COMMENT)? | COMMENT? + #| routine = optInd identVis pattern? genericParamList? + #| paramListColon pragma? ('=' COMMENT? stmt)? indAndComment result = newNodeP(kind, p) getTok(p) optInd(p, result) addSon(result, identVis(p)) - if p.tok.tokType == tkCurlyLe: addSon(result, parsePattern(p)) + if p.tok.tokType == tkCurlyLe and p.validInd: addSon(result, p.parsePattern) else: addSon(result, ast.emptyNode) - if p.tok.tokType == tkBracketLe: addSon(result, parseGenericParamList(p)) - else: addSon(result, ast.emptyNode) - addSon(result, parseParamList(p)) - if p.tok.tokType == tkCurlyDotLe: addSon(result, parsePragma(p)) + if p.tok.tokType == tkBracketLe and p.validInd: + result.add(p.parseGenericParamList) + else: + addSon(result, ast.emptyNode) + addSon(result, p.parseParamList) + if p.tok.tokType == tkCurlyDotLe and p.validInd: addSon(result, p.parsePragma) else: addSon(result, ast.emptyNode) # empty exception tracking: addSon(result, ast.emptyNode) - if p.tok.tokType == tkEquals: + if p.tok.tokType == tkEquals and p.validInd: getTok(p) skipComment(p, result) addSon(result, parseStmt(p)) - else: + else: addSon(result, ast.emptyNode) - indAndComment(p, result) # XXX: document this in the grammar! + indAndComment(p, result) proc newCommentStmt(p: var TParser): PNode = + #| commentStmt = COMMENT result = newNodeP(nkCommentStmt, p) result.info.line = result.info.line - int16(1) - int16(p.tok.iNumber) + getTok(p) -type +type TDefParser = proc (p: var TParser): PNode {.nimcall.} -proc parseSection(p: var TParser, kind: TNodeKind, - defparser: TDefParser): PNode = +proc parseSection(p: var TParser, kind: TNodeKind, + defparser: TDefParser): PNode = + #| section(p) = COMMENT? p / (IND{>} (p / COMMENT)^+IND{=} DED) result = newNodeP(kind, p) getTok(p) skipComment(p, result) - case p.tok.tokType - of tkInd: - pushInd(p.lex, p.tok.indent) - getTok(p) - skipComment(p, result) - while true: - case p.tok.tokType - of tkSad: - getTok(p) - of tkSymbol, tkAccent: - var a = defparser(p) - skipComment(p, a) - addSon(result, a) - of tkDed: - getTok(p) - break - of tkEof: - break # BUGFIX - of tkComment: - var a = newCommentStmt(p) - skipComment(p, a) - addSon(result, a) - else: - parMessage(p, errIdentifierExpected, p.tok) - break - popInd(p.lex) - of tkSymbol, tkAccent, tkParLe: + if realInd(p): + withInd(p): + skipComment(p, result) + while sameInd(p): + case p.tok.tokType + of tkSymbol, tkAccent: + var a = defparser(p) + skipComment(p, a) + addSon(result, a) + of tkComment: + var a = newCommentStmt(p) + addSon(result, a) + else: + parMessage(p, errIdentifierExpected, p.tok) + break + if result.len == 0: parMessage(p, errIdentifierExpected, p.tok) + elif p.tok.tokType in {tkSymbol, tkAccent, tkParLe} and p.tok.indent < 0: # tkParLe is allowed for ``var (x, y) = ...`` tuple parsing addSon(result, defparser(p)) - else: parMessage(p, errIdentifierExpected, p.tok) + else: + parMessage(p, errIdentifierExpected, p.tok) -proc parseConstant(p: var TParser): PNode = +proc parseConstant(p: var TParser): PNode = + #| constant = identWithPragma (colon typedesc)? '=' optInd expr indAndComment result = newNodeP(nkConstDef, p) addSon(result, identWithPragma(p)) if p.tok.tokType == tkColon: @@ -1262,60 +1381,73 @@ proc parseConstant(p: var TParser): PNode = eat(p, tkEquals) optInd(p, result) addSon(result, parseExpr(p)) - indAndComment(p, result) # XXX: special extension! + indAndComment(p, result) proc parseEnum(p: var TParser): PNode = - var a, b: PNode + #| enum = 'enum' optInd (symbol optInd ('=' optInd expr COMMENT?)? comma?)+ result = newNodeP(nkEnumTy, p) - a = nil getTok(p) addSon(result, ast.emptyNode) optInd(p, result) - while true: - case p.tok.tokType - of tkEof, tkSad, tkDed: break - else: a = parseSymbol(p) - optInd(p, a) - if p.tok.tokType == tkEquals: + while true: + var a = parseSymbol(p) + if p.tok.indent >= 0 and p.tok.indent <= p.currInd: + add(result, a) + break + if p.tok.tokType == tkEquals and p.tok.indent < 0: getTok(p) optInd(p, a) - b = a + var b = a a = newNodeP(nkEnumFieldDef, p) addSon(a, b) addSon(a, parseExpr(p)) skipComment(p, a) - if p.tok.tokType == tkComma: + if p.tok.tokType == tkComma and p.tok.indent < 0: getTok(p) - optInd(p, a) + rawSkipComment(p, a) + else: + skipComment(p, a) addSon(result, a) + if p.tok.indent >= 0 and p.tok.indent <= p.currInd or + p.tok.tokType == tkEof: + break if result.len <= 1: lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok)) proc parseObjectPart(p: var TParser): PNode proc parseObjectWhen(p: var TParser): PNode = + #| objectWhen = 'when' expr colcom objectPart COMMENT? + #| ('elif' expr colcom objectPart COMMENT?)* + #| ('else' colcom objectPart COMMENT?)? result = newNodeP(nkRecWhen, p) - while true: + while sameInd(p): getTok(p) # skip `when`, `elif` var branch = newNodeP(nkElifBranch, p) optInd(p, branch) addSon(branch, parseExpr(p)) - eat(p, tkColon) - skipComment(p, branch) + colcom(p, branch) addSon(branch, parseObjectPart(p)) skipComment(p, branch) addSon(result, branch) - if p.tok.tokType != tkElif: break - if p.tok.tokType == tkElse: + if p.tok.tokType != tkElif: break + if p.tok.tokType == tkElse and sameInd(p): var branch = newNodeP(nkElse, p) eat(p, tkElse) - eat(p, tkColon) - skipComment(p, branch) + colcom(p, branch) addSon(branch, parseObjectPart(p)) + skipComment(p, branch) addSon(result, branch) proc parseObjectCase(p: var TParser): PNode = + #| objectBranch = 'of' exprList colcom objectPart + #| objectBranches = objectBranch (IND{=} objectBranch)* + #| (IND{=} 'elif' expr colcom objectPart)* + #| (IND{=} 'else' colcom objectPart)? + #| objectCase = 'case' identWithPragma ':' typeDesc ':'? COMMENT? + #| (IND{>} objectBranches DED + #| | IND{=} objectBranches) result = newNodeP(nkRecCase, p) - getTok(p) + getTokNoInd(p) var a = newNodeP(nkIdentDefs, p) addSon(a, identWithPragma(p)) eat(p, tkColon) @@ -1325,12 +1457,11 @@ proc parseObjectCase(p: var TParser): PNode = if p.tok.tokType == tkColon: getTok(p) skipComment(p, result) var wasIndented = false - if p.tok.tokType == tkInd: - pushInd(p.lex, p.tok.indent) - getTok(p) + let oldInd = p.currInd + if realInd(p): + p.currInd = p.tok.indent wasIndented = true - while true: - if p.tok.tokType == tkSad: getTok(p) + while sameInd(p): var b: PNode case p.tok.tokType of tkOf: @@ -1348,72 +1479,79 @@ proc parseObjectCase(p: var TParser): PNode = fields = newNodeP(nkNilLit, p) # don't break further semantic checking addSon(b, fields) addSon(result, b) - if b.kind == nkElse: break + if b.kind == nkElse: break if wasIndented: - eat(p, tkDed) - popInd(p.lex) + p.currInd = oldInd proc parseObjectPart(p: var TParser): PNode = - case p.tok.tokType - of tkInd: + #| objectPart = IND{>} objectPart^+IND{=} DED + #| / objectWhen / objectCase / 'nil' / declColonEquals + if realInd(p): result = newNodeP(nkRecList, p) - pushInd(p.lex, p.tok.indent) - getTok(p) - skipComment(p, result) - while true: - case p.tok.tokType - of tkSad: - getTok(p) - of tkCase, tkWhen, tkSymbol, tkAccent, tkNil: - addSon(result, parseObjectPart(p)) - of tkDed: - getTok(p) - break - of tkEof: - break - else: - parMessage(p, errIdentifierExpected, p.tok) - break - popInd(p.lex) - of tkWhen: - result = parseObjectWhen(p) - of tkCase: - result = parseObjectCase(p) - of tkSymbol, tkAccent: - result = parseIdentColonEquals(p, {withPragma}) - skipComment(p, result) - of tkNil: - result = newNodeP(nkNilLit, p) - getTok(p) - else: result = ast.emptyNode + withInd(p): + rawSkipComment(p, result) + while sameInd(p): + case p.tok.tokType + of tkCase, tkWhen, tkSymbol, tkAccent, tkNil: + addSon(result, parseObjectPart(p)) + else: + parMessage(p, errIdentifierExpected, p.tok) + break + else: + case p.tok.tokType + of tkWhen: + result = parseObjectWhen(p) + of tkCase: + result = parseObjectCase(p) + of tkSymbol, tkAccent: + result = parseIdentColonEquals(p, {withPragma}) + skipComment(p, result) + of tkNil: + result = newNodeP(nkNilLit, p) + getTok(p) + else: + result = ast.emptyNode proc parseObject(p: var TParser): PNode = + #| object = 'object' pragma? ('of' typeDesc)? COMMENT? objectPart result = newNodeP(nkObjectTy, p) getTok(p) - if p.tok.tokType == tkCurlyDotLe: addSon(result, parsePragma(p)) - else: addSon(result, ast.emptyNode) - if p.tok.tokType == tkOf: + if p.tok.tokType == tkCurlyDotLe and p.validInd: + addSon(result, parsePragma(p)) + else: + addSon(result, ast.emptyNode) + if p.tok.tokType == tkOf and p.tok.indent < 0: var a = newNodeP(nkOfInherit, p) getTok(p) addSon(a, parseTypeDesc(p)) addSon(result, a) else: addSon(result, ast.emptyNode) - skipComment(p, result) + if p.tok.tokType == tkComment: + skipComment(p, result) + # an initial IND{>} HAS to follow: + if not realInd(p): + addSon(result, emptyNode) + return addSon(result, parseObjectPart(p)) proc parseDistinct(p: var TParser): PNode = + #| distinct = 'distinct' optInd typeDesc result = newNodeP(nkDistinctTy, p) getTok(p) optInd(p, result) addSon(result, parseTypeDesc(p)) proc parseTypeDef(p: var TParser): PNode = + #| typeDef = identWithPragma genericParamList? '=' optInd typeDefAux + #| indAndComment? result = newNodeP(nkTypeDef, p) addSon(result, identWithPragma(p)) - if p.tok.tokType == tkBracketLe: addSon(result, parseGenericParamList(p)) - else: addSon(result, ast.emptyNode) - if p.tok.tokType == tkEquals: + if p.tok.tokType == tkBracketLe and p.validInd: + addSon(result, parseGenericParamList(p)) + else: + addSon(result, ast.emptyNode) + if p.tok.tokType == tkEquals: getTok(p) optInd(p, result) addSon(result, parseTypeDefAux(p)) @@ -1421,16 +1559,17 @@ proc parseTypeDef(p: var TParser): PNode = addSon(result, ast.emptyNode) indAndComment(p, result) # special extension! -proc parseVarTuple(p: var TParser): PNode = +proc parseVarTuple(p: var TParser): PNode = + #| varTuple = '(' optInd identWithPragma ^+ comma optPar ')' '=' optInd expr result = newNodeP(nkVarTuple, p) getTok(p) # skip '(' optInd(p, result) - while (p.tok.tokType == tkSymbol) or (p.tok.tokType == tkAccent): + while p.tok.tokType in {tkSymbol, tkAccent}: var a = identWithPragma(p) addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) - optInd(p, a) + skipComment(p, a) addSon(result, ast.emptyNode) # no type desc optPar(p) eat(p, tkParRi) @@ -1438,12 +1577,15 @@ proc parseVarTuple(p: var TParser): PNode = optInd(p, result) addSon(result, parseExpr(p)) -proc parseVariable(p: var TParser): PNode = +proc parseVariable(p: var TParser): PNode = + #| variable = (varTuple / identColonEquals) indAndComment if p.tok.tokType == tkParLe: result = parseVarTuple(p) else: result = parseIdentColonEquals(p, {withPragma}) - indAndComment(p, result) # special extension! + indAndComment(p, result) proc parseBind(p: var TParser, k: TNodeKind): PNode = + #| bindStmt = 'bind' optInd qualifiedIdent ^+ comma + #| mixinStmt = 'mixin' optInd qualifiedIdent ^+ comma result = newNodeP(k, p) getTok(p) optInd(p, result) @@ -1452,26 +1594,32 @@ proc parseBind(p: var TParser, k: TNodeKind): PNode = addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) - optInd(p, a) - expectNl(p) + optInd(p, a) + #expectNl(p) proc parseStmtPragma(p: var TParser): PNode = + #| pragmaStmt = pragma (':' COMMENT? stmt)? result = parsePragma(p) - if p.tok.tokType == tkColon: + if p.tok.tokType == tkColon and p.tok.indent < 0: let a = result result = newNodeI(nkPragmaBlock, a.info) getTok(p) + skipComment(p, result) result.add a result.add parseStmt(p) proc simpleStmt(p: var TParser): PNode = + #| simpleStmt = ((returnStmt | raiseStmt | yieldStmt | discardStmt | breakStmt + #| | continueStmt | pragmaStmt | importStmt | exportStmt | fromStmt + #| | includeStmt | commentStmt) / exprStmt) COMMENT? + #| case p.tok.tokType of tkReturn: result = parseReturnOrRaise(p, nkReturnStmt) of tkRaise: result = parseReturnOrRaise(p, nkRaiseStmt) of tkYield: result = parseReturnOrRaise(p, nkYieldStmt) of tkDiscard: result = parseReturnOrRaise(p, nkDiscardStmt) - of tkBreak: result = parseBreakOrContinue(p, nkBreakStmt) - of tkContinue: result = parseBreakOrContinue(p, nkContinueStmt) + of tkBreak: result = parseReturnOrRaise(p, nkBreakStmt) + of tkContinue: result = parseReturnOrRaise(p, nkContinueStmt) of tkCurlyDotLe: result = parseStmtPragma(p) of tkImport: result = parseImport(p, nkImportStmt) of tkExport: result = parseImport(p, nkExportStmt) @@ -1481,9 +1629,23 @@ proc simpleStmt(p: var TParser): PNode = else: if isExprStart(p): result = parseExprStmt(p) else: result = ast.emptyNode - if result.kind != nkEmpty: skipComment(p, result) + if result.kind notin {nkEmpty, nkCommentStmt}: skipComment(p, result) -proc complexOrSimpleStmt(p: var TParser): PNode = +proc complexOrSimpleStmt(p: var TParser): PNode = + #| complexOrSimpleStmt = (ifStmt | whenStmt | whileStmt + #| | tryStmt | finallyStmt | exceptStmt | forStmt + #| | blockStmt | staticStmt | asmStmt + #| | 'proc' routine + #| | 'method' routine + #| | 'iterator' routine + #| | 'macro' routine + #| | 'template' routine + #| | 'converter' routine + #| | 'type' section(typeDef) + #| | 'const' section(constant) + #| | ('let' | 'var') section(variable) + #| | bindStmt | mixinStmt) + #| / simpleStmt case p.tok.tokType of tkIf: result = parseIfOrWhen(p, nkIfStmt) of tkWhile: result = parseWhile(p) @@ -1510,26 +1672,27 @@ proc complexOrSimpleStmt(p: var TParser): PNode = of tkMixin: result = parseBind(p, nkMixinStmt) else: result = simpleStmt(p) -proc parseStmt(p: var TParser): PNode = - if p.tok.tokType == tkInd: +proc parseStmt(p: var TParser): PNode = + #| stmt = (IND{>} complexOrSimpleStmt^+(IND{=} / ';') DED) + #| / simpleStmt + if p.tok.indent > p.currInd: result = newNodeP(nkStmtList, p) - pushInd(p.lex, p.tok.indent) - getTok(p) - while true: - case p.tok.tokType - of tkSad, tkSemicolon: getTok(p) - of tkEof: break - of tkDed: - getTok(p) - break - else: + withInd(p): + while true: + if p.tok.indent == p.currInd: + nil + elif p.tok.tokType == tkSemicolon: + while p.tok.tokType == tkSemicolon: getTok(p) + else: + if p.tok.indent > p.currInd: + parMessage(p, errInvalidIndentation) + break var a = complexOrSimpleStmt(p) - if a.kind == nkEmpty: - # XXX this needs a proper analysis; - if isKeyword(p.tok.tokType): parMessage(p, errInvalidIndentation) - break - addSon(result, a) - popInd(p.lex) + if a.kind != nkEmpty: + addSon(result, a) + else: + parMessage(p, errExprExpected, p.tok) + getTok(p) else: # the case statement is only needed for better error messages: case p.tok.tokType @@ -1540,36 +1703,32 @@ proc parseStmt(p: var TParser): PNode = else: result = simpleStmt(p) if result.kind == nkEmpty: parMessage(p, errExprExpected, p.tok) - if p.tok.tokType == tkSemicolon: getTok(p) - if p.tok.tokType == tkSad: getTok(p) + while p.tok.tokType == tkSemicolon: getTok(p) proc parseAll(p: var TParser): PNode = result = newNodeP(nkStmtList, p) - while true: - case p.tok.tokType - of tkSad: getTok(p) - of tkDed, tkInd: - parMessage(p, errInvalidIndentation) + while p.tok.tokType != tkEof: + var a = complexOrSimpleStmt(p) + if a.kind != nkEmpty: + addSon(result, a) + else: + parMessage(p, errExprExpected, p.tok) + # bugfix: consume a token here to prevent an endless loop: getTok(p) - of tkEof: break - else: - var a = complexOrSimpleStmt(p) - if a.kind == nkEmpty: - parMessage(p, errExprExpected, p.tok) - # bugfix: consume a token here to prevent an endless loop: - getTok(p) - addSon(result, a) + if p.tok.indent != 0: + parMessage(p, errInvalidIndentation) -proc parseTopLevelStmt(p: var TParser): PNode = +proc parseTopLevelStmt(p: var TParser): PNode = result = ast.emptyNode - while true: + while true: + if p.tok.indent != 0: + if p.firstTok and p.tok.indent < 0: nil + else: parMessage(p, errInvalidIndentation) + p.firstTok = false case p.tok.tokType - of tkSad, tkSemicolon: getTok(p) - of tkDed, tkInd: - parMessage(p, errInvalidIndentation) - getTok(p) - of tkEof: break - else: + of tkSemicolon: getTok(p) + of tkEof: break + else: result = complexOrSimpleStmt(p) if result.kind == nkEmpty: parMessage(p, errExprExpected, p.tok) break @@ -1583,4 +1742,3 @@ proc parseString(s: string, filename: string = "", line: int = 0): PNode = result = parser.parseAll CloseParser(parser) - diff --git a/compiler/renderer.nim b/compiler/renderer.nim index b4ef52100..1333e40c4 100644 --- a/compiler/renderer.nim +++ b/compiler/renderer.nim @@ -81,13 +81,13 @@ proc addTok(g: var TSrcGen, kind: TTokType, s: string) = proc addPendingNL(g: var TSrcGen) = if g.pendingNL >= 0: - addTok(g, tkInd, "\n" & repeatChar(g.pendingNL)) + addTok(g, tkSpaces, "\n" & repeatChar(g.pendingNL)) g.lineLen = g.pendingNL g.pendingNL = - 1 proc putNL(g: var TSrcGen, indent: int) = if g.pendingNL >= 0: addPendingNL(g) - else: addTok(g, tkInd, "\n") + else: addTok(g, tkSpaces, "\n") g.pendingNL = indent g.lineLen = indent diff --git a/compiler/sem.nim b/compiler/sem.nim index 805af9e31..88249fedb 100644 --- a/compiler/sem.nim +++ b/compiler/sem.nim @@ -58,6 +58,37 @@ proc fitNode(c: PContext, formal: PType, arg: PNode): PNode = result = copyNode(arg) result.typ = formal +proc commonType*(x, y: PType): PType = + # new type relation that is used for array constructors, + # if expressions, etc.: + if x == nil: return y + var a = skipTypes(x, {tyGenericInst}) + var b = skipTypes(y, {tyGenericInst}) + result = x + if a.kind in {tyExpr, tyNil}: return y + elif b.kind in {tyExpr, tyNil}: return x + elif b.kind in {tyArray, tyArrayConstr, tySet, tySequence} and + a.kind == b.kind: + # check for seq[empty] vs. seq[int] + let idx = ord(b.kind in {tyArray, tyArrayConstr}) + if a.sons[idx].kind == tyEmpty: return y + #elif b.sons[idx].kind == tyEmpty: return x + else: + var k = tyNone + if a.kind in {tyRef, tyPtr}: + k = a.kind + if b.kind != a.kind: return x + a = a.sons[0] + b = b.sons[0] + if a.kind == tyObject and b.kind == tyObject: + result = commonSuperclass(a, b) + # this will trigger an error later: + if result.isNil: return x + if k != tyNone: + let r = result + result = NewType(k, r.owner) + result.addSonSkipIntLit(r) + proc isTopLevel(c: PContext): bool {.inline.} = result = c.tab.tos <= 2 diff --git a/compiler/semdata.nim b/compiler/semdata.nim index 4981b64c9..afce365f9 100644 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -230,7 +230,7 @@ proc markIndirect*(c: PContext, s: PSym) {.inline.} = incl(s.flags, sfAddrTaken) # XXX add to 'c' for global analysis -proc illFormedAst*(n: PNode) = +proc illFormedAst*(n: PNode) = GlobalError(n.info, errIllFormedAstX, renderTree(n, {renderNoComments})) proc checkSonsLen*(n: PNode, length: int) = diff --git a/compiler/types.nim b/compiler/types.nim index 4b528d9a2..5b1da74d4 100644 --- a/compiler/types.nim +++ b/compiler/types.nim @@ -904,6 +904,26 @@ proc inheritanceDiff*(a, b: PType): int = inc(result) result = high(int) +proc commonSuperclass*(a, b: PType): PType = + # quick check: are they the same? + if sameObjectTypes(a, b): return a + + # simple algorithm: we store all ancestors of 'a' in a ID-set and walk 'b' + # up until the ID is found: + assert a.kind == tyObject + assert b.kind == tyObject + var x = a + var ancestors = initIntSet() + while x != nil: + x = skipTypes(x, skipPtrs) + ancestors.incl(x.id) + x = x.sons[0] + var y = b + while y != nil: + y = skipTypes(y, skipPtrs) + if ancestors.contains(y.id): return y + y = y.sons[0] + proc typeAllowedAux(marker: var TIntSet, typ: PType, kind: TSymKind): bool proc typeAllowedNode(marker: var TIntSet, n: PNode, kind: TSymKind): bool = result = true diff --git a/doc/grammar.txt b/doc/grammar.txt index b95af89ef..59ae5b073 100644 --- a/doc/grammar.txt +++ b/doc/grammar.txt @@ -1,204 +1,181 @@ -module ::= ([COMMENT] [SAD] stmt)* - -comma ::= ',' [COMMENT] [IND] -semicolon ::= ';' [COMMENT] [IND] - -operator ::= OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9 - | 'or' | 'xor' | 'and' - | 'is' | 'isnot' | 'in' | 'notin' | 'of' - | 'div' | 'mod' | 'shl' | 'shr' | 'not' | 'addr' | 'static' | '..' - -prefixOperator ::= operator - -optInd ::= [COMMENT] [IND] -optPar ::= [IND] | [SAD] - -lowestExpr ::= assignExpr (OP0 optInd assignExpr)* -assignExpr ::= orExpr (OP1 optInd orExpr)* -orExpr ::= andExpr (OP2 optInd andExpr)* -andExpr ::= cmpExpr (OP3 optInd cmpExpr)* -cmpExpr ::= sliceExpr (OP4 optInd sliceExpr)* -sliceExpr ::= ampExpr (OP5 optInd ampExpr)* -ampExpr ::= plusExpr (OP6 optInd plusExpr)* -plusExpr ::= mulExpr (OP7 optInd mulExpr)* -mulExpr ::= dollarExpr (OP8 optInd dollarExpr)* -dollarExpr ::= primary (OP9 optInd primary)* - -indexExpr ::= expr - -castExpr ::= 'cast' '[' optInd typeDesc optPar ']' '(' optInd expr optPar ')' -symbol ::= '`' (KEYWORD | IDENT | operator | '(' ')' | '[' ']' | '{' '}' - | '=' | literal)+ '`' - | IDENT - -primaryPrefix ::= (prefixOperator | 'bind') optInd -primarySuffix ::= '.' optInd symbol [generalizedLit] - | '(' optInd namedExprList optPar ')' - | '[' optInd [indexExpr (comma indexExpr)* [comma]] optPar ']' - | '{' optInd [indexExpr (comma indexExpr)* [comma]] optPar '}' - -primary ::= primaryPrefix* (symbol [generalizedLit] | - constructor | castExpr) - primarySuffix* - +module = stmt ^* (';' / IND{=}) +comma = ',' COMMENT? +semicolon = ';' COMMENT? +colon = ':' COMMENT? +colcom = ':' COMMENT? + +operator = OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9 + | 'or' | 'xor' | 'and' + | 'is' | 'isnot' | 'in' | 'notin' | 'of' + | 'div' | 'mod' | 'shl' | 'shr' | 'not' | 'addr' | 'static' | '..' + +prefixOperator = operator + +optInd = COMMENT? +optPar = (IND{>} | IND{=})? + +simpleExpr = assignExpr (OP0 optInd assignExpr)* +assignExpr = orExpr (OP1 optInd orExpr)* +orExpr = andExpr (OP2 optInd andExpr)* +andExpr = cmpExpr (OP3 optInd cmpExpr)* +cmpExpr = sliceExpr (OP4 optInd sliceExpr)* +sliceExpr = ampExpr (OP5 optInd ampExpr)* +ampExpr = plusExpr (OP6 optInd plusExpr)* +plusExpr = mulExpr (OP7 optInd mulExpr)* +mulExpr = dollarExpr (OP8 optInd dollarExpr)* +dollarExpr = primary (OP9 optInd primary)* +symbol = '`' (KEYW|IDENT|operator|'(' ')'|'[' ']'|'{' '}'|'='|literal)+ '`' + | IDENT +indexExpr = expr +indexExprList = indexExpr ^+ comma +exprColonEqExpr = expr (':'|'=' expr)? +exprList = expr ^+ comma +dotExpr = expr '.' optInd ('type' | 'addr' | symbol) +qualifiedIdent = symbol ('.' optInd ('type' | 'addr' | symbol))? +exprColonEqExprList = exprColonEqExpr (comma exprColonEqExpr)* (comma)? +setOrTableConstr = '{' ((exprColonEqExpr comma)* | ':' ) '}' +castExpr = 'cast' '[' optInd typeDesc optPar ']' '(' optInd expr optPar ')' generalizedLit ::= GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT - -literal ::= INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT - | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT - | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT - | STR_LIT | RSTR_LIT | TRIPLESTR_LIT - | CHAR_LIT - | NIL - -constructor ::= literal - | '[' optInd colonExprList optPar ']' - | '{' optInd ':' | colonExprList optPar '}' - | '(' optInd colonExprList optPar ')' - -colonExpr ::= expr [':' expr] -colonExprList ::= [colonExpr (comma colonExpr)* [comma]] - -namedExpr ::= expr ['=' expr] -namedExprList ::= [namedExpr (comma namedExpr)* [comma]] - -exprOrType ::= lowestExpr - | 'if' expr ':' expr ('elif' expr ':' expr)* 'else' ':' expr - | 'var' exprOrType - | 'ref' exprOrType - | 'ptr' exprOrType - | 'type' exprOrType - | 'tuple' tupleDesc - -expr ::= exprOrType - | 'proc' paramList [pragma] ['=' stmt] - | 'iterator' paramList [pragma] ['=' stmt] - -exprList ::= [expr (comma expr)* [comma]] - - -qualifiedIdent ::= symbol ['.' symbol] - -typeDesc ::= (exprOrType - | 'proc' paramList [pragma] - | 'iterator' paramList [pragma] ) - ['not' expr] # for now only 'not nil' suffix is supported - -macroStmt ::= ':' [stmt] ('of' [exprList] ':' stmt - |'elif' expr ':' stmt - |'except' exceptList ':' stmt )* - ['else' ':' stmt] - -pragmaBlock ::= pragma [':' stmt] - -simpleStmt ::= returnStmt - | yieldStmt - | discardStmt - | raiseStmt - | breakStmt - | continueStmt - | pragmaBlock - | importStmt - | fromStmt - | includeStmt - | exprStmt -complexStmt ::= ifStmt | whileStmt | caseStmt | tryStmt | forStmt - | blockStmt | staticStmt | asmStmt - | procDecl | iteratorDecl | macroDecl | templateDecl | methodDecl - | constSection | letSection | varSection - | typeSection | whenStmt | bindStmt - -indPush ::= IND # and push indentation onto the stack -indPop ::= # pop indentation from the stack - -stmt ::= simpleStmt [SAD] - | indPush (complexStmt | simpleStmt) - ([SAD] (complexStmt | simpleStmt))* - DED indPop - -exprStmt ::= lowestExpr ['=' expr | [expr (comma expr)*] [macroStmt]] -returnStmt ::= 'return' [expr] -yieldStmt ::= 'yield' expr -discardStmt ::= 'discard' expr -raiseStmt ::= 'raise' [expr] -breakStmt ::= 'break' [symbol] -continueStmt ::= 'continue' -ifStmt ::= 'if' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt] -whenStmt ::= 'when' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt] -caseStmt ::= 'case' expr [':'] ('of' exprList ':' stmt)* - ('elif' expr ':' stmt)* - ['else' ':' stmt] -whileStmt ::= 'while' expr ':' stmt -forStmt ::= 'for' symbol (comma symbol)* 'in' expr ':' stmt -exceptList ::= [qualifiedIdent (comma qualifiedIdent)*] - -tryStmt ::= 'try' ':' stmt - ('except' exceptList ':' stmt)* - ['finally' ':' stmt] -asmStmt ::= 'asm' [pragma] (STR_LIT | RSTR_LIT | TRIPLESTR_LIT) -blockStmt ::= 'block' [symbol] ':' stmt -staticStmt ::= 'static' ':' stmt -filename ::= symbol | STR_LIT | RSTR_LIT | TRIPLESTR_LIT -importStmt ::= 'import' filename (comma filename)* -includeStmt ::= 'include' filename (comma filename)* -bindStmt ::= 'bind' qualifiedIdent (comma qualifiedIdent)* -fromStmt ::= 'from' filename 'import' symbol (comma symbol)* - -pragma ::= '{.' optInd (colonExpr [comma])* optPar ('.}' | '}') - -param ::= symbol (comma symbol)* (':' typeDesc ['=' expr] | '=' expr) -paramList ::= ['(' [param (comma|semicolon param)*] optPar ')'] [':' typeDesc] - -genericConstraint ::= 'object' | 'tuple' | 'enum' | 'proc' | 'ref' | 'ptr' - | 'var' | 'distinct' | 'iterator' | primary -genericConstraints ::= genericConstraint ( '|' optInd genericConstraint )* - -genericParam ::= symbol [':' genericConstraints] ['=' expr] -genericParams ::= '[' genericParam (comma|semicolon genericParam)* optPar ']' - - -routineDecl := symbol ['*'] [genericParams] paramList [pragma] ['=' stmt] -procDecl ::= 'proc' routineDecl -macroDecl ::= 'macro' routineDecl -iteratorDecl ::= 'iterator' routineDecl -templateDecl ::= 'template' routineDecl -methodDecl ::= 'method' routineDecl - -colonAndEquals ::= [':' typeDesc] '=' expr - -constDecl ::= symbol ['*'] [pragma] colonAndEquals [COMMENT | IND COMMENT] - | COMMENT -constSection ::= 'const' indPush constDecl (SAD constDecl)* DED indPop -letSection ::= 'let' indPush constDecl (SAD constDecl)* DED indPop - -typeDef ::= typeDesc | objectDef | enumDef | 'distinct' typeDesc - -objectField ::= symbol ['*'] [pragma] -objectIdentPart ::= objectField (comma objectField)* ':' typeDesc - [COMMENT|IND COMMENT] - -objectWhen ::= 'when' expr ':' [COMMENT] objectPart - ('elif' expr ':' [COMMENT] objectPart)* - ['else' ':' [COMMENT] objectPart] -objectCase ::= 'case' expr ':' typeDesc [COMMENT] - ('of' exprList ':' [COMMENT] objectPart)* - ['else' ':' [COMMENT] objectPart] - -objectPart ::= objectWhen | objectCase | objectIdentPart | 'nil' - | indPush objectPart (SAD objectPart)* DED indPop -tupleDesc ::= '[' optInd [param (comma|semicolon param)*] optPar ']' - -objectDef ::= 'object' [pragma] ['of' typeDesc] objectPart -enumField ::= symbol ['=' expr] -enumDef ::= 'enum' (enumField [comma] [COMMENT | IND COMMENT])+ - -typeDecl ::= COMMENT - | symbol ['*'] [genericParams] ['=' typeDef] [COMMENT | IND COMMENT] - -typeSection ::= 'type' indPush typeDecl (SAD typeDecl)* DED indPop - -colonOrEquals ::= ':' typeDesc ['=' expr] | '=' expr -varField ::= symbol ['*'] [pragma] -varPart ::= symbol (comma symbol)* colonOrEquals [COMMENT | IND COMMENT] -varSection ::= 'var' (varPart - | indPush (COMMENT|varPart) - (SAD (COMMENT|varPart))* DED indPop) +identOrLiteral = generalizedLit | symbol + | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT + | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT + | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT + | STR_LIT | RSTR_LIT | TRIPLESTR_LIT + | CHAR_LIT + | NIL + | tupleConstr | arrayConstr | setOrTableConstr + | castExpr +tupleConstr = '(' optInd (exprColonEqExpr comma?)* optPar ')' +arrayConstr = '[' optInd (exprColonEqExpr comma?)* optPar ']' +primarySuffix = '(' (exprColonEqExpr comma?)* ')' doBlocks? + | doBlocks + | '.' optInd ('type' | 'addr' | symbol) generalizedLit? + | '[' optInd indexExprList optPar ']' + | '{' optInd indexExprList optPar '}' +condExpr = expr colcom expr optInd + ('elif' expr colcom expr optInd)* + 'else' colcom expr +ifExpr = 'if' condExpr +whenExpr = 'when' condExpr +pragma = '{.' optInd (exprColonExpr comma?)* optPar ('.}' | '}') +identVis = symbol opr? # postfix position +identWithPragma = identVis pragma? +declColonEquals = identWithPragma (comma identWithPragma)* comma? + (':' optInd typeDesc)? ('=' optInd expr)? +identColonEquals = ident (comma ident)* comma? + (':' optInd typeDesc)? ('=' optInd expr)?) +inlTupleDecl = 'tuple' + [' optInd (identColonEquals (comma/semicolon)?)* optPar ']' +extTupleDecl = 'tuple' + COMMENT? (IND{>} identColonEquals (IND{=} identColonEquals)*)? +paramList = '(' identColonEquals ^* (comma/semicolon) ')' +paramListArrow = paramList? ('->' optInd typeDesc)? +paramListColon = paramList? (':' optInd typeDesc)? +doBlock = 'do' paramListArrow pragmas? colcom stmt +doBlocks = doBlock ^* IND{=} +procExpr = 'proc' paramListColon pragmas? ('=' COMMENT? stmt)? +expr = (ifExpr + | whenExpr + | caseExpr) + / simpleExpr +typeKeyw = 'var' | 'ref' | 'ptr' | 'shared' | 'type' | 'tuple' + | 'proc' | 'iterator' | 'distinct' | 'object' | 'enum' +primary = typeKeyw typeDescK + / prefixOperator* identOrLiteral primarySuffix* + / 'addr' primary + / 'static' primary + / 'bind' primary +typeDesc = simpleExpr +typeDefAux = simpleExpr +exprStmt = simpleExpr + (( '=' optInd expr ) + / ( expr ^+ comma + doBlocks + / ':' stmt? ( IND{=} 'of' exprList ':' stmt + | IND{=} 'elif' expr ':' stmt + | IND{=} 'except' exprList ':' stmt + | IND{=} 'else' ':' stmt )* + ))? +importStmt = 'import' optInd expr + ((comma expr)* + / 'except' optInd (expr ^+ comma)) +includeStmt = 'include' optInd expr ^+ comma +fromStmt = 'from' expr 'import' optInd expr (comma expr)* +returnStmt = 'return' optInd expr? +raiseStmt = 'raise' optInd expr? +yieldStmt = 'yield' optInd expr? +discardStmt = 'discard' optInd expr? +breakStmt = 'break' optInd expr? +continueStmt = 'break' optInd expr? +condStmt = expr colcom stmt COMMENT? + (IND{=} 'elif' expr colcom stmt)* + (IND{=} 'else' colcom stmt)? +ifStmt = 'if' condStmt +whenStmt = 'when' condStmt +whileStmt = 'while' expr colcom stmt +ofBranch = 'of' exprList colcom stmt +ofBranches = ofBranch (IND{=} ofBranch)* + (IND{=} 'elif' expr colcom stmt)* + (IND{=} 'else' colcom stmt)? +caseStmt = 'case' expr ':'? COMMENT? + (IND{>} ofBranches DED + | IND{=} ofBranches) +tryStmt = 'try' colcom stmt &(IND{=}? 'except'|'finally') + (IND{=}? 'except' exprList colcom stmt)* + (IND{=}? 'finally' colcom stmt)? +exceptBlock = 'except' colcom stmt +forStmt = 'for' symbol (comma symbol)* 'in' expr colcom stmt +blockStmt = 'block' symbol? colcom stmt +staticStmt = 'static' colcom stmt +asmStmt = 'asm' pragma? (STR_LIT | RSTR_LIT | TRIPLE_STR_LIT) +genericParam = symbol (comma symbol)* (colon expr)? ('=' optInd expr)? +genericParamList = '[' optInd + genericParam ^* (comma/semicolon) optPar ']' +pattern = '{' stmt '}' +indAndComment = (IND{>} COMMENT)? | COMMENT? +routine = optInd identVis pattern? genericParamList? + paramListColon pragma? ('=' COMMENT? stmt)? indAndComment +commentStmt = COMMENT +section(p) = COMMENT? p / (IND{>} (p / COMMENT)^+IND{=} DED) +constant = identWithPragma (colon typedesc)? '=' optInd expr indAndComment +enum = 'enum' optInd (symbol optInd ('=' optInd expr COMMENT?)? comma?)+ +objectWhen = 'when' expr colcom objectPart COMMENT? + ('elif' expr colcom objectPart COMMENT?)* + ('else' colcom objectPart COMMENT?)? +objectBranch = 'of' exprList colcom objectPart +objectBranches = objectBranch (IND{=} objectBranch)* + (IND{=} 'elif' expr colcom objectPart)* + (IND{=} 'else' colcom objectPart)? +objectCase = 'case' identWithPragma ':' typeDesc ':'? COMMENT? + (IND{>} objectBranches DED + | IND{=} objectBranches) +objectPart = IND{>} objectPart^+IND{=} DED + / objectWhen / objectCase / 'nil' / declColonEquals +object = 'object' pragma? ('of' typeDesc)? COMMENT? objectPart +distinct = 'distinct' optInd typeDesc +typeDef = identWithPragma genericParamList? '=' optInd typeDefAux + indAndComment? +varTuple = '(' optInd identWithPragma ^+ comma optPar ')' '=' optInd expr +variable = (varTuple / identColonEquals) indAndComment +bindStmt = 'bind' optInd qualifiedIdent ^+ comma +mixinStmt = 'mixin' optInd qualifiedIdent ^+ comma +pragmaStmt = pragma (':' COMMENT? stmt)? +simpleStmt = ((returnStmt | raiseStmt | yieldStmt | discardStmt | breakStmt + | continueStmt | pragmaStmt | importStmt | exportStmt | fromStmt + | includeStmt | commentStmt) / exprStmt) COMMENT? +complexOrSimpleStmt = (ifStmt | whenStmt | whileStmt + | tryStmt | finallyStmt | exceptStmt | forStmt + | blockStmt | staticStmt | asmStmt + | 'proc' routine + | 'method' routine + | 'iterator' routine + | 'macro' routine + | 'template' routine + | 'converter' routine + | 'type' section(typeDef) + | 'const' section(constant) + | ('let' | 'var') section(variable) + | bindStmt | mixinStmt) + / simpleStmt +stmt = (IND{>} complexOrSimpleStmt^+(IND{=} / ';') DED) + / simpleStmt diff --git a/doc/manual.txt b/doc/manual.txt index 8ca7b697e..3437e3e44 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -23,14 +23,25 @@ This document describes the lexis, the syntax, and the semantics of Nimrod. The language constructs are explained using an extended BNF, in which ``(a)*`` means 0 or more ``a``'s, ``a+`` means 1 or more ``a``'s, and -``(a)?`` means an optional *a*; an alternative spelling for optional parts is -``[a]``. The ``|`` symbol is used to mark alternatives -and has the lowest precedence. Parentheses may be used to group elements. +``(a)?`` means an optional *a*. Parentheses may be used to group elements. + +The ``|``, ``/`` symbols are used to mark alternatives and have the lowest +precedence. ``/`` is the ordered choice that requires the parser to try the +alternatives in the given order. ``/`` is often used to ensure the grammar +is not ambiguous. + Non-terminals start with a lowercase letter, abstract terminal symbols are in UPPERCASE. Verbatim terminal symbols (including keywords) are quoted with ``'``. An example:: - ifStmt ::= 'if' expr ':' stmts ('elif' expr ':' stmts)* ['else' stmts] + ifStmt = 'if' expr ':' stmts ('elif' expr ':' stmts)* ('else' stmts)? + +The binary ``^*`` operator is used as a shorthand for 0 or more occurances +separated by its second argument; likewise ``^+`` means 1 or more +occurances: ``a ^+ b`` is short for ``a (b a)*`` +and ``a ^* b`` is short for ``(a (b a)*)?``. Example:: + + arrayConstructor = '[' expr ^* ',' ']' Other parts of Nimrod - like scoping rules or runtime semantics are only described in an informal manner for now. @@ -50,7 +61,7 @@ An `identifier`:idx: is a symbol declared as a name for a variable, type, procedure, etc. The region of the program over which a declaration applies is called the `scope`:idx: of the declaration. Scopes can be nested. The meaning of an identifier is determined by the smallest enclosing scope in which the -identifier is declared. +identifier is declared unless overloading resolution rules suggest otherwise. An expression specifies a computation that produces a value or location. Expressions that produce locations are called `l-values`:idx:. An l-value @@ -93,28 +104,31 @@ Nimrod's standard grammar describes an `indentation sensitive`:idx: language. This means that all the control structures are recognized by indentation. Indentation consists only of spaces; tabulators are not allowed. -The terminals ``IND`` (indentation), ``DED`` (dedentation) and ``SAD`` -(same indentation) are generated by the scanner, denoting an indentation. +The indentation handling is implemented as follows: The lexer annotates the +following token with the preceeding number of spaces; indentation is not +a separate token. This trick allows parsing of Nimrod with only 1 token of +lookahead. + +The parser uses a stack of indentation levels: the stack consists of integers +counting the spaces. The indentation information is queried at strategic +places in the parser but ignored otherwise: The pseudo terminal ``IND{>}`` +denotes an indentation that consists of more spaces than the entry at the top +of the stack; IND{=} an indentation that has the same number of spaces. ``DED`` +is another pseudo terminal that describes the *action* of popping a value +from the stack, ``IND{>}`` then implies to push onto the stack. -These terminals are only generated for lines that are not empty. +With this notation we can now easily define the core of the grammar: A block of +statements (simplified example):: -The parser and the scanner communicate over a stack which indentation terminal -should be generated: the stack consists of integers counting the spaces. The -stack is initialized with a zero on its top. The scanner reads from the stack: -If the current indentation token consists of more spaces than the entry at the -top of the stack, a ``IND`` token is generated, else if it consists of the same -number of spaces, a ``SAD`` token is generated. If it consists of fewer spaces, -a ``DED`` token is generated for any item on the stack that is greater than the -current. These items are later popped from the stack by the parser. At the end -of the file, a ``DED`` token is generated for each number remaining on the -stack that is larger than zero. + ifStmt = 'if' expr ':' stmt + (IND{=} 'elif' expr ':' stmt)* + (IND{=} 'else' ':' stmt)? + + simpleStmt = ifStmt / ... + + stmt = IND{>} stmt ^+ IND{=} DED # list of statements + / simpleStmt # or a simple statement -Because the grammar contains some optional ``IND`` tokens, the scanner cannot -push new indentation levels. This has to be done by the parser. The symbol -``indPush`` indicates that an ``IND`` token is expected; the current number of -leading spaces is pushed onto the stack by the parser. The symbol ``indPop`` -denotes that the parser pops an item from the indentation stack. No token is -consumed by ``indPop``. Comments @@ -416,11 +430,11 @@ and not the two tokens `{.`:tok:, `.}`:tok:. Syntax ====== -This section lists Nimrod's standard syntax in ENBF. How the parser receives -indentation tokens is already described in the `Lexical Analysis`_ section. +This section lists Nimrod's standard syntax. How the parser handles +the indentation is already described in the `Lexical Analysis`_ section. Nimrod allows user-definable operators. -Binary operators have 10 different levels of precedence. +Binary operators have 10 different levels of precedence. Relevant character ------------------ @@ -1040,7 +1054,7 @@ an ``object`` type or a ``ref object`` type: .. code-block:: nimrod var student = TStudent(name: "Anton", age: 5, id: 3) -For a ``ref object`` type ``new`` is invoked implicitly. +For a ``ref object`` type ``system.new`` is invoked implicitly. Object variants @@ -1701,44 +1715,20 @@ Statements and expressions ========================== Nimrod uses the common statement/expression paradigm: `Statements`:idx: do not -produce a value in contrast to expressions. Call expressions are statements. -If the called procedure returns a value, it is not a valid statement -as statements do not produce values. To evaluate an expression for -side-effects and throw its value away, one can use the ``discard`` statement. +produce a value in contrast to expressions. However, some expressions are +statements. Statements are separated into `simple statements`:idx: and `complex statements`:idx:. Simple statements are statements that cannot contain other statements like assignments, calls or the ``return`` statement; complex statements can contain other statements. To avoid the `dangling else problem`:idx:, complex -statements always have to be intended:: - - simpleStmt ::= returnStmt - | yieldStmt - | discardStmt - | raiseStmt - | breakStmt - | continueStmt - | pragma - | importStmt - | fromStmt - | includeStmt - | exprStmt - complexStmt ::= ifStmt | whileStmt | caseStmt | tryStmt | forStmt - | blockStmt | asmStmt - | procDecl | iteratorDecl | macroDecl | templateDecl - | constSection | letSection - | typeSection | whenStmt | varSection - +statements always have to be intended. The details can be found in the grammar. Discard statement ----------------- -Syntax:: - - discardStmt ::= 'discard' expr - Example: .. code-block:: nimrod @@ -1766,16 +1756,6 @@ been declared with the `discardable`:idx: pragma: Var statement ------------- -Syntax:: - - colonOrEquals ::= ':' typeDesc ['=' expr] | '=' expr - varField ::= symbol ['*'] [pragma] - varPart ::= symbol (comma symbol)* [comma] colonOrEquals [COMMENT | IND COMMENT] - varSection ::= 'var' (varPart - | indPush (COMMENT|varPart) - (SAD (COMMENT|varPart))* DED indPop) - - `Var`:idx: statements declare new local and global variables and initialize them. A comma separated list of variables can be used to specify variables of the same type: @@ -1839,14 +1819,6 @@ For let variables the same pragmas are available as for ordinary variables. Const section ------------- -Syntax:: - - colonAndEquals ::= [':' typeDesc] '=' expr - - constDecl ::= symbol ['*'] [pragma] colonAndEquals [COMMENT | IND COMMENT] - | COMMENT - constSection ::= 'const' indPush constDecl (SAD constDecl)* DED indPop - `Constants`:idx: are symbols which are bound to a value. The constant's value cannot change. The compiler must be able to evaluate the expression in a constant declaration at compile time. @@ -1877,10 +1849,6 @@ they contain such a type. Static statement/expression --------------------------- -Syntax:: - staticExpr ::= 'static' '(' optInd expr optPar ')' - staticStmt ::= 'static' ':' stmt - A `static`:idx: statement/expression can be used to enforce compile time evaluation explicitly. Enforced compile time evaluation can even evaluate code that has side effects: @@ -1902,10 +1870,6 @@ support the FFI at compile time. If statement ------------ -Syntax:: - - ifStmt ::= 'if' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt] - Example: .. code-block:: nimrod @@ -1932,12 +1896,6 @@ part, execution continues with the statement after the ``if`` statement. Case statement -------------- -Syntax:: - - caseStmt ::= 'case' expr [':'] ('of' sliceExprList ':' stmt)* - ('elif' expr ':' stmt)* - ['else' ':' stmt] - Example: .. code-block:: nimrod @@ -1998,10 +1956,6 @@ a list of its elements: When statement -------------- -Syntax:: - - whenStmt ::= 'when' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt] - Example: .. code-block:: nimrod @@ -2032,10 +1986,6 @@ within ``object`` definitions. Return statement ---------------- -Syntax:: - - returnStmt ::= 'return' [expr] - Example: .. code-block:: nimrod @@ -2063,10 +2013,6 @@ variables, ``result`` is initialized to (binary) zero: Yield statement --------------- -Syntax:: - - yieldStmt ::= 'yield' expr - Example: .. code-block:: nimrod @@ -2083,10 +2029,6 @@ for further information. Block statement --------------- -Syntax:: - - blockStmt ::= 'block' [symbol] ':' stmt - Example: .. code-block:: nimrod @@ -2108,10 +2050,6 @@ block to specify which block is to leave. Break statement --------------- -Syntax:: - - breakStmt ::= 'break' [symbol] - Example: .. code-block:: nimrod @@ -2125,10 +2063,6 @@ absent, the innermost block is left. While statement --------------- -Syntax:: - - whileStmt ::= 'while' expr ':' stmt - Example: .. code-block:: nimrod @@ -2147,10 +2081,6 @@ so that they can be left with a ``break`` statement. Continue statement ------------------ -Syntax:: - - continueStmt ::= 'continue' - A `continue`:idx: statement leads to the immediate next iteration of the surrounding loop construct. It is only allowed within a loop. A continue statement is syntactic sugar for a nested block: @@ -2173,9 +2103,6 @@ Is equivalent to: Assembler statement ------------------- -Syntax:: - - asmStmt ::= 'asm' [pragma] (STR_LIT | RSTR_LIT | TRIPLESTR_LIT) The direct embedding of `assembler`:idx: code into Nimrod code is supported by the unsafe ``asm`` statement. Identifiers in the assembler code that refer to @@ -2203,8 +2130,7 @@ Example: var y = if x > 8: 9 else: 10 An if expression always results in a value, so the ``else`` part is -required. ``Elif`` parts are also allowed (but unlikely to be good -style). +required. ``Elif`` parts are also allowed. When expression --------------- @@ -2311,18 +2237,8 @@ procedure declaration defines an identifier and associates it with a block of code. A procedure may call itself recursively. A parameter may be given a default value that is used if the caller does not provide a value for this parameter. -The syntax is:: - - param ::= symbol (comma symbol)* (':' typeDesc ['=' expr] | '=' expr) - paramList ::= ['(' [param (comma param)*] [SAD] ')'] [':' typeDesc] - genericParam ::= symbol [':' typeDesc] ['=' expr] - genericParams ::= '[' genericParam (comma genericParam)* [SAD] ']' - - procDecl ::= 'proc' symbol ['*'] [genericParams] paramList [pragma] - ['=' stmt] - -If the ``= stmt`` part is missing, it is a `forward`:idx: declaration. If +If the proc declaration has no body, it is a `forward`:idx: declaration. If the proc returns a value, the procedure body can access an implicitly declared variable named `result`:idx: that represents the return value. Procs can be overloaded. The overloading resolution algorithm tries to find the proc that is @@ -2417,24 +2333,14 @@ Do notation As a special more convenient notation, proc expressions involved in procedure calls can use the ``do`` keyword: -Syntax:: - primarySuffix ::= 'do' ['(' namedExprList ')'] ['->' typeDesc] ':' - -As a start, let's repeat the example from the previous section: - .. code-block:: nimrod - cities.sort do (x,y: string) -> int: + sort(cities) do (x,y: string) -> int: cmp(x.len, y.len) ``do`` is written after the parentheses enclosing the regular proc params. The proc expression represented by the do block is appended to them. -Again, let's see the equivalent of the previous example: - -.. code-block:: nimrod - sort(cities) do (x,y: string) -> int: - cmp(x.len, y.len) -Finally, more than one ``do`` block can appear in a single call: +More than one ``do`` block can appear in a single call: .. code-block:: nimrod proc performWithUndo(task: proc(), undo: proc()) = ... @@ -2635,30 +2541,16 @@ evaluation or dead code elimination do not work with methods. Iterators and the for statement =============================== -Syntax:: - - forStmt ::= 'for' symbol (comma symbol)* [comma] 'in' expr ':' stmt - - param ::= symbol (comma symbol)* [comma] ':' typeDesc - paramList ::= ['(' [param (comma param)* [comma]] ')'] [':' typeDesc] - - genericParam ::= symbol [':' typeDesc] - genericParams ::= '[' genericParam (comma genericParam)* [comma] ']' - - iteratorDecl ::= 'iterator' symbol ['*'] [genericParams] paramList [pragma] - ['=' stmt] - The `for`:idx: statement is an abstract mechanism to iterate over the elements of a container. It relies on an `iterator`:idx: to do so. Like ``while`` statements, ``for`` statements open an `implicit block`:idx:, so that they can be left with a ``break`` statement. -The ``for`` loop declares -iteration variables (``x`` in the example) - their scope reaches until the +The ``for`` loop declares iteration variables - their scope reaches until the end of the loop body. The iteration variables' types are inferred by the return type of the iterator. -An iterator is similar to a procedure, except that it is always called in the +An iterator is similar to a procedure, except that it can be called in the context of a ``for`` loop. Iterators provide a way to specify the iteration over an abstract type. A key role in the execution of a ``for`` loop plays the ``yield`` statement in the called iterator. Whenever a ``yield`` statement is @@ -2686,9 +2578,10 @@ The compiler generates code as if the programmer would have written this: echo(ch) inc(i) -If the iterator yields a tuple, there have to be as many iteration variables +If the iterator yields a tuple, there can be as many iteration variables as there are components in the tuple. The i'th iteration variable's type is -the type of the i'th component. +the type of the i'th component. In other words, implicit tuple unpacking in a +for loop context is supported. Implict items/pairs invocations @@ -2792,23 +2685,10 @@ iterator that has already finished its work. Type sections ============= -Syntax:: - - typeDef ::= typeDesc | objectDef | enumDef - - genericParam ::= symbol [':' typeDesc] - genericParams ::= '[' genericParam (comma genericParam)* [comma] ']' - - typeDecl ::= COMMENT - | symbol ['*'] [genericParams] ['=' typeDef] [COMMENT|IND COMMENT] - - typeSection ::= 'type' indPush typeDecl (SAD typeDecl)* DED indPop - - Example: .. code-block:: nimrod - type # example demonstrates mutually recursive types + type # example demonstrating mutually recursive types PNode = ref TNode # a traced pointer to a TNode TNode = object le, ri: PNode # left and right subtrees @@ -2822,7 +2702,8 @@ Example: A `type`:idx: section begins with the ``type`` keyword. It contains multiple type definitions. A type definition binds a type to a name. Type definitions can be recursive or even mutually recursive. Mutually recursive types are only -possible within a single ``type`` section. +possible within a single ``type`` section. Nominal types like ``objects`` +or ``enums`` can only be defined in a ``type`` section. Exception handling @@ -2831,14 +2712,6 @@ Exception handling Try statement ------------- -Syntax:: - - qualifiedIdent ::= symbol ['.' symbol] - exceptList ::= [qualifiedIdent (comma qualifiedIdent)* [comma]] - tryStmt ::= 'try' ':' stmt - ('except' exceptList ':' stmt)* - ['finally' ':' stmt] - Example: .. code-block:: nimrod @@ -2863,15 +2736,14 @@ Example: close(f) - The statements after the `try`:idx: are executed in sequential order unless an exception ``e`` is raised. If the exception type of ``e`` matches any -of the list ``exceptlist`` the corresponding statements are executed. +listed in an ``except`` clause the corresponding statements are executed. The statements following the ``except`` clauses are called `exception handlers`:idx:. The empty `except`:idx: clause is executed if there is an exception that is -in no list. It is similar to an ``else`` clause in ``if`` statements. +not listed otherwise. It is similar to an ``else`` clause in ``if`` statements. If there is a `finally`:idx: clause, it is always executed after the exception handlers. @@ -2916,10 +2788,6 @@ statements. Example: Raise statement --------------- -Syntax:: - - raiseStmt ::= 'raise' [expr] - Example: .. code-block:: nimrod @@ -2948,17 +2816,21 @@ This allows for a Lisp-like `condition system`:idx:\: .. code-block:: nimrod var myFile = open("broken.txt", fmWrite) try: - onRaise(proc (e: ref E_Base): bool = + onRaise do (e: ref E_Base)-> bool: if e of EIO: stdout.writeln "ok, writing to stdout instead" else: # do raise other exceptions: result = true - ) myFile.writeln "writing to broken file" finally: myFile.close() +``OnRaise`` can only *filter* raised exceptions, it cannot transform one +exception into another. (Nor should ``onRaise`` raise an exception though +this is currently not enforced.) This restriction keeps the exception tracking +analysis sound. + Effect system ============= @@ -3447,10 +3319,6 @@ Symbol binding within templates happens after template instantiation: Bind statement -------------- -Syntax:: - - bindStmt ::= 'bind' IDENT (comma IDENT)* - Exporting a template is a often a leaky abstraction as it can depend on symbols that are not visible from a client module. However, to compensate for this case, a `bind`:idx: statement can be used: It declares all identifiers @@ -3715,18 +3583,11 @@ Statement Macros ---------------- Statement macros are defined just as expression macros. However, they are -invoked by an expression following a colon:: - - exprStmt ::= lowestExpr ['=' expr | [expr (comma expr)* [comma]] [macroStmt]] - macroStmt ::= ':' [stmt] ('of' [sliceExprList] ':' stmt - | 'elif' expr ':' stmt - | 'except' exceptList ':' stmt )* - ['else' ':' stmt] +invoked by an expression following a colon. The following example outlines a macro that generates a lexical analyzer from regular expressions: - .. code-block:: nimrod import macros @@ -3799,7 +3660,7 @@ instantiation type using the param name: var tree = new(TBinaryTree[int]) When used with macros and .compileTime. procs on the other hand, the compiler -don't need to instantiate the code multiple times, because types then can be +does not need to instantiate the code multiple times, because types then can be manipulated using the unified internal symbol representation. In such context typedesc acts as any other type. One can create variables, store typedesc values inside containers and so on. For example, here is how one can create @@ -4358,13 +4219,6 @@ the compiler encounters any static error. Pragmas ======= -Syntax:: - - colonExpr ::= expr [':' expr] - colonExprList ::= [colonExpr (comma colonExpr)* [comma]] - - pragma ::= '{.' optInd (colonExpr [comma])* [SAD] ('.}' | '}') - Pragmas are Nimrod's method to give the compiler additional information / commands without introducing a massive number of new keywords. Pragmas are processed on the fly during semantic checking. Pragmas are enclosed in the @@ -4411,10 +4265,10 @@ calls to any base class destructors in both user-defined and generated destructors. A destructor is attached to the type it destructs; expressions of this type -can then only be used in *destructible contexts*: +can then only be used in *destructible contexts* and as parameters: .. code-block:: nimrod - type + type TMyObj = object x, y: int p: pointer @@ -4425,9 +4279,15 @@ can then only be used in *destructible contexts*: proc open: TMyObj = result = TMyObj(x: 1, y: 2, p: alloc(3)) + proc work(o: TMyObj) = + echo o.x + # No destructor invoked here for 'o' as 'o' is a parameter. + proc main() = # destructor automatically invoked at the end of the scope: var x = open() + # valid: pass 'x' to some other proc: + work(x) # Error: usage of a type with a destructor in a non destructible context echo open() @@ -4849,8 +4709,8 @@ a dynamic library (``.dll`` files for Windows, ``lib*.so`` files for UNIX). The non-optional argument has to be the name of the dynamic library: .. code-block:: Nimrod - proc gtk_image_new(): PGtkWidget {. - cdecl, dynlib: "libgtk-x11-2.0.so", importc.} + proc gtk_image_new(): PGtkWidget + {.cdecl, dynlib: "libgtk-x11-2.0.so", importc.} In general, importing a dynamic library does not require any special linker options or linking with import libraries. This also implies that no *devel* @@ -4894,6 +4754,10 @@ strings, because they are precompiled. **Note**: Passing variables to the ``dynlib`` pragma will fail at runtime because of order of initialization problems. +**Note**: A ``dynlib`` import can be overriden with +the ``--dynlibOverride:name`` command line option. The Compiler User Guide +contains further information. + Dynlib pragma for export ------------------------ @@ -4971,7 +4835,7 @@ Nimrod supports the `actor model`:idx: of concurrency natively: type TMsgKind = enum mLine, mEof - TMsg = object {.pure, final.} + TMsg = object case k: TMsgKind of mEof: nil of mLine: data: string diff --git a/todo.txt b/todo.txt index 65a9127d1..8b1ecd358 100644 --- a/todo.txt +++ b/todo.txt @@ -7,7 +7,17 @@ version 0.9.2 - acyclic vs prunable; introduce GC hints - CGEN: ``restrict`` pragma + backend support; computed goto support - document NimMain and check whether it works for threading - +- parser/grammar: + * check that of branches can only receive even simpler expressions, don't + allow 'of (var x = 23; nkIdent)' + * allow (var x = 12; for i in ... ; x) construct + * try except as an expression +- make use of commonType relation in expressions +- further expr/stmt unification: + - nkIfStmt vs nkIfExpr + - start with JS backend and support exprs everywhere + - then enhance C backend + - OR: do the temp stuff in transf Bugs ==== @@ -29,14 +39,13 @@ version 0.9.4 ============= - macros as type pragmas -- ``try`` as an expression - provide tool/API to track leaks/object counts - hybrid GC - use big blocks in the allocator - implement full 'not nil' checking - make 'bind' default for templates and introduce 'mixin'; special rule for ``[]=`` -- implicit deref for parameter matching; overloading based on 'var T' +- implicit deref for parameter matching - ``=`` should be overloadable; requires specialization for ``=``; general lift mechanism in the compiler is already implemented for 'fields' - lazy overloading resolution: @@ -54,9 +63,14 @@ version 0.9.X - improve the compiler as a service - better support for macros that rewrite procs - macros need access to types and symbols (partially implemented) -- rethink the syntax/grammar: - * parser is not strict enough with newlines - * change comment handling in the AST +- perhaps: change comment handling in the AST +- enforce 'simpleExpr' more often --> doesn't work; tkProc is + part of primary! +- the typeDesc/expr unification is weird and only necessary because of + the ambiguous a[T] construct: It would be easy to support a[expr] for + generics but require a[.typeDesc] if that's required; this would also + allow [.ref T.](x) for a more general type conversion construct; for + templates that would work too: T([.ref int]) Concurrency @@ -96,7 +110,8 @@ Not essential for 1.0.0 - mocking support with ``tyProxy`` that does: fallback for ``.`` operator - overloading of ``.``? Special case ``.=``? - allow implicit forward declarations of procs via a pragma (so that the - wrappers can deactivate it) + wrappers can deactivate it): better solution: introduce the notion of a + 'proc section' that is similar to a type section. - implement the "snoopResult" pragma; no, make a strutils with string append semantics instead ... - implement "closure tuple consists of a single 'ref'" optimization |