diff options
Diffstat (limited to 'compiler/parser.nim')
-rw-r--r-- | compiler/parser.nim | 164 |
1 files changed, 109 insertions, 55 deletions
diff --git a/compiler/parser.nim b/compiler/parser.nim index b1cfd7609..fbc57ebb6 100644 --- a/compiler/parser.nim +++ b/compiler/parser.nim @@ -17,6 +17,8 @@ # In fact the grammar is generated from this file: when isMainModule: + # Leave a note in grammar.txt that it is generated: + #| # This file is generated by compiler/parser.nim. import pegs var outp = open("doc/grammar.txt", fmWrite) for line in lines("compiler/parser.nim"): @@ -25,7 +27,7 @@ when isMainModule: outp.close import - llstream, lexer, idents, strutils, ast, astalgo, msgs + llstream, lexer, idents, strutils, ast, astalgo, msgs, options, configuration type TParser* = object # A TParser object represents a file that @@ -81,21 +83,21 @@ proc getTok(p: var TParser) = rawGetTok(p.lex, p.tok) p.hasProgress = true -proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream, - cache: IdentCache; +proc openParser*(p: var TParser, fileIdx: FileIndex, inputStream: PLLStream, + cache: IdentCache; config: ConfigRef; strongSpaces=false) = ## Open a parser, using the given arguments to set up its internal state. ## initToken(p.tok) - openLexer(p.lex, fileIdx, inputStream, cache) + openLexer(p.lex, fileIdx, inputStream, cache, config) getTok(p) # read the first token p.firstTok = true p.strongSpaces = strongSpaces proc openParser*(p: var TParser, filename: string, inputStream: PLLStream, - cache: IdentCache; + cache: IdentCache; config: ConfigRef; strongSpaces=false) = - openParser(p, filename.fileInfoIdx, inputStream, cache, strongSpaces) + openParser(p, fileInfoIdx(config, filename), inputStream, cache, config, strongSpaces) proc closeParser(p: var TParser) = ## Close a parser, freeing up its resources. @@ -105,9 +107,13 @@ proc parMessage(p: TParser, msg: TMsgKind, arg = "") = ## Produce and emit the parser message `arg` to output. lexMessageTok(p.lex, msg, p.tok, arg) -proc parMessage(p: TParser, msg: TMsgKind, tok: TToken) = +proc parMessage(p: TParser, msg: string, tok: TToken) = ## Produce and emit a parser message to output about the token `tok` - parMessage(p, msg, prettyTok(tok)) + parMessage(p, errGenerated, msg % prettyTok(tok)) + +proc parMessage(p: TParser, arg: string) = + ## Produce and emit the parser message `arg` to output. + lexMessageTok(p.lex, errGenerated, p.tok, arg) template withInd(p, body: untyped) = let oldInd = p.currInd @@ -123,7 +129,13 @@ proc rawSkipComment(p: var TParser, node: PNode) = if p.tok.tokType == tkComment: if node != nil: if node.comment == nil: node.comment = "" - add(node.comment, p.tok.literal) + when defined(nimpretty): + if p.tok.commentOffsetB > p.tok.commentOffsetA: + add node.comment, fileSection(p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB) + else: + add node.comment, p.tok.literal + else: + add(node.comment, p.tok.literal) else: parMessage(p, errInternal, "skipComment") getTok(p) @@ -134,6 +146,12 @@ proc skipComment(p: var TParser, node: PNode) = proc flexComment(p: var TParser, node: PNode) = if p.tok.indent < 0 or realInd(p): rawSkipComment(p, node) +const + errInvalidIndentation = "invalid indentation" + errIdentifierExpected = "identifier expected, but got '$1'" + errExprExpected = "expression expected, but found '$1'" + errTokenExpected = "'$1' expected" + proc skipInd(p: var TParser) = if p.tok.indent >= 0: if not realInd(p): parMessage(p, errInvalidIndentation) @@ -152,11 +170,11 @@ proc getTokNoInd(p: var TParser) = proc expectIdentOrKeyw(p: TParser) = if p.tok.tokType != tkSymbol and not isKeyword(p.tok.tokType): - lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok)) + lexMessage(p.lex, errGenerated, errIdentifierExpected % prettyTok(p.tok)) proc expectIdent(p: TParser) = if p.tok.tokType != tkSymbol: - lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok)) + lexMessage(p.lex, errGenerated, errIdentifierExpected % prettyTok(p.tok)) proc eat(p: var TParser, tokType: TTokType) = ## Move the parser to the next token if the current token is of type @@ -164,7 +182,8 @@ proc eat(p: var TParser, tokType: TTokType) = if p.tok.tokType == tokType: getTok(p) else: - lexMessageTok(p.lex, errTokenExpected, p.tok, TokTypeToStr[tokType]) + lexMessage(p.lex, errGenerated, + "expected: '" & TokTypeToStr[tokType] & "', but got: '" & prettyTok(p.tok) & "'") proc parLineInfo(p: TParser): TLineInfo = ## Retrieve the line information associated with the parser's current state. @@ -260,13 +279,9 @@ proc isUnary(p: TParser): bool = proc checkBinary(p: TParser) {.inline.} = ## Check if the current parser token is a binary operator. # we don't check '..' here as that's too annoying - if p.strongSpaces and p.tok.tokType == tkOpr: + if p.tok.tokType == tkOpr: if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB: - parMessage(p, errGenerated, - "Number of spaces around '$#' not consistent" % - prettyTok(p.tok)) - elif p.tok.strongSpaceA notin {0,1,2,4,8}: - parMessage(p, errGenerated, "Number of spaces must be 0,1,2,4 or 8") + parMessage(p, warnInconsistentSpacing, prettyTok(p.tok)) #| module = stmt ^* (';' / IND{=}) #| @@ -387,20 +402,6 @@ proc exprList(p: var TParser, endTok: TTokType, result: PNode) = getTok(p) optInd(p, a) -proc dotExpr(p: var TParser, a: PNode): PNode = - #| dotExpr = expr '.' optInd symbol - var info = p.parLineInfo - getTok(p) - result = newNodeI(nkDotExpr, info) - optInd(p, result) - addSon(result, a) - addSon(result, parseSymbol(p, smAfterDot)) - -proc qualifiedIdent(p: var TParser): PNode = - #| qualifiedIdent = symbol ('.' optInd symbol)? - result = parseSymbol(p) - if p.tok.tokType == tkDot: result = dotExpr(p, result) - proc exprColonEqExprListAux(p: var TParser, endTok: TTokType, result: PNode) = assert(endTok in {tkCurlyRi, tkCurlyDotRi, tkBracketRi, tkParRi}) getTok(p) @@ -411,6 +412,9 @@ proc exprColonEqExprListAux(p: var TParser, endTok: TTokType, result: PNode) = addSon(result, a) if p.tok.tokType != tkComma: break getTok(p) + # (1,) produces a tuple expression + if endTok == tkParRi and p.tok.tokType == tkParRi and result.kind == nkPar: + result.kind = nkTupleConstr skipComment(p, a) optPar(p) eat(p, endTok) @@ -421,6 +425,33 @@ proc exprColonEqExprList(p: var TParser, kind: TNodeKind, result = newNodeP(kind, p) exprColonEqExprListAux(p, endTok, result) +proc dotExpr(p: var TParser, a: PNode): PNode = + #| dotExpr = expr '.' optInd (symbol | '[:' exprList ']') + #| explicitGenericInstantiation = '[:' exprList ']' ( '(' exprColonEqExpr ')' )? + var info = p.parLineInfo + getTok(p) + result = newNodeI(nkDotExpr, info) + optInd(p, result) + addSon(result, a) + addSon(result, parseSymbol(p, smAfterDot)) + if p.tok.tokType == tkBracketLeColon and p.tok.strongSpaceA <= 0: + var x = newNodeI(nkBracketExpr, p.parLineInfo) + # rewrite 'x.y[:z]()' to 'y[z](x)' + x.add result[1] + exprList(p, tkBracketRi, x) + eat(p, tkBracketRi) + var y = newNodeI(nkCall, p.parLineInfo) + y.add x + y.add result[0] + if p.tok.tokType == tkParLe and p.tok.strongSpaceA <= 0: + exprColonEqExprListAux(p, tkParRi, y) + result = y + +proc qualifiedIdent(p: var TParser): PNode = + #| qualifiedIdent = symbol ('.' optInd symbol)? + result = parseSymbol(p) + if p.tok.tokType == tkDot: result = dotExpr(p, result) + proc setOrTableConstr(p: var TParser): PNode = #| setOrTableConstr = '{' ((exprColonEqExpr comma)* | ':' ) '}' result = newNodeP(nkCurly, p) @@ -551,6 +582,9 @@ proc parsePar(p: var TParser): PNode = if p.tok.tokType == tkComma: getTok(p) skipComment(p, a) + # (1,) produces a tuple expression: + if p.tok.tokType == tkParRi: + result.kind = nkTupleConstr # progress guaranteed while p.tok.tokType != tkParRi and p.tok.tokType != tkEof: var a = exprColonEqExpr(p) @@ -679,10 +713,17 @@ proc namedParams(p: var TParser, callee: PNode, # progress guaranteed exprColonEqExprListAux(p, endTok, result) -proc commandParam(p: var TParser): PNode = +proc commandParam(p: var TParser, isFirstParam: var bool): PNode = result = parseExpr(p) if p.tok.tokType == tkDo: result = postExprBlocks(p, result) + elif p.tok.tokType == tkEquals and not isFirstParam: + let lhs = result + result = newNodeP(nkExprEqExpr, p) + getTok(p) + addSon(result, lhs) + addSon(result, parseExpr(p)) + isFirstParam = false proc primarySuffix(p: var TParser, r: PNode, baseIndent: int): PNode = #| primarySuffix = '(' (exprColonEqExpr comma?)* ')' doBlocks? @@ -717,17 +758,19 @@ proc primarySuffix(p: var TParser, r: PNode, baseIndent: int): PNode = # progress guaranteed somePar() result = namedParams(p, result, nkCurlyExpr, tkCurlyRi) - of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast, tkAddr, tkType: - if p.inPragma == 0: + of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast, tkAddr, tkType, + tkOpr, tkDotDot: + if p.inPragma == 0 and (isUnary(p) or p.tok.tokType notin {tkOpr, tkDotDot}): # actually parsing {.push hints:off.} as {.push(hints:off).} is a sweet # solution, but pragmas.nim can't handle that let a = result result = newNodeP(nkCommand, p) addSon(result, a) + var isFirstParam = true when true: # progress NOT guaranteed p.hasProgress = false - addSon result, commandParam(p) + addSon result, commandParam(p, isFirstParam) if not p.hasProgress: break else: while p.tok.tokType != tkEof: @@ -818,7 +861,6 @@ proc parseIfExpr(p: var TParser, kind: TNodeKind): PNode = if realInd(p): p.currInd = p.tok.indent wasIndented = true - echo result.info, " yes ", p.currInd addSon(branch, parseExpr(p)) result.add branch while sameInd(p) or not wasIndented: @@ -855,7 +897,7 @@ proc parsePragma(p: var TParser): PNode = skipComment(p, a) optPar(p) if p.tok.tokType in {tkCurlyDotRi, tkCurlyRi}: getTok(p) - else: parMessage(p, errTokenExpected, ".}") + else: parMessage(p, "expected '.}'") dec p.inPragma proc identVis(p: var TParser; allowDot=false): PNode = @@ -914,15 +956,15 @@ proc parseIdentColonEquals(p: var TParser, flags: TDeclaredIdentFlags): PNode = optInd(p, result) addSon(result, parseTypeDesc(p)) else: - addSon(result, ast.emptyNode) + addSon(result, newNodeP(nkEmpty, p)) if p.tok.tokType != tkEquals and withBothOptional notin flags: - parMessage(p, errColonOrEqualsExpected, p.tok) + parMessage(p, "':' or '=' expected, but got '$1'", p.tok) if p.tok.tokType == tkEquals: getTok(p) optInd(p, result) addSon(result, parseExpr(p)) else: - addSon(result, ast.emptyNode) + addSon(result, newNodeP(nkEmpty, p)) proc parseTuple(p: var TParser, indentAllowed = false): PNode = #| inlTupleDecl = 'tuple' @@ -962,6 +1004,8 @@ proc parseTuple(p: var TParser, indentAllowed = false): PNode = parMessage(p, errIdentifierExpected, p.tok) break if not sameInd(p): break + elif p.tok.tokType == tkParLe: + parMessage(p, errGenerated, "the syntax for tuple types is 'tuple[...]', not 'tuple(...)'") else: result = newNodeP(nkTupleClassTy, p) @@ -983,8 +1027,11 @@ proc parseParamList(p: var TParser, retColon = true): PNode = a = parseIdentColonEquals(p, {withBothOptional, withPragma}) of tkParRi: break + of tkVar: + parMessage(p, errGenerated, "the syntax is 'parameter: var T', not 'var parameter: T'") + break else: - parMessage(p, errTokenExpected, ")") + parMessage(p, "expected closing ')'") break addSon(result, a) if p.tok.tokType notin {tkComma, tkSemiColon}: break @@ -1061,6 +1108,7 @@ proc parseTypeDescKAux(p: var TParser, kind: TNodeKind, #| distinct = 'distinct' optInd typeDesc result = newNodeP(kind, p) getTok(p) + if p.tok.indent != -1 and p.tok.indent <= p.currInd: return optInd(p, result) if not isOperator(p.tok) and isExprStart(p): addSon(result, primary(p, mode)) @@ -1144,7 +1192,7 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode = if mode == pmTypeDef: result = parseTypeClass(p) else: - parMessage(p, errInvalidToken, p.tok) + parMessage(p, "the 'concept' keyword is only valid in 'type' sections") of tkStatic: let info = parLineInfo(p) getTokNoInd(p) @@ -1254,7 +1302,7 @@ proc postExprBlocks(p: var TParser, x: PNode): PNode = if nextBlock.kind == nkElse: break else: if openingParams.kind != nkEmpty: - parMessage(p, errTokenExpected, ":") + parMessage(p, "expected ':'") proc parseExprStmt(p: var TParser): PNode = #| exprStmt = simpleExpr @@ -1274,17 +1322,18 @@ proc parseExprStmt(p: var TParser): PNode = addSon(result, b) else: # simpleExpr parsed 'p a' from 'p a, b'? + var isFirstParam = false if p.tok.indent < 0 and p.tok.tokType == tkComma and a.kind == nkCommand: result = a while true: getTok(p) optInd(p, result) - addSon(result, commandParam(p)) + addSon(result, commandParam(p, isFirstParam)) if p.tok.tokType != tkComma: break elif p.tok.indent < 0 and isExprStart(p): result = newNode(nkCommand, a.info, @[a]) while true: - addSon(result, commandParam(p)) + addSon(result, commandParam(p, isFirstParam)) if p.tok.tokType != tkComma: break getTok(p) optInd(p, result) @@ -1489,7 +1538,7 @@ proc parseTry(p: var TParser; isExpr: bool): PNode = addSon(b, parseStmt(p)) addSon(result, b) if b.kind == nkFinally: break - if b == nil: parMessage(p, errTokenExpected, "except") + if b == nil: parMessage(p, "expected 'except'") proc parseExceptBlock(p: var TParser, kind: TNodeKind): PNode = #| exceptBlock = 'except' colcom stmt @@ -1544,7 +1593,7 @@ proc parseAsm(p: var TParser): PNode = of tkTripleStrLit: addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p)) else: - parMessage(p, errStringLiteralExpected) + parMessage(p, "the 'asm' statement takes a string literal") addSon(result, ast.emptyNode) return getTok(p) @@ -1675,7 +1724,7 @@ proc parseSection(p: var TParser, kind: TNodeKind, parMessage(p, errIdentifierExpected, p.tok) proc parseConstant(p: var TParser): PNode = - #| constant = identWithPragma (colon typedesc)? '=' optInd expr indAndComment + #| constant = identWithPragma (colon typeDesc)? '=' optInd expr indAndComment result = newNodeP(nkConstDef, p) addSon(result, identWithPragma(p)) if p.tok.tokType == tkColon: @@ -1723,7 +1772,7 @@ proc parseEnum(p: var TParser): PNode = p.tok.tokType == tkEof: break if result.len <= 1: - lexMessageTok(p.lex, errIdentifierExpected, p.tok, prettyTok(p.tok)) + parMessage(p, errIdentifierExpected, p.tok) proc parseObjectPart(p: var TParser): PNode proc parseObjectWhen(p: var TParser): PNode = @@ -2086,7 +2135,7 @@ proc parseStmt(p: var TParser): PNode = case p.tok.tokType of tkIf, tkWhile, tkCase, tkTry, tkFor, tkBlock, tkAsm, tkProc, tkFunc, tkIterator, tkMacro, tkType, tkConst, tkWhen, tkVar: - parMessage(p, errComplexStmtRequiresInd) + parMessage(p, "complex statement requires indentation") result = ast.emptyNode else: if p.inSemiStmtList > 0: @@ -2130,7 +2179,12 @@ proc parseTopLevelStmt(p: var TParser): PNode = if p.tok.indent != 0: if p.firstTok and p.tok.indent < 0: discard elif p.tok.tokType != tkSemiColon: - parMessage(p, errInvalidIndentation) + # special casing for better error messages: + if p.tok.tokType == tkOpr and p.tok.ident.s == "*": + parMessage(p, errGenerated, + "invalid indentation; an export marker '*' follows the declared identifier") + else: + parMessage(p, errInvalidIndentation) p.firstTok = false case p.tok.tokType of tkSemiColon: @@ -2144,8 +2198,8 @@ proc parseTopLevelStmt(p: var TParser): PNode = if result.kind == nkEmpty: parMessage(p, errExprExpected, p.tok) break -proc parseString*(s: string; cache: IdentCache; filename: string = ""; - line: int = 0; +proc parseString*(s: string; cache: IdentCache; config: ConfigRef; + filename: string = ""; line: int = 0; errorHandler: TErrorHandler = nil): PNode = ## Parses a string into an AST, returning the top node. ## `filename` and `line`, although optional, provide info so that the @@ -2158,7 +2212,7 @@ proc parseString*(s: string; cache: IdentCache; filename: string = ""; # XXX for now the builtin 'parseStmt/Expr' functions do not know about strong # spaces... parser.lex.errorHandler = errorHandler - openParser(parser, filename, stream, cache, false) + openParser(parser, filename, stream, cache, config, false) result = parser.parseAll closeParser(parser) |