diff options
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r-- | compiler/lexer.nim | 139 |
1 files changed, 47 insertions, 92 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index bf9bf5343..6660ff65c 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2013 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -58,8 +58,7 @@ type tkParDotLe, tkParDotRi, # (. and .) tkComma, tkSemiColon, tkColon, tkColonColon, tkEquals, tkDot, tkDotDot, - tkOpr, tkComment, tkAccent, tkInd, tkSad, - tkDed, # pseudo token types used by the source renderers: + tkOpr, tkComment, tkAccent, tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr, TTokTypes* = set[TTokType] @@ -91,8 +90,8 @@ const ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)", ",", ";", ":", "::", "=", ".", "..", - "tkOpr", "tkComment", "`", "[new indentation]", - "[same indentation]", "[dedentation]", "tkSpaces", "tkInfixOpr", + "tkOpr", "tkComment", "`", + "tkSpaces", "tkInfixOpr", "tkPrefixOpr", "tkPostfixOpr"] type @@ -102,7 +101,8 @@ type base2, base8, base16 TToken* = object # a Nimrod token tokType*: TTokType # the type of the token - indent*: int # the indentation; only valid if tokType = tkIndent + indent*: int # the indentation; != -1 if the token has been + # preceeded with indentation ident*: PIdent # the parsed identifier iNumber*: BiggestInt # the parsed integer literal fNumber*: BiggestFloat # the parsed floating point literal @@ -113,8 +113,6 @@ type TLexer* = object of TBaseLexer fileIdx*: int32 - indentStack*: seq[int] # the indentation stack - dedent*: int # counter for DED token generation indentAhead*: int # if > 0 an indendation has already been read # this is needed because scanning comments # needs so much look-ahead @@ -122,9 +120,6 @@ type var gLinesCompiled*: int # all lines that have been compiled -proc pushInd*(L: var TLexer, indent: int) - -proc popInd*(L: var TLexer) proc isKeyword*(kind: TTokType): bool proc openLexer*(lex: var TLexer, fileidx: int32, inputstream: PLLStream) proc rawGetTok*(L: var TLexer, tok: var TToken) @@ -154,31 +149,14 @@ proc isNimrodIdentifier*(s: string): bool = inc(i) result = true -proc pushInd(L: var TLexer, indent: int) = - var length = len(L.indentStack) - setlen(L.indentStack, length + 1) - if (indent > L.indentStack[length - 1]): - L.indentstack[length] = indent - else: - InternalError("pushInd") - -proc popInd(L: var TLexer) = - var length = len(L.indentStack) - setlen(L.indentStack, length - 1) - -proc findIdent(L: TLexer, indent: int): bool = - for i in countdown(len(L.indentStack) - 1, 0): - if L.indentStack[i] == indent: - return true - proc tokToStr*(tok: TToken): string = case tok.tokType of tkIntLit..tkInt64Lit: result = $tok.iNumber of tkFloatLit..tkFloat64Lit: result = $tok.fNumber of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal - of tkParLe..tkColon, tkEof, tkInd, tkSad, tkDed, tkAccent: + of tkParLe..tkColon, tkEof, tkAccent: result = tokTypeToStr[tok.tokType] - else: + else: if tok.ident != nil: result = tok.ident.s else: @@ -216,7 +194,6 @@ proc fillToken(L: var TToken) = proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = openBaseLexer(lex, inputstream) - lex.indentStack = @[0] lex.fileIdx = fileIdx lex.indentAhead = - 1 inc(lex.Linenumber, inputstream.lineOffset) @@ -434,9 +411,10 @@ proc GetNumber(L: var TLexer): TToken = result.tokType = tkInt64Lit elif result.tokType != tkInt64Lit: lexMessage(L, errInvalidNumber, result.literal) - except EInvalidValue: lexMessage(L, errInvalidNumber, result.literal) - except EOverflow: lexMessage(L, errNumberOutOfRange, result.literal) - except EOutOfRange: lexMessage(L, errNumberOutOfRange, result.literal) + except EInvalidValue: + lexMessage(L, errInvalidNumber, result.literal) + except EOverflow, EOutOfRange: + lexMessage(L, errNumberOutOfRange, result.literal) L.bufpos = endpos proc handleHexChar(L: var TLexer, xi: var int) = @@ -651,24 +629,6 @@ proc getOperator(L: var TLexer, tok: var TToken) = Inc(pos) endOperator(L, tok, pos, h) -proc handleIndentation(L: var TLexer, tok: var TToken, indent: int) = - tok.indent = indent - var i = high(L.indentStack) - if indent > L.indentStack[i]: - tok.tokType = tkInd - elif indent == L.indentStack[i]: - tok.tokType = tkSad - else: - # check we have the indentation somewhere in the stack: - while (i >= 0) and (indent != L.indentStack[i]): - dec(i) - inc(L.dedent) - dec(L.dedent) - tok.tokType = tkDed - if i < 0: - tok.tokType = tkSad # for the parser it is better as SAD - lexMessage(L, errInvalidIndentation) - proc scanComment(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf @@ -705,53 +665,45 @@ proc scanComment(L: var TLexer, tok: var TToken) = else: if buf[pos] > ' ': L.indentAhead = indent - inc(L.dedent) - break + break L.bufpos = pos -proc skip(L: var TLexer, tok: var TToken) = +proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf - while true: + while true: case buf[pos] - of ' ': + of ' ': Inc(pos) - of Tabulator: + of Tabulator: lexMessagePos(L, errTabulatorsAreNotAllowed, pos) - inc(pos) # BUGFIX - of CR, LF: + inc(pos) + of CR, LF: pos = HandleCRLF(L, pos) buf = L.buf var indent = 0 - while buf[pos] == ' ': + while buf[pos] == ' ': Inc(pos) Inc(indent) - if (buf[pos] > ' '): - handleIndentation(L, tok, indent) - break - else: + if buf[pos] > ' ': + tok.indent = indent + break + else: break # EndOfFile also leaves the loop L.bufpos = pos -proc rawGetTok(L: var TLexer, tok: var TToken) = +proc rawGetTok(L: var TLexer, tok: var TToken) = fillToken(tok) - if L.dedent > 0: - dec(L.dedent) - if L.indentAhead >= 0: - handleIndentation(L, tok, L.indentAhead) - L.indentAhead = - 1 - else: - tok.tokType = tkDed - return + if L.indentAhead >= 0: + tok.indent = L.indentAhead + L.indentAhead = -1 + else: + tok.indent = -1 skip(L, tok) - # got an documentation comment or tkIndent, return that: - if tok.toktype != tkInvalid: return var c = L.buf[L.bufpos] - if c in SymStartChars - {'r', 'R', 'l'}: + if c in SymStartChars - {'r', 'R', 'l'}: getSymbol(L, tok) - elif c in {'0'..'9'}: - tok = getNumber(L) - else: + else: case c of '#': scanComment(L, tok) @@ -769,10 +721,10 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = of 'l': # if we parsed exactly one character and its a small L (l), this # is treated as a warning because it may be confused with the number 1 - if not (L.buf[L.bufpos + 1] in (SymChars + {'_'})): + if L.buf[L.bufpos+1] notin (SymChars + {'_'}): lexMessage(L, warnSmallLshouldNotBeUsed) getSymbol(L, tok) - of 'r', 'R': + of 'r', 'R': if L.buf[L.bufPos + 1] == '\"': Inc(L.bufPos) getString(L, tok, true) @@ -780,7 +732,7 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = getSymbol(L, tok) of '(': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkParDotLe Inc(L.bufpos) else: @@ -790,29 +742,29 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = Inc(L.bufpos) of '[': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkBracketDotLe Inc(L.bufpos) - else: + else: tok.toktype = tkBracketLe - of ']': + of ']': tok.toktype = tkBracketRi Inc(L.bufpos) - of '.': - if L.buf[L.bufPos + 1] == ']': + of '.': + if L.buf[L.bufPos+1] == ']': tok.tokType = tkBracketDotRi Inc(L.bufpos, 2) - elif L.buf[L.bufPos + 1] == '}': + elif L.buf[L.bufPos+1] == '}': tok.tokType = tkCurlyDotRi Inc(L.bufpos, 2) - elif L.buf[L.bufPos + 1] == ')': + elif L.buf[L.bufPos+1] == ')': tok.tokType = tkParDotRi Inc(L.bufpos, 2) else: getOperator(L, tok) of '{': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos+1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkCurlyDotLe Inc(L.bufpos) else: @@ -838,13 +790,16 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = tok.tokType = tkCharLit getCharacter(L, tok) tok.tokType = tkCharLit + of '0'..'9': + tok = getNumber(L) else: if c in OpChars: getOperator(L, tok) elif c == nimlexbase.EndOfFile: tok.toktype = tkEof + tok.indent = 0 else: - tok.literal = c & "" + tok.literal = $c tok.tokType = tkInvalid lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')') Inc(L.bufpos) |