diff options
author | Araq <rumpf_a@web.de> | 2013-04-20 01:59:39 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2013-04-20 01:59:39 +0200 |
commit | 2796121dd7410af45a2fbaf43cc7577799fb3157 (patch) | |
tree | c07d68dbaf2a6916aef0a03d6d898ab73c496d31 /compiler/lexer.nim | |
parent | 04216fc7500e6c74f41c8f5aa743fb43a1ee65da (diff) | |
download | Nim-2796121dd7410af45a2fbaf43cc7577799fb3157.tar.gz |
next steps for the new parser/grammar
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r-- | compiler/lexer.nim | 78 |
1 files changed, 37 insertions, 41 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index f28702f5c..601828ed9 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -58,7 +58,7 @@ type tkParDotLe, tkParDotRi, # (. and .) tkComma, tkSemiColon, tkColon, tkColonColon, tkEquals, tkDot, tkDotDot, - tkOpr, tkComment, tkAccent, tkInd, + tkOpr, tkComment, tkAccent, tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr, TTokTypes* = set[TTokType] @@ -90,7 +90,7 @@ const ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)", ",", ";", ":", "::", "=", ".", "..", - "tkOpr", "tkComment", "`", "[new indentation]", + "tkOpr", "tkComment", "`", "tkSpaces", "tkInfixOpr", "tkPrefixOpr", "tkPostfixOpr"] @@ -154,7 +154,7 @@ proc tokToStr*(tok: TToken): string = of tkIntLit..tkInt64Lit: result = $tok.iNumber of tkFloatLit..tkFloat64Lit: result = $tok.fNumber of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal - of tkParLe..tkColon, tkEof, tkInd, tkAccent: + of tkParLe..tkColon, tkEof, tkAccent: result = tokTypeToStr[tok.tokType] else: if tok.ident != nil: @@ -411,9 +411,10 @@ proc GetNumber(L: var TLexer): TToken = result.tokType = tkInt64Lit elif result.tokType != tkInt64Lit: lexMessage(L, errInvalidNumber, result.literal) - except EInvalidValue: lexMessage(L, errInvalidNumber, result.literal) - except EOverflow: lexMessage(L, errNumberOutOfRange, result.literal) - except EOutOfRange: lexMessage(L, errNumberOutOfRange, result.literal) + except EInvalidValue: + lexMessage(L, errInvalidNumber, result.literal) + except EOverflow, EOutOfRange: + lexMessage(L, errNumberOutOfRange, result.literal) L.bufpos = endpos proc handleHexChar(L: var TLexer, xi: var int) = @@ -628,10 +629,6 @@ proc getOperator(L: var TLexer, tok: var TToken) = Inc(pos) endOperator(L, tok, pos, h) -proc handleIndentation(tok: var TToken, indent: int) {.inline.} = - tok.indent = indent - tok.tokType = tkInd - proc scanComment(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf @@ -668,28 +665,28 @@ proc scanComment(L: var TLexer, tok: var TToken) = else: if buf[pos] > ' ': L.indentAhead = indent - break + break L.bufpos = pos -proc skip(L: var TLexer, tok: var TToken) = +proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf - while true: + while true: case buf[pos] - of ' ': + of ' ': Inc(pos) - of Tabulator: + of Tabulator: lexMessagePos(L, errTabulatorsAreNotAllowed, pos) - inc(pos) # BUGFIX - of CR, LF: + inc(pos) + of CR, LF: pos = HandleCRLF(L, pos) buf = L.buf var indent = 0 - while buf[pos] == ' ': + while buf[pos] == ' ': Inc(pos) Inc(indent) - if (buf[pos] > ' '): - handleIndentation(tok, indent) + if buf[pos] > ' ': + tok.indent = indent break else: break # EndOfFile also leaves the loop @@ -698,18 +695,15 @@ proc skip(L: var TLexer, tok: var TToken) = proc rawGetTok(L: var TLexer, tok: var TToken) = fillToken(tok) if L.indentAhead >= 0: - handleIndentation(tok, L.indentAhead) - L.indentAhead = - 1 - return + tok.indent = L.indentAhead + L.indentAhead = -1 + else: + tok.indent = -1 skip(L, tok) - # got an documentation comment or tkIndent, return that: - if tok.toktype != tkInvalid: return var c = L.buf[L.bufpos] - if c in SymStartChars - {'r', 'R', 'l'}: + if c in SymStartChars - {'r', 'R', 'l'}: getSymbol(L, tok) - elif c in {'0'..'9'}: - tok = getNumber(L) - else: + else: case c of '#': scanComment(L, tok) @@ -727,10 +721,10 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = of 'l': # if we parsed exactly one character and its a small L (l), this # is treated as a warning because it may be confused with the number 1 - if not (L.buf[L.bufpos + 1] in (SymChars + {'_'})): + if L.buf[L.bufpos+1] notin (SymChars + {'_'}): lexMessage(L, warnSmallLshouldNotBeUsed) getSymbol(L, tok) - of 'r', 'R': + of 'r', 'R': if L.buf[L.bufPos + 1] == '\"': Inc(L.bufPos) getString(L, tok, true) @@ -738,7 +732,7 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = getSymbol(L, tok) of '(': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkParDotLe Inc(L.bufpos) else: @@ -748,29 +742,29 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = Inc(L.bufpos) of '[': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkBracketDotLe Inc(L.bufpos) - else: + else: tok.toktype = tkBracketLe - of ']': + of ']': tok.toktype = tkBracketRi Inc(L.bufpos) - of '.': - if L.buf[L.bufPos + 1] == ']': + of '.': + if L.buf[L.bufPos+1] == ']': tok.tokType = tkBracketDotRi Inc(L.bufpos, 2) - elif L.buf[L.bufPos + 1] == '}': + elif L.buf[L.bufPos+1] == '}': tok.tokType = tkCurlyDotRi Inc(L.bufpos, 2) - elif L.buf[L.bufPos + 1] == ')': + elif L.buf[L.bufPos+1] == ')': tok.tokType = tkParDotRi Inc(L.bufpos, 2) else: getOperator(L, tok) of '{': Inc(L.bufpos) - if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos+1] != '.'): + if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': tok.toktype = tkCurlyDotLe Inc(L.bufpos) else: @@ -796,13 +790,15 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = tok.tokType = tkCharLit getCharacter(L, tok) tok.tokType = tkCharLit + of '0'..'9': + tok = getNumber(L) else: if c in OpChars: getOperator(L, tok) elif c == lexbase.EndOfFile: tok.toktype = tkEof else: - tok.literal = c & "" + tok.literal = $c tok.tokType = tkInvalid lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')') Inc(L.bufpos) |