diff options
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r-- | compiler/lexer.nim | 123 |
1 files changed, 53 insertions, 70 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 5eaa4c09f..0dd6245b0 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -318,17 +318,16 @@ template eatChar(L: var TLexer, t: var TToken) = proc getNumber(L: var TLexer, result: var TToken) = proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]): Natural = var pos = L.bufpos # use registers for pos, buf - var buf = L.buf result = 0 while true: - if buf[pos] in chars: - add(tok.literal, buf[pos]) + if L.buf[pos] in chars: + add(tok.literal, L.buf[pos]) inc(pos) inc(result) else: break - if buf[pos] == '_': - if buf[pos+1] notin chars: + if L.buf[pos] == '_': + if L.buf[pos+1] notin chars: lexMessage(L, errGenerated, "only single underscores may occur in a token and token may not " & "end with an underscore: e.g. '1__1' and '1_' are invalid") @@ -339,9 +338,8 @@ proc getNumber(L: var TLexer, result: var TToken) = proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) = var pos = L.bufpos # use registers for pos, buf - var buf = L.buf - while buf[pos] in chars: - add(tok.literal, buf[pos]) + while L.buf[pos] in chars: + add(tok.literal, L.buf[pos]) inc(pos) L.bufpos = pos @@ -800,25 +798,23 @@ type proc getString(L: var TLexer, tok: var TToken, mode: StringMode) = var pos = L.bufpos - var buf = L.buf # put `buf` in a register var line = L.lineNumber # save linenumber for better error message tokenBegin(tok, pos - ord(mode == raw)) inc pos # skip " - if buf[pos] == '\"' and buf[pos+1] == '\"': + if L.buf[pos] == '\"' and L.buf[pos+1] == '\"': tok.tokType = tkTripleStrLit # long string literal: inc(pos, 2) # skip "" # skip leading newline: - if buf[pos] in {' ', '\t'}: + if L.buf[pos] in {' ', '\t'}: var newpos = pos+1 - while buf[newpos] in {' ', '\t'}: inc newpos - if buf[newpos] in {CR, LF}: pos = newpos + while L.buf[newpos] in {' ', '\t'}: inc newpos + if L.buf[newpos] in {CR, LF}: pos = newpos pos = handleCRLF(L, pos) - buf = L.buf while true: - case buf[pos] + case L.buf[pos] of '\"': - if buf[pos+1] == '\"' and buf[pos+2] == '\"' and - buf[pos+3] != '\"': + if L.buf[pos+1] == '\"' and L.buf[pos+2] == '\"' and + L.buf[pos+3] != '\"': tokenEndIgnore(tok, pos+2) L.bufpos = pos + 3 # skip the three """ break @@ -827,7 +823,6 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) = of CR, LF: tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf add(tok.literal, "\n") of nimlexbase.EndOfFile: tokenEndIgnore(tok, pos) @@ -838,16 +833,16 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) = L.bufpos = pos break else: - add(tok.literal, buf[pos]) + add(tok.literal, L.buf[pos]) inc(pos) else: # ordinary string literal if mode != normal: tok.tokType = tkRStrLit else: tok.tokType = tkStrLit while true: - var c = buf[pos] + var c = L.buf[pos] if c == '\"': - if mode != normal and buf[pos+1] == '\"': + if mode != normal and L.buf[pos+1] == '\"': inc(pos, 2) add(tok.literal, '"') else: @@ -885,10 +880,9 @@ proc getCharacter(L: var TLexer, tok: var TToken) = proc getSymbol(L: var TLexer, tok: var TToken) = var h: Hash = 0 var pos = L.bufpos - var buf = L.buf tokenBegin(tok, pos) while true: - var c = buf[pos] + var c = L.buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': h = h !& ord(c) @@ -898,7 +892,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) = h = h !& ord(c) inc(pos) of '_': - if buf[pos+1] notin SymChars: + if L.buf[pos+1] notin SymChars: lexMessage(L, errGenerated, "invalid token: trailing underscore") break inc(pos) @@ -923,11 +917,10 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int, proc getOperator(L: var TLexer, tok: var TToken) = var pos = L.bufpos - var buf = L.buf tokenBegin(tok, pos) var h: Hash = 0 while true: - var c = buf[pos] + var c = L.buf[pos] if c notin OpChars: break h = h !& ord(c) inc(pos) @@ -936,10 +929,10 @@ proc getOperator(L: var TLexer, tok: var TToken) = # advance pos but don't store it in L.bufpos so the next token (which might # be an operator too) gets the preceding spaces: tok.strongSpaceB = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc pos inc tok.strongSpaceB - if buf[pos] in {CR, LF, nimlexbase.EndOfFile}: + if L.buf[pos] in {CR, LF, nimlexbase.EndOfFile}: tok.strongSpaceB = -1 proc getPrecedence*(tok: TToken, strongSpaces: bool): int = @@ -980,9 +973,8 @@ proc getPrecedence*(tok: TToken, strongSpaces: bool): int = proc newlineFollows*(L: TLexer): bool = var pos = L.bufpos - var buf = L.buf while true: - case buf[pos] + case L.buf[pos] of ' ', '\t': inc(pos) of CR, LF: @@ -990,49 +982,47 @@ proc newlineFollows*(L: TLexer): bool = break of '#': inc(pos) - if buf[pos] == '#': inc(pos) - if buf[pos] != '[': return true + if L.buf[pos] == '#': inc(pos) + if L.buf[pos] != '[': return true else: break proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; isDoc: bool) = var pos = start - var buf = L.buf var toStrip = 0 tokenBegin(tok, pos) # detect the amount of indentation: if isDoc: toStrip = getColNumber(L, pos) - while buf[pos] == ' ': inc pos - if buf[pos] in {CR, LF}: + while L.buf[pos] == ' ': inc pos + if L.buf[pos] in {CR, LF}: pos = handleCRLF(L, pos) - buf = L.buf toStrip = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc pos inc toStrip var nesting = 0 while true: - case buf[pos] + case L.buf[pos] of '#': if isDoc: - if buf[pos+1] == '#' and buf[pos+2] == '[': + if L.buf[pos+1] == '#' and L.buf[pos+2] == '[': inc nesting tok.literal.add '#' - elif buf[pos+1] == '[': + elif L.buf[pos+1] == '[': inc nesting inc pos of ']': if isDoc: - if buf[pos+1] == '#' and buf[pos+2] == '#': + if L.buf[pos+1] == '#' and L.buf[pos+2] == '#': if nesting == 0: tokenEndIgnore(tok, pos+2) inc(pos, 3) break dec nesting tok.literal.add ']' - elif buf[pos+1] == '#': + elif L.buf[pos+1] == '#': if nesting == 0: tokenEndIgnore(tok, pos+1) inc(pos, 2) @@ -1042,14 +1032,13 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; of CR, LF: tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf # strip leading whitespace: when defined(nimpretty): tok.literal.add "\L" if isDoc: when not defined(nimpretty): tok.literal.add "\n" inc tok.iNumber var c = toStrip - while buf[pos] == ' ' and c > 0: + while L.buf[pos] == ' ' and c > 0: inc pos dec c of nimlexbase.EndOfFile: @@ -1057,7 +1046,7 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; lexMessagePos(L, errGenerated, pos, "end of multiline comment expected") break else: - if isDoc or defined(nimpretty): tok.literal.add buf[pos] + if isDoc or defined(nimpretty): tok.literal.add L.buf[pos] inc(pos) L.bufpos = pos when defined(nimpretty): @@ -1065,49 +1054,47 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; proc scanComment(L: var TLexer, tok: var TToken) = var pos = L.bufpos - var buf = L.buf tok.tokType = tkComment # iNumber contains the number of '\n' in the token tok.iNumber = 0 - assert buf[pos+1] == '#' + assert L.buf[pos+1] == '#' when defined(nimpretty): tok.commentOffsetA = L.offsetBase + pos - 1 - if buf[pos+2] == '[': + if L.buf[pos+2] == '[': skipMultiLineComment(L, tok, pos+3, true) return tokenBegin(tok, pos) inc(pos, 2) var toStrip = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc pos inc toStrip while true: var lastBackslash = -1 - while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: - if buf[pos] == '\\': lastBackslash = pos+1 - add(tok.literal, buf[pos]) + while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: + if L.buf[pos] == '\\': lastBackslash = pos+1 + add(tok.literal, L.buf[pos]) inc(pos) tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf var indent = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc(pos) inc(indent) - if buf[pos] == '#' and buf[pos+1] == '#': + if L.buf[pos] == '#' and L.buf[pos+1] == '#': tok.literal.add "\n" inc(pos, 2) var c = toStrip - while buf[pos] == ' ' and c > 0: + while L.buf[pos] == ' ' and c > 0: inc pos dec c inc tok.iNumber else: - if buf[pos] > ' ': + if L.buf[pos] > ' ': L.indentAhead = indent tokenEndIgnore(tok, pos) break @@ -1117,7 +1104,6 @@ proc scanComment(L: var TLexer, tok: var TToken) = proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos - var buf = L.buf tokenBegin(tok, pos) tok.strongSpaceA = 0 when defined(nimpretty): @@ -1127,7 +1113,7 @@ proc skip(L: var TLexer, tok: var TToken) = tok.commentOffsetB = tok.commentOffsetA tok.line = -1 while true: - case buf[pos] + case L.buf[pos] of ' ': inc(pos) inc(tok.strongSpaceA) @@ -1137,13 +1123,12 @@ proc skip(L: var TLexer, tok: var TToken) = of CR, LF: tokenEndPrevious(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf var indent = 0 while true: - if buf[pos] == ' ': + if L.buf[pos] == ' ': inc(pos) inc(indent) - elif buf[pos] == '#' and buf[pos+1] == '[': + elif L.buf[pos] == '#' and L.buf[pos+1] == '[': when defined(nimpretty): hasComment = true if tok.line < 0: @@ -1151,32 +1136,30 @@ proc skip(L: var TLexer, tok: var TToken) = commentIndent = indent skipMultiLineComment(L, tok, pos+2, false) pos = L.bufpos - buf = L.buf else: break tok.strongSpaceA = 0 when defined(nimpretty): - if buf[pos] == '#' and tok.line < 0: commentIndent = indent - if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'): + if L.buf[pos] == '#' and tok.line < 0: commentIndent = indent + if L.buf[pos] > ' ' and (L.buf[pos] != '#' or L.buf[pos+1] == '#'): tok.indent = indent L.currLineIndent = indent break of '#': # do not skip documentation comment: - if buf[pos+1] == '#': break + if L.buf[pos+1] == '#': break when defined(nimpretty): hasComment = true if tok.line < 0: tok.line = L.lineNumber - if buf[pos+1] == '[': + if L.buf[pos+1] == '[': skipMultiLineComment(L, tok, pos+2, false) pos = L.bufpos - buf = L.buf else: tokenBegin(tok, pos) - while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: - when defined(nimpretty): tok.literal.add buf[pos] + while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: + when defined(nimpretty): tok.literal.add L.buf[pos] inc(pos) tokenEndIgnore(tok, pos+1) when defined(nimpretty): |