diff options
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r-- | compiler/lexer.nim | 346 |
1 files changed, 173 insertions, 173 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index c86762121..78266ef4d 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -10,51 +10,51 @@ # This scanner is handwritten for efficiency. I used an elegant buffering # scheme which I have not seen anywhere else: # We guarantee that a whole line is in the buffer. Thus only when scanning -# the \n or \r character we have to check wether we need to read in the next +# the \n or \r character we have to check wether we need to read in the next # chunk. (\n or \r already need special handling for incrementing the line # counter; choosing both \n and \r allows the scanner to properly read Unix, # DOS or Macintosh text files, even when it is not the native format. -import +import hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream, wordrecg -const +const MaxLineLength* = 80 # lines longer than this lead to a warning numChars*: set[char] = {'0'..'9', 'a'..'z', 'A'..'Z'} SymChars*: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'} SymStartChars*: set[char] = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'} - OpChars*: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.', + OpChars*: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.', '|', '=', '%', '&', '$', '@', '~', ':', '\x80'..'\xFF'} # don't forget to update the 'highlite' module if these charsets should change -type - TTokType* = enum +type + TTokType* = enum tkInvalid, tkEof, # order is important here! tkSymbol, # keywords: - tkAddr, tkAnd, tkAs, tkAsm, tkAtomic, - tkBind, tkBlock, tkBreak, tkCase, tkCast, - tkConst, tkContinue, tkConverter, + tkAddr, tkAnd, tkAs, tkAsm, tkAtomic, + tkBind, tkBlock, tkBreak, tkCase, tkCast, + tkConcept, tkConst, tkContinue, tkConverter, tkDefer, tkDiscard, tkDistinct, tkDiv, tkDo, tkElif, tkElse, tkEnd, tkEnum, tkExcept, tkExport, tkFinally, tkFor, tkFrom, tkFunc, - tkGeneric, tkIf, tkImport, tkIn, tkInclude, tkInterface, + tkGeneric, tkIf, tkImport, tkIn, tkInclude, tkInterface, tkIs, tkIsnot, tkIterator, tkLet, - tkMacro, tkMethod, tkMixin, tkMod, tkNil, tkNot, tkNotin, - tkObject, tkOf, tkOr, tkOut, + tkMacro, tkMethod, tkMixin, tkMod, tkNil, tkNot, tkNotin, + tkObject, tkOf, tkOr, tkOut, tkProc, tkPtr, tkRaise, tkRef, tkReturn, tkShl, tkShr, tkStatic, - tkTemplate, - tkTry, tkTuple, tkType, tkUsing, + tkTemplate, + tkTry, tkTuple, tkType, tkUsing, tkVar, tkWhen, tkWhile, tkWith, tkWithout, tkXor, tkYield, # end of keywords tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit, tkUIntLit, tkUInt8Lit, tkUInt16Lit, tkUInt32Lit, tkUInt64Lit, tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit, tkStrLit, tkRStrLit, tkTripleStrLit, - tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe, - tkBracketRi, tkCurlyLe, tkCurlyRi, + tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe, + tkBracketRi, tkCurlyLe, tkCurlyRi, tkBracketDotLe, tkBracketDotRi, # [. and .] tkCurlyDotLe, tkCurlyDotRi, # {. and .} tkParDotLe, tkParDotRi, # (. and .) @@ -62,27 +62,27 @@ type tkColon, tkColonColon, tkEquals, tkDot, tkDotDot, tkOpr, tkComment, tkAccent, tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr, - + TTokTypes* = set[TTokType] -const +const tokKeywordLow* = succ(tkSymbol) tokKeywordHigh* = pred(tkIntLit) - TokTypeToStr*: array[TTokType, string] = ["tkInvalid", "[EOF]", + TokTypeToStr*: array[TTokType, string] = ["tkInvalid", "[EOF]", "tkSymbol", - "addr", "and", "as", "asm", "atomic", - "bind", "block", "break", "case", "cast", - "const", "continue", "converter", + "addr", "and", "as", "asm", "atomic", + "bind", "block", "break", "case", "cast", + "concept", "const", "continue", "converter", "defer", "discard", "distinct", "div", "do", "elif", "else", "end", "enum", "except", "export", - "finally", "for", "from", "func", "generic", "if", + "finally", "for", "from", "func", "generic", "if", "import", "in", "include", "interface", "is", "isnot", "iterator", "let", - "macro", "method", "mixin", "mod", - "nil", "not", "notin", "object", "of", "or", - "out", "proc", "ptr", "raise", "ref", "return", + "macro", "method", "mixin", "mod", + "nil", "not", "notin", "object", "of", "or", + "out", "proc", "ptr", "raise", "ref", "return", "shl", "shr", "static", - "template", + "template", "try", "tuple", "type", "using", "var", "when", "while", "with", "without", "xor", "yield", @@ -90,7 +90,7 @@ const "tkUIntLit", "tkUInt8Lit", "tkUInt16Lit", "tkUInt32Lit", "tkUInt64Lit", "tkFloatLit", "tkFloat32Lit", "tkFloat64Lit", "tkFloat128Lit", "tkStrLit", "tkRStrLit", - "tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(", + "tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(", ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)", ",", ";", ":", "::", "=", ".", "..", @@ -98,8 +98,8 @@ const "tkSpaces", "tkInfixOpr", "tkPrefixOpr", "tkPostfixOpr"] -type - TNumericalBase* = enum +type + TNumericalBase* = enum base10, # base10 is listed as the first element, # so that it is the correct default value base2, base8, base16 @@ -148,45 +148,45 @@ proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) = proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") -proc isKeyword(kind: TTokType): bool = +proc isKeyword(kind: TTokType): bool = result = (kind >= tokKeywordLow) and (kind <= tokKeywordHigh) proc isNimIdentifier*(s: string): bool = if s[0] in SymStartChars: var i = 1 while i < s.len: - if s[i] == '_': + if s[i] == '_': inc(i) if s[i] notin SymChars: return if s[i] notin SymChars: return inc(i) result = true -proc tokToStr*(tok: TToken): string = +proc tokToStr*(tok: TToken): string = case tok.tokType of tkIntLit..tkInt64Lit: result = $tok.iNumber of tkFloatLit..tkFloat64Lit: result = $tok.fNumber of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal - of tkParLe..tkColon, tkEof, tkAccent: + of tkParLe..tkColon, tkEof, tkAccent: result = TokTypeToStr[tok.tokType] else: if tok.ident != nil: result = tok.ident.s - else: + else: internalError("tokToStr") result = "" - + proc prettyTok*(tok: TToken): string = if isKeyword(tok.tokType): result = "keyword " & tok.ident.s else: result = tokToStr(tok) - -proc printTok*(tok: TToken) = + +proc printTok*(tok: TToken) = msgWriteln($tok.line & ":" & $tok.col & "\t" & TokTypeToStr[tok.tokType] & " " & tokToStr(tok)) var dummyIdent: PIdent -proc initToken*(L: var TToken) = +proc initToken*(L: var TToken) = L.tokType = tkInvalid L.iNumber = 0 L.indent = 0 @@ -196,7 +196,7 @@ proc initToken*(L: var TToken) = L.base = base10 L.ident = dummyIdent -proc fillToken(L: var TToken) = +proc fillToken(L: var TToken) = L.tokType = tkInvalid L.iNumber = 0 L.indent = 0 @@ -205,22 +205,22 @@ proc fillToken(L: var TToken) = L.fNumber = 0.0 L.base = base10 L.ident = dummyIdent - -proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = + +proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = openBaseLexer(lex, inputstream) lex.fileIdx = fileidx lex.indentAhead = - 1 lex.currLineIndent = 0 - inc(lex.lineNumber, inputstream.lineOffset) + inc(lex.lineNumber, inputstream.lineOffset) -proc closeLexer(lex: var TLexer) = +proc closeLexer(lex: var TLexer) = inc(gLinesCompiled, lex.lineNumber) closeBaseLexer(lex) -proc getColumn(L: TLexer): int = +proc getColumn(L: TLexer): int = result = getColNumber(L, L.bufpos) -proc getLineInfo(L: TLexer): TLineInfo = +proc getLineInfo(L: TLexer): TLineInfo = result = newLineInfo(L.fileIdx, L.lineNumber, getColNumber(L, L.bufpos)) proc dispMessage(L: TLexer; info: TLineInfo; msg: TMsgKind; arg: string) = @@ -236,24 +236,24 @@ proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart) L.dispMessage(info, msg, arg) -proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = +proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = var pos = L.bufpos # use registers for pos, buf var buf = L.buf - while true: - if buf[pos] in chars: + while true: + if buf[pos] in chars: add(tok.literal, buf[pos]) inc(pos) - else: - break - if buf[pos] == '_': - if buf[pos+1] notin chars: + else: + break + if buf[pos] == '_': + if buf[pos+1] notin chars: lexMessage(L, errInvalidToken, "_") break add(tok.literal, '_') inc(pos) L.bufpos = pos -proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool = +proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool = result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second) proc isFloatLiteral(s: string): bool = @@ -275,8 +275,8 @@ proc unsafeParseUInt(s: string, b: var BiggestInt, start = 0): int = result = i - start {.pop.} # overflowChecks -proc getNumber(L: var TLexer): TToken = - var +proc getNumber(L: var TLexer): TToken = + var pos, endpos: int xi: BiggestInt # get the base: @@ -290,15 +290,15 @@ proc getNumber(L: var TLexer): TToken = else: matchUnderscoreChars(L, result, {'0'..'9', 'b', 'B', 'o', 'c', 'C'}) eallowed = true - if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): + if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): add(result.literal, '.') inc(L.bufpos) matchUnderscoreChars(L, result, {'0'..'9'}) eallowed = true - if eallowed and L.buf[L.bufpos] in {'e', 'E'}: + if eallowed and L.buf[L.bufpos] in {'e', 'E'}: add(result.literal, 'e') inc(L.bufpos) - if L.buf[L.bufpos] in {'+', '-'}: + if L.buf[L.bufpos] in {'+', '-'}: add(result.literal, L.buf[L.bufpos]) inc(L.bufpos) matchUnderscoreChars(L, result, {'0'..'9'}) @@ -307,7 +307,7 @@ proc getNumber(L: var TLexer): TToken = if L.buf[endpos] == '\'': inc(endpos) L.bufpos = pos # restore position case L.buf[endpos] - of 'f', 'F': + of 'f', 'F': inc(endpos) if (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): result.tokType = tkFloat32Lit @@ -320,36 +320,36 @@ proc getNumber(L: var TLexer): TToken = (L.buf[endpos + 2] == '8'): result.tokType = tkFloat128Lit inc(endpos, 3) - else: + else: lexMessage(L, errInvalidNumber, result.literal & "'f" & L.buf[endpos]) - of 'i', 'I': + of 'i', 'I': inc(endpos) - if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): result.tokType = tkInt64Lit inc(endpos, 2) - elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): result.tokType = tkInt32Lit inc(endpos, 2) - elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): + elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): result.tokType = tkInt16Lit inc(endpos, 2) - elif (L.buf[endpos] == '8'): + elif (L.buf[endpos] == '8'): result.tokType = tkInt8Lit inc(endpos) - else: + else: lexMessage(L, errInvalidNumber, result.literal & "'i" & L.buf[endpos]) of 'u', 'U': inc(endpos) - if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): result.tokType = tkUInt64Lit inc(endpos, 2) - elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): result.tokType = tkUInt32Lit inc(endpos, 2) - elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): + elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): result.tokType = tkUInt16Lit inc(endpos, 2) - elif (L.buf[endpos] == '8'): + elif (L.buf[endpos] == '8'): result.tokType = tkUInt8Lit inc(endpos) else: @@ -357,45 +357,45 @@ proc getNumber(L: var TLexer): TToken = else: lexMessage(L, errInvalidNumber, result.literal & "'" & L.buf[endpos]) else: L.bufpos = pos # restore position - try: + try: if (L.buf[pos] == '0') and - (L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}): + (L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}): inc(pos, 2) xi = 0 # it may be a base prefix case L.buf[pos - 1] # now look at the optional type suffix: - of 'b', 'B': + of 'b', 'B': result.base = base2 - while true: + while true: case L.buf[pos] - of '2'..'9', '.': + of '2'..'9', '.': lexMessage(L, errInvalidNumber, result.literal) inc(pos) - of '_': - if L.buf[pos+1] notin {'0'..'1'}: + of '_': + if L.buf[pos+1] notin {'0'..'1'}: lexMessage(L, errInvalidToken, "_") break inc(pos) - of '0', '1': + of '0', '1': xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0')) inc(pos) - else: break - of 'o', 'c', 'C': + else: break + of 'o', 'c', 'C': result.base = base8 - while true: + while true: case L.buf[pos] - of '8'..'9', '.': + of '8'..'9', '.': lexMessage(L, errInvalidNumber, result.literal) inc(pos) - of '_': + of '_': if L.buf[pos+1] notin {'0'..'7'}: lexMessage(L, errInvalidToken, "_") break inc(pos) - of '0'..'7': + of '0'..'7': xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0')) inc(pos) - else: break - of 'O': + else: break + of 'O': lexMessage(L, errInvalidNumber, result.literal) of 'x', 'X': result.base = base16 @@ -415,7 +415,7 @@ proc getNumber(L: var TLexer): TToken = of 'A'..'F': xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10) inc(pos) - else: break + else: break else: internalError(getLineInfo(L), "getNumber") case result.tokType of tkIntLit, tkInt64Lit: result.iNumber = xi @@ -426,14 +426,14 @@ proc getNumber(L: var TLexer): TToken = of tkUInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) of tkUInt16Lit: result.iNumber = BiggestInt(toU16(int(xi))) of tkUInt32Lit: result.iNumber = BiggestInt(toU32(xi)) - of tkFloat32Lit: - result.fNumber = (cast[PFloat32](addr(xi)))[] + of tkFloat32Lit: + result.fNumber = (cast[PFloat32](addr(xi)))[] # note: this code is endian neutral! # XXX: Test this on big endian machine! - of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[] + of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[] else: internalError(getLineInfo(L), "getNumber") elif isFloatLiteral(result.literal) or (result.tokType == tkFloat32Lit) or - (result.tokType == tkFloat64Lit): + (result.tokType == tkFloat64Lit): result.fNumber = parseFloat(result.literal) if result.tokType == tkIntLit: result.tokType = tkFloatLit elif result.tokType == tkUint64Lit: @@ -461,69 +461,69 @@ proc getNumber(L: var TLexer): TToken = lexMessage(L, errNumberOutOfRange, result.literal) L.bufpos = endpos -proc handleHexChar(L: var TLexer, xi: var int) = +proc handleHexChar(L: var TLexer, xi: var int) = case L.buf[L.bufpos] - of '0'..'9': + of '0'..'9': xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('0')) inc(L.bufpos) - of 'a'..'f': + of 'a'..'f': xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('a') + 10) inc(L.bufpos) - of 'A'..'F': + of 'A'..'F': xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('A') + 10) inc(L.bufpos) else: discard -proc handleDecChars(L: var TLexer, xi: var int) = - while L.buf[L.bufpos] in {'0'..'9'}: +proc handleDecChars(L: var TLexer, xi: var int) = + while L.buf[L.bufpos] in {'0'..'9'}: xi = (xi * 10) + (ord(L.buf[L.bufpos]) - ord('0')) inc(L.bufpos) -proc getEscapedChar(L: var TLexer, tok: var TToken) = +proc getEscapedChar(L: var TLexer, tok: var TToken) = inc(L.bufpos) # skip '\' case L.buf[L.bufpos] - of 'n', 'N': + of 'n', 'N': if tok.tokType == tkCharLit: lexMessage(L, errNnotAllowedInCharacter) add(tok.literal, tnl) inc(L.bufpos) - of 'r', 'R', 'c', 'C': + of 'r', 'R', 'c', 'C': add(tok.literal, CR) inc(L.bufpos) - of 'l', 'L': + of 'l', 'L': add(tok.literal, LF) inc(L.bufpos) - of 'f', 'F': + of 'f', 'F': add(tok.literal, FF) inc(L.bufpos) - of 'e', 'E': + of 'e', 'E': add(tok.literal, ESC) inc(L.bufpos) - of 'a', 'A': + of 'a', 'A': add(tok.literal, BEL) inc(L.bufpos) - of 'b', 'B': + of 'b', 'B': add(tok.literal, BACKSPACE) inc(L.bufpos) - of 'v', 'V': + of 'v', 'V': add(tok.literal, VT) inc(L.bufpos) - of 't', 'T': + of 't', 'T': add(tok.literal, '\t') inc(L.bufpos) - of '\'', '\"': + of '\'', '\"': add(tok.literal, L.buf[L.bufpos]) inc(L.bufpos) - of '\\': + of '\\': add(tok.literal, '\\') inc(L.bufpos) - of 'x', 'X': + of 'x', 'X': inc(L.bufpos) var xi = 0 handleHexChar(L, xi) handleHexChar(L, xi) add(tok.literal, chr(xi)) - of '0'..'9': - if matchTwoChars(L, '0', {'0'..'9'}): + of '0'..'9': + if matchTwoChars(L, '0', {'0'..'9'}): lexMessage(L, warnOctalEscape) var xi = 0 handleDecChars(L, xi) @@ -540,7 +540,7 @@ proc newString(s: cstring, len: int): string = proc handleCRLF(L: var TLexer, pos: int): int = template registerLine = let col = L.getColNumber(pos) - + if col > MaxLineLength: lexMessagePos(L, hintLineTooLong, pos) @@ -548,7 +548,7 @@ proc handleCRLF(L: var TLexer, pos: int): int = let lineStart = cast[ByteAddress](L.buf) + L.lineStart let line = newString(cast[cstring](lineStart), col) addSourceLine(L.fileIdx, line) - + case L.buf[pos] of CR: registerLine() @@ -557,12 +557,12 @@ proc handleCRLF(L: var TLexer, pos: int): int = registerLine() result = nimlexbase.handleLF(L, pos) else: result = pos - -proc getString(L: var TLexer, tok: var TToken, rawMode: bool) = + +proc getString(L: var TLexer, tok: var TToken, rawMode: bool) = var pos = L.bufpos + 1 # skip " var buf = L.buf # put `buf` in a register var line = L.lineNumber # save linenumber for better error message - if buf[pos] == '\"' and buf[pos+1] == '\"': + if buf[pos] == '\"' and buf[pos+1] == '\"': tok.tokType = tkTripleStrLit # long string literal: inc(pos, 2) # skip "" # skip leading newline: @@ -572,105 +572,105 @@ proc getString(L: var TLexer, tok: var TToken, rawMode: bool) = if buf[newpos] in {CR, LF}: pos = newpos pos = handleCRLF(L, pos) buf = L.buf - while true: + while true: case buf[pos] - of '\"': + of '\"': if buf[pos+1] == '\"' and buf[pos+2] == '\"' and - buf[pos+3] != '\"': + buf[pos+3] != '\"': L.bufpos = pos + 3 # skip the three """ - break + break add(tok.literal, '\"') inc(pos) - of CR, LF: + of CR, LF: pos = handleCRLF(L, pos) buf = L.buf add(tok.literal, tnl) - of nimlexbase.EndOfFile: + of nimlexbase.EndOfFile: var line2 = L.lineNumber L.lineNumber = line lexMessagePos(L, errClosingTripleQuoteExpected, L.lineStart) L.lineNumber = line2 - break - else: + break + else: add(tok.literal, buf[pos]) inc(pos) - else: + else: # ordinary string literal if rawMode: tok.tokType = tkRStrLit else: tok.tokType = tkStrLit - while true: + while true: var c = buf[pos] - if c == '\"': + if c == '\"': if rawMode and buf[pos+1] == '\"': inc(pos, 2) add(tok.literal, '"') else: inc(pos) # skip '"' break - elif c in {CR, LF, nimlexbase.EndOfFile}: + elif c in {CR, LF, nimlexbase.EndOfFile}: lexMessage(L, errClosingQuoteExpected) - break - elif (c == '\\') and not rawMode: + break + elif (c == '\\') and not rawMode: L.bufpos = pos getEscapedChar(L, tok) pos = L.bufpos - else: + else: add(tok.literal, c) inc(pos) L.bufpos = pos -proc getCharacter(L: var TLexer, tok: var TToken) = +proc getCharacter(L: var TLexer, tok: var TToken) = inc(L.bufpos) # skip ' var c = L.buf[L.bufpos] case c of '\0'..pred(' '), '\'': lexMessage(L, errInvalidCharacterConstant) of '\\': getEscapedChar(L, tok) - else: + else: tok.literal = $c inc(L.bufpos) if L.buf[L.bufpos] != '\'': lexMessage(L, errMissingFinalQuote) inc(L.bufpos) # skip ' - -proc getSymbol(L: var TLexer, tok: var TToken) = + +proc getSymbol(L: var TLexer, tok: var TToken) = var h: THash = 0 var pos = L.bufpos var buf = L.buf - while true: + while true: var c = buf[pos] case c - of 'a'..'z', '0'..'9', '\x80'..'\xFF': + of 'a'..'z', '0'..'9', '\x80'..'\xFF': h = h !& ord(c) - of 'A'..'Z': + of 'A'..'Z': c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) of '_': - if buf[pos+1] notin SymChars: + if buf[pos+1] notin SymChars: lexMessage(L, errInvalidToken, "_") break - else: break + else: break inc(pos) h = !$h tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) L.bufpos = pos if (tok.ident.id < ord(tokKeywordLow) - ord(tkSymbol)) or - (tok.ident.id > ord(tokKeywordHigh) - ord(tkSymbol)): + (tok.ident.id > ord(tokKeywordHigh) - ord(tkSymbol)): tok.tokType = tkSymbol - else: + else: tok.tokType = TTokType(tok.ident.id + ord(tkSymbol)) - + proc endOperator(L: var TLexer, tok: var TToken, pos: int, - hash: THash) {.inline.} = + hash: THash) {.inline.} = var h = !$hash tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr else: tok.tokType = TTokType(tok.ident.id - oprLow + ord(tkColon)) L.bufpos = pos - -proc getOperator(L: var TLexer, tok: var TToken) = + +proc getOperator(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf var h: THash = 0 - while true: + while true: var c = buf[pos] if c notin OpChars: break h = h !& ord(c) @@ -699,7 +699,7 @@ proc scanComment(L: var TLexer, tok: var TToken) = return else: lexMessagePos(L, warnDeprecated, pos, "use '## [' instead; '##['") - + tok.tokType = tkComment # iNumber contains the number of '\n' in the token tok.iNumber = 0 @@ -723,7 +723,7 @@ proc scanComment(L: var TLexer, tok: var TToken) = pos = handleCRLF(L, pos) buf = L.buf var indent = 0 - while buf[pos] == ' ': + while buf[pos] == ' ': inc(pos) inc(indent) @@ -738,7 +738,7 @@ proc scanComment(L: var TLexer, tok: var TToken) = when defined(nimfix): col = indent inc tok.iNumber else: - if buf[pos] > ' ': + if buf[pos] > ' ': L.indentAhead = indent break L.bufpos = pos @@ -801,10 +801,10 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = getSymbol(L, tok) else: case c - of '#': + of '#': scanComment(L, tok) of '*': - # '*:' is unfortunately a special case, because it is two tokens in + # '*:' is unfortunately a special case, because it is two tokens in # 'var v*: int'. if L.buf[L.bufpos+1] == ':' and L.buf[L.bufpos+2] notin OpChars: var h = 0 !& ord('*') @@ -814,29 +814,29 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = of ',': tok.tokType = tkComma inc(L.bufpos) - of 'l': + of 'l': # if we parsed exactly one character and its a small L (l), this # is treated as a warning because it may be confused with the number 1 if L.buf[L.bufpos+1] notin (SymChars + {'_'}): lexMessage(L, warnSmallLshouldNotBeUsed) getSymbol(L, tok) of 'r', 'R': - if L.buf[L.bufpos + 1] == '\"': + if L.buf[L.bufpos + 1] == '\"': inc(L.bufpos) getString(L, tok, true) - else: + else: getSymbol(L, tok) - of '(': + of '(': inc(L.bufpos) - if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.': + if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.': tok.tokType = tkParDotLe inc(L.bufpos) - else: + else: tok.tokType = tkParLe - of ')': + of ')': tok.tokType = tkParRi inc(L.bufpos) - of '[': + of '[': inc(L.bufpos) if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.': tok.tokType = tkBracketDotLe @@ -847,34 +847,34 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = tok.tokType = tkBracketRi inc(L.bufpos) of '.': - if L.buf[L.bufpos+1] == ']': + if L.buf[L.bufpos+1] == ']': tok.tokType = tkBracketDotRi inc(L.bufpos, 2) - elif L.buf[L.bufpos+1] == '}': + elif L.buf[L.bufpos+1] == '}': tok.tokType = tkCurlyDotRi inc(L.bufpos, 2) - elif L.buf[L.bufpos+1] == ')': + elif L.buf[L.bufpos+1] == ')': tok.tokType = tkParDotRi inc(L.bufpos, 2) - else: + else: getOperator(L, tok) - of '{': + of '{': inc(L.bufpos) if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.': tok.tokType = tkCurlyDotLe inc(L.bufpos) - else: + else: tok.tokType = tkCurlyLe - of '}': + of '}': tok.tokType = tkCurlyRi inc(L.bufpos) - of ';': + of ';': tok.tokType = tkSemiColon inc(L.bufpos) - of '`': + of '`': tok.tokType = tkAccent inc(L.bufpos) - of '\"': + of '\"': # check for extended raw string literal: var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars getString(L, tok, rawMode) @@ -889,7 +889,7 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = of '0'..'9': tok = getNumber(L) else: - if c in OpChars: + if c in OpChars: getOperator(L, tok) elif c == nimlexbase.EndOfFile: tok.tokType = tkEof |