diff options
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r-- | compiler/lexer.nim | 430 |
1 files changed, 261 insertions, 169 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 78266ef4d..cea42ad1e 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -17,7 +17,7 @@ import hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream, - wordrecg + wordrecg, etcpriv const MaxLineLength* = 80 # lines longer than this lead to a warning @@ -61,7 +61,7 @@ type tkComma, tkSemiColon, tkColon, tkColonColon, tkEquals, tkDot, tkDotDot, tkOpr, tkComment, tkAccent, - tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr, + tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr TTokTypes* = set[TTokType] @@ -131,33 +131,21 @@ type var gLinesCompiled*: int # all lines that have been compiled -proc isKeyword*(kind: TTokType): bool -proc openLexer*(lex: var TLexer, fileidx: int32, inputstream: PLLStream) -proc rawGetTok*(L: var TLexer, tok: var TToken) - # reads in the next token into tok and skips it - proc getLineInfo*(L: TLexer, tok: TToken): TLineInfo {.inline.} = newLineInfo(L.fileIdx, tok.line, tok.col) -proc closeLexer*(lex: var TLexer) -proc printTok*(tok: TToken) -proc tokToStr*(tok: TToken): string - -proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) = - openLexer(lex, filename.fileInfoIdx, inputstream) - -proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") - -proc isKeyword(kind: TTokType): bool = +proc isKeyword*(kind: TTokType): bool = result = (kind >= tokKeywordLow) and (kind <= tokKeywordHigh) proc isNimIdentifier*(s: string): bool = if s[0] in SymStartChars: var i = 1 - while i < s.len: + var sLen = s.len + while i < sLen: if s[i] == '_': inc(i) - if s[i] notin SymChars: return + elif isMagicIdentSeparatorRune(cstring s, i): + inc(i, magicIdentSeparatorRuneByteWidth) if s[i] notin SymChars: return inc(i) result = true @@ -206,14 +194,17 @@ proc fillToken(L: var TToken) = L.base = base10 L.ident = dummyIdent -proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = +proc openLexer*(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = openBaseLexer(lex, inputstream) lex.fileIdx = fileidx lex.indentAhead = - 1 lex.currLineIndent = 0 inc(lex.lineNumber, inputstream.lineOffset) -proc closeLexer(lex: var TLexer) = +proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) = + openLexer(lex, filename.fileInfoIdx, inputstream) + +proc closeLexer*(lex: var TLexer) = inc(gLinesCompiled, lex.lineNumber) closeBaseLexer(lex) @@ -229,30 +220,17 @@ proc dispMessage(L: TLexer; info: TLineInfo; msg: TMsgKind; arg: string) = else: L.errorHandler(info, msg, arg) -proc lexMessage(L: TLexer, msg: TMsgKind, arg = "") = +proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") = L.dispMessage(getLineInfo(L), msg, arg) +proc lexMessageTok*(L: TLexer, msg: TMsgKind, tok: TToken, arg = "") = + var info = newLineInfo(L.fileIdx, tok.line, tok.col) + L.dispMessage(info, msg, arg) + proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart) L.dispMessage(info, msg, arg) -proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = - var pos = L.bufpos # use registers for pos, buf - var buf = L.buf - while true: - if buf[pos] in chars: - add(tok.literal, buf[pos]) - inc(pos) - else: - break - if buf[pos] == '_': - if buf[pos+1] notin chars: - lexMessage(L, errInvalidToken, "_") - break - add(tok.literal, '_') - inc(pos) - L.bufpos = pos - proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool = result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second) @@ -275,136 +253,200 @@ proc unsafeParseUInt(s: string, b: var BiggestInt, start = 0): int = result = i - start {.pop.} # overflowChecks + +template eatChar(L: var TLexer, t: var TToken, replacementChar: char) = + add(t.literal, replacementChar) + inc(L.bufpos) + +template eatChar(L: var TLexer, t: var TToken) = + add(t.literal, L.buf[L.bufpos]) + inc(L.bufpos) + proc getNumber(L: var TLexer): TToken = + proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = + var pos = L.bufpos # use registers for pos, buf + var buf = L.buf + while true: + if buf[pos] in chars: + add(tok.literal, buf[pos]) + inc(pos) + else: + break + if buf[pos] == '_': + if buf[pos+1] notin chars: + lexMessage(L, errInvalidToken, "_") + break + add(tok.literal, '_') + inc(pos) + L.bufpos = pos + + proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) = + var pos = L.bufpos # use registers for pos, buf + var buf = L.buf + while buf[pos] in chars: + add(tok.literal, buf[pos]) + inc(pos) + L.bufpos = pos + + proc lexMessageLitNum(L: var TLexer, msg: TMsgKind, startpos: int) = + # Used to get slightly human friendlier err messages. + # Note: the erroneous 'O' char in the character set is intentional + const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O', + 'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'} + var msgPos = L.bufpos + var t: TToken + t.literal = "" + L.bufpos = startpos # Use L.bufpos as pos because of matchChars + matchChars(L, t, literalishChars) + # We must verify +/- specifically so that we're not past the literal + if L.buf[L.bufpos] in {'+', '-'} and + L.buf[L.bufpos - 1] in {'e', 'E'}: + add(t.literal, L.buf[L.bufpos]) + inc(L.bufpos) + matchChars(L, t, literalishChars) + if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}: + inc(L.bufpos) + add(t.literal, L.buf[L.bufpos]) + matchChars(L, t, {'0'..'9'}) + L.bufpos = msgPos + lexMessage(L, msg, t.literal) + var - pos, endpos: int + startpos, endpos: int xi: BiggestInt - # get the base: + isBase10 = true + const + baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'} + literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''} + floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit} result.tokType = tkIntLit # int literal until we know better result.literal = "" - result.base = base10 # BUGFIX - pos = L.bufpos # make sure the literal is correct for error messages: - var eallowed = false - if L.buf[pos] == '0' and L.buf[pos+1] in {'X', 'x'}: - matchUnderscoreChars(L, result, {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x'}) + result.base = base10 + startpos = L.bufpos + + # First stage: find out base, make verifications, build token literal string + if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}: + isBase10 = false + eatChar(L, result, '0') + case L.buf[L.bufpos] + of 'O': + lexMessageLitNum(L, errInvalidNumberOctalCode, startpos) + of 'x', 'X': + eatChar(L, result, 'x') + matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'}) + of 'o', 'c', 'C': + eatChar(L, result, 'c') + matchUnderscoreChars(L, result, {'0'..'7'}) + of 'b', 'B': + eatChar(L, result, 'b') + matchUnderscoreChars(L, result, {'0'..'1'}) + else: + internalError(getLineInfo(L), "getNumber") else: - matchUnderscoreChars(L, result, {'0'..'9', 'b', 'B', 'o', 'c', 'C'}) - eallowed = true - if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): - add(result.literal, '.') - inc(L.bufpos) - matchUnderscoreChars(L, result, {'0'..'9'}) - eallowed = true - if eallowed and L.buf[L.bufpos] in {'e', 'E'}: - add(result.literal, 'e') - inc(L.bufpos) - if L.buf[L.bufpos] in {'+', '-'}: - add(result.literal, L.buf[L.bufpos]) - inc(L.bufpos) matchUnderscoreChars(L, result, {'0'..'9'}) + if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): + result.tokType = tkFloat64Lit + eatChar(L, result, '.') + matchUnderscoreChars(L, result, {'0'..'9'}) + if L.buf[L.bufpos] in {'e', 'E'}: + result.tokType = tkFloat64Lit + eatChar(L, result, 'e') + if L.buf[L.bufpos] in {'+', '-'}: + eatChar(L, result) + matchUnderscoreChars(L, result, {'0'..'9'}) endpos = L.bufpos - if L.buf[endpos] in {'\'', 'f', 'F', 'i', 'I', 'u', 'U'}: - if L.buf[endpos] == '\'': inc(endpos) - L.bufpos = pos # restore position - case L.buf[endpos] + + # Second stage, find out if there's a datatype suffix and handle it + var postPos = endpos + if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}: + if L.buf[postPos] == '\'': + inc(postPos) + + case L.buf[postPos] of 'f', 'F': - inc(endpos) - if (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + inc(postPos) + if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'): result.tokType = tkFloat32Lit - inc(endpos, 2) - elif (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + inc(postPos, 2) + elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'): result.tokType = tkFloat64Lit - inc(endpos, 2) - elif (L.buf[endpos] == '1') and - (L.buf[endpos + 1] == '2') and - (L.buf[endpos + 2] == '8'): + inc(postPos, 2) + elif (L.buf[postPos] == '1') and + (L.buf[postPos + 1] == '2') and + (L.buf[postPos + 2] == '8'): result.tokType = tkFloat128Lit - inc(endpos, 3) - else: - lexMessage(L, errInvalidNumber, result.literal & "'f" & L.buf[endpos]) + inc(postPos, 3) + else: # "f" alone defaults to float32 + result.tokType = tkFloat32Lit + of 'd', 'D': # ad hoc convenience shortcut for f64 + inc(postPos) + result.tokType = tkFloat64Lit of 'i', 'I': - inc(endpos) - if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + inc(postPos) + if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'): result.tokType = tkInt64Lit - inc(endpos, 2) - elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + inc(postPos, 2) + elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'): result.tokType = tkInt32Lit - inc(endpos, 2) - elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): + inc(postPos, 2) + elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'): result.tokType = tkInt16Lit - inc(endpos, 2) - elif (L.buf[endpos] == '8'): + inc(postPos, 2) + elif (L.buf[postPos] == '8'): result.tokType = tkInt8Lit - inc(endpos) + inc(postPos) else: - lexMessage(L, errInvalidNumber, result.literal & "'i" & L.buf[endpos]) + lexMessageLitNum(L, errInvalidNumber, startpos) of 'u', 'U': - inc(endpos) - if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + inc(postPos) + if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'): result.tokType = tkUInt64Lit - inc(endpos, 2) - elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + inc(postPos, 2) + elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'): result.tokType = tkUInt32Lit - inc(endpos, 2) - elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): + inc(postPos, 2) + elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'): result.tokType = tkUInt16Lit - inc(endpos, 2) - elif (L.buf[endpos] == '8'): + inc(postPos, 2) + elif (L.buf[postPos] == '8'): result.tokType = tkUInt8Lit - inc(endpos) + inc(postPos) else: result.tokType = tkUIntLit - else: lexMessage(L, errInvalidNumber, result.literal & "'" & L.buf[endpos]) - else: - L.bufpos = pos # restore position + else: + lexMessageLitNum(L, errInvalidNumber, startpos) + + # Is there still a literalish char awaiting? Then it's an error! + if L.buf[postPos] in literalishChars or + (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}): + lexMessageLitNum(L, errInvalidNumber, startpos) + + # Third stage, extract actual number + L.bufpos = startpos # restore position + var pos: int = startpos try: - if (L.buf[pos] == '0') and - (L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}): + if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars): inc(pos, 2) - xi = 0 # it may be a base prefix - case L.buf[pos - 1] # now look at the optional type suffix: + xi = 0 # it is a base prefix + + case L.buf[pos - 1] of 'b', 'B': result.base = base2 - while true: - case L.buf[pos] - of '2'..'9', '.': - lexMessage(L, errInvalidNumber, result.literal) - inc(pos) - of '_': - if L.buf[pos+1] notin {'0'..'1'}: - lexMessage(L, errInvalidToken, "_") - break - inc(pos) - of '0', '1': + while pos < endpos: + if L.buf[pos] != '_': xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0')) - inc(pos) - else: break + inc(pos) of 'o', 'c', 'C': result.base = base8 - while true: - case L.buf[pos] - of '8'..'9', '.': - lexMessage(L, errInvalidNumber, result.literal) - inc(pos) - of '_': - if L.buf[pos+1] notin {'0'..'7'}: - lexMessage(L, errInvalidToken, "_") - break - inc(pos) - of '0'..'7': + while pos < endpos: + if L.buf[pos] != '_': xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0')) - inc(pos) - else: break - of 'O': - lexMessage(L, errInvalidNumber, result.literal) + inc(pos) of 'x', 'X': result.base = base16 - while true: + while pos < endpos: case L.buf[pos] of '_': - if L.buf[pos+1] notin {'0'..'9', 'a'..'f', 'A'..'F'}: - lexMessage(L, errInvalidToken, "_") - break inc(pos) of '0'..'9': xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0')) @@ -415,51 +457,81 @@ proc getNumber(L: var TLexer): TToken = of 'A'..'F': xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10) inc(pos) - else: break - else: internalError(getLineInfo(L), "getNumber") + else: + break + else: + internalError(getLineInfo(L), "getNumber") + case result.tokType of tkIntLit, tkInt64Lit: result.iNumber = xi of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) - of tkInt16Lit: result.iNumber = BiggestInt(toU16(int(xi))) - of tkInt32Lit: result.iNumber = BiggestInt(toU32(xi)) + of tkInt16Lit: result.iNumber = BiggestInt(int16(toU16(int(xi)))) + of tkInt32Lit: result.iNumber = BiggestInt(int32(toU32(int64(xi)))) of tkUIntLit, tkUInt64Lit: result.iNumber = xi - of tkUInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) - of tkUInt16Lit: result.iNumber = BiggestInt(toU16(int(xi))) - of tkUInt32Lit: result.iNumber = BiggestInt(toU32(xi)) + of tkUInt8Lit: result.iNumber = BiggestInt(uint8(toU8(int(xi)))) + of tkUInt16Lit: result.iNumber = BiggestInt(uint16(toU16(int(xi)))) + of tkUInt32Lit: result.iNumber = BiggestInt(uint32(toU32(int64(xi)))) of tkFloat32Lit: result.fNumber = (cast[PFloat32](addr(xi)))[] # note: this code is endian neutral! # XXX: Test this on big endian machine! of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[] else: internalError(getLineInfo(L), "getNumber") - elif isFloatLiteral(result.literal) or (result.tokType == tkFloat32Lit) or - (result.tokType == tkFloat64Lit): - result.fNumber = parseFloat(result.literal) - if result.tokType == tkIntLit: result.tokType = tkFloatLit - elif result.tokType == tkUint64Lit: - xi = 0 - let len = unsafeParseUInt(result.literal, xi) - if len != result.literal.len or len == 0: - raise newException(ValueError, "invalid integer: " & $xi) - result.iNumber = xi + + # Bounds checks. Non decimal literals are allowed to overflow the range of + # the datatype as long as their pattern don't overflow _bitwise_, hence + # below checks of signed sizes against uint*.high is deliberate: + # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK) + if result.tokType notin floatTypes: + let outOfRange = case result.tokType: + of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi + of tkInt8Lit: (xi > BiggestInt(uint8.high)) + of tkInt16Lit: (xi > BiggestInt(uint16.high)) + of tkInt32Lit: (xi > BiggestInt(uint32.high)) + else: false + + if outOfRange: + #echo "out of range num: ", result.iNumber, " vs ", xi + lexMessageLitNum(L, errNumberOutOfRange, startpos) + else: - result.iNumber = parseBiggestInt(result.literal) + case result.tokType + of floatTypes: + result.fNumber = parseFloat(result.literal) + of tkUint64Lit: + xi = 0 + let len = unsafeParseUInt(result.literal, xi) + if len != result.literal.len or len == 0: + raise newException(ValueError, "invalid integer: " & $xi) + result.iNumber = xi + else: + result.iNumber = parseBiggestInt(result.literal) + + # Explicit bounds checks + let outOfRange = case result.tokType: + of tkInt8Lit: (result.iNumber < int8.low or result.iNumber > int8.high) + of tkUInt8Lit: (result.iNumber < BiggestInt(uint8.low) or + result.iNumber > BiggestInt(uint8.high)) + of tkInt16Lit: (result.iNumber < int16.low or result.iNumber > int16.high) + of tkUInt16Lit: (result.iNumber < BiggestInt(uint16.low) or + result.iNumber > BiggestInt(uint16.high)) + of tkInt32Lit: (result.iNumber < int32.low or result.iNumber > int32.high) + of tkUInt32Lit: (result.iNumber < BiggestInt(uint32.low) or + result.iNumber > BiggestInt(uint32.high)) + else: false + + if outOfRange: lexMessageLitNum(L, errNumberOutOfRange, startpos) + + # Promote int literal to int64? Not always necessary, but more consistent + if result.tokType == tkIntLit: if (result.iNumber < low(int32)) or (result.iNumber > high(int32)): - if result.tokType == tkIntLit: - result.tokType = tkInt64Lit - elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}: - lexMessage(L, errNumberOutOfRange, result.literal) - elif result.tokType == tkInt8Lit and - (result.iNumber < int8.low or result.iNumber > int8.high): - lexMessage(L, errNumberOutOfRange, result.literal) - elif result.tokType == tkInt16Lit and - (result.iNumber < int16.low or result.iNumber > int16.high): - lexMessage(L, errNumberOutOfRange, result.literal) + result.tokType = tkInt64Lit + except ValueError: - lexMessage(L, errInvalidNumber, result.literal) + lexMessageLitNum(L, errInvalidNumber, startpos) except OverflowError, RangeError: - lexMessage(L, errNumberOutOfRange, result.literal) - L.bufpos = endpos + lexMessageLitNum(L, errNumberOutOfRange, startpos) + L.bufpos = postPos proc handleHexChar(L: var TLexer, xi: var int) = case L.buf[L.bufpos] @@ -632,23 +704,34 @@ proc getCharacter(L: var TLexer, tok: var TToken) = inc(L.bufpos) # skip ' proc getSymbol(L: var TLexer, tok: var TToken) = - var h: THash = 0 + var h: Hash = 0 var pos = L.bufpos var buf = L.buf while true: var c = buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': - h = h !& ord(c) + if c == '\226' and + buf[pos+1] == '\128' and + buf[pos+2] == '\147': # It's a 'magic separator' en-dash Unicode + if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars: + lexMessage(L, errInvalidToken, "–") + break + inc(pos, magicIdentSeparatorRuneByteWidth) + else: + h = h !& ord(c) + inc(pos) of 'A'..'Z': c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) + inc(pos) of '_': if buf[pos+1] notin SymChars: lexMessage(L, errInvalidToken, "_") break + inc(pos) + else: break - inc(pos) h = !$h tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) L.bufpos = pos @@ -659,7 +742,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) = tok.tokType = TTokType(tok.ident.id + ord(tkSymbol)) proc endOperator(L: var TLexer, tok: var TToken, pos: int, - hash: THash) {.inline.} = + hash: Hash) {.inline.} = var h = !$hash tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr @@ -669,7 +752,7 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int, proc getOperator(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf - var h: THash = 0 + var h: Hash = 0 while true: var c = buf[pos] if c notin OpChars: break @@ -785,7 +868,7 @@ proc skip(L: var TLexer, tok: var TToken) = break # EndOfFile also leaves the loop L.bufpos = pos -proc rawGetTok(L: var TLexer, tok: var TToken) = +proc rawGetTok*(L: var TLexer, tok: var TToken) = fillToken(tok) if L.indentAhead >= 0: tok.indent = L.indentAhead @@ -874,6 +957,15 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = of '`': tok.tokType = tkAccent inc(L.bufpos) + of '_': + inc(L.bufpos) + if L.buf[L.bufpos] notin SymChars: + tok.tokType = tkSymbol + tok.ident = getIdent("_") + else: + tok.literal = $c + tok.tokType = tkInvalid + lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')') of '\"': # check for extended raw string literal: var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars |