diff options
author | Oscar Campbell <oscar@campbell.nu> | 2015-06-09 14:38:54 +0200 |
---|---|---|
committer | Oscar Campbell <oscar@campbell.nu> | 2015-06-12 13:45:14 +0200 |
commit | a723362d85c077e572240d55301e715ff6dbcf64 (patch) | |
tree | dd41d273416d9f67d651396a3c0fae5ef1ce690b /compiler/lexer.nim | |
parent | 70e157d79e3953dcb122dac46e9e5ca5eacdfd99 (diff) | |
download | Nim-a723362d85c077e572240d55301e715ff6dbcf64.tar.gz |
Fix bug #1179 + some more found while testing.
- The char(-1) < char(0) is not covered in this PR - different beast. - Additionally to #1179, I found that non base-10 literals wheren't bounds checked at all.
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r-- | compiler/lexer.nim | 119 |
1 files changed, 72 insertions, 47 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 6b38ee062..09eca3100 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -262,14 +262,11 @@ template eatChar(L: var TLexer, t: var TToken) = add(t.literal, L.buf[L.bufpos]) inc(L.bufpos) -proc getNumber(L: var TLexer): TToken = - var - startpos, endpos: int - xi: BiggestInt - const literalishChars = { 'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'c', - 'C', 'b', 'B', '_', '.', '\''} - const literalishCharsNoDot = literalishChars - {'.'} + + + +proc getNumber(L: var TLexer): TToken = proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = var pos = L.bufpos # use registers for pos, buf var buf = L.buf @@ -318,14 +315,22 @@ proc getNumber(L: var TLexer): TToken = L.bufpos = msgPos lexMessage(L, msg, t.literal) + var + startpos, endpos: int + xi: BiggestInt + isBase10 = true + const + baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'} + literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''} + floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit} result.tokType = tkIntLit # int literal until we know better result.literal = "" result.base = base10 startpos = L.bufpos - var isAFloatLiteral = false + # First stage: find out base, make verifications, build token literal string - if L.buf[L.bufpos] == '0' and - L.buf[L.bufpos + 1] in {'X', 'x', 'o', 'O', 'c', 'C', 'b', 'B'}: + if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}: + isBase10 = false eatChar(L, result, '0') case L.buf[L.bufpos] of 'O': @@ -344,21 +349,23 @@ proc getNumber(L: var TLexer): TToken = else: matchUnderscoreChars(L, result, {'0'..'9'}) if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): - isAFloatLiteral = true + result.tokType = tkFloat64Lit eatChar(L, result, '.') matchUnderscoreChars(L, result, {'0'..'9'}) if L.buf[L.bufpos] in {'e', 'E'}: - isAFloatLiteral = true + result.tokType = tkFloat64Lit eatChar(L, result, 'e') if L.buf[L.bufpos] in {'+', '-'}: eatChar(L, result) matchUnderscoreChars(L, result, {'0'..'9'}) endpos = L.bufpos - # Second stage, find out if there's a datatype postfix and handle it + + # Second stage, find out if there's a datatype suffix and handle it var postPos = endpos if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}: - if L.buf[postPos] == '\'': + if L.buf[postPos] == '\'': inc(postPos) + case L.buf[postPos] of 'f', 'F': inc(postPos) @@ -410,21 +417,23 @@ proc getNumber(L: var TLexer): TToken = inc(postPos) else: result.tokType = tkUIntLit - else: + else: lexMessageLitNum(L, errInvalidNumber, startpos) + # Is there still a literalish char awaiting? Then it's an error! - if L.buf[postPos] in literalishCharsNoDot or + if L.buf[postPos] in literalishChars or (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}): lexMessageLitNum(L, errInvalidNumber, startpos) + # Third stage, extract actual number L.bufpos = startpos # restore position var pos: int = startpos try: - if (L.buf[pos] == '0') and - (L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}): + if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars): inc(pos, 2) xi = 0 # it is a base prefix - case L.buf[pos - 1] # now look at the optional type suffix: + + case L.buf[pos - 1] of 'b', 'B': result.base = base2 while pos < endpos: @@ -452,48 +461,64 @@ proc getNumber(L: var TLexer): TToken = of 'A'..'F': xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10) inc(pos) - else: + else: break - else: + else: internalError(getLineInfo(L), "getNumber") + case result.tokType of tkIntLit, tkInt64Lit: result.iNumber = xi of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) - of tkInt16Lit: result.iNumber = BiggestInt(toU16(int(xi))) - of tkInt32Lit: result.iNumber = BiggestInt(toU32(xi)) + of tkInt16Lit: result.iNumber = BiggestInt(int16(toU16(int(xi)))) + of tkInt32Lit: result.iNumber = BiggestInt(int32(toU32(int64(xi)))) of tkUIntLit, tkUInt64Lit: result.iNumber = xi - of tkUInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) - of tkUInt16Lit: result.iNumber = BiggestInt(toU16(int(xi))) - of tkUInt32Lit: result.iNumber = BiggestInt(toU32(xi)) + of tkUInt8Lit: result.iNumber = BiggestInt(uint8(toU8(int(xi)))) + of tkUInt16Lit: result.iNumber = BiggestInt(uint16(toU16(int(xi)))) + of tkUInt32Lit: result.iNumber = BiggestInt(uint32(toU32(int64(xi)))) of tkFloat32Lit: result.fNumber = (cast[PFloat32](addr(xi)))[] # note: this code is endian neutral! # XXX: Test this on big endian machine! of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[] else: internalError(getLineInfo(L), "getNumber") - elif isAFloatLiteral or (result.tokType == tkFloat32Lit) or - (result.tokType == tkFloat64Lit): - result.fNumber = parseFloat(result.literal) - if result.tokType == tkIntLit: result.tokType = tkFloatLit - elif result.tokType == tkUint64Lit: - xi = 0 - let len = unsafeParseUInt(result.literal, xi) - if len != result.literal.len or len == 0: - raise newException(ValueError, "invalid integer: " & $xi) - result.iNumber = xi + + # Simple bounds check + if result.tokType notin floatTypes and xi != result.iNumber: + lexMessageLitNum(L, errNumberOutOfRange, startpos) + else: - result.iNumber = parseBiggestInt(result.literal) - if (result.iNumber < low(int32)) or (result.iNumber > high(int32)): - if result.tokType == tkIntLit: + case result.tokType + of floatTypes: + result.fNumber = parseFloat(result.literal) + of tkUint64Lit: + xi = 0 + let len = unsafeParseUInt(result.literal, xi) + if len != result.literal.len or len == 0: + raise newException(ValueError, "invalid integer: " & $xi) + result.iNumber = xi + else: + result.iNumber = parseBiggestInt(result.literal) + + # Explicit bounds checks + let outOfRange = case result.tokType: + of tkInt8Lit: (result.iNumber < int8.low or result.iNumber > int8.high) + of tkUInt8Lit: (result.iNumber < BiggestInt(uint8.low) or + result.iNumber > BiggestInt(uint8.high)) + of tkInt16Lit: (result.iNumber < int16.low or result.iNumber > int16.high) + of tkUInt16Lit: (result.iNumber < BiggestInt(uint16.low) or + result.iNumber > BiggestInt(uint16.high)) + of tkInt32Lit: (result.iNumber < int32.low or result.iNumber > int32.high) + of tkUInt32Lit: (result.iNumber < BiggestInt(uint32.low) or + result.iNumber > BiggestInt(uint32.high)) + else: false + + if outOfRange: lexMessageLitNum(L, errNumberOutOfRange, startpos) + + # Promote int literal to int64? Not always necessary, but more consistent + if result.tokType == tkIntLit: + if (result.iNumber < low(int32)) or (result.iNumber > high(int32)): result.tokType = tkInt64Lit - elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}: - lexMessageLitNum(L, errNumberOutOfRange, startpos) - elif result.tokType == tkInt8Lit and - (result.iNumber < int8.low or result.iNumber > int8.high): - lexMessageLitNum(L, errNumberOutOfRange, startpos) - elif result.tokType == tkInt16Lit and - (result.iNumber < int16.low or result.iNumber > int16.high): - lexMessageLitNum(L, errNumberOutOfRange, startpos) + except ValueError: lexMessageLitNum(L, errInvalidNumber, startpos) except OverflowError, RangeError: |