diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2015-06-13 19:49:32 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2015-06-13 19:49:32 +0200 |
commit | 35e922d18e0d24eb7c537debb85b8017eae2f186 (patch) | |
tree | 0a324f6f446fdda8587608785e79d4dc129f6593 | |
parent | ccf8c88f29655fe292a81792eb1e51e214a08360 (diff) | |
parent | 7ca61f484b67b231929432436ba9717494f1648f (diff) | |
download | Nim-35e922d18e0d24eb7c537debb85b8017eae2f186.tar.gz |
Merge pull request #2890 from ozra/fix-1179-unsigned-number-literals
Fix 1179 unsigned number literals
-rw-r--r-- | compiler/lexer.nim | 131 | ||||
-rw-r--r-- | tests/misc/tunsignedmisc.nim | 68 |
2 files changed, 152 insertions, 47 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 6b38ee062..5c7baf7d3 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -262,14 +262,11 @@ template eatChar(L: var TLexer, t: var TToken) = add(t.literal, L.buf[L.bufpos]) inc(L.bufpos) -proc getNumber(L: var TLexer): TToken = - var - startpos, endpos: int - xi: BiggestInt - const literalishChars = { 'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'c', - 'C', 'b', 'B', '_', '.', '\''} - const literalishCharsNoDot = literalishChars - {'.'} + + + +proc getNumber(L: var TLexer): TToken = proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = var pos = L.bufpos # use registers for pos, buf var buf = L.buf @@ -318,14 +315,22 @@ proc getNumber(L: var TLexer): TToken = L.bufpos = msgPos lexMessage(L, msg, t.literal) + var + startpos, endpos: int + xi: BiggestInt + isBase10 = true + const + baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'} + literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''} + floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit} result.tokType = tkIntLit # int literal until we know better result.literal = "" result.base = base10 startpos = L.bufpos - var isAFloatLiteral = false + # First stage: find out base, make verifications, build token literal string - if L.buf[L.bufpos] == '0' and - L.buf[L.bufpos + 1] in {'X', 'x', 'o', 'O', 'c', 'C', 'b', 'B'}: + if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}: + isBase10 = false eatChar(L, result, '0') case L.buf[L.bufpos] of 'O': @@ -344,21 +349,23 @@ proc getNumber(L: var TLexer): TToken = else: matchUnderscoreChars(L, result, {'0'..'9'}) if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): - isAFloatLiteral = true + result.tokType = tkFloat64Lit eatChar(L, result, '.') matchUnderscoreChars(L, result, {'0'..'9'}) if L.buf[L.bufpos] in {'e', 'E'}: - isAFloatLiteral = true + result.tokType = tkFloat64Lit eatChar(L, result, 'e') if L.buf[L.bufpos] in {'+', '-'}: eatChar(L, result) matchUnderscoreChars(L, result, {'0'..'9'}) endpos = L.bufpos - # Second stage, find out if there's a datatype postfix and handle it + + # Second stage, find out if there's a datatype suffix and handle it var postPos = endpos if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}: - if L.buf[postPos] == '\'': + if L.buf[postPos] == '\'': inc(postPos) + case L.buf[postPos] of 'f', 'F': inc(postPos) @@ -410,21 +417,23 @@ proc getNumber(L: var TLexer): TToken = inc(postPos) else: result.tokType = tkUIntLit - else: + else: lexMessageLitNum(L, errInvalidNumber, startpos) + # Is there still a literalish char awaiting? Then it's an error! - if L.buf[postPos] in literalishCharsNoDot or + if L.buf[postPos] in literalishChars or (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}): lexMessageLitNum(L, errInvalidNumber, startpos) + # Third stage, extract actual number L.bufpos = startpos # restore position var pos: int = startpos try: - if (L.buf[pos] == '0') and - (L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}): + if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars): inc(pos, 2) xi = 0 # it is a base prefix - case L.buf[pos - 1] # now look at the optional type suffix: + + case L.buf[pos - 1] of 'b', 'B': result.base = base2 while pos < endpos: @@ -452,48 +461,76 @@ proc getNumber(L: var TLexer): TToken = of 'A'..'F': xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10) inc(pos) - else: + else: break - else: + else: internalError(getLineInfo(L), "getNumber") + case result.tokType of tkIntLit, tkInt64Lit: result.iNumber = xi of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) - of tkInt16Lit: result.iNumber = BiggestInt(toU16(int(xi))) - of tkInt32Lit: result.iNumber = BiggestInt(toU32(xi)) + of tkInt16Lit: result.iNumber = BiggestInt(int16(toU16(int(xi)))) + of tkInt32Lit: result.iNumber = BiggestInt(int32(toU32(int64(xi)))) of tkUIntLit, tkUInt64Lit: result.iNumber = xi - of tkUInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) - of tkUInt16Lit: result.iNumber = BiggestInt(toU16(int(xi))) - of tkUInt32Lit: result.iNumber = BiggestInt(toU32(xi)) + of tkUInt8Lit: result.iNumber = BiggestInt(uint8(toU8(int(xi)))) + of tkUInt16Lit: result.iNumber = BiggestInt(uint16(toU16(int(xi)))) + of tkUInt32Lit: result.iNumber = BiggestInt(uint32(toU32(int64(xi)))) of tkFloat32Lit: result.fNumber = (cast[PFloat32](addr(xi)))[] # note: this code is endian neutral! # XXX: Test this on big endian machine! of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[] else: internalError(getLineInfo(L), "getNumber") - elif isAFloatLiteral or (result.tokType == tkFloat32Lit) or - (result.tokType == tkFloat64Lit): - result.fNumber = parseFloat(result.literal) - if result.tokType == tkIntLit: result.tokType = tkFloatLit - elif result.tokType == tkUint64Lit: - xi = 0 - let len = unsafeParseUInt(result.literal, xi) - if len != result.literal.len or len == 0: - raise newException(ValueError, "invalid integer: " & $xi) - result.iNumber = xi + + # Bounds checks. Non decimal literals are allowed to overflow the range of + # the datatype as long as their pattern don't overflow _bitwise_, hence + # below checks of signed sizes against uint*.high is deliberate: + # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK) + if result.tokType notin floatTypes: + let outOfRange = case result.tokType: + of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi + of tkInt8Lit: (xi > BiggestInt(uint8.high)) + of tkInt16Lit: (xi > BiggestInt(uint16.high)) + of tkInt32Lit: (xi > BiggestInt(uint32.high)) + else: false + + if outOfRange: + echo "out of range num: ", result.iNumber, " vs ", xi + lexMessageLitNum(L, errNumberOutOfRange, startpos) + else: - result.iNumber = parseBiggestInt(result.literal) - if (result.iNumber < low(int32)) or (result.iNumber > high(int32)): - if result.tokType == tkIntLit: + case result.tokType + of floatTypes: + result.fNumber = parseFloat(result.literal) + of tkUint64Lit: + xi = 0 + let len = unsafeParseUInt(result.literal, xi) + if len != result.literal.len or len == 0: + raise newException(ValueError, "invalid integer: " & $xi) + result.iNumber = xi + else: + result.iNumber = parseBiggestInt(result.literal) + + # Explicit bounds checks + let outOfRange = case result.tokType: + of tkInt8Lit: (result.iNumber < int8.low or result.iNumber > int8.high) + of tkUInt8Lit: (result.iNumber < BiggestInt(uint8.low) or + result.iNumber > BiggestInt(uint8.high)) + of tkInt16Lit: (result.iNumber < int16.low or result.iNumber > int16.high) + of tkUInt16Lit: (result.iNumber < BiggestInt(uint16.low) or + result.iNumber > BiggestInt(uint16.high)) + of tkInt32Lit: (result.iNumber < int32.low or result.iNumber > int32.high) + of tkUInt32Lit: (result.iNumber < BiggestInt(uint32.low) or + result.iNumber > BiggestInt(uint32.high)) + else: false + + if outOfRange: lexMessageLitNum(L, errNumberOutOfRange, startpos) + + # Promote int literal to int64? Not always necessary, but more consistent + if result.tokType == tkIntLit: + if (result.iNumber < low(int32)) or (result.iNumber > high(int32)): result.tokType = tkInt64Lit - elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}: - lexMessageLitNum(L, errNumberOutOfRange, startpos) - elif result.tokType == tkInt8Lit and - (result.iNumber < int8.low or result.iNumber > int8.high): - lexMessageLitNum(L, errNumberOutOfRange, startpos) - elif result.tokType == tkInt16Lit and - (result.iNumber < int16.low or result.iNumber > int16.high): - lexMessageLitNum(L, errNumberOutOfRange, startpos) + except ValueError: lexMessageLitNum(L, errInvalidNumber, startpos) except OverflowError, RangeError: diff --git a/tests/misc/tunsignedmisc.nim b/tests/misc/tunsignedmisc.nim new file mode 100644 index 000000000..e6a497a3d --- /dev/null +++ b/tests/misc/tunsignedmisc.nim @@ -0,0 +1,68 @@ +import unsigned + +discard """ + errormsg: "number 0x123'u8 out of valid range" +""" + +# Bug #1179 + +# Unsigneds + +# 8 bit +let ref1 = 128'u8 shr 7 +let hex1 = 0x80'u8 shr 7 +let oct1 = 0c200'u8 shr 7 +let dig1 = 0b10000000'u8 shr 7 + +doAssert(ref1 == 1) +doAssert(ref1 == hex1) +doAssert(ref1 == oct1) +doAssert(ref1 == dig1) + +# 16 bit +let ref2 = 32768'u16 shr 15 +let hex2 = 0x8000'u16 shr 15 +let oct2 = 0c100000'u16 shr 15 +let dig2 = 0b1000000000000000'u16 shr 15 + +doAssert(ref2 == 1) +doAssert(ref2 == hex2) +doAssert(ref2 == oct2) +doAssert(ref2 == dig2) + +# 32 bit +let ref3 = 2147483648'u32 shr 31 +let hex3 = 0x80000000'u32 shr 31 +let oct3 = 0c20000000000'u32 shr 31 +let dig3 = 0b10000000000000000000000000000000'u32 shr 31 + +doAssert(ref3 == 1) +doAssert(ref3 == hex3) +doAssert(ref3 == oct3) +doAssert(ref3 == dig3) + +# Below doesn't work for lexer stage errors... +# doAssert(compiles(0xFF'u8) == true) +# doAssert(compiles(0xFFF'u16) == true) +# doAssert(compiles(0x7FFF'i16) == true) + +# doAssert(compiles(0x123'u8) == false) +# doAssert(compiles(0x123'i8) == false) +# doAssert(compiles(0x123123'u16) == false) +# doAssert(compiles(0x123123'i16) == false) + +# Should compile # +let boundOkHex1 = 0xFF'u8 +let boundOkHex2 = 0xFFFF'u16 +let boundOkHex3 = 0x7FFF'i16 + +let boundOkHex4 = 0x80'i8 +let boundOkHex5 = 0xFF'i8 +let boundOkHex6 = 0xFFFF'i16 +let boundOkHex7 = 0x7FFF'i16 + +# Should _not_ compile # +let boundBreakingHex1 = 0x123'u8 +let boundBreakingHex2 = 0x123'i8 +let boundBreakingHex3 = 0x123123'u16 +let boundBreakingHex4 = 0x123123'i16 |