diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2015-06-07 10:55:23 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2015-06-07 10:55:23 +0200 |
commit | 94b7da4297558dc85b047f6ddd91201895f54d0c (patch) | |
tree | 9cce66219dd1155782c8498233b4d3228f2027c3 | |
parent | cb1f1cfd521d3dbcf8467e655521016f9627e722 (diff) | |
parent | fef21e90031557bc00af80e3d2da98b28780ee2b (diff) | |
download | Nim-94b7da4297558dc85b047f6ddd91201895f54d0c.tar.gz |
Merge pull request #2645 from def-/builtin_overflow
Use builtin overflow functions of Clang and GCC (WIP, RFC)
-rw-r--r-- | lib/system/arithm.nim | 176 |
1 files changed, 122 insertions, 54 deletions
diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim index ef153417c..907907e24 100644 --- a/lib/system/arithm.nim +++ b/lib/system/arithm.nim @@ -17,17 +17,114 @@ proc raiseOverflow {.compilerproc, noinline.} = proc raiseDivByZero {.compilerproc, noinline.} = sysFatal(DivByZeroError, "division by zero") -proc addInt64(a, b: int64): int64 {.compilerProc, inline.} = - result = a +% b - if (result xor a) >= int64(0) or (result xor b) >= int64(0): - return result - raiseOverflow() +when defined(builtinOverflow): +# Builtin compiler functions for improved performance + when sizeof(clong) == 8: + proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {. + importc: "__builtin_saddl_overflow", nodecl, nosideeffect.} -proc subInt64(a, b: int64): int64 {.compilerProc, inline.} = - result = a -% b - if (result xor a) >= int64(0) or (result xor not b) >= int64(0): - return result - raiseOverflow() + proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {. + importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.} + + proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {. + importc: "__builtin_smull_overflow", nodecl, nosideeffect.} + + elif sizeof(clonglong) == 8: + proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {. + importc: "__builtin_saddll_overflow", nodecl, nosideeffect.} + + proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {. + importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.} + + proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {. + importc: "__builtin_smulll_overflow", nodecl, nosideeffect.} + + when sizeof(int) == 8: + proc addIntOverflow(a, b: int, c: var int): bool {.inline.} = + addInt64Overflow(a, b, c) + + proc subIntOverflow(a, b: int, c: var int): bool {.inline.} = + subInt64Overflow(a, b, c) + + proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} = + mulInt64Overflow(a, b, c) + + elif sizeof(int) == 4 and sizeof(cint) == 4: + proc addIntOverflow(a, b: int, c: var int): bool {. + importc: "__builtin_sadd_overflow", nodecl, nosideeffect.} + + proc subIntOverflow(a, b: int, c: var int): bool {. + importc: "__builtin_ssub_overflow", nodecl, nosideeffect.} + + proc mulIntOverflow(a, b: int, c: var int): bool {. + importc: "__builtin_smul_overflow", nodecl, nosideeffect.} + + proc addInt64(a, b: int64): int64 {.compilerProc, inline.} = + if addInt64Overflow(a, b, result): + raiseOverflow() + + proc subInt64(a, b: int64): int64 {.compilerProc, inline.} = + if subInt64Overflow(a, b, result): + raiseOverflow() + + proc mulInt64(a, b: int64): int64 {.compilerproc, inline.} = + if mulInt64Overflow(a, b, result): + raiseOverflow() +else: + proc addInt64(a, b: int64): int64 {.compilerProc, inline.} = + result = a +% b + if (result xor a) >= int64(0) or (result xor b) >= int64(0): + return result + raiseOverflow() + + proc subInt64(a, b: int64): int64 {.compilerProc, inline.} = + result = a -% b + if (result xor a) >= int64(0) or (result xor not b) >= int64(0): + return result + raiseOverflow() + + # + # This code has been inspired by Python's source code. + # The native int product x*y is either exactly right or *way* off, being + # just the last n bits of the true product, where n is the number of bits + # in an int (the delivered product is the true product plus i*2**n for + # some integer i). + # + # The native float64 product x*y is subject to three + # rounding errors: on a sizeof(int)==8 box, each cast to double can lose + # info, and even on a sizeof(int)==4 box, the multiplication can lose info. + # But, unlike the native int product, it's not in *range* trouble: even + # if sizeof(int)==32 (256-bit ints), the product easily fits in the + # dynamic range of a float64. So the leading 50 (or so) bits of the float64 + # product are correct. + # + # We check these two ways against each other, and declare victory if they're + # approximately the same. Else, because the native int product is the only + # one that can lose catastrophic amounts of information, it's the native int + # product that must have overflowed. + # + proc mulInt64(a, b: int64): int64 {.compilerproc.} = + var + resAsFloat, floatProd: float64 + result = a *% b + floatProd = toBiggestFloat(a) # conversion + floatProd = floatProd * toBiggestFloat(b) + resAsFloat = toBiggestFloat(result) + + # Fast path for normal case: small multiplicands, and no info + # is lost in either method. + if resAsFloat == floatProd: return result + + # Somebody somewhere lost info. Close enough, or way off? Note + # that a != 0 and b != 0 (else resAsFloat == floatProd == 0). + # The difference either is or isn't significant compared to the + # true value (of which floatProd is a good approximation). + + # abs(diff)/abs(prod) <= 1/32 iff + # 32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough" + if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd): + return result + raiseOverflow() proc negInt64(a: int64): int64 {.compilerProc, inline.} = if a != low(int64): return -a @@ -51,50 +148,6 @@ proc modInt64(a, b: int64): int64 {.compilerProc, inline.} = raiseDivByZero() return a mod b -# -# This code has been inspired by Python's source code. -# The native int product x*y is either exactly right or *way* off, being -# just the last n bits of the true product, where n is the number of bits -# in an int (the delivered product is the true product plus i*2**n for -# some integer i). -# -# The native float64 product x*y is subject to three -# rounding errors: on a sizeof(int)==8 box, each cast to double can lose -# info, and even on a sizeof(int)==4 box, the multiplication can lose info. -# But, unlike the native int product, it's not in *range* trouble: even -# if sizeof(int)==32 (256-bit ints), the product easily fits in the -# dynamic range of a float64. So the leading 50 (or so) bits of the float64 -# product are correct. -# -# We check these two ways against each other, and declare victory if they're -# approximately the same. Else, because the native int product is the only -# one that can lose catastrophic amounts of information, it's the native int -# product that must have overflowed. -# -proc mulInt64(a, b: int64): int64 {.compilerproc.} = - var - resAsFloat, floatProd: float64 - result = a *% b - floatProd = toBiggestFloat(a) # conversion - floatProd = floatProd * toBiggestFloat(b) - resAsFloat = toBiggestFloat(result) - - # Fast path for normal case: small multiplicands, and no info - # is lost in either method. - if resAsFloat == floatProd: return result - - # Somebody somewhere lost info. Close enough, or way off? Note - # that a != 0 and b != 0 (else resAsFloat == floatProd == 0). - # The difference either is or isn't significant compared to the - # true value (of which floatProd is a good approximation). - - # abs(diff)/abs(prod) <= 1/32 iff - # 32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough" - if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd): - return result - raiseOverflow() - - proc absInt(a: int): int {.compilerProc, inline.} = if a != low(int): if a >= 0: return a @@ -246,6 +299,21 @@ elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)): :"%edx" """ +when not declared(addInt) and defined(builtinOverflow): + proc addInt(a, b: int): int {.compilerProc, inline.} = + if addIntOverflow(a, b, result): + raiseOverflow() + +when not declared(subInt) and defined(builtinOverflow): + proc subInt(a, b: int): int {.compilerProc, inline.} = + if subIntOverflow(a, b, result): + raiseOverflow() + +when not declared(mulInt) and defined(builtinOverflow): + proc mulInt(a, b: int): int {.compilerProc, inline.} = + if mulIntOverflow(a, b, result): + raiseOverflow() + # Platform independent versions of the above (slower!) when not declared(addInt): proc addInt(a, b: int): int {.compilerProc, inline.} = |