From fef21e90031557bc00af80e3d2da98b28780ee2b Mon Sep 17 00:00:00 2001
From: def <dennis@felsin9.de>
Date: Fri, 8 May 2015 03:58:44 +0200
Subject: Use -d:builtinOverflow for builtin overflow checks of recent Clang
 and GCC >= 5.0.

Maybe it's better that it's optional after all. GCC's code with builtin
overflow checks seems to be a bit slower actually, while Clang's is 3
times faster.
---
 lib/system/arithm.nim | 251 ++++++++++++++++++++++----------------------------
 1 file changed, 110 insertions(+), 141 deletions(-)

(limited to 'lib/system')

diff --git a/lib/system/arithm.nim b/lib/system/arithm.nim
index e41372bc6..5455f0981 100644
--- a/lib/system/arithm.nim
+++ b/lib/system/arithm.nim
@@ -8,105 +8,123 @@
 #
 
 
-# Only clang has __has_builtin (so far)
-#
-# TODO: This is emitted at the wrong position so we don't actually have an
-#       emit. Could we add this to nimbase.h instead?
-{.emit: """#ifndef __has_builtin
-  #define __has_builtin(x) 0
-#endif""".}
+# simple integer arithmetic with overflow checking
+
+proc raiseOverflow {.compilerproc, noinline, noreturn.} =
+  # a single proc to reduce code size to a minimum
+  sysFatal(OverflowError, "over- or underflow")
 
+proc raiseDivByZero {.compilerproc, noinline, noreturn.} =
+  sysFatal(DivByZeroError, "division by zero")
+
+when defined(builtinOverflow):
 # Builtin compiler functions for improved performance
+  when sizeof(clong) == 8:
+    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
 
-proc checkFunction(name: string): string =
-  "((__has_builtin(__builtin_" & name & "_overflow)) || __GNUC__ >= 5)"
+    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
 
-# TODO: This is totally ugly. But we can't reliably detect this from Nim,
-# especially with cross-compiling where the user may be using an older compiler
-# version. Switching this on/off manually with a define seems weird as well.
-when sizeof(clong) == 8:
-  const hasAddInt64Overflow = checkFunction("saddl")
-  proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-    importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
+    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
 
-  const hasSubInt64Overflow = checkFunction("ssubl")
-  proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-    importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
+  elif sizeof(clonglong) == 8:
+    proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
 
-  const hasMulInt64Overflow = checkFunction("smull")
-  proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-    importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
+    proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
 
-elif sizeof(clonglong) == 8:
-  const hasAddInt64Overflow = checkFunction("saddll")
-  proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-    importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
+    proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
+      importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
 
-  const hasSubInt64Overflow = checkFunction("ssubll")
-  proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-    importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
+  when sizeof(int) == 8:
+    proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
+      addInt64Overflow(a, b, c)
 
-  const hasMulInt64Overflow = checkFunction("smulll")
-  proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
-    importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
+    proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
+      subInt64Overflow(a, b, c)
 
-when sizeof(int) == 8:
-  const hasAddIntOverflow = hasAddInt64Overflow
-  proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
-    addInt64Overflow(a, b, c)
+    proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
+      mulInt64Overflow(a, b, c)
 
-  const hasSubIntOverflow = hasSubInt64Overflow
-  proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
-    subInt64Overflow(a, b, c)
+  elif sizeof(int) == 4 and sizeof(cint) == 4:
+    proc addIntOverflow(a, b: int, c: var int): bool {.
+      importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
 
-  const hasMulIntOverflow = hasMulInt64Overflow
-  proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
-    mulInt64Overflow(a, b, c)
+    proc subIntOverflow(a, b: int, c: var int): bool {.
+      importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
 
-elif sizeof(int) == 4 and sizeof(cint) == 4:
-  const hasAddIntOverflow = checkFunction("sadd")
-  proc addIntOverflow(a, b: int, c: var int): bool {.
-    importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
+    proc mulIntOverflow(a, b: int, c: var int): bool {.
+      importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
 
-  const hasSubIntOverflow = checkFunction("ssub")
-  proc subIntOverflow(a, b: int, c: var int): bool {.
-    importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
+  proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    if addInt64Overflow(a, b, result):
+      raiseOverflow()
 
-  const hasMulIntOverflow = checkFunction("smul")
-  proc mulIntOverflow(a, b: int, c: var int): bool {.
-    importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
+  proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    if subInt64Overflow(a, b, result):
+      raiseOverflow()
 
+  proc mulInt64(a, b: int64): int64 {.compilerproc, inline.} =
+    if mulInt64Overflow(a, b, result):
+      raiseOverflow()
+else:
+  proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    result = a +% b
+    if (result xor a) >= int64(0) or (result xor b) >= int64(0):
+      return result
+    raiseOverflow()
 
-# simple integer arithmetic with overflow checking
+  proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
+    result = a -% b
+    if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
+      return result
+    raiseOverflow()
 
-proc raiseOverflow {.compilerproc, noinline, noreturn.} =
-  # a single proc to reduce code size to a minimum
-  sysFatal(OverflowError, "over- or underflow")
+  #
+  # This code has been inspired by Python's source code.
+  # The native int product x*y is either exactly right or *way* off, being
+  # just the last n bits of the true product, where n is the number of bits
+  # in an int (the delivered product is the true product plus i*2**n for
+  # some integer i).
+  #
+  # The native float64 product x*y is subject to three
+  # rounding errors: on a sizeof(int)==8 box, each cast to double can lose
+  # info, and even on a sizeof(int)==4 box, the multiplication can lose info.
+  # But, unlike the native int product, it's not in *range* trouble:  even
+  # if sizeof(int)==32 (256-bit ints), the product easily fits in the
+  # dynamic range of a float64. So the leading 50 (or so) bits of the float64
+  # product are correct.
+  #
+  # We check these two ways against each other, and declare victory if they're
+  # approximately the same. Else, because the native int product is the only
+  # one that can lose catastrophic amounts of information, it's the native int
+  # product that must have overflowed.
+  #
+  proc mulInt64(a, b: int64): int64 {.compilerproc.} =
+    var
+      resAsFloat, floatProd: float64
+    result = a *% b
+    floatProd = toBiggestFloat(a) # conversion
+    floatProd = floatProd * toBiggestFloat(b)
+    resAsFloat = toBiggestFloat(result)
 
-proc raiseDivByZero {.compilerproc, noinline, noreturn.} =
-  sysFatal(DivByZeroError, "division by zero")
+    # Fast path for normal case: small multiplicands, and no info
+    # is lost in either method.
+    if resAsFloat == floatProd: return result
 
-proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  {.emit: "#if `hasAddInt64Overflow`".}
-  if addInt64Overflow(a, b, result):
-    raiseOverflow()
-  {.emit: "#else".}
-  result = a +% b
-  if (result xor a) >= int64(0) or (result xor b) >= int64(0):
-    return result
-  raiseOverflow()
-  {.emit: "#endif".}
+    # Somebody somewhere lost info. Close enough, or way off? Note
+    # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
+    # The difference either is or isn't significant compared to the
+    # true value (of which floatProd is a good approximation).
 
-proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
-  {.emit: "#if `hasSubInt64Overflow`".}
-  if subInt64Overflow(a, b, result):
+    # abs(diff)/abs(prod) <= 1/32 iff
+    #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
+    if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
+      return result
     raiseOverflow()
-  {.emit: "#else".}
-  result = a -% b
-  if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
-    return result
-  raiseOverflow()
-  {.emit: "#endif".}
 
 proc negInt64(a: int64): int64 {.compilerProc, inline.} =
   if a != low(int64): return -a
@@ -130,55 +148,6 @@ proc modInt64(a, b: int64): int64 {.compilerProc, inline.} =
     raiseDivByZero()
   return a mod b
 
-#
-# This code has been inspired by Python's source code.
-# The native int product x*y is either exactly right or *way* off, being
-# just the last n bits of the true product, where n is the number of bits
-# in an int (the delivered product is the true product plus i*2**n for
-# some integer i).
-#
-# The native float64 product x*y is subject to three
-# rounding errors: on a sizeof(int)==8 box, each cast to double can lose
-# info, and even on a sizeof(int)==4 box, the multiplication can lose info.
-# But, unlike the native int product, it's not in *range* trouble:  even
-# if sizeof(int)==32 (256-bit ints), the product easily fits in the
-# dynamic range of a float64. So the leading 50 (or so) bits of the float64
-# product are correct.
-#
-# We check these two ways against each other, and declare victory if they're
-# approximately the same. Else, because the native int product is the only
-# one that can lose catastrophic amounts of information, it's the native int
-# product that must have overflowed.
-#
-proc mulInt64(a, b: int64): int64 {.compilerproc.} =
-  {.emit: "#if `hasMulInt64Overflow`".}
-  if mulInt64Overflow(a, b, result):
-    raiseOverflow()
-  {.emit: "#else".}
-  var
-    resAsFloat, floatProd: float64
-  result = a *% b
-  floatProd = toBiggestFloat(a) # conversion
-  floatProd = floatProd * toBiggestFloat(b)
-  resAsFloat = toBiggestFloat(result)
-
-  # Fast path for normal case: small multiplicands, and no info
-  # is lost in either method.
-  if resAsFloat == floatProd: return result
-
-  # Somebody somewhere lost info. Close enough, or way off? Note
-  # that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
-  # The difference either is or isn't significant compared to the
-  # true value (of which floatProd is a good approximation).
-
-  # abs(diff)/abs(prod) <= 1/32 iff
-  #   32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
-  if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
-    return result
-  raiseOverflow()
-  {.emit: "#endif".}
-
-
 proc absInt(a: int): int {.compilerProc, inline.} =
   if a != low(int):
     if a >= 0: return a
@@ -330,30 +299,35 @@ elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)):
             :"%edx"
     """
 
-# Platform independent versions of the above (slower!)
-when not declared(addInt):
+when not declared(addInt) and defined(builtinOverflow):
   proc addInt(a, b: int): int {.compilerProc, inline.} =
-    {.emit: "#if `hasAddIntOverflow`".}
     if addIntOverflow(a, b, result):
       raiseOverflow()
-    {.emit: "#else".}
+
+when not declared(subInt) and defined(builtinOverflow):
+  proc subInt(a, b: int): int {.compilerProc, inline.} =
+    if subIntOverflow(a, b, result):
+      raiseOverflow()
+
+when not declared(mulInt) and defined(builtinOverflow):
+  proc mulInt(a, b: int): int {.compilerProc, inline.} =
+    if mulIntOverflow(a, b, result):
+      raiseOverflow()
+
+# Platform independent versions of the above (slower!)
+when not declared(addInt):
+  proc addInt(a, b: int): int {.compilerProc, inline.} =
     result = a +% b
     if (result xor a) >= 0 or (result xor b) >= 0:
       return result
     raiseOverflow()
-    {.emit: "#endif".}
 
 when not declared(subInt):
   proc subInt(a, b: int): int {.compilerProc, inline.} =
-    {.emit: "#if `hasSubIntOverflow`".}
-    if subIntOverflow(a, b, result):
-      raiseOverflow()
-    {.emit: "#else".}
     result = a -% b
     if (result xor a) >= 0 or (result xor not b) >= 0:
       return result
     raiseOverflow()
-    {.emit: "#endif".}
 
 when not declared(negInt):
   proc negInt(a: int): int {.compilerProc, inline.} =
@@ -396,10 +370,6 @@ when not declared(mulInt):
   # native int product that must have overflowed.
   #
   proc mulInt(a, b: int): int {.compilerProc.} =
-    {.emit: "#if `hasMulIntOverflow`".}
-    if mulIntOverflow(a, b, result):
-      raiseOverflow()
-    {.emit: "#else".}
     var
       resAsFloat, floatProd: float
 
@@ -421,7 +391,6 @@ when not declared(mulInt):
     if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
       return result
     raiseOverflow()
-    {.emit: "#endif".}
 
 # We avoid setting the FPU control word here for compatibility with libraries
 # written in other languages.
-- 
cgit 1.4.1-2-gfad0