diff options
Diffstat (limited to 'lib/pure/bitops.nim')
-rw-r--r-- | lib/pure/bitops.nim | 79 |
1 files changed, 47 insertions, 32 deletions
diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim index 033c54a6a..710c8590a 100644 --- a/lib/pure/bitops.nim +++ b/lib/pure/bitops.nim @@ -39,7 +39,8 @@ proc bitxor*[T: SomeInteger](x, y: T): T {.magic: "BitxorI", noSideEffect.} const useBuiltins = not defined(noIntrinsicsBitOpts) const noUndefined = defined(noUndefinedBitOpts) -const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins +const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or + defined(clang)) and useBuiltins const useICC_builtins = defined(icc) and useBuiltins const useVCC_builtins = defined(vcc) and useBuiltins const arch64 = sizeof(int) == 8 @@ -196,7 +197,8 @@ template parityImpl[T](value: T): int = when useGCC_builtins: # Returns the number of set 1-bits in value. proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.} - proc builtin_popcountll(x: culonglong): cint {.importc: "__builtin_popcountll", cdecl.} + proc builtin_popcountll(x: culonglong): cint {. + importc: "__builtin_popcountll", cdecl.} # Returns the bit parity in value proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.} @@ -216,17 +218,24 @@ when useGCC_builtins: elif useVCC_builtins: # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer. - proc builtin_popcnt16(a2: uint16): uint16 {.importc: "__popcnt16" header: "<intrin.h>", noSideEffect.} - proc builtin_popcnt32(a2: uint32): uint32 {.importc: "__popcnt" header: "<intrin.h>", noSideEffect.} - proc builtin_popcnt64(a2: uint64): uint64 {.importc: "__popcnt64" header: "<intrin.h>", noSideEffect.} + proc builtin_popcnt16(a2: uint16): uint16 {. + importc: "__popcnt16"header: "<intrin.h>", noSideEffect.} + proc builtin_popcnt32(a2: uint32): uint32 {. + importc: "__popcnt"header: "<intrin.h>", noSideEffect.} + proc builtin_popcnt64(a2: uint64): uint64 {. + importc: "__popcnt64"header: "<intrin.h>", noSideEffect.} # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1). - proc bitScanReverse(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanReverse", header: "<intrin.h>", noSideEffect.} - proc bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>", noSideEffect.} + proc bitScanReverse(index: ptr culong, mask: culong): cuchar {. + importc: "_BitScanReverse", header: "<intrin.h>", noSideEffect.} + proc bitScanReverse64(index: ptr culong, mask: uint64): cuchar {. + importc: "_BitScanReverse64", header: "<intrin.h>", noSideEffect.} # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1). - proc bitScanForward(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanForward", header: "<intrin.h>", noSideEffect.} - proc bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>", noSideEffect.} + proc bitScanForward(index: ptr culong, mask: culong): cuchar {. + importc: "_BitScanForward", header: "<intrin.h>", noSideEffect.} + proc bitScanForward64(index: ptr culong, mask: uint64): cuchar {. + importc: "_BitScanForward64", header: "<intrin.h>", noSideEffect.} template vcc_scan_impl(fnc: untyped; v: untyped): int = var index: culong @@ -238,16 +247,22 @@ elif useICC_builtins: # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm # see also: https://software.intel.com/en-us/node/523362 # Count the number of bits set to 1 in an integer a, and return that count in dst. - proc builtin_popcnt32(a: cint): cint {.importc: "_popcnt" header: "<immintrin.h>", noSideEffect.} - proc builtin_popcnt64(a: uint64): cint {.importc: "_popcnt64" header: "<immintrin.h>", noSideEffect.} + proc builtin_popcnt32(a: cint): cint {. + importc: "_popcnt"header: "<immintrin.h>", noSideEffect.} + proc builtin_popcnt64(a: uint64): cint {. + importc: "_popcnt64"header: "<immintrin.h>", noSideEffect.} # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. - proc bitScanForward(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanForward", header: "<immintrin.h>", noSideEffect.} - proc bitScanForward64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanForward64", header: "<immintrin.h>", noSideEffect.} + proc bitScanForward(p: ptr uint32, b: uint32): cuchar {. + importc: "_BitScanForward", header: "<immintrin.h>", noSideEffect.} + proc bitScanForward64(p: ptr uint32, b: uint64): cuchar {. + importc: "_BitScanForward64", header: "<immintrin.h>", noSideEffect.} # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. - proc bitScanReverse(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanReverse", header: "<immintrin.h>", noSideEffect.} - proc bitScanReverse64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanReverse64", header: "<immintrin.h>", noSideEffect.} + proc bitScanReverse(p: ptr uint32, b: uint32): cuchar {. + importc: "_BitScanReverse", header: "<immintrin.h>", noSideEffect.} + proc bitScanReverse64(p: ptr uint32, b: uint64): cuchar {. + importc: "_BitScanReverse64", header: "<immintrin.h>", noSideEffect.} template icc_scan_impl(fnc: untyped; v: untyped): int = var index: uint32 @@ -266,21 +281,21 @@ proc countSetBits*(x: SomeInteger): int {.inline, noSideEffect.} = else: when useGCC_builtins: when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int - else: result = builtin_popcountll(x.culonglong).int + else: result = builtin_popcountll(x.culonglong).int elif useVCC_builtins: when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int - elif arch64: result = builtin_popcnt64(x.uint64).int - else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32 ).int + - builtin_popcnt32((x.uint64 shr 32'u64).uint32 ).int + elif arch64: result = builtin_popcnt64(x.uint64).int + else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int + + builtin_popcnt32((x.uint64 shr 32'u64).uint32).int elif useICC_builtins: when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int - elif arch64: result = builtin_popcnt64(x.uint64).int - else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint ).int + - builtin_popcnt32((x.uint64 shr 32'u64).cint ).int + elif arch64: result = builtin_popcnt64(x.uint64).int + else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int + + builtin_popcnt32((x.uint64 shr 32'u64).cint).int else: when sizeof(x) <= 4: result = countSetBitsNim(x.uint32) - else: result = countSetBitsNim(x.uint64) + else: result = countSetBitsNim(x.uint64) proc popcount*(x: SomeInteger): int {.inline, noSideEffect.} = ## Alias for for countSetBits (Hamming weight.) @@ -298,10 +313,10 @@ proc parityBits*(x: SomeInteger): int {.inline, noSideEffect.} = else: when useGCC_builtins: when sizeof(x) <= 4: result = builtin_parity(x.uint32).int - else: result = builtin_parityll(x.uint64).int + else: result = builtin_parityll(x.uint64).int else: when sizeof(x) <= 4: result = parityImpl(x.uint32) - else: result = parityImpl(x.uint64) + else: result = parityImpl(x.uint64) proc firstSetBit*(x: SomeInteger): int {.inline, noSideEffect.} = ## Returns the 1-based index of the least significant set bit of x. @@ -321,7 +336,7 @@ proc firstSetBit*(x: SomeInteger): int {.inline, noSideEffect.} = return 0 when useGCC_builtins: when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int - else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int + else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int elif useVCC_builtins: when sizeof(x) <= 4: result = 1 + vcc_scan_impl(bitScanForward, x.culong) @@ -338,7 +353,7 @@ proc firstSetBit*(x: SomeInteger): int {.inline, noSideEffect.} = result = firstSetBitNim(x.uint64) else: when sizeof(x) <= 4: result = firstSetBitNim(x.uint32) - else: result = firstSetBitNim(x.uint64) + else: result = firstSetBitNim(x.uint64) proc fastLog2*(x: SomeInteger): int {.inline, noSideEffect.} = ## Quickly find the log base 2 of an integer. @@ -354,7 +369,7 @@ proc fastLog2*(x: SomeInteger): int {.inline, noSideEffect.} = else: when useGCC_builtins: when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int - else: result = 63 - builtin_clzll(x.uint64).int + else: result = 63 - builtin_clzll(x.uint64).int elif useVCC_builtins: when sizeof(x) <= 4: result = vcc_scan_impl(bitScanReverse, x.culong) @@ -371,7 +386,7 @@ proc fastLog2*(x: SomeInteger): int {.inline, noSideEffect.} = result = fastlog2Nim(x.uint64) else: when sizeof(x) <= 4: result = fastlog2Nim(x.uint32) - else: result = fastlog2Nim(x.uint64) + else: result = fastlog2Nim(x.uint64) proc countLeadingZeroBits*(x: SomeInteger): int {.inline, noSideEffect.} = ## Returns the number of leading zero bits in integer. @@ -387,10 +402,10 @@ proc countLeadingZeroBits*(x: SomeInteger): int {.inline, noSideEffect.} = else: when useGCC_builtins: when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8) - else: result = builtin_clzll(x.uint64).int + else: result = builtin_clzll(x.uint64).int else: when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32) - else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64) + else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64) proc countTrailingZeroBits*(x: SomeInteger): int {.inline, noSideEffect.} = ## Returns the number of trailing zeros in integer. @@ -406,7 +421,7 @@ proc countTrailingZeroBits*(x: SomeInteger): int {.inline, noSideEffect.} = else: when useGCC_builtins: when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int - else: result = builtin_ctzll(x.uint64).int + else: result = builtin_ctzll(x.uint64).int else: result = firstSetBit(x) - 1 |