From c8dda867f2a2607d4049759d09593a66b1697d02 Mon Sep 17 00:00:00 2001 From: flywind Date: Mon, 22 Mar 2021 07:36:48 +0800 Subject: close #11330 sets uses optimized countSetBits (#17334) * Update lib/pure/bitops.nim * Update lib/system/sets.nim * Apply suggestions from code review Co-authored-by: Andreas Rumpf --- lib/pure/bitops.nim | 88 +++++------------------------------------------------ 1 file changed, 8 insertions(+), 80 deletions(-) (limited to 'lib/pure/bitops.nim') diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim index 57e3c7989..377602e75 100644 --- a/lib/pure/bitops.nim +++ b/lib/pure/bitops.nim @@ -27,6 +27,9 @@ import macros import std/private/since +from std/private/vmutils import forwardImpl, toUnsigned + + func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".} ## Computes the `bitwise complement` of the integer `x`. @@ -58,34 +61,6 @@ macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T = for extra in z: result = newCall(fn, result, extra) -const useBuiltins = not defined(noIntrinsicsBitOpts) -const noUndefined = defined(noUndefinedBitOpts) -const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or - defined(clang)) and useBuiltins -const useICC_builtins = defined(icc) and useBuiltins -const useVCC_builtins = defined(vcc) and useBuiltins -const arch64 = sizeof(int) == 8 -const useBuiltinsRotate = (defined(amd64) or defined(i386)) and - (defined(gcc) or defined(clang) or defined(vcc) or - (defined(icl) and not defined(cpp))) and useBuiltins - -template toUnsigned(x: int8): uint8 = cast[uint8](x) -template toUnsigned(x: int16): uint16 = cast[uint16](x) -template toUnsigned(x: int32): uint32 = cast[uint32](x) -template toUnsigned(x: int64): uint64 = cast[uint64](x) -template toUnsigned(x: int): uint = cast[uint](x) - -template forwardImpl(impl, arg) {.dirty.} = - when sizeof(x) <= 4: - when x is SomeSignedInt: - impl(cast[uint32](x.int32)) - else: - impl(x.uint32) - else: - when x is SomeSignedInt: - impl(cast[uint64](x.int64)) - else: - impl(x.uint64) type BitsRange*[T] = range[0..sizeof(T)*8-1] ## A range with all bit positions for type `T`. @@ -436,13 +411,12 @@ func fastlog2Nim(x: uint64): int {.inline.} = v = v or v shr 32 result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int -# sets.nim cannot import bitops, but bitops can use include -# system/sets to eliminate code duplication. sets.nim defines -# countBits32 and countBits64. import system/countbits_impl -template countSetBitsNim(n: uint32): int = countBits32(n) -template countSetBitsNim(n: uint64): int = countBits64(n) +const arch64 = sizeof(int) == 8 +const useBuiltinsRotate = (defined(amd64) or defined(i386)) and + (defined(gcc) or defined(clang) or defined(vcc) or + (defined(icl) and not defined(cpp))) and useBuiltins template parityImpl[T](value: T): int = # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel @@ -459,11 +433,6 @@ template parityImpl[T](value: T): int = when useGCC_builtins: - # Returns the number of set 1-bits in value. - proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.} - proc builtin_popcountll(x: culonglong): cint {. - importc: "__builtin_popcountll", cdecl.} - # Returns the bit parity in value proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.} proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.} @@ -481,14 +450,6 @@ when useGCC_builtins: proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.} elif useVCC_builtins: - # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer. - func builtin_popcnt16(a2: uint16): uint16 {. - importc: "__popcnt16", header: "".} - func builtin_popcnt32(a2: uint32): uint32 {. - importc: "__popcnt", header: "".} - func builtin_popcnt64(a2: uint64): uint64 {. - importc: "__popcnt64", header: "".} - # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1). func bitScanReverse(index: ptr culong, mask: culong): cuchar {. importc: "_BitScanReverse", header: "".} @@ -507,15 +468,6 @@ elif useVCC_builtins: index.int elif useICC_builtins: - - # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm - # see also: https://software.intel.com/en-us/node/523362 - # Count the number of bits set to 1 in an integer a, and return that count in dst. - func builtin_popcnt32(a: cint): cint {. - importc: "_popcnt", header: "".} - func builtin_popcnt64(a: uint64): cint {. - importc: "_popcnt64", header: "".} - # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. func bitScanForward(p: ptr uint32, b: uint32): cuchar {. importc: "_BitScanForward", header: "".} @@ -533,37 +485,13 @@ elif useICC_builtins: discard fnc(index.addr, v) index.int - func countSetBits*(x: SomeInteger): int {.inline.} = ## Counts the set bits in an integer (also called `Hamming weight`:idx:). runnableExamples: doAssert countSetBits(0b0000_0011'u8) == 2 doAssert countSetBits(0b1010_1010'u8) == 4 - # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT. - # like GCC and MSVC - when x is SomeSignedInt: - let x = x.toUnsigned - when nimvm: - result = forwardImpl(countSetBitsNim, x) - else: - when useGCC_builtins: - when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int - else: result = builtin_popcountll(x.culonglong).int - elif useVCC_builtins: - when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int - elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int - elif arch64: result = builtin_popcnt64(x.uint64).int - else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int + - builtin_popcnt32((x.uint64 shr 32'u64).uint32).int - elif useICC_builtins: - when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int - elif arch64: result = builtin_popcnt64(x.uint64).int - else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int + - builtin_popcnt32((x.uint64 shr 32'u64).cint).int - else: - when sizeof(x) <= 4: result = countSetBitsNim(x.uint32) - else: result = countSetBitsNim(x.uint64) + result = countSetBitsImpl(x) func popcount*(x: SomeInteger): int {.inline.} = ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight). -- cgit 1.4.1-2-gfad0