diff options
-rw-r--r-- | lib/pure/bitops.nim | 383 | ||||
-rw-r--r-- | tests/stdlib/tbitops.nim | 168 | ||||
-rw-r--r-- | tests/stdlib/tbitops.nim.cfg | 1 | ||||
-rw-r--r-- | tests/stdlib/tbitops2.nim | 168 | ||||
-rw-r--r-- | tests/stdlib/tbitops2.nim.cfg | 2 |
5 files changed, 722 insertions, 0 deletions
diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim new file mode 100644 index 000000000..d1207603d --- /dev/null +++ b/lib/pure/bitops.nim @@ -0,0 +1,383 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Nim Authors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a series of low level methods for bit manipulation. +## By default, this module use compiler intrinsics to improve performance +## on supported compilers: ``GCC``, ``LLVM_GCC``, ``CLANG``, ``VCC``, ``ICC``. +## +## The module will fallback to pure nim procs incase the backend is not supported. +## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics. +## +## This module is also compatible with other backends: ``Javascript``, ``Nimscript`` +## as well as the ``compiletime VM``. +## +## As a result of using optimized function/intrinsics some functions can return +## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts` +## to force predictable behaviour for all input, causing a small performance hit. +## +## At this time only `fastLog2`, `firstSetBit, `countLeadingZeroBits`, `countTrailingZeroBits` +## may return undefined and/or platform dependant value if given invalid input. + + +const useBuiltins = not defined(noIntrinsicsBitOpts) +const noUndefined = defined(noUndefinedBitOpts) +const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins +const useICC_builtins = defined(icc) and useBuiltins +const useVCC_builtins = defined(vcc) and useBuiltins +const arch64 = sizeof(int) == 8 + +# #### Pure Nim version #### + +proc firstSetBit_nim(x: uint32): int {.inline, nosideeffect.} = + ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup + const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, + 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9] + var v = x.uint32 + var k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v)) + result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int + +proc firstSetBit_nim(x: uint64): int {.inline, nosideeffect.} = + ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup + var v = uint64(x) + var k = uint32(v and 0xFFFFFFFF'u32) + if k == 0: + k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32 + result = 32 + result += firstSetBit_nim(k) + +proc fastlog2_nim(x: uint32): int {.inline, nosideeffect.} = + ## Quickly find the log base 2 of a 32-bit or less integer. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn + # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers + const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, + 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31] + var v = x.uint32 + v = v or v shr 1 # first round down to one less than a power of 2 + v = v or v shr 2 + v = v or v shr 4 + v = v or v shr 8 + v = v or v shr 16 + result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int + +proc fastlog2_nim(x: uint64): int {.inline, nosideeffect.} = + ## Quickly find the log base 2 of a 64-bit integer. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn + # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers + const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, + 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62, + 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, + 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63] + var v = x.uint64 + v = v or v shr 1 # first round down to one less than a power of 2 + v = v or v shr 2 + v = v or v shr 4 + v = v or v shr 8 + v = v or v shr 16 + v = v or v shr 32 + result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int + + +proc countSetBits_nim(n: uint32): int {.inline, noSideEffect.} = + ## Counts the set bits in integer. (also called Hamming weight.) + # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + + var v = uint32(n) + v = v - ((v shr 1) and 0x55555555) + v = (v and 0x33333333) + ((v shr 2) and 0x33333333) + result = (((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24).int + +proc countSetBits_nim(n: uint64): int {.inline, noSideEffect.} = + ## Counts the set bits in integer. (also called Hamming weight.) + # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + var v = uint64(n) + v = v - ((v shr 1'u64) and 0x5555555555555555'u64) + v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64) + v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64) + result = ((v * 0x0101010101010101'u64) shr 56'u64).int + + +template parity_impl[T](value: T): int = + # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel + var v = value + when sizeof(T) == 8: + v = v xor (v shr 32) + when sizeof(T) >= 4: + v = v xor (v shr 16) + when sizeof(T) >= 2: + v = v xor (v shr 8) + v = v xor (v shr 4) + v = v and 0xf + ((0x6996'u shr v) and 1).int + + +when useGCC_builtins: + # Returns the number of set 1-bits in value. + proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.} + proc builtin_popcountll(x: culonglong): cint {.importc: "__builtin_popcountll", cdecl.} + + # Returns the bit parity in value + proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.} + proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.} + + # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero. + proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.} + proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.} + + # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. + proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.} + proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.} + + # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. + proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.} + proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.} + +elif useVCC_builtins: + # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer. + proc builtin_popcnt16(a2: uint16): uint16 {.importc: "__popcnt16" header: "<intrin.h>", nosideeffect.} + proc builtin_popcnt32(a2: uint32): uint32 {.importc: "__popcnt" header: "<intrin.h>", nosideeffect.} + proc builtin_popcnt64(a2: uint64): uint64 {.importc: "__popcnt64" header: "<intrin.h>", nosideeffect.} + + # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1). + proc bitScanReverse(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanReverse", header: "<intrin.h>", nosideeffect.} + proc bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>", nosideeffect.} + + # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1). + proc bitScanForward(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanForward", header: "<intrin.h>", nosideeffect.} + proc bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>", nosideeffect.} + + template vcc_scan_impl(fnc: untyped; v: untyped): int = + var index: culong + discard fnc(index.addr, v) + index.int + +elif useICC_builtins: + + # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm + # see also: https://software.intel.com/en-us/node/523362 + # Count the number of bits set to 1 in an integer a, and return that count in dst. + proc builtin_popcnt32(a: cint): cint {.importc: "_popcnt" header: "<immintrin.h>", nosideeffect.} + proc builtin_popcnt64(a: uint64): cint {.importc: "_popcnt64" header: "<immintrin.h>", nosideeffect.} + + # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. + proc bitScanForward(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanForward", header: "<immintrin.h>", nosideeffect.} + proc bitScanForward64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanForward64", header: "<immintrin.h>", nosideeffect.} + + # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. + proc bitScanReverse(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanReverse", header: "<immintrin.h>", nosideeffect.} + proc bitScanReverse64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanReverse64", header: "<immintrin.h>", nosideeffect.} + + template icc_scan_impl(fnc: untyped; v: untyped): int = + var index: uint32 + discard fnc(index.addr, v) + index.int + + +proc countSetBits*(x: SomeInteger): int {.inline, nosideeffect.} = + ## Counts the set bits in integer. (also called Hamming weight.) + # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT. + # like GCC and MSVC + when nimvm: + when sizeof(x) <= 4: result = countSetBits_nim(x.uint32) + else: result = countSetBits_nim(x.uint64) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int + else: result = builtin_popcountll(x.culonglong).int + elif useVCC_builtins: + when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int + elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int + elif arch64: result = builtin_popcnt64(x.uint64).int + else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32 ).int + + builtin_popcnt32((x.uint64 shr 32'u64).uint32 ).int + elif useICC_builtins: + when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int + elif arch64: result = builtin_popcnt64(x.uint64).int + else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint ).int + + builtin_popcnt32((x.uint64 shr 32'u64).cint ).int + else: + when sizeof(x) <= 4: result = countSetBits_nim(x.uint32) + else: result = countSetBits_nim(x.uint64) + +proc popcount*(x: SomeInteger): int {.inline, nosideeffect.} = + ## Alias for for countSetBits (Hamming weight.) + result = countSetBits(x) + +proc parityBits*(x: SomeInteger): int {.inline, nosideeffect.} = + ## Calculate the bit parity in integer. If number of 1-bit + ## is odd parity is 1, otherwise 0. + # Can be used a base if creating ASM version. + # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd + when nimvm: + when sizeof(x) <= 4: result = parity_impl(x.uint32) + else: result = parity_impl(x.uint64) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_parity(x.uint32).int + else: result = builtin_parityll(x.uint64).int + else: + when sizeof(x) <= 4: result = parity_impl(x.uint32) + else: result = parity_impl(x.uint64) + +proc firstSetBit*(x: SomeInteger): int {.inline, nosideeffect.} = + ## Returns the 1-based index of the least significant set bit of x. + ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is 0, + ## otherwise result is undefined. + # GCC builtin 'builtin_ffs' already handle zero input. + when nimvm: + when noUndefined: + if x == 0: + return 0 + when sizeof(x) <= 4: result = firstSetBit_nim(x.uint32) + else: result = firstSetBit_nim(x.uint64) + else: + when noUndefined and not useGCC_builtins: + if x == 0: + return 0 + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int + else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int + elif useVCC_builtins: + when sizeof(x) <= 4: + result = 1 + vcc_scan_impl(bitScanForward, x.culong) + elif arch64: + result = 1 + vcc_scan_impl(bitScanForward64, x.uint64) + else: + result = firstSetBit_nim(x.uint64) + elif useICC_builtins: + when sizeof(x) <= 4: + result = 1 + icc_scan_impl(bitScanForward, x.uint32) + elif arch64: + result = 1 + icc_scan_impl(bitScanForward64, x.uint64) + else: + result = firstSetBit_nim(x.uint64) + else: + when sizeof(x) <= 4: result = firstSetBit_nim(x.uint32) + else: result = firstSetBit_nim(x.uint64) + +proc fastLog2*(x: SomeInteger): int {.inline, nosideeffect.} = + ## Quickly find the log base 2 of an integer. + ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is -1, + ## otherwise result is undefined. + when noUndefined: + if x == 0: + return -1 + when nimvm: + when sizeof(x) <= 4: result = fastlog2_nim(x.uint32) + else: result = fastlog2_nim(x.uint64) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int + else: result = 63 - builtin_clzll(x.uint64).int + elif useVCC_builtins: + when sizeof(x) <= 4: + result = vcc_scan_impl(bitScanReverse, x.culong) + elif arch64: + result = vcc_scan_impl(bitScanReverse64, x.uint64) + else: + result = fastlog2_nim(x.uint64) + elif useICC_builtins: + when sizeof(x) <= 4: + result = icc_scan_impl(bitScanReverse, x.uint32) + elif arch64: + result = icc_scan_impl(bitScanReverse64, x.uint64) + else: + result = fastlog2_nim(x.uint64) + else: + when sizeof(x) <= 4: result = fastlog2_nim(x.uint32) + else: result = fastlog2_nim(x.uint64) + +proc countLeadingZeroBits*(x: SomeInteger): int {.inline, nosideeffect.} = + ## Returns the number of leading zero bits in integer. + ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is 0, + ## otherwise result is undefined. + when noUndefined: + if x == 0: + return 0 + when nimvm: + when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint32) + else: result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint64) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8) + else: result = builtin_clzll(x.uint64).int + else: + when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint32) + else: result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint64) + +proc countTrailingZeroBits*(x: SomeInteger): int {.inline, nosideeffect.} = + ## Returns the number of trailing zeros in integer. + ## If `x` is zero, when ``noUndefinedBitOpts`` is set, result is 0, + ## otherwise result is undefined. + when noUndefined: + if x == 0: + return 0 + when nimvm: + result = firstSetBit(x) - 1 + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int + else: result = builtin_ctzll(x.uint64).int + else: + result = firstSetBit(x) - 1 + + +proc rotateLeftBits*(value: uint8; + amount: range[0..8]): uint8 {.inline, noSideEffect.} = + ## Left-rotate bits in a 8-bits value. + # using this form instead of the one below should handle any value + # out of range as well as negative values. + # result = (value shl amount) or (value shr (8 - amount)) + # taken from: https://en.wikipedia.org/wiki/Circular_shift#Implementing_circular_shifts + let amount = amount and 7 + result = (value shl amount) or (value shr ( (-amount) and 7)) + +proc rotateLeftBits*(value: uint16; + amount: range[0..16]): uint16 {.inline, noSideEffect.} = + ## Left-rotate bits in a 16-bits value. + let amount = amount and 15 + result = (value shl amount) or (value shr ( (-amount) and 15)) + +proc rotateLeftBits*(value: uint32; + amount: range[0..32]): uint32 {.inline, noSideEffect.} = + ## Left-rotate bits in a 32-bits value. + let amount = amount and 31 + result = (value shl amount) or (value shr ( (-amount) and 31)) + +proc rotateLeftBits*(value: uint64; + amount: range[0..64]): uint64 {.inline, noSideEffect.} = + ## Left-rotate bits in a 64-bits value. + let amount = amount and 63 + result = (value shl amount) or (value shr ( (-amount) and 63)) + + +proc rotateRightBits*(value: uint8; + amount: range[0..8]): uint8 {.inline, noSideEffect.} = + ## Right-rotate bits in a 8-bits value. + let amount = amount and 7 + result = (value shr amount) or (value shl ( (-amount) and 7)) + +proc rotateRightBits*(value: uint16; + amount: range[0..16]): uint16 {.inline, noSideEffect.} = + ## Right-rotate bits in a 16-bits value. + let amount = amount and 15 + result = (value shr amount) or (value shl ( (-amount) and 15)) + +proc rotateRightBits*(value: uint32; + amount: range[0..32]): uint32 {.inline, noSideEffect.} = + ## Right-rotate bits in a 32-bits value. + let amount = amount and 31 + result = (value shr amount) or (value shl ( (-amount) and 31)) + +proc rotateRightBits*(value: uint64; + amount: range[0..64]): uint64 {.inline, noSideEffect.} = + ## Right-rotate bits in a 64-bits value. + let amount = amount and 63 + result = (value shr amount) or (value shl ( (-amount) and 63)) diff --git a/tests/stdlib/tbitops.nim b/tests/stdlib/tbitops.nim new file mode 100644 index 000000000..8301256c4 --- /dev/null +++ b/tests/stdlib/tbitops.nim @@ -0,0 +1,168 @@ +discard """ + file: "tbitops.nim" + output: "OK" +""" +import bitops + + +proc main() = + const U8 = 0b0011_0010'u8 + const I8 = 0b0011_0010'i8 + const U16 = 0b00100111_00101000'u16 + const I16 = 0b00100111_00101000'i16 + const U32 = 0b11010101_10011100_11011010_01010000'u32 + const I32 = 0b11010101_10011100_11011010_01010000'i32 + const U64A = 0b01000100_00111111_01111100_10001010_10011001_01001000_01111010_00010001'u64 + const I64A = 0b01000100_00111111_01111100_10001010_10011001_01001000_01111010_00010001'i64 + const U64B = 0b00110010_11011101_10001111_00101000_00000000_00000000_00000000_00000000'u64 + const I64B = 0b00110010_11011101_10001111_00101000_00000000_00000000_00000000_00000000'i64 + + doAssert( U64A.fastLog2 == 62) + doAssert( I64A.fastLog2 == 62) + doAssert( U64A.countLeadingZeroBits == 1) + doAssert( I64A.countLeadingZeroBits == 1) + doAssert( U64A.countTrailingZeroBits == 0) + doAssert( I64A.countTrailingZeroBits == 0) + doAssert( U64A.firstSetBit == 1) + doAssert( I64A.firstSetBit == 1) + doAssert( U64A.parityBits == 1) + doAssert( I64A.parityBits == 1) + doAssert( U64A.countSetBits == 29) + doAssert( I64A.countSetBits == 29) + doAssert( U64A.rotateLeftBits(37) == 0b00101001_00001111_01000010_00101000_10000111_11101111_10010001_01010011'u64) + doAssert( U64A.rotateRightBits(37) == 0b01010100_11001010_01000011_11010000_10001010_00100001_11111011_11100100'u64) + + doAssert( U64B.firstSetBit == 36) + doAssert( I64B.firstSetBit == 36) + + doAssert( U32.fastLog2 == 31) + doAssert( I32.fastLog2 == 31) + doAssert( U32.countLeadingZeroBits == 0) + doAssert( I32.countLeadingZeroBits == 0) + doAssert( U32.countTrailingZeroBits == 4) + doAssert( I32.countTrailingZeroBits == 4) + doAssert( U32.firstSetBit == 5) + doAssert( I32.firstSetBit == 5) + doAssert( U32.parityBits == 0) + doAssert( I32.parityBits == 0) + doAssert( U32.countSetBits == 16) + doAssert( I32.countSetBits == 16) + doAssert( U32.rotateLeftBits(21) == 0b01001010_00011010_10110011_10011011'u32) + doAssert( U32.rotateRightBits(21) == 0b11100110_11010010_10000110_10101100'u32) + + doAssert( U16.fastLog2 == 13) + doAssert( I16.fastLog2 == 13) + doAssert( U16.countLeadingZeroBits == 2) + doAssert( I16.countLeadingZeroBits == 2) + doAssert( U16.countTrailingZeroBits == 3) + doAssert( I16.countTrailingZeroBits == 3) + doAssert( U16.firstSetBit == 4) + doAssert( I16.firstSetBit == 4) + doAssert( U16.parityBits == 0) + doAssert( I16.parityBits == 0) + doAssert( U16.countSetBits == 6) + doAssert( I16.countSetBits == 6) + doAssert( U16.rotateLeftBits(12) == 0b10000010_01110010'u16) + doAssert( U16.rotateRightBits(12) == 0b01110010_10000010'u16) + + doAssert( U8.fastLog2 == 5) + doAssert( I8.fastLog2 == 5) + doAssert( U8.countLeadingZeroBits == 2) + doAssert( I8.countLeadingZeroBits == 2) + doAssert( U8.countTrailingZeroBits == 1) + doAssert( I8.countTrailingZeroBits == 1) + doAssert( U8.firstSetBit == 2) + doAssert( I8.firstSetBit == 2) + doAssert( U8.parityBits == 1) + doAssert( I8.parityBits == 1) + doAssert( U8.countSetBits == 3) + doAssert( I8.countSetBits == 3) + doAssert( U8.rotateLeftBits(3) == 0b10010001'u8) + doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8) + + static : + # test bitopts at compile time with vm + doAssert( U8.fastLog2 == 5) + doAssert( I8.fastLog2 == 5) + doAssert( U8.countLeadingZeroBits == 2) + doAssert( I8.countLeadingZeroBits == 2) + doAssert( U8.countTrailingZeroBits == 1) + doAssert( I8.countTrailingZeroBits == 1) + doAssert( U8.firstSetBit == 2) + doAssert( I8.firstSetBit == 2) + doAssert( U8.parityBits == 1) + doAssert( I8.parityBits == 1) + doAssert( U8.countSetBits == 3) + doAssert( I8.countSetBits == 3) + doAssert( U8.rotateLeftBits(3) == 0b10010001'u8) + doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8) + + + + template test_undefined_impl(ffunc: untyped; expected: int; is_static: bool) = + doAssert( ffunc(0'u8) == expected) + doAssert( ffunc(0'i8) == expected) + doAssert( ffunc(0'u16) == expected) + doAssert( ffunc(0'i16) == expected) + doAssert( ffunc(0'u32) == expected) + doAssert( ffunc(0'i32) == expected) + doAssert( ffunc(0'u64) == expected) + doAssert( ffunc(0'i64) == expected) + + template test_undefined(ffunc: untyped; expected: int) = + test_undefined_impl(ffunc, expected, false) + static: + test_undefined_impl(ffunc, expected, true) + + when defined(noUndefinedBitOpts): + # check for undefined behavior with zero. + test_undefined(countSetBits, 0) + test_undefined(parityBits, 0) + test_undefined(firstSetBit, 0) + test_undefined(countLeadingZeroBits, 0) + test_undefined(countTrailingZeroBits, 0) + test_undefined(fastLog2, -1) + + # check for undefined behavior with rotate by zero. + doAssert( U8.rotateLeftBits(0) == U8) + doAssert( U8.rotateRightBits(0) == U8) + doAssert( U16.rotateLeftBits(0) == U16) + doAssert( U16.rotateRightBits(0) == U16) + doAssert( U32.rotateLeftBits(0) == U32) + doAssert( U32.rotateRightBits(0) == U32) + doAssert( U64A.rotateLeftBits(0) == U64A) + doAssert( U64A.rotateRightBits(0) == U64A) + + # check for undefined behavior with rotate by integer width. + doAssert( U8.rotateLeftBits(8) == U8) + doAssert( U8.rotateRightBits(8) == U8) + doAssert( U16.rotateLeftBits(16) == U16) + doAssert( U16.rotateRightBits(16) == U16) + doAssert( U32.rotateLeftBits(32) == U32) + doAssert( U32.rotateRightBits(32) == U32) + doAssert( U64A.rotateLeftBits(64) == U64A) + doAssert( U64A.rotateRightBits(64) == U64A) + + static: # check for undefined behavior with rotate by zero. + doAssert( U8.rotateLeftBits(0) == U8) + doAssert( U8.rotateRightBits(0) == U8) + doAssert( U16.rotateLeftBits(0) == U16) + doAssert( U16.rotateRightBits(0) == U16) + doAssert( U32.rotateLeftBits(0) == U32) + doAssert( U32.rotateRightBits(0) == U32) + doAssert( U64A.rotateLeftBits(0) == U64A) + doAssert( U64A.rotateRightBits(0) == U64A) + + # check for undefined behavior with rotate by integer width. + doAssert( U8.rotateLeftBits(8) == U8) + doAssert( U8.rotateRightBits(8) == U8) + doAssert( U16.rotateLeftBits(16) == U16) + doAssert( U16.rotateRightBits(16) == U16) + doAssert( U32.rotateLeftBits(32) == U32) + doAssert( U32.rotateRightBits(32) == U32) + doAssert( U64A.rotateLeftBits(64) == U64A) + doAssert( U64A.rotateRightBits(64) == U64A) + + echo "OK" + +main() diff --git a/tests/stdlib/tbitops.nim.cfg b/tests/stdlib/tbitops.nim.cfg new file mode 100644 index 000000000..f0d7668a7 --- /dev/null +++ b/tests/stdlib/tbitops.nim.cfg @@ -0,0 +1 @@ +-d:noUndefinedBitOps diff --git a/tests/stdlib/tbitops2.nim b/tests/stdlib/tbitops2.nim new file mode 100644 index 000000000..31952316c --- /dev/null +++ b/tests/stdlib/tbitops2.nim @@ -0,0 +1,168 @@ +discard """ + file: "tbitops.nim" + output: "OK" +""" +import bitops + + +proc main() = + const U8 = 0b0011_0010'u8 + const I8 = 0b0011_0010'i8 + const U16 = 0b00100111_00101000'u16 + const I16 = 0b00100111_00101000'i16 + const U32 = 0b11010101_10011100_11011010_01010000'u32 + const I32 = 0b11010101_10011100_11011010_01010000'i32 + const U64A = 0b01000100_00111111_01111100_10001010_10011001_01001000_01111010_00010001'u64 + const I64A = 0b01000100_00111111_01111100_10001010_10011001_01001000_01111010_00010001'i64 + const U64B = 0b00110010_11011101_10001111_00101000_00000000_00000000_00000000_00000000'u64 + const I64B = 0b00110010_11011101_10001111_00101000_00000000_00000000_00000000_00000000'i64 + + doAssert( U64A.fastLog2 == 62) + doAssert( I64A.fastLog2 == 62) + doAssert( U64A.countLeadingZeroBits == 1) + doAssert( I64A.countLeadingZeroBits == 1) + doAssert( U64A.countTrailingZeroBits == 0) + doAssert( I64A.countTrailingZeroBits == 0) + doAssert( U64A.firstSetBit == 1) + doAssert( I64A.firstSetBit == 1) + doAssert( U64A.parityBits == 1) + doAssert( I64A.parityBits == 1) + doAssert( U64A.countSetBits == 29) + doAssert( I64A.countSetBits == 29) + doAssert( U64A.rotateLeftBits(37) == 0b00101001_00001111_01000010_00101000_10000111_11101111_10010001_01010011'u64) + doAssert( U64A.rotateRightBits(37) == 0b01010100_11001010_01000011_11010000_10001010_00100001_11111011_11100100'u64) + + doAssert( U64B.firstSetBit == 36) + doAssert( I64B.firstSetBit == 36) + + doAssert( U32.fastLog2 == 31) + doAssert( I32.fastLog2 == 31) + doAssert( U32.countLeadingZeroBits == 0) + doAssert( I32.countLeadingZeroBits == 0) + doAssert( U32.countTrailingZeroBits == 4) + doAssert( I32.countTrailingZeroBits == 4) + doAssert( U32.firstSetBit == 5) + doAssert( I32.firstSetBit == 5) + doAssert( U32.parityBits == 0) + doAssert( I32.parityBits == 0) + doAssert( U32.countSetBits == 16) + doAssert( I32.countSetBits == 16) + doAssert( U32.rotateLeftBits(21) == 0b01001010_00011010_10110011_10011011'u32) + doAssert( U32.rotateRightBits(21) == 0b11100110_11010010_10000110_10101100'u32) + + doAssert( U16.fastLog2 == 13) + doAssert( I16.fastLog2 == 13) + doAssert( U16.countLeadingZeroBits == 2) + doAssert( I16.countLeadingZeroBits == 2) + doAssert( U16.countTrailingZeroBits == 3) + doAssert( I16.countTrailingZeroBits == 3) + doAssert( U16.firstSetBit == 4) + doAssert( I16.firstSetBit == 4) + doAssert( U16.parityBits == 0) + doAssert( I16.parityBits == 0) + doAssert( U16.countSetBits == 6) + doAssert( I16.countSetBits == 6) + doAssert( U16.rotateLeftBits(12) == 0b10000010_01110010'u16) + doAssert( U16.rotateRightBits(12) == 0b01110010_10000010'u16) + + doAssert( U8.fastLog2 == 5) + doAssert( I8.fastLog2 == 5) + doAssert( U8.countLeadingZeroBits == 2) + doAssert( I8.countLeadingZeroBits == 2) + doAssert( U8.countTrailingZeroBits == 1) + doAssert( I8.countTrailingZeroBits == 1) + doAssert( U8.firstSetBit == 2) + doAssert( I8.firstSetBit == 2) + doAssert( U8.parityBits == 1) + doAssert( I8.parityBits == 1) + doAssert( U8.countSetBits == 3) + doAssert( I8.countSetBits == 3) + doAssert( U8.rotateLeftBits(3) == 0b10010001'u8) + doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8) + + static : + # test bitopts at compile time with vm + doAssert( U8.fastLog2 == 5) + doAssert( I8.fastLog2 == 5) + doAssert( U8.countLeadingZeroBits == 2) + doAssert( I8.countLeadingZeroBits == 2) + doAssert( U8.countTrailingZeroBits == 1) + doAssert( I8.countTrailingZeroBits == 1) + doAssert( U8.firstSetBit == 2) + doAssert( I8.firstSetBit == 2) + doAssert( U8.parityBits == 1) + doAssert( I8.parityBits == 1) + doAssert( U8.countSetBits == 3) + doAssert( I8.countSetBits == 3) + doAssert( U8.rotateLeftBits(3) == 0b10010001'u8) + doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8) + + + + template test_undefined_impl(ffunc: untyped; expected: int; is_static: bool) = + doAssert( ffunc(0'u8) == expected) + doAssert( ffunc(0'i8) == expected) + doAssert( ffunc(0'u16) == expected) + doAssert( ffunc(0'i16) == expected) + doAssert( ffunc(0'u32) == expected) + doAssert( ffunc(0'i32) == expected) + doAssert( ffunc(0'u64) == expected) + doAssert( ffunc(0'i64) == expected) + + template test_undefined(ffunc: untyped; expected: int) = + test_undefined_impl(ffunc, expected, false) + static: + test_undefined_impl(ffunc, expected, true) + + when defined(noUndefinedBitOpts): + # check for undefined behavior with zero. + test_undefined(countSetBits, 0) + test_undefined(parityBits, 0) + test_undefined(firstSetBit, 0) + test_undefined(countLeadingZeroBits, 0) + test_undefined(countTrailingZeroBits, 0) + test_undefined(fastLog2, -1) + + # check for undefined behavior with rotate by zero. + doAssert( U8.rotateLeftBits(0) == U8) + doAssert( U8.rotateRightBits(0) == U8) + doAssert( U16.rotateLeftBits(0) == U16) + doAssert( U16.rotateRightBits(0) == U16) + doAssert( U32.rotateLeftBits(0) == U32) + doAssert( U32.rotateRightBits(0) == U32) + doAssert( U64A.rotateLeftBits(0) == U64A) + doAssert( U64A.rotateRightBits(0) == U64A) + + # check for undefined behavior with rotate by integer width. + doAssert( U8.rotateLeftBits(8) == U8) + doAssert( U8.rotateRightBits(8) == U8) + doAssert( U16.rotateLeftBits(16) == U16) + doAssert( U16.rotateRightBits(16) == U16) + doAssert( U32.rotateLeftBits(32) == U32) + doAssert( U32.rotateRightBits(32) == U32) + doAssert( U64A.rotateLeftBits(64) == U64A) + doAssert( U64A.rotateRightBits(64) == U64A) + + static: # check for undefined behavior with rotate by zero. + doAssert( U8.rotateLeftBits(0) == U8) + doAssert( U8.rotateRightBits(0) == U8) + doAssert( U16.rotateLeftBits(0) == U16) + doAssert( U16.rotateRightBits(0) == U16) + doAssert( U32.rotateLeftBits(0) == U32) + doAssert( U32.rotateRightBits(0) == U32) + doAssert( U64A.rotateLeftBits(0) == U64A) + doAssert( U64A.rotateRightBits(0) == U64A) + + # check for undefined behavior with rotate by integer width. + doAssert( U8.rotateLeftBits(8) == U8) + doAssert( U8.rotateRightBits(8) == U8) + doAssert( U16.rotateLeftBits(16) == U16) + doAssert( U16.rotateRightBits(16) == U16) + doAssert( U32.rotateLeftBits(32) == U32) + doAssert( U32.rotateRightBits(32) == U32) + doAssert( U64A.rotateLeftBits(64) == U64A) + doAssert( U64A.rotateRightBits(64) == U64A) + + echo "OK" + +main() diff --git a/tests/stdlib/tbitops2.nim.cfg b/tests/stdlib/tbitops2.nim.cfg new file mode 100644 index 000000000..e1cb77e82 --- /dev/null +++ b/tests/stdlib/tbitops2.nim.cfg @@ -0,0 +1,2 @@ +-d:noIntrinsicsBitOpts +-d:noUndefinedBitOps |