1 files changed, 70 insertions, 132 deletions
diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim
index 377602e75..0d3351ee5 100644
--- a/lib/pure/bitops.nim
+++ b/lib/pure/bitops.nim
@@ -25,11 +25,9 @@
 ## At this time only `fastLog2`, `firstSetBit`, `countLeadingZeroBits` and `countTrailingZeroBits`
 ## may return undefined and/or platform dependent values if given invalid input.
 
-import macros
+import std/macros
 import std/private/since
-from std/private/vmutils import forwardImpl, toUnsigned
-
-
+from std/private/bitops_utils import forwardImpl, castToUnsigned
 
 func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".}
   ## Computes the `bitwise complement` of the integer `x`.
@@ -65,6 +63,12 @@ macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
 type BitsRange*[T] = range[0..sizeof(T)*8-1]
   ## A range with all bit positions for type `T`.
 
+template typeMasked[T: SomeInteger](x: T): T =
+  when defined(js):
+    T(x and ((0xffffffff_ffffffff'u shr (64 - sizeof(T) * 8))))
+  else:
+    x
+
 func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} =
   ## Returns an extracted (and shifted) slice of bits from `v`.
   runnableExamples:
@@ -74,8 +78,8 @@ func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1,
 
   let
     upmost = sizeof(T) * 8 - 1
-    uv     = when v is SomeUnsignedInt: v else: v.toUnsigned
-  (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
+    uv     = v.castToUnsigned
+  ((uv shl (upmost - slice.b)).typeMasked shr (upmost - slice.b + slice.a)).T
 
 proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} =
   ## Mutates `v` into an extracted (and shifted) slice of bits from `v`.
@@ -86,8 +90,8 @@ proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1,
 
   let
     upmost = sizeof(T) * 8 - 1
-    uv     = when v is SomeUnsignedInt: v else: v.toUnsigned
-  v = (uv shl (upmost - slice.b) shr (upmost - slice.b + slice.a)).T
+    uv     = v.castToUnsigned
+  v = ((uv shl (upmost - slice.b)).typeMasked shr (upmost - slice.b + slice.a)).T
 
 func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} =
   ## Creates a bitmask based on a slice of bits.
@@ -97,11 +101,8 @@ func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} =
 
   let
     upmost = sizeof(T) * 8 - 1
-    bitmask = when T is SomeUnsignedInt:
-                bitnot(0.T)
-              else:
-                bitnot(0.T).toUnsigned
-  (bitmask shl (upmost - slice.b + slice.a) shr (upmost - slice.b)).T
+    bitmask = bitnot(0.T).castToUnsigned
+  ((bitmask shl (upmost - slice.b + slice.a)).typeMasked shr (upmost - slice.b)).T
 
 proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} =
   ## Returns `v`, with only the `1` bits from `mask` matching those of
@@ -413,7 +414,6 @@ func fastlog2Nim(x: uint64): int {.inline.} =
 
 import system/countbits_impl
 
-const arch64 = sizeof(int) == 8
 const useBuiltinsRotate = (defined(amd64) or defined(i386)) and
                           (defined(gcc) or defined(clang) or defined(vcc) or
                            (defined(icl) and not defined(cpp))) and useBuiltins
@@ -451,33 +451,33 @@ when useGCC_builtins:
 
 elif useVCC_builtins:
   # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
-  func bitScanReverse(index: ptr culong, mask: culong): cuchar {.
+  func bitScanReverse(index: ptr culong, mask: culong): uint8 {.
       importc: "_BitScanReverse", header: "<intrin.h>".}
-  func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.
+  func bitScanReverse64(index: ptr culong, mask: uint64): uint8 {.
       importc: "_BitScanReverse64", header: "<intrin.h>".}
 
   # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1).
-  func bitScanForward(index: ptr culong, mask: culong): cuchar {.
+  func bitScanForward(index: ptr culong, mask: culong): uint8 {.
       importc: "_BitScanForward", header: "<intrin.h>".}
-  func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.
+  func bitScanForward64(index: ptr culong, mask: uint64): uint8 {.
       importc: "_BitScanForward64", header: "<intrin.h>".}
 
   template vcc_scan_impl(fnc: untyped; v: untyped): int =
-    var index: culong
+    var index {.inject.}: culong = 0
     discard fnc(index.addr, v)
     index.int
 
 elif useICC_builtins:
   # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
-  func bitScanForward(p: ptr uint32, b: uint32): cuchar {.
+  func bitScanForward(p: ptr uint32, b: uint32): uint8 {.
       importc: "_BitScanForward", header: "<immintrin.h>".}
-  func bitScanForward64(p: ptr uint32, b: uint64): cuchar {.
+  func bitScanForward64(p: ptr uint32, b: uint64): uint8 {.
       importc: "_BitScanForward64", header: "<immintrin.h>".}
 
   # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
-  func bitScanReverse(p: ptr uint32, b: uint32): cuchar {.
+  func bitScanReverse(p: ptr uint32, b: uint32): uint8 {.
       importc: "_BitScanReverse", header: "<immintrin.h>".}
-  func bitScanReverse64(p: ptr uint32, b: uint64): cuchar {.
+  func bitScanReverse64(p: ptr uint32, b: uint64): uint8 {.
       importc: "_BitScanReverse64", header: "<immintrin.h>".}
 
   template icc_scan_impl(fnc: untyped; v: untyped): int =
@@ -508,8 +508,7 @@ func parityBits*(x: SomeInteger): int {.inline.} =
 
   # Can be used a base if creating ASM version.
   # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
-  when x is SomeSignedInt:
-    let x = x.toUnsigned
+  let x = x.castToUnsigned
   when nimvm:
     result = forwardImpl(parityImpl, x)
   else:
@@ -532,8 +531,7 @@ func firstSetBit*(x: SomeInteger): int {.inline.} =
     doAssert firstSetBit(0b0000_1111'u8) == 1
 
   # GCC builtin 'builtin_ffs' already handle zero input.
-  when x is SomeSignedInt:
-    let x = x.toUnsigned
+  let x = x.castToUnsigned
   when nimvm:
     when noUndefined:
       if x == 0:
@@ -575,8 +573,7 @@ func fastLog2*(x: SomeInteger): int {.inline.} =
     doAssert fastLog2(0b0000_1000'u8) == 3
     doAssert fastLog2(0b0000_1111'u8) == 3
 
-  when x is SomeSignedInt:
-    let x = x.toUnsigned
+  let x = x.castToUnsigned
   when noUndefined:
     if x == 0:
       return -1
@@ -618,8 +615,7 @@ func countLeadingZeroBits*(x: SomeInteger): int {.inline.} =
     doAssert countLeadingZeroBits(0b0000_1000'u8) == 4
     doAssert countLeadingZeroBits(0b0000_1111'u8) == 4
 
-  when x is SomeSignedInt:
-    let x = x.toUnsigned
+  let x = x.castToUnsigned
   when noUndefined:
     if x == 0:
       return 0
@@ -647,8 +643,7 @@ func countTrailingZeroBits*(x: SomeInteger): int {.inline.} =
     doAssert countTrailingZeroBits(0b0000_1000'u8) == 3
     doAssert countTrailingZeroBits(0b0000_1111'u8) == 0
 
-  when x is SomeSignedInt:
-    let x = x.toUnsigned
+  let x = x.castToUnsigned
   when noUndefined:
     if x == 0:
       return 0
@@ -665,7 +660,7 @@ when useBuiltinsRotate:
   when defined(gcc):
     # GCC was tested until version 4.8.1 and intrinsics were present. Not tested
     # in previous versions.
-    func builtin_rotl8(value: cuchar, shift: cint): cuchar
+    func builtin_rotl8(value: uint8, shift: cint): uint8
                       {.importc: "__rolb", header: "<x86intrin.h>".}
     func builtin_rotl16(value: cushort, shift: cint): cushort
                        {.importc: "__rolw", header: "<x86intrin.h>".}
@@ -675,7 +670,7 @@ when useBuiltinsRotate:
       func builtin_rotl64(value: culonglong, shift: cint): culonglong
                          {.importc: "__rolq", header: "<x86intrin.h>".}
 
-    func builtin_rotr8(value: cuchar, shift: cint): cuchar
+    func builtin_rotr8(value: uint8, shift: cint): uint8
                       {.importc: "__rorb", header: "<x86intrin.h>".}
     func builtin_rotr16(value: cushort, shift: cint): cushort
                        {.importc: "__rorw", header: "<x86intrin.h>".}
@@ -691,7 +686,7 @@ when useBuiltinsRotate:
     # https://releases.llvm.org/8.0.0/tools/clang/docs/ReleaseNotes.html#non-comprehensive-list-of-changes-in-this-release
     # https://releases.llvm.org/8.0.0/tools/clang/docs/LanguageExtensions.html#builtin-rotateleft
     # source for correct declarations: https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def
-    func builtin_rotl8(value: cuchar, shift: cuchar): cuchar
+    func builtin_rotl8(value: uint8, shift: uint8): uint8
                       {.importc: "__builtin_rotateleft8", nodecl.}
     func builtin_rotl16(value: cushort, shift: cushort): cushort
                        {.importc: "__builtin_rotateleft16", nodecl.}
@@ -701,7 +696,7 @@ when useBuiltinsRotate:
       func builtin_rotl64(value: culonglong, shift: culonglong): culonglong
                          {.importc: "__builtin_rotateleft64", nodecl.}
 
-    func builtin_rotr8(value: cuchar, shift: cuchar): cuchar
+    func builtin_rotr8(value: uint8, shift: uint8): uint8
                       {.importc: "__builtin_rotateright8", nodecl.}
     func builtin_rotr16(value: cushort, shift: cushort): cushort
                        {.importc: "__builtin_rotateright16", nodecl.}
@@ -717,9 +712,9 @@ when useBuiltinsRotate:
     # https://docs.microsoft.com/en-us/cpp/intrinsics/rotl8-rotl16?view=msvc-160
     # https://docs.microsoft.com/en-us/cpp/intrinsics/rotr8-rotr16?view=msvc-160
     # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64?view=msvc-160
-    func builtin_rotl8(value: cuchar, shift: cuchar): cuchar
+    func builtin_rotl8(value: uint8, shift: uint8): uint8
                       {.importc: "_rotl8", header: "<intrin.h>".}
-    func builtin_rotl16(value: cushort, shift: cuchar): cushort
+    func builtin_rotl16(value: cushort, shift: uint8): cushort
                        {.importc: "_rotl16", header: "<intrin.h>".}
     func builtin_rotl32(value: cuint, shift: cint): cuint
                        {.importc: "_rotl", header: "<stdlib.h>".}
@@ -727,9 +722,9 @@ when useBuiltinsRotate:
       func builtin_rotl64(value: culonglong, shift: cint): culonglong
                          {.importc: "_rotl64", header: "<stdlib.h>".}
 
-    func builtin_rotr8(value: cuchar, shift: cuchar): cuchar
+    func builtin_rotr8(value: uint8, shift: uint8): uint8
                       {.importc: "_rotr8", header: "<intrin.h>".}
-    func builtin_rotr16(value: cushort, shift: cuchar): cushort
+    func builtin_rotr16(value: cushort, shift: uint8): cushort
                        {.importc: "_rotr16", header: "<intrin.h>".}
     func builtin_rotr32(value: cuint, shift: cint): cuint
                        {.importc: "_rotr", header: "<stdlib.h>".}
@@ -739,7 +734,7 @@ when useBuiltinsRotate:
   elif defined(icl):
     # Tested on Intel(R) C++ Intel(R) 64 Compiler Classic Version 2021.1.2 Build
     # 20201208_000000 x64 and x86. Not tested in previous versions.
-    func builtin_rotl8(value: cuchar, shift: cint): cuchar
+    func builtin_rotl8(value: uint8, shift: cint): uint8
                       {.importc: "__rolb", header: "<immintrin.h>".}
     func builtin_rotl16(value: cushort, shift: cint): cushort
                        {.importc: "__rolw", header: "<immintrin.h>".}
@@ -749,7 +744,7 @@ when useBuiltinsRotate:
       func builtin_rotl64(value: culonglong, shift: cint): culonglong
                          {.importc: "__rolq", header: "<immintrin.h>".}
 
-    func builtin_rotr8(value: cuchar, shift: cint): cuchar
+    func builtin_rotr8(value: uint8, shift: cint): uint8
                       {.importc: "__rorb", header: "<immintrin.h>".}
     func builtin_rotr16(value: cushort, shift: cint): cushort
                        {.importc: "__rorw", header: "<immintrin.h>".}
@@ -772,11 +767,13 @@ func rotr[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} =
   let rot = rot and mask
   (value shr rot) or (value shl ((-rot) and mask))
 
-func shiftTypeToImpl(size: static int, shift: int): auto {.inline.} =
+func shiftTypeTo(size: static int, shift: int): auto {.inline.} =
+  ## Returns the `shift` for the rotation according to the compiler and the
+  ## `size`.
   when (defined(vcc) and (size in [4, 8])) or defined(gcc) or defined(icl):
     cint(shift)
   elif (defined(vcc) and (size in [1, 2])) or (defined(clang) and size == 1):
-    cuchar(shift)
+    uint8(shift)
   elif defined(clang):
     when size == 2:
       cushort(shift)
@@ -785,118 +782,59 @@ func shiftTypeToImpl(size: static int, shift: int): auto {.inline.} =
     elif size == 8:
       culonglong(shift)
 
-template shiftTypeTo[T](value: T, shift: int): auto =
-  ## Returns the `shift` for the rotation according to the compiler and the size
-  ## of the` value`.
-  shiftTypeToImpl(sizeof(value), shift)
-
-func rotateLeftBits*(value: uint8, shift: range[0..8]): uint8 {.inline.} =
-  ## Left-rotate bits in a 8-bits value.
+func rotateLeftBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
+  ## Left-rotate bits in a `value`.
   runnableExamples:
     doAssert rotateLeftBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
-
-  when nimvm:
-    rotl(value, shift.int32)
-  else:
-    when useBuiltinsRotate:
-      builtin_rotl8(value.cuchar, shiftTypeTo(value, shift)).uint8
-    else:
-      rotl(value, shift.int32)
-
-func rotateLeftBits*(value: uint16, shift: range[0..16]): uint16 {.inline.} =
-  ## Left-rotate bits in a 16-bits value.
-  runnableExamples:
     doAssert rotateLeftBits(0b00111100_11000011'u16, 8) ==
       0b11000011_00111100'u16
-
-  when nimvm:
-    rotl(value, shift.int32)
-  else:
-    when useBuiltinsRotate:
-      builtin_rotl16(value.cushort, shiftTypeTo(value, shift)).uint16
-    else:
-      rotl(value, shift.int32)
-
-func rotateLeftBits*(value: uint32, shift: range[0..32]): uint32 {.inline.} =
-  ## Left-rotate bits in a 32-bits value.
-  runnableExamples:
     doAssert rotateLeftBits(0b0000111111110000_1111000000001111'u32, 16) ==
       0b1111000000001111_0000111111110000'u32
-
-  when nimvm:
-    rotl(value, shift.int32)
-  else:
-    when useBuiltinsRotate:
-      builtin_rotl32(value.cuint, shiftTypeTo(value, shift)).uint32
-    else:
-      rotl(value, shift.int32)
-
-func rotateLeftBits*(value: uint64, shift: range[0..64]): uint64 {.inline.} =
-  ## Left-rotate bits in a 64-bits value.
-  runnableExamples:
     doAssert rotateLeftBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
       0b11111111000000000000000011111111_00000000111111111111111100000000'u64
-
   when nimvm:
     rotl(value, shift.int32)
   else:
-    when useBuiltinsRotate and defined(amd64):
-      builtin_rotl64(value.culonglong, shiftTypeTo(value, shift)).uint64
+    when useBuiltinsRotate:
+      const size = sizeof(T)
+      when size == 1:
+        builtin_rotl8(value.uint8, shiftTypeTo(size, shift)).T
+      elif size == 2:
+        builtin_rotl16(value.cushort, shiftTypeTo(size, shift)).T
+      elif size == 4:
+        builtin_rotl32(value.cuint, shiftTypeTo(size, shift)).T
+      elif size == 8 and arch64:
+        builtin_rotl64(value.culonglong, shiftTypeTo(size, shift)).T
+      else:
+        rotl(value, shift.int32)
     else:
       rotl(value, shift.int32)
 
-func rotateRightBits*(value: uint8, shift: range[0..8]): uint8 {.inline.} =
-  ## Right-rotate bits in a 8-bits value.
+func rotateRightBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} =
+  ## Right-rotate bits in a `value`.
   runnableExamples:
     doAssert rotateRightBits(0b0110_1001'u8, 4) == 0b1001_0110'u8
-
-  when nimvm:
-    rotr(value, shift.int32)
-  else:
-    when useBuiltinsRotate:
-      builtin_rotr8(value.cuchar, shiftTypeTo(value, shift)).uint8
-    else:
-      rotr(value, shift.int32)
-
-func rotateRightBits*(value: uint16, shift: range[0..16]): uint16 {.inline.} =
-  ## Right-rotate bits in a 16-bits value.
-  runnableExamples:
     doAssert rotateRightBits(0b00111100_11000011'u16, 8) ==
       0b11000011_00111100'u16
-
-  when nimvm:
-    rotr(value, shift.int32)
-  else:
-    when useBuiltinsRotate:
-      builtin_rotr16(value.cushort, shiftTypeTo(value, shift)).uint16
-    else:
-      rotr(value, shift.int32)
-
-func rotateRightBits*(value: uint32, shift: range[0..32]): uint32 {.inline.} =
-  ## Right-rotate bits in a 32-bits value.
-  runnableExamples:
     doAssert rotateRightBits(0b0000111111110000_1111000000001111'u32, 16) ==
       0b1111000000001111_0000111111110000'u32
-
-  when nimvm:
-    rotr(value, shift.int32)
-  else:
-    when useBuiltinsRotate:
-      builtin_rotr32(value.cuint, shiftTypeTo(value, shift)).uint32
-    else:
-      rotr(value, shift.int32)
-
-func rotateRightBits*(value: uint64, shift: range[0..64]): uint64 {.inline.} =
-  ## Right-rotate bits in a 64-bits value.
-  runnableExamples:
     doAssert rotateRightBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) ==
       0b11111111000000000000000011111111_00000000111111111111111100000000'u64
-
   when nimvm:
     rotr(value, shift.int32)
   else:
-    when useBuiltinsRotate and defined(amd64):
-      builtin_rotr64(value.culonglong, shiftTypeTo(value, shift)).uint64
+    when useBuiltinsRotate:
+      const size = sizeof(T)
+      when size == 1:
+        builtin_rotr8(value.uint8, shiftTypeTo(size, shift)).T
+      elif size == 2:
+        builtin_rotr16(value.cushort, shiftTypeTo(size, shift)).T
+      elif size == 4:
+        builtin_rotr32(value.cuint, shiftTypeTo(size, shift)).T
+      elif size == 8 and arch64:
+        builtin_rotr64(value.culonglong, shiftTypeTo(size, shift)).T
+      else:
+        rotr(value, shift.int32)
     else:
       rotr(value, shift.int32)