5 files changed, 47 insertions, 62 deletions
diff --git a/compiler/vm.nim b/compiler/vm.nim
index e95a491fd..71fd2722b 100644
--- a/compiler/vm.nim
+++ b/compiler/vm.nim
@@ -1264,6 +1264,11 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame): TFullReg =
     of opcNarrowU:
       decodeB(rkInt)
       regs[ra].intVal = regs[ra].intVal and ((1'i64 shl rb)-1)
+    of opcSignExtend:
+      # like opcNarrowS, but no out of range possible
+      decodeB(rkInt)
+      let imm = 64 - rb
+      regs[ra].intVal = ashr(regs[ra].intVal shl imm, imm)
     of opcIsNil:
       decodeB(rkInt)
       let node = regs[rb].node
diff --git a/compiler/vmdef.nim b/compiler/vmdef.nim
index a43f8dbba..58158a7cc 100644
--- a/compiler/vmdef.nim
+++ b/compiler/vmdef.nim
@@ -75,6 +75,7 @@ type
     opcSubStr, opcParseFloat, opcConv, opcCast,
     opcQuit,
     opcNarrowS, opcNarrowU,
+    opcSignExtend,
 
     opcAddStrCh,
     opcAddStrStr,
diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim
index 7f3ca84ae..f513c59a7 100644
--- a/compiler/vmgen.nim
+++ b/compiler/vmgen.nim
@@ -986,7 +986,14 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest; m: TMagic) =
     c.freeTemp(tmp)
     c.freeTemp(tmp2)
 
-  of mShlI: genBinaryABCnarrowU(c, n, dest, opcShlInt)
+  of mShlI:
+    genBinaryABC(c, n, dest, opcShlInt)
+    # genNarrowU modified
+    let t = skipTypes(n.typ, abstractVar-{tyTypeDesc})
+    if t.kind in {tyUInt8..tyUInt32} or (t.kind == tyUInt and t.size < 8):
+      c.gABC(n, opcNarrowU, dest, TRegister(t.size*8))
+    elif t.kind in {tyInt8..tyInt32} or (t.kind == tyInt and t.size < 8):
+      c.gABC(n, opcSignExtend, dest, TRegister(t.size*8))
   of mAshrI: genBinaryABC(c, n, dest, opcAshrInt)
   of mBitandI: genBinaryABC(c, n, dest, opcBitandInt)
   of mBitorI: genBinaryABC(c, n, dest, opcBitorInt)
diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim
index d258a42de..0eee3cd70 100644
--- a/lib/pure/bitops.nim
+++ b/lib/pure/bitops.nim
@@ -33,6 +33,18 @@ const useICC_builtins = defined(icc) and useBuiltins
 const useVCC_builtins = defined(vcc) and useBuiltins
 const arch64 = sizeof(int) == 8
 
+template forwardImpl(impl, arg) {.dirty.} =
+  when sizeof(x) <= 4:
+    when x is SomeSignedInt:
+      impl(cast[uint32](x.int32))
+    else:
+      impl(x.uint32)
+  else:
+    when x is SomeSignedInt:
+      impl(cast[uint64](x.int64))
+    else:
+      impl(x.uint64)
+
 when defined(nimHasalignOf):
 
   import macros
@@ -243,8 +255,7 @@ proc countSetBits*(x: SomeInteger): int {.inline, nosideeffect.} =
   # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
   # like GCC and MSVC
   when nimvm:
-    when sizeof(x) <= 4: result = countSetBits_nim(x.uint32)
-    else:                result = countSetBits_nim(x.uint64)
+    result = forwardImpl(countSetBits_nim, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
@@ -274,8 +285,7 @@ proc parityBits*(x: SomeInteger): int {.inline, nosideeffect.} =
   # Can be used a base if creating ASM version.
   # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
   when nimvm:
-    when sizeof(x) <= 4: result = parity_impl(x.uint32)
-    else:                result = parity_impl(x.uint64)
+    result = forwardImpl(parity_impl, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = builtin_parity(x.uint32).int
@@ -293,8 +303,7 @@ proc firstSetBit*(x: SomeInteger): int {.inline, nosideeffect.} =
     when noUndefined:
       if x == 0:
         return 0
-    when sizeof(x) <= 4: result = firstSetBit_nim(x.uint32)
-    else:                result = firstSetBit_nim(x.uint64)
+    result = forwardImpl(firstSetBit_nim, x)
   else:
     when noUndefined and not useGCC_builtins:
       if x == 0:
@@ -328,8 +337,7 @@ proc fastLog2*(x: SomeInteger): int {.inline, nosideeffect.} =
     if x == 0:
       return -1
   when nimvm:
-    when sizeof(x) <= 4: result = fastlog2_nim(x.uint32)
-    else:                result = fastlog2_nim(x.uint64)
+    result = forwardImpl(fastlog2_nim, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int
@@ -360,8 +368,7 @@ proc countLeadingZeroBits*(x: SomeInteger): int {.inline, nosideeffect.} =
     if x == 0:
       return 0
   when nimvm:
-      when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint32)
-      else:                result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint64)
+    result = sizeof(x)*8 - 1 - forwardImpl(fastlog2_nim, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8)
diff --git a/tests/stdlib/tbitops.nim b/tests/stdlib/tbitops.nim
index b8b44703c..1cbab4870 100644
--- a/tests/stdlib/tbitops.nim
+++ b/tests/stdlib/tbitops.nim
@@ -1,9 +1,9 @@
 discard """
+  nimout: "OK"
   output: "OK"
 """
 import bitops
 
-
 proc main() =
   const U8 = 0b0011_0010'u8
   const I8 = 0b0011_0010'i8
@@ -79,25 +79,6 @@ proc main() =
   doAssert( U8.rotateLeftBits(3) == 0b10010001'u8)
   doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8)
 
-  static :
-    # test bitopts at compile time with vm
-    doAssert( U8.fastLog2 == 5)
-    doAssert( I8.fastLog2 == 5)
-    doAssert( U8.countLeadingZeroBits == 2)
-    doAssert( I8.countLeadingZeroBits == 2)
-    doAssert( U8.countTrailingZeroBits == 1)
-    doAssert( I8.countTrailingZeroBits == 1)
-    doAssert( U8.firstSetBit == 2)
-    doAssert( I8.firstSetBit == 2)
-    doAssert( U8.parityBits == 1)
-    doAssert( I8.parityBits == 1)
-    doAssert( U8.countSetBits == 3)
-    doAssert( I8.countSetBits == 3)
-    doAssert( U8.rotateLeftBits(3) == 0b10010001'u8)
-    doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8)
-
-
-
   template test_undefined_impl(ffunc: untyped; expected: int; is_static: bool) =
     doAssert( ffunc(0'u8) == expected)
     doAssert( ffunc(0'i8) == expected)
@@ -142,26 +123,6 @@ proc main() =
     doAssert( U64A.rotateLeftBits(64) == U64A)
     doAssert( U64A.rotateRightBits(64) == U64A)
 
-    static:    # check for undefined behavior with rotate by zero.
-      doAssert( U8.rotateLeftBits(0) == U8)
-      doAssert( U8.rotateRightBits(0) == U8)
-      doAssert( U16.rotateLeftBits(0) == U16)
-      doAssert( U16.rotateRightBits(0) == U16)
-      doAssert( U32.rotateLeftBits(0) == U32)
-      doAssert( U32.rotateRightBits(0) == U32)
-      doAssert( U64A.rotateLeftBits(0) == U64A)
-      doAssert( U64A.rotateRightBits(0) == U64A)
-
-      # check for undefined behavior with rotate by integer width.
-      doAssert( U8.rotateLeftBits(8) == U8)
-      doAssert( U8.rotateRightBits(8) == U8)
-      doAssert( U16.rotateLeftBits(16) == U16)
-      doAssert( U16.rotateRightBits(16) == U16)
-      doAssert( U32.rotateLeftBits(32) == U32)
-      doAssert( U32.rotateRightBits(32) == U32)
-      doAssert( U64A.rotateLeftBits(64) == U64A)
-      doAssert( U64A.rotateRightBits(64) == U64A)
-
   block:
     # mask operations
     var v: uint8
@@ -207,18 +168,22 @@ proc main() =
     var v: uint64
     v.setBit(63)
     doAssert v == 0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000'u64
-  block:
-    # Test if RangeError is thrown if indexing out of range
-    try:
-      var v: uint32
-      var i = 32
-      v.setBit(i)
-      doAssert false
-    except RangeError:
-      discard
-    except:
-      doAssert false
 
   echo "OK"
 
+block: # not ready for vm because exception is compile error
+  try:
+    var v: uint32
+    var i = 32
+    v.setBit(i)
+    doAssert false
+  except RangeError:
+    discard
+  except:
+    doAssert false
+
+
 main()
+static:
+  # test everything on vm as well
+  main()