summary refs log tree commit diff stats
path: root/lib/system/countbits_impl.nim
blob: 34969cb328a39e40210386e8990eb48ebf258114 (plain) (blame)
1
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl
#
#
#            Nim's Runtime Library
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## Contains the used algorithms for counting bits.

from std/private/bitops_utils import forwardImpl, castToUnsigned

const useBuiltins* = not defined(noIntrinsicsBitOpts)
const noUndefined* = defined(noUndefinedBitOpts)
const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or
                         defined(clang)) and useBuiltins
const useICC_builtins* = defined(icc) and useBuiltins
const useVCC_builtins* = defined(vcc) and useBuiltins
const arch64* = sizeof(int) == 8

template countBitsImpl(n: uint32): int =
  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  var v = uint32(n)
  v = v - ((v shr 1'u32) and 0x55555555'u32)
  v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32)
  (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int

template countBitsImpl(n: uint64): int =
  # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  var v = uint64(n)
  v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
  v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
  v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
  ((v * 0x0101010101010101'u64) shr 56'u64).int


when useGCC_builtins:
  # Returns the number of set 1-bits in value.
  proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
  proc builtin_popcountll(x: culonglong): cint {.
      importc: "__builtin_popcountll", cdecl.}

elif useVCC_builtins:
  # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
  func builtin_popcnt16(a2: uint16): uint16 {.
      importc: "__popcnt16", header: "<intrin.h>".}
  func builtin_popcnt32(a2: uint32): uint32 {.
      importc: "__popcnt", header: "<intrin.h>".}
  func builtin_popcnt64(a2: uint64): uint64 {.
      importc: "__popcnt64", header: "<intrin.h>".}

elif useICC_builtins:
  # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
  # see also: https://software.intel.com/en-us/node/523362
  # Count the number of bits set to 1 in an integer a, and return that count in dst.
  func builtin_popcnt32(a: cint): cint {.
      importc: "_popcnt", header: "<immintrin.h>".}
  func builtin_popcnt64(a: uint64): cint {.
      importc: "_popcnt64", header: "<immintrin.h>".}


func countSetBitsImpl*(x: SomeInteger): int {.inline.} =
  ## Counts the set bits in an integer (also called `Hamming weight`:idx:).
  # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
  # like GCC and MSVC
  let x = x.castToUnsigned
  when nimvm:
    result = forwardImpl(countBitsImpl, x)
  else:
    when useGCC_builtins:
      when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
      else: result = builtin_popcountll(x.culonglong).int
    elif useVCC_builtins:
      when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
      elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
      elif arch64: result = builtin_popcnt64(x.uint64).int
      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
                     builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
    elif useICC_builtins:
      when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
      elif arch64: result = builtin_popcnt64(x.uint64).int
      else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
                     builtin_popcnt32((x.uint64 shr 32'u64).cint).int
    else:
      when sizeof(x) <= 4: result = countBitsImpl(x.uint32)
      else: result = countBitsImpl(x.uint64)

proc countBits32*(n: uint32): int {.compilerproc, inline.} =
  result = countSetBitsImpl(n)

proc countBits64*(n: uint64): int {.compilerproc, inline.} =
  result = countSetBitsImpl(n)