4 files changed, 155 insertions, 125 deletions
diff --git a/lib/pure/concurrency/atomics.nim b/lib/pure/concurrency/atomics.nim
index bdf1e8cc2..818f1b37a 100644
--- a/lib/pure/concurrency/atomics.nim
+++ b/lib/pure/concurrency/atomics.nim
@@ -10,6 +10,9 @@
 ## Types and operations for atomic operations and lockless algorithms.
 ##
 ## Unstable API.
+## 
+## By default, C++ uses C11 atomic primitives. To use C++ `std::atomic`,
+## `-d:nimUseCppAtomics` can be defined.
 
 runnableExamples:
   # Atomic
@@ -50,8 +53,7 @@ runnableExamples:
   flag.clear(moRelaxed)
   assert not flag.testAndSet
 
-
-when defined(cpp) or defined(nimdoc):
+when (defined(cpp) and defined(nimUseCppAtomics)) or defined(nimdoc):
   # For the C++ backend, types and operations map directly to C++11 atomics.
 
   {.push, header: "<atomic>".}
@@ -211,8 +213,8 @@ else:
 
     # MSVC intrinsics
     proc interlockedExchange(location: pointer; desired: int8): int8 {.importc: "_InterlockedExchange8".}
-    proc interlockedExchange(location: pointer; desired: int16): int16 {.importc: "_InterlockedExchange".}
-    proc interlockedExchange(location: pointer; desired: int32): int32 {.importc: "_InterlockedExchange16".}
+    proc interlockedExchange(location: pointer; desired: int16): int16 {.importc: "_InterlockedExchange16".}
+    proc interlockedExchange(location: pointer; desired: int32): int32 {.importc: "_InterlockedExchange".}
     proc interlockedExchange(location: pointer; desired: int64): int64 {.importc: "_InterlockedExchange64".}
 
     proc interlockedCompareExchange(location: pointer; desired, expected: int8): int8 {.importc: "_InterlockedCompareExchange8".}
@@ -274,10 +276,17 @@ else:
       cast[T](interlockedXor(addr(location.value), cast[nonAtomicType(T)](value)))
 
   else:
-    {.push, header: "<stdatomic.h>".}
+    when defined(cpp):
+      {.push, header: "<atomic>".}
+      template maybeWrapStd(x: string): string =
+        "std::" & x
+    else:
+      {.push, header: "<stdatomic.h>".}
+      template maybeWrapStd(x: string): string =
+        x
 
     type
-      MemoryOrder* {.importc: "memory_order".} = enum
+      MemoryOrder* {.importc: "memory_order".maybeWrapStd.} = enum
         moRelaxed
         moConsume
         moAcquire
@@ -285,53 +294,59 @@ else:
         moAcquireRelease
         moSequentiallyConsistent
 
-    type
-      # Atomic* {.importcpp: "_Atomic('0)".} [T] = object
+    when defined(cpp):
+      type
+        # Atomic*[T] {.importcpp: "_Atomic('0)".} = object
 
-      AtomicInt8 {.importc: "_Atomic NI8", size: 1.} = object
-      AtomicInt16 {.importc: "_Atomic NI16", size: 2.} = object
-      AtomicInt32 {.importc: "_Atomic NI32", size: 4.} = object
-      AtomicInt64 {.importc: "_Atomic NI64", size: 8.} = object
+        AtomicInt8 {.importc: "std::atomic<NI8>".} = int8
+        AtomicInt16 {.importc: "std::atomic<NI16>".} = int16
+        AtomicInt32 {.importc: "std::atomic<NI32>".} = int32
+        AtomicInt64 {.importc: "std::atomic<NI64>".} = int64
+    else:
+      type
+        # Atomic*[T] {.importcpp: "_Atomic('0)".} = object
 
-    template atomicType*(T: typedesc[Trivial]): untyped =
-      # Maps the size of a trivial type to it's internal atomic type
-      when sizeof(T) == 1: AtomicInt8
-      elif sizeof(T) == 2: AtomicInt16
-      elif sizeof(T) == 4: AtomicInt32
-      elif sizeof(T) == 8: AtomicInt64
+        AtomicInt8 {.importc: "_Atomic NI8".} = int8
+        AtomicInt16 {.importc: "_Atomic NI16".} = int16
+        AtomicInt32 {.importc: "_Atomic NI32".} = int32
+        AtomicInt64 {.importc: "_Atomic NI64".} = int64
 
     type
-      AtomicFlag* {.importc: "atomic_flag", size: 1.} = object
+      AtomicFlag* {.importc: "atomic_flag".maybeWrapStd, size: 1.} = object
 
       Atomic*[T] = object
         when T is Trivial:
-          value: T.atomicType
+          # Maps the size of a trivial type to it's internal atomic type
+          when sizeof(T) == 1: value: AtomicInt8
+          elif sizeof(T) == 2: value: AtomicInt16
+          elif sizeof(T) == 4: value: AtomicInt32
+          elif sizeof(T) == 8: value: AtomicInt64
         else:
           nonAtomicValue: T
           guard: AtomicFlag
 
     #proc init*[T](location: var Atomic[T]; value: T): T {.importcpp: "atomic_init(@)".}
-    proc atomic_load_explicit[T, A](location: ptr A; order: MemoryOrder): T {.importc.}
-    proc atomic_store_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.importc.}
-    proc atomic_exchange_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc.}
-    proc atomic_compare_exchange_strong_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc.}
-    proc atomic_compare_exchange_weak_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc.}
+    proc atomic_load_explicit[T, A](location: ptr A; order: MemoryOrder): T {.importc: "atomic_load_explicit".maybeWrapStd.}
+    proc atomic_store_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.importc: "atomic_store_explicit".maybeWrapStd.}
+    proc atomic_exchange_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_exchange_explicit".maybeWrapStd.}
+    proc atomic_compare_exchange_strong_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc: "atomic_compare_exchange_strong_explicit".maybeWrapStd.}
+    proc atomic_compare_exchange_weak_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc: "atomic_compare_exchange_weak_explicit".maybeWrapStd.}
 
     # Numerical operations
-    proc atomic_fetch_add_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc.}
-    proc atomic_fetch_sub_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc.}
-    proc atomic_fetch_and_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc.}
-    proc atomic_fetch_or_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc.}
-    proc atomic_fetch_xor_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc.}
+    proc atomic_fetch_add_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_add_explicit".maybeWrapStd.}
+    proc atomic_fetch_sub_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_sub_explicit".maybeWrapStd.}
+    proc atomic_fetch_and_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_and_explicit".maybeWrapStd.}
+    proc atomic_fetch_or_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_or_explicit".maybeWrapStd.}
+    proc atomic_fetch_xor_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_xor_explicit".maybeWrapStd.}
 
     # Flag operations
     # var ATOMIC_FLAG_INIT {.importc, nodecl.}: AtomicFlag
     # proc init*(location: var AtomicFlag) {.inline.} = location = ATOMIC_FLAG_INIT
-    proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool {.importc: "atomic_flag_test_and_set_explicit".}
-    proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) {.importc: "atomic_flag_clear_explicit".}
+    proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool {.importc: "atomic_flag_test_and_set_explicit".maybeWrapStd.}
+    proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) {.importc: "atomic_flag_clear_explicit".maybeWrapStd.}
 
-    proc fence*(order: MemoryOrder) {.importc: "atomic_thread_fence".}
-    proc signalFence*(order: MemoryOrder) {.importc: "atomic_signal_fence".}
+    proc fence*(order: MemoryOrder) {.importc: "atomic_thread_fence".maybeWrapStd.}
+    proc signalFence*(order: MemoryOrder) {.importc: "atomic_signal_fence".maybeWrapStd.}
 
     {.pop.}
 
@@ -364,11 +379,11 @@ else:
       cast[T](atomic_fetch_xor_explicit(addr(location.value), cast[nonAtomicType(T)](value), order))
 
   template withLock[T: not Trivial](location: var Atomic[T]; order: MemoryOrder; body: untyped): untyped =
-    while location.guard.testAndSet(moAcquire): discard
+    while testAndSet(location.guard, moAcquire): discard
     try:
       body
     finally:
-      location.guard.clear(moRelease)
+      clear(location.guard, moRelease)
 
   proc load*[T: not Trivial](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} =
     withLock(location, order):
diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim
index ee43b8e11..9bc3fd579 100644
--- a/lib/pure/concurrency/cpuinfo.nim
+++ b/lib/pure/concurrency/cpuinfo.nim
@@ -15,87 +15,96 @@ runnableExamples:
 
 include "system/inclrtl"
 
-when defined(posix) and not (defined(macosx) or defined(bsd)):
-  import posix
+when defined(js):
+  import std/jsffi
+  proc countProcessorsImpl(): int =
+    when defined(nodejs):
+      let jsOs = require("os")
+      let jsObj = jsOs.cpus().length
+    else:
+      # `navigator.hardwareConcurrency`
+      # works on browser as well as deno.
+      let navigator{.importcpp.}: JsObject
+      let jsObj = navigator.hardwareConcurrency
+    result = jsObj.to int
+else:
+  when defined(posix) and not (defined(macosx) or defined(bsd)):
+    import std/posix
 
-when defined(freebsd) or defined(macosx):
-  {.emit: "#include <sys/types.h>".}
+  when defined(windows):
+    import std/private/win_getsysteminfo
+
+  when defined(freebsd) or defined(macosx):
+    {.emit: "#include <sys/types.h>".}
+
+  when defined(openbsd) or defined(netbsd):
+    {.emit: "#include <sys/param.h>".}
+
+  when defined(macosx) or defined(bsd):
+    # we HAVE to emit param.h before sysctl.h so we cannot use .header here
+    # either. The amount of archaic bullshit in Poonix based OSes is just insane.
+    {.emit: "#include <sys/sysctl.h>".}
+    {.push nodecl.}
+    when defined(macosx):
+      proc sysctlbyname(name: cstring,
+        oldp: pointer, oldlenp: var csize_t,
+        newp: pointer, newlen: csize_t): cint {.importc.}
+    let
+      CTL_HW{.importc.}: cint
+      HW_NCPU{.importc.}: cint
+    proc sysctl[I: static[int]](name: var array[I, cint], namelen: cuint,
+      oldp: pointer, oldlenp: var csize_t,
+      newp: pointer, newlen: csize_t): cint {.importc.}
+    {.pop.}
 
-when defined(openbsd) or defined(netbsd):
-  {.emit: "#include <sys/param.h>".}
+  when defined(genode):
+    import genode/env
 
-when defined(macosx) or defined(bsd):
-  # we HAVE to emit param.h before sysctl.h so we cannot use .header here
-  # either. The amount of archaic bullshit in Poonix based OSes is just insane.
-  {.emit: "#include <sys/sysctl.h>".}
-  const
-    CTL_HW = 6
-    HW_AVAILCPU = 25
-    HW_NCPU = 3
-  proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
-              a: var csize_t, b: pointer, c: csize_t): cint {.
-              importc: "sysctl", nodecl.}
+    proc affinitySpaceTotal(env: GenodeEnvPtr): cuint {.
+      importcpp: "@->cpu().affinity_space().total()".}
 
-when defined(genode):
-  include genode/env
+  when defined(haiku):
+    type
+      SystemInfo {.importc: "system_info", header: "<OS.h>".} = object
+        cpuCount {.importc: "cpu_count".}: uint32
+
+    proc getSystemInfo(info: ptr SystemInfo): int32 {.importc: "get_system_info",
+                                                      header: "<OS.h>".}
 
-  proc affinitySpaceTotal(env: GenodeEnvPtr): cuint {.
-    importcpp: "@->cpu().affinity_space().total()".}
+  proc countProcessorsImpl(): int {.inline.} =
+    when defined(windows):
+      var
+        si: SystemInfo
+      getSystemInfo(addr si)
+      result = int(si.dwNumberOfProcessors)
+    elif defined(macosx) or defined(bsd):
+      let dest = addr result
+      var len = sizeof(result).csize_t
+      when defined(macosx):
+        # alias of "hw.activecpu"
+        if sysctlbyname("hw.logicalcpu", dest, len, nil, 0) == 0:
+          return
+      var mib = [CTL_HW, HW_NCPU]
+      if sysctl(mib, 2, dest, len, nil, 0) == 0:
+        return
+    elif defined(hpux):
+      result = mpctl(MPC_GETNUMSPUS, nil, nil)
+    elif defined(irix):
+      var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
+      result = sysconf(SC_NPROC_ONLN)
+    elif defined(genode):
+      result = runtimeEnv.affinitySpaceTotal().int
+    elif defined(haiku):
+      var sysinfo: SystemInfo
+      if getSystemInfo(addr sysinfo) == 0:
+        result = sysinfo.cpuCount.int
+    else:
+      result = sysconf(SC_NPROCESSORS_ONLN)
+    if result < 0: result = 0
 
-when defined(haiku):
-  type
-    SystemInfo {.importc: "system_info", header: "<OS.h>".} = object
-      cpuCount {.importc: "cpu_count".}: uint32
 
-  proc getSystemInfo(info: ptr SystemInfo): int32 {.importc: "get_system_info",
-                                                    header: "<OS.h>".}
 
 proc countProcessors*(): int {.rtl, extern: "ncpi$1".} =
   ## Returns the number of the processors/cores the machine has.
   ## Returns 0 if it cannot be detected.
-  when defined(windows):
-    type
-      SYSTEM_INFO {.final, pure.} = object
-        u1: int32
-        dwPageSize: int32
-        lpMinimumApplicationAddress: pointer
-        lpMaximumApplicationAddress: pointer
-        dwActiveProcessorMask: ptr int32
-        dwNumberOfProcessors: int32
-        dwProcessorType: int32
-        dwAllocationGranularity: int32
-        wProcessorLevel: int16
-        wProcessorRevision: int16
-
-    proc GetSystemInfo(lpSystemInfo: var SYSTEM_INFO) {.stdcall, dynlib: "kernel32", importc: "GetSystemInfo".}
-
-    var
-      si: SYSTEM_INFO
-    GetSystemInfo(si)
-    result = si.dwNumberOfProcessors
-  elif defined(macosx) or defined(bsd):
-    var
-      mib: array[0..3, cint]
-      numCPU: int
-    mib[0] = CTL_HW
-    mib[1] = HW_AVAILCPU
-    var len = sizeof(numCPU).csize_t
-    discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
-    if numCPU < 1:
-      mib[1] = HW_NCPU
-      discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
-    result = numCPU
-  elif defined(hpux):
-    result = mpctl(MPC_GETNUMSPUS, nil, nil)
-  elif defined(irix):
-    var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
-    result = sysconf(SC_NPROC_ONLN)
-  elif defined(genode):
-    result = runtimeEnv.affinitySpaceTotal().int
-  elif defined(haiku):
-    var sysinfo: SystemInfo
-    if getSystemInfo(addr sysinfo) == 0:
-      result = sysinfo.cpuCount.int
-  else:
-    result = sysconf(SC_NPROCESSORS_ONLN)
-  if result <= 0: result = 0
+  countProcessorsImpl()
diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim
index 841d58d86..bfbf16721 100644
--- a/lib/pure/concurrency/cpuload.nim
+++ b/lib/pure/concurrency/cpuload.nim
@@ -13,11 +13,14 @@
 ## Unstable API.
 
 when defined(windows):
-  import winlean, os, strutils, math
+  import std/[winlean, os, strutils, math]
 
   proc `-`(a, b: FILETIME): int64 = a.rdFileTime - b.rdFileTime
 elif defined(linux):
-  from cpuinfo import countProcessors
+  from std/cpuinfo import countProcessors
+
+when defined(nimPreviewSlimSystem):
+  import std/syncio
 
 type
   ThreadPoolAdvice* = enum
@@ -84,7 +87,7 @@ proc advice*(s: var ThreadPoolState): ThreadPoolAdvice =
   inc s.calls
 
 when not defined(testing) and isMainModule and not defined(nimdoc):
-  import random
+  import std/random
 
   proc busyLoop() =
     while true:
diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim
index 9beb39522..06ed2fe54 100644
--- a/lib/pure/concurrency/threadpool.nim
+++ b/lib/pure/concurrency/threadpool.nim
@@ -7,21 +7,25 @@
 #    distribution, for details about the copyright.
 #
 
+{.deprecated: "use the nimble packages `malebolgia`, `taskpools` or `weave` instead".}
+
 ## Implements Nim's `parallel & spawn statements <manual_experimental.html#parallel-amp-spawn>`_.
 ##
 ## Unstable API.
 ##
 ## See also
 ## ========
-## * `threads module <threads.html>`_ for basic thread support
-## * `channels module <channels_builtin.html>`_ for message passing support
+## * `threads module <typedthreads.html>`_ for basic thread support
 ## * `locks module <locks.html>`_ for locks and condition variables
 ## * `asyncdispatch module <asyncdispatch.html>`_ for asynchronous IO
 
 when not compileOption("threads"):
   {.error: "Threadpool requires --threads:on option.".}
 
-import cpuinfo, cpuload, locks, os
+import std/[cpuinfo, cpuload, locks, os]
+
+when defined(nimPreviewSlimSystem):
+  import std/[assertions, typedthreads, sysatomics]
 
 {.push stackTrace:off.}
 
@@ -52,17 +56,14 @@ proc signal(cv: var Semaphore) =
   release(cv.L)
   signal(cv.c)
 
-const CacheLineSize = 32 # true for most archs
+const CacheLineSize = 64 # true for most archs
 
 type
   Barrier {.compilerproc.} = object
     entered: int
     cv: Semaphore # Semaphore takes 3 words at least
-    when sizeof(int) < 8:
-      cacheAlign: array[CacheLineSize-4*sizeof(int), byte]
-    left: int
-    cacheAlign2: array[CacheLineSize-sizeof(int), byte]
-    interest: bool # whether the master is interested in the "all done" event
+    left {.align(CacheLineSize).}: int
+    interest {.align(CacheLineSize).} : bool # whether the master is interested in the "all done" event
 
 proc barrierEnter(b: ptr Barrier) {.compilerproc, inline.} =
   # due to the signaling between threads, it is ensured we are the only
@@ -103,7 +104,7 @@ type
     idx: int
 
   FlowVarBase* = ref FlowVarBaseObj ## Untyped base class for `FlowVar[T] <#FlowVar>`_.
-  FlowVarBaseObj = object of RootObj
+  FlowVarBaseObj {.acyclic.} = object of RootObj
     ready, usesSemaphore, awaited: bool
     cv: Semaphore  # for 'blockUntilAny' support
     ai: ptr AwaitInfo
@@ -112,7 +113,7 @@ type
                    # be RootRef here otherwise the wrong GC keeps track of it!
     owner: pointer # ptr Worker
 
-  FlowVarObj[T] = object of FlowVarBaseObj
+  FlowVarObj[T] {.acyclic.} = object of FlowVarBaseObj
     blob: T
 
   FlowVar*[T] {.compilerproc.} = ref FlowVarObj[T] ## A data flow variable.
@@ -451,19 +452,21 @@ proc preferSpawn*(): bool =
   ## <#spawnX.t>`_ instead.
   result = gSomeReady.counter > 0
 
-proc spawn*(call: sink typed) {.magic: "Spawn".}
+proc spawn*(call: sink typed) {.magic: "Spawn".} =
   ## Always spawns a new task, so that the `call` is never executed on
   ## the calling thread.
   ##
   ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a
   ## return type that is either `void` or compatible with `FlowVar[T]`.
+  discard "It uses `nimSpawn3` internally"
 
-proc pinnedSpawn*(id: ThreadId; call: sink typed) {.magic: "Spawn".}
+proc pinnedSpawn*(id: ThreadId; call: sink typed) {.magic: "Spawn".} =
   ## Always spawns a new task on the worker thread with `id`, so that
   ## the `call` is **always** executed on the thread.
   ##
   ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a
   ## return type that is either `void` or compatible with `FlowVar[T]`.
+  discard "It uses `nimSpawn4` internally"
 
 template spawnX*(call) =
   ## Spawns a new task if a CPU core is ready, otherwise executes the