diff options
Diffstat (limited to 'lib/pure/concurrency')
-rw-r--r-- | lib/pure/concurrency/atomics.nim | 433 | ||||
-rw-r--r-- | lib/pure/concurrency/cpuinfo.nim | 110 | ||||
-rw-r--r-- | lib/pure/concurrency/cpuload.nim | 106 | ||||
-rw-r--r-- | lib/pure/concurrency/threadpool.nim | 606 | ||||
-rw-r--r-- | lib/pure/concurrency/threadpool.nim.cfg | 1 |
5 files changed, 1256 insertions, 0 deletions
diff --git a/lib/pure/concurrency/atomics.nim b/lib/pure/concurrency/atomics.nim new file mode 100644 index 000000000..818f1b37a --- /dev/null +++ b/lib/pure/concurrency/atomics.nim @@ -0,0 +1,433 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Jörg Wollenschläger +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Types and operations for atomic operations and lockless algorithms. +## +## Unstable API. +## +## By default, C++ uses C11 atomic primitives. To use C++ `std::atomic`, +## `-d:nimUseCppAtomics` can be defined. + +runnableExamples: + # Atomic + var loc: Atomic[int] + loc.store(4) + assert loc.load == 4 + loc.store(2) + assert loc.load(moRelaxed) == 2 + loc.store(9) + assert loc.load(moAcquire) == 9 + loc.store(0, moRelease) + assert loc.load == 0 + + assert loc.exchange(7) == 0 + assert loc.load == 7 + + var expected = 7 + assert loc.compareExchange(expected, 5, moRelaxed, moRelaxed) + assert expected == 7 + assert loc.load == 5 + + assert not loc.compareExchange(expected, 12, moRelaxed, moRelaxed) + assert expected == 5 + assert loc.load == 5 + + assert loc.fetchAdd(1) == 5 + assert loc.fetchAdd(2) == 6 + assert loc.fetchSub(3) == 8 + + loc.atomicInc(1) + assert loc.load == 6 + + # AtomicFlag + var flag: AtomicFlag + + assert not flag.testAndSet + assert flag.testAndSet + flag.clear(moRelaxed) + assert not flag.testAndSet + +when (defined(cpp) and defined(nimUseCppAtomics)) or defined(nimdoc): + # For the C++ backend, types and operations map directly to C++11 atomics. + + {.push, header: "<atomic>".} + + type + MemoryOrder* {.importcpp: "std::memory_order".} = enum + ## Specifies how non-atomic operations can be reordered around atomic + ## operations. + + moRelaxed + ## No ordering constraints. Only the atomicity and ordering against + ## other atomic operations is guaranteed. + + moConsume + ## This ordering is currently discouraged as it's semantics are + ## being revised. Acquire operations should be preferred. + + moAcquire + ## When applied to a load operation, no reads or writes in the + ## current thread can be reordered before this operation. + + moRelease + ## When applied to a store operation, no reads or writes in the + ## current thread can be reorderd after this operation. + + moAcquireRelease + ## When applied to a read-modify-write operation, this behaves like + ## both an acquire and a release operation. + + moSequentiallyConsistent + ## Behaves like Acquire when applied to load, like Release when + ## applied to a store and like AcquireRelease when applied to a + ## read-modify-write operation. + ## Also guarantees that all threads observe the same total ordering + ## with other moSequentiallyConsistent operations. + + type + Atomic*[T] {.importcpp: "std::atomic", completeStruct.} = object + ## An atomic object with underlying type `T`. + raw: T + + AtomicFlag* {.importcpp: "std::atomic_flag", size: 1.} = object + ## An atomic boolean state. + + # Access operations + + proc load*[T](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.load(@)".} + ## Atomically obtains the value of the atomic object. + + proc store*[T](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.importcpp: "#.store(@)".} + ## Atomically replaces the value of the atomic object with the `desired` + ## value. + + proc exchange*[T](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.exchange(@)".} + ## Atomically replaces the value of the atomic object with the `desired` + ## value and returns the old value. + + proc compareExchange*[T](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.importcpp: "#.compare_exchange_strong(@)".} + ## Atomically compares the value of the atomic object with the `expected` + ## value and performs exchange with the `desired` one if equal or load if + ## not. Returns true if the exchange was successful. + + proc compareExchange*[T](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.importcpp: "#.compare_exchange_strong(@)".} + ## Same as above, but allows for different memory orders for success and + ## failure. + + proc compareExchangeWeak*[T](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.importcpp: "#.compare_exchange_weak(@)".} + ## Same as above, but is allowed to fail spuriously. + + proc compareExchangeWeak*[T](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.importcpp: "#.compare_exchange_weak(@)".} + ## Same as above, but allows for different memory orders for success and + ## failure. + + # Numerical operations + + proc fetchAdd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_add(@)".} + ## Atomically adds a `value` to the atomic integer and returns the + ## original value. + + proc fetchSub*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_sub(@)".} + ## Atomically subtracts a `value` to the atomic integer and returns the + ## original value. + + proc fetchAnd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_and(@)".} + ## Atomically replaces the atomic integer with it's bitwise AND + ## with the specified `value` and returns the original value. + + proc fetchOr*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_or(@)".} + ## Atomically replaces the atomic integer with it's bitwise OR + ## with the specified `value` and returns the original value. + + proc fetchXor*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_xor(@)".} + ## Atomically replaces the atomic integer with it's bitwise XOR + ## with the specified `value` and returns the original value. + + # Flag operations + + proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool {.importcpp: "#.test_and_set(@)".} + ## Atomically sets the atomic flag to true and returns the original value. + + proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) {.importcpp: "#.clear(@)".} + ## Atomically sets the value of the atomic flag to false. + + proc fence*(order: MemoryOrder) {.importcpp: "std::atomic_thread_fence(@)".} + ## Ensures memory ordering without using atomic operations. + + proc signalFence*(order: MemoryOrder) {.importcpp: "std::atomic_signal_fence(@)".} + ## Prevents reordering of accesses by the compiler as would fence, but + ## inserts no CPU instructions for memory ordering. + + {.pop.} + +else: + # For the C backend, atomics map to C11 built-ins on GCC and Clang for + # trivial Nim types. Other types are implemented using spin locks. + # This could be overcome by supporting advanced importc-patterns. + + # Since MSVC does not implement C11, we fall back to MS intrinsics + # where available. + + type + Trivial = SomeNumber | bool | enum | ptr | pointer + # A type that is known to be atomic and whose size is known at + # compile time to be 8 bytes or less + + template nonAtomicType*(T: typedesc[Trivial]): untyped = + # Maps types to integers of the same size + when sizeof(T) == 1: int8 + elif sizeof(T) == 2: int16 + elif sizeof(T) == 4: int32 + elif sizeof(T) == 8: int64 + + when defined(vcc): + + # TODO: Trivial types should be volatile and use VC's special volatile + # semantics for store and loads. + + type + MemoryOrder* = enum + moRelaxed + moConsume + moAcquire + moRelease + moAcquireRelease + moSequentiallyConsistent + + Atomic*[T] = object + when T is Trivial: + value: T.nonAtomicType + else: + nonAtomicValue: T + guard: AtomicFlag + + AtomicFlag* = distinct int8 + + {.push header: "<intrin.h>".} + + # MSVC intrinsics + proc interlockedExchange(location: pointer; desired: int8): int8 {.importc: "_InterlockedExchange8".} + proc interlockedExchange(location: pointer; desired: int16): int16 {.importc: "_InterlockedExchange16".} + proc interlockedExchange(location: pointer; desired: int32): int32 {.importc: "_InterlockedExchange".} + proc interlockedExchange(location: pointer; desired: int64): int64 {.importc: "_InterlockedExchange64".} + + proc interlockedCompareExchange(location: pointer; desired, expected: int8): int8 {.importc: "_InterlockedCompareExchange8".} + proc interlockedCompareExchange(location: pointer; desired, expected: int16): int16 {.importc: "_InterlockedCompareExchange16".} + proc interlockedCompareExchange(location: pointer; desired, expected: int32): int32 {.importc: "_InterlockedCompareExchange".} + proc interlockedCompareExchange(location: pointer; desired, expected: int64): int64 {.importc: "_InterlockedCompareExchange64".} + + proc interlockedAnd(location: pointer; value: int8): int8 {.importc: "_InterlockedAnd8".} + proc interlockedAnd(location: pointer; value: int16): int16 {.importc: "_InterlockedAnd16".} + proc interlockedAnd(location: pointer; value: int32): int32 {.importc: "_InterlockedAnd".} + proc interlockedAnd(location: pointer; value: int64): int64 {.importc: "_InterlockedAnd64".} + + proc interlockedOr(location: pointer; value: int8): int8 {.importc: "_InterlockedOr8".} + proc interlockedOr(location: pointer; value: int16): int16 {.importc: "_InterlockedOr16".} + proc interlockedOr(location: pointer; value: int32): int32 {.importc: "_InterlockedOr".} + proc interlockedOr(location: pointer; value: int64): int64 {.importc: "_InterlockedOr64".} + + proc interlockedXor(location: pointer; value: int8): int8 {.importc: "_InterlockedXor8".} + proc interlockedXor(location: pointer; value: int16): int16 {.importc: "_InterlockedXor16".} + proc interlockedXor(location: pointer; value: int32): int32 {.importc: "_InterlockedXor".} + proc interlockedXor(location: pointer; value: int64): int64 {.importc: "_InterlockedXor64".} + + proc fence(order: MemoryOrder): int64 {.importc: "_ReadWriteBarrier()".} + proc signalFence(order: MemoryOrder): int64 {.importc: "_ReadWriteBarrier()".} + + {.pop.} + + proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool = + interlockedOr(addr(location), 1'i8) == 1'i8 + proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) = + discard interlockedAnd(addr(location), 0'i8) + + proc load*[T: Trivial](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedOr(addr(location.value), (nonAtomicType(T))0)) + proc store*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.inline.} = + discard interlockedExchange(addr(location.value), cast[nonAtomicType(T)](desired)) + + proc exchange*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedExchange(addr(location.value), cast[int64](desired))) + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + cast[T](interlockedCompareExchange(addr(location.value), cast[nonAtomicType(T)](desired), cast[nonAtomicType(T)](expected))) == expected + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchange(location, expected, desired, order, order) + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + compareExchange(location, expected, desired, success, failure) + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchangeWeak(location, expected, desired, order, order) + + proc fetchAdd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + var currentValue = location.load() + while not compareExchangeWeak(location, currentValue, currentValue + value): discard + proc fetchSub*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + fetchAdd(location, -value, order) + proc fetchAnd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedAnd(addr(location.value), cast[nonAtomicType(T)](value))) + proc fetchOr*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedOr(addr(location.value), cast[nonAtomicType(T)](value))) + proc fetchXor*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedXor(addr(location.value), cast[nonAtomicType(T)](value))) + + else: + when defined(cpp): + {.push, header: "<atomic>".} + template maybeWrapStd(x: string): string = + "std::" & x + else: + {.push, header: "<stdatomic.h>".} + template maybeWrapStd(x: string): string = + x + + type + MemoryOrder* {.importc: "memory_order".maybeWrapStd.} = enum + moRelaxed + moConsume + moAcquire + moRelease + moAcquireRelease + moSequentiallyConsistent + + when defined(cpp): + type + # Atomic*[T] {.importcpp: "_Atomic('0)".} = object + + AtomicInt8 {.importc: "std::atomic<NI8>".} = int8 + AtomicInt16 {.importc: "std::atomic<NI16>".} = int16 + AtomicInt32 {.importc: "std::atomic<NI32>".} = int32 + AtomicInt64 {.importc: "std::atomic<NI64>".} = int64 + else: + type + # Atomic*[T] {.importcpp: "_Atomic('0)".} = object + + AtomicInt8 {.importc: "_Atomic NI8".} = int8 + AtomicInt16 {.importc: "_Atomic NI16".} = int16 + AtomicInt32 {.importc: "_Atomic NI32".} = int32 + AtomicInt64 {.importc: "_Atomic NI64".} = int64 + + type + AtomicFlag* {.importc: "atomic_flag".maybeWrapStd, size: 1.} = object + + Atomic*[T] = object + when T is Trivial: + # Maps the size of a trivial type to it's internal atomic type + when sizeof(T) == 1: value: AtomicInt8 + elif sizeof(T) == 2: value: AtomicInt16 + elif sizeof(T) == 4: value: AtomicInt32 + elif sizeof(T) == 8: value: AtomicInt64 + else: + nonAtomicValue: T + guard: AtomicFlag + + #proc init*[T](location: var Atomic[T]; value: T): T {.importcpp: "atomic_init(@)".} + proc atomic_load_explicit[T, A](location: ptr A; order: MemoryOrder): T {.importc: "atomic_load_explicit".maybeWrapStd.} + proc atomic_store_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.importc: "atomic_store_explicit".maybeWrapStd.} + proc atomic_exchange_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_exchange_explicit".maybeWrapStd.} + proc atomic_compare_exchange_strong_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc: "atomic_compare_exchange_strong_explicit".maybeWrapStd.} + proc atomic_compare_exchange_weak_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc: "atomic_compare_exchange_weak_explicit".maybeWrapStd.} + + # Numerical operations + proc atomic_fetch_add_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_add_explicit".maybeWrapStd.} + proc atomic_fetch_sub_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_sub_explicit".maybeWrapStd.} + proc atomic_fetch_and_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_and_explicit".maybeWrapStd.} + proc atomic_fetch_or_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_or_explicit".maybeWrapStd.} + proc atomic_fetch_xor_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_xor_explicit".maybeWrapStd.} + + # Flag operations + # var ATOMIC_FLAG_INIT {.importc, nodecl.}: AtomicFlag + # proc init*(location: var AtomicFlag) {.inline.} = location = ATOMIC_FLAG_INIT + proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool {.importc: "atomic_flag_test_and_set_explicit".maybeWrapStd.} + proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) {.importc: "atomic_flag_clear_explicit".maybeWrapStd.} + + proc fence*(order: MemoryOrder) {.importc: "atomic_thread_fence".maybeWrapStd.} + proc signalFence*(order: MemoryOrder) {.importc: "atomic_signal_fence".maybeWrapStd.} + + {.pop.} + + proc load*[T: Trivial](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_load_explicit[nonAtomicType(T), typeof(location.value)](addr(location.value), order)) + proc store*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.inline.} = + atomic_store_explicit(addr(location.value), cast[nonAtomicType(T)](desired), order) + proc exchange*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_exchange_explicit(addr(location.value), cast[nonAtomicType(T)](desired), order)) + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + atomic_compare_exchange_strong_explicit(addr(location.value), cast[ptr nonAtomicType(T)](addr(expected)), cast[nonAtomicType(T)](desired), success, failure) + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchange(location, expected, desired, order, order) + + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + atomic_compare_exchange_weak_explicit(addr(location.value), cast[ptr nonAtomicType(T)](addr(expected)), cast[nonAtomicType(T)](desired), success, failure) + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchangeWeak(location, expected, desired, order, order) + + # Numerical operations + proc fetchAdd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_add_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchSub*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_sub_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchAnd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_and_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchOr*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_or_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchXor*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_xor_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + + template withLock[T: not Trivial](location: var Atomic[T]; order: MemoryOrder; body: untyped): untyped = + while testAndSet(location.guard, moAcquire): discard + try: + body + finally: + clear(location.guard, moRelease) + + proc load*[T: not Trivial](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + withLock(location, order): + result = location.nonAtomicValue + + proc store*[T: not Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.inline.} = + withLock(location, order): + location.nonAtomicValue = desired + + proc exchange*[T: not Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + withLock(location, order): + result = location.nonAtomicValue + location.nonAtomicValue = desired + + proc compareExchange*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + withLock(location, success): + if location.nonAtomicValue != expected: + expected = location.nonAtomicValue + return false + expected = desired + swap(location.nonAtomicValue, expected) + return true + + proc compareExchangeWeak*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + compareExchange(location, expected, desired, success, failure) + + proc compareExchange*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchange(location, expected, desired, order, order) + + proc compareExchangeWeak*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchangeWeak(location, expected, desired, order, order) + +proc atomicInc*[T: SomeInteger](location: var Atomic[T]; value: T = 1) {.inline.} = + ## Atomically increments the atomic integer by some `value`. + discard location.fetchAdd(value) + +proc atomicDec*[T: SomeInteger](location: var Atomic[T]; value: T = 1) {.inline.} = + ## Atomically decrements the atomic integer by some `value`. + discard location.fetchSub(value) + +proc `+=`*[T: SomeInteger](location: var Atomic[T]; value: T) {.inline.} = + ## Atomically increments the atomic integer by some `value`. + discard location.fetchAdd(value) + +proc `-=`*[T: SomeInteger](location: var Atomic[T]; value: T) {.inline.} = + ## Atomically decrements the atomic integer by some `value`. + discard location.fetchSub(value) diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim new file mode 100644 index 000000000..9bc3fd579 --- /dev/null +++ b/lib/pure/concurrency/cpuinfo.nim @@ -0,0 +1,110 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a proc to determine the number of CPUs / cores. + +runnableExamples: + doAssert countProcessors() > 0 + + +include "system/inclrtl" + +when defined(js): + import std/jsffi + proc countProcessorsImpl(): int = + when defined(nodejs): + let jsOs = require("os") + let jsObj = jsOs.cpus().length + else: + # `navigator.hardwareConcurrency` + # works on browser as well as deno. + let navigator{.importcpp.}: JsObject + let jsObj = navigator.hardwareConcurrency + result = jsObj.to int +else: + when defined(posix) and not (defined(macosx) or defined(bsd)): + import std/posix + + when defined(windows): + import std/private/win_getsysteminfo + + when defined(freebsd) or defined(macosx): + {.emit: "#include <sys/types.h>".} + + when defined(openbsd) or defined(netbsd): + {.emit: "#include <sys/param.h>".} + + when defined(macosx) or defined(bsd): + # we HAVE to emit param.h before sysctl.h so we cannot use .header here + # either. The amount of archaic bullshit in Poonix based OSes is just insane. + {.emit: "#include <sys/sysctl.h>".} + {.push nodecl.} + when defined(macosx): + proc sysctlbyname(name: cstring, + oldp: pointer, oldlenp: var csize_t, + newp: pointer, newlen: csize_t): cint {.importc.} + let + CTL_HW{.importc.}: cint + HW_NCPU{.importc.}: cint + proc sysctl[I: static[int]](name: var array[I, cint], namelen: cuint, + oldp: pointer, oldlenp: var csize_t, + newp: pointer, newlen: csize_t): cint {.importc.} + {.pop.} + + when defined(genode): + import genode/env + + proc affinitySpaceTotal(env: GenodeEnvPtr): cuint {. + importcpp: "@->cpu().affinity_space().total()".} + + when defined(haiku): + type + SystemInfo {.importc: "system_info", header: "<OS.h>".} = object + cpuCount {.importc: "cpu_count".}: uint32 + + proc getSystemInfo(info: ptr SystemInfo): int32 {.importc: "get_system_info", + header: "<OS.h>".} + + proc countProcessorsImpl(): int {.inline.} = + when defined(windows): + var + si: SystemInfo + getSystemInfo(addr si) + result = int(si.dwNumberOfProcessors) + elif defined(macosx) or defined(bsd): + let dest = addr result + var len = sizeof(result).csize_t + when defined(macosx): + # alias of "hw.activecpu" + if sysctlbyname("hw.logicalcpu", dest, len, nil, 0) == 0: + return + var mib = [CTL_HW, HW_NCPU] + if sysctl(mib, 2, dest, len, nil, 0) == 0: + return + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + elif defined(genode): + result = runtimeEnv.affinitySpaceTotal().int + elif defined(haiku): + var sysinfo: SystemInfo + if getSystemInfo(addr sysinfo) == 0: + result = sysinfo.cpuCount.int + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result < 0: result = 0 + + + +proc countProcessors*(): int {.rtl, extern: "ncpi$1".} = + ## Returns the number of the processors/cores the machine has. + ## Returns 0 if it cannot be detected. + countProcessorsImpl() diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim new file mode 100644 index 000000000..bfbf16721 --- /dev/null +++ b/lib/pure/concurrency/cpuload.nim @@ -0,0 +1,106 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a helper for a thread pool to determine whether +## creating a thread is a good idea. +## +## Unstable API. + +when defined(windows): + import std/[winlean, os, strutils, math] + + proc `-`(a, b: FILETIME): int64 = a.rdFileTime - b.rdFileTime +elif defined(linux): + from std/cpuinfo import countProcessors + +when defined(nimPreviewSlimSystem): + import std/syncio + +type + ThreadPoolAdvice* = enum + doNothing, + doCreateThread, # create additional thread for throughput + doShutdownThread # too many threads are busy, shutdown one + + ThreadPoolState* = object + when defined(windows): + prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: FILETIME + calls*: int + +proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = + when defined(windows): + var + sysIdle, sysKernel, sysUser, + procCreation, procExit, procKernel, procUser: FILETIME + if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or + getProcessTimes(Handle(-1), procCreation, procExit, + procKernel, procUser) == 0: + return doNothing + if s.calls > 0: + let + sysKernelDiff = sysKernel - s.prevSysKernel + sysUserDiff = sysUser - s.prevSysUser + + procKernelDiff = procKernel - s.prevProcKernel + procUserDiff = procUser - s.prevProcUser + + sysTotal = sysKernelDiff + sysUserDiff + procTotal = procKernelDiff + procUserDiff + # total CPU usage < 85% --> create a new worker thread. + # Measurements show that 100% and often even 90% is not reached even + # if all my cores are busy. + if sysTotal == 0 or procTotal.float / sysTotal.float < 0.85: + result = doCreateThread + s.prevSysKernel = sysKernel + s.prevSysUser = sysUser + s.prevProcKernel = procKernel + s.prevProcUser = procUser + elif defined(linux): + proc fscanf(c: File, frmt: cstring) {.varargs, importc, + header: "<stdio.h>".} + + var f: File + if f.open("/proc/loadavg"): + var b: float + var busy, total: int + fscanf(f,"%lf %lf %lf %ld/%ld", + addr b, addr b, addr b, addr busy, addr total) + f.close() + let cpus = countProcessors() + if busy-1 < cpus: + result = doCreateThread + elif busy-1 >= cpus*2: + result = doShutdownThread + else: + result = doNothing + else: + result = doNothing + else: + # XXX implement this for other OSes + result = doNothing + inc s.calls + +when not defined(testing) and isMainModule and not defined(nimdoc): + import std/random + + proc busyLoop() = + while true: + discard rand(80) + os.sleep(100) + + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + + var s: ThreadPoolState + + for i in 1 .. 70: + echo advice(s) + os.sleep(1000) diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim new file mode 100644 index 000000000..06ed2fe54 --- /dev/null +++ b/lib/pure/concurrency/threadpool.nim @@ -0,0 +1,606 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +{.deprecated: "use the nimble packages `malebolgia`, `taskpools` or `weave` instead".} + +## Implements Nim's `parallel & spawn statements <manual_experimental.html#parallel-amp-spawn>`_. +## +## Unstable API. +## +## See also +## ======== +## * `threads module <typedthreads.html>`_ for basic thread support +## * `locks module <locks.html>`_ for locks and condition variables +## * `asyncdispatch module <asyncdispatch.html>`_ for asynchronous IO + +when not compileOption("threads"): + {.error: "Threadpool requires --threads:on option.".} + +import std/[cpuinfo, cpuload, locks, os] + +when defined(nimPreviewSlimSystem): + import std/[assertions, typedthreads, sysatomics] + +{.push stackTrace:off.} + +type + Semaphore = object + c: Cond + L: Lock + counter: int + +proc initSemaphore(cv: var Semaphore) = + initCond(cv.c) + initLock(cv.L) + +proc destroySemaphore(cv: var Semaphore) {.inline.} = + deinitCond(cv.c) + deinitLock(cv.L) + +proc blockUntil(cv: var Semaphore) = + acquire(cv.L) + while cv.counter <= 0: + wait(cv.c, cv.L) + dec cv.counter + release(cv.L) + +proc signal(cv: var Semaphore) = + acquire(cv.L) + inc cv.counter + release(cv.L) + signal(cv.c) + +const CacheLineSize = 64 # true for most archs + +type + Barrier {.compilerproc.} = object + entered: int + cv: Semaphore # Semaphore takes 3 words at least + left {.align(CacheLineSize).}: int + interest {.align(CacheLineSize).} : bool # whether the master is interested in the "all done" event + +proc barrierEnter(b: ptr Barrier) {.compilerproc, inline.} = + # due to the signaling between threads, it is ensured we are the only + # one with access to 'entered' so we don't need 'atomicInc' here: + inc b.entered + # also we need no 'fence' instructions here as soon 'nimArgsPassingDone' + # will be called which already will perform a fence for us. + +proc barrierLeave(b: ptr Barrier) {.compilerproc, inline.} = + atomicInc b.left + when not defined(x86): fence() + # We may not have seen the final value of b.entered yet, + # so we need to check for >= instead of ==. + if b.interest and b.left >= b.entered: signal(b.cv) + +proc openBarrier(b: ptr Barrier) {.compilerproc, inline.} = + b.entered = 0 + b.left = 0 + b.interest = false + +proc closeBarrier(b: ptr Barrier) {.compilerproc.} = + fence() + if b.left != b.entered: + b.cv.initSemaphore() + fence() + b.interest = true + fence() + while b.left != b.entered: blockUntil(b.cv) + destroySemaphore(b.cv) + +{.pop.} + +# ---------------------------------------------------------------------------- + +type + AwaitInfo = object + cv: Semaphore + idx: int + + FlowVarBase* = ref FlowVarBaseObj ## Untyped base class for `FlowVar[T] <#FlowVar>`_. + FlowVarBaseObj {.acyclic.} = object of RootObj + ready, usesSemaphore, awaited: bool + cv: Semaphore # for 'blockUntilAny' support + ai: ptr AwaitInfo + idx: int + data: pointer # we incRef and unref it to keep it alive; note this MUST NOT + # be RootRef here otherwise the wrong GC keeps track of it! + owner: pointer # ptr Worker + + FlowVarObj[T] {.acyclic.} = object of FlowVarBaseObj + blob: T + + FlowVar*[T] {.compilerproc.} = ref FlowVarObj[T] ## A data flow variable. + + ToFreeQueue = object + len: int + lock: Lock + empty: Semaphore + data: array[128, pointer] + + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} + Worker = object + taskArrived: Semaphore + taskStarted: Semaphore #\ + # task data: + f: WorkerProc + data: pointer + ready: bool # put it here for correct alignment! + initialized: bool # whether it has even been initialized + shutdown: bool # the pool requests to shut down this worker thread + q: ToFreeQueue + readyForTask: Semaphore + +const threadpoolWaitMs {.intdefine.}: int = 100 + +proc blockUntil*(fv: var FlowVarBaseObj) = + ## Waits until the value for `fv` arrives. + ## + ## Usually it is not necessary to call this explicitly. + if fv.usesSemaphore and not fv.awaited: + fv.awaited = true + blockUntil(fv.cv) + destroySemaphore(fv.cv) + +proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = + if cas(addr w.ready, true, false): + w.data = data + w.f = fn + signal(w.taskArrived) + blockUntil(w.taskStarted) + result = true + +proc cleanFlowVars(w: ptr Worker) = + let q = addr(w.q) + acquire(q.lock) + for i in 0 ..< q.len: + GC_unref(cast[RootRef](q.data[i])) + #echo "GC_unref" + q.len = 0 + release(q.lock) + +proc wakeupWorkerToProcessQueue(w: ptr Worker) = + # we have to ensure it's us who wakes up the owning thread. + # This is quite horrible code, but it runs so rarely that it doesn't matter: + while not cas(addr w.ready, true, false): + cpuRelax() + discard + w.data = nil + w.f = proc (w, a: pointer) {.nimcall.} = + let w = cast[ptr Worker](w) + cleanFlowVars(w) + signal(w.q.empty) + signal(w.taskArrived) + +proc attach(fv: FlowVarBase; i: int): bool = + acquire(fv.cv.L) + if fv.cv.counter <= 0: + fv.idx = i + result = true + else: + result = false + release(fv.cv.L) + +proc finished(fv: var FlowVarBaseObj) = + doAssert fv.ai.isNil, "flowVar is still attached to an 'blockUntilAny'" + # we have to protect against the rare cases where the owner of the flowVar + # simply disregards the flowVar and yet the "flowVar" has not yet written + # anything to it: + blockUntil(fv) + if fv.data.isNil: return + let owner = cast[ptr Worker](fv.owner) + let q = addr(owner.q) + acquire(q.lock) + while not (q.len < q.data.len): + #echo "EXHAUSTED!" + release(q.lock) + wakeupWorkerToProcessQueue(owner) + blockUntil(q.empty) + acquire(q.lock) + q.data[q.len] = cast[pointer](fv.data) + inc q.len + release(q.lock) + fv.data = nil + # the worker thread waits for "data" to be set to nil before shutting down + owner.data = nil + +proc `=destroy`[T](fv: var FlowVarObj[T]) = + finished(fv) + `=destroy`(fv.blob) + +proc nimCreateFlowVar[T](): FlowVar[T] {.compilerproc.} = + new(result) + +proc nimFlowVarCreateSemaphore(fv: FlowVarBase) {.compilerproc.} = + fv.cv.initSemaphore() + fv.usesSemaphore = true + +proc nimFlowVarSignal(fv: FlowVarBase) {.compilerproc.} = + if fv.ai != nil: + acquire(fv.ai.cv.L) + fv.ai.idx = fv.idx + inc fv.ai.cv.counter + release(fv.ai.cv.L) + signal(fv.ai.cv.c) + if fv.usesSemaphore: + signal(fv.cv) + +proc awaitAndThen*[T](fv: FlowVar[T]; action: proc (x: T) {.closure.}) = + ## Blocks until `fv` is available and then passes its value + ## to `action`. + ## + ## Note that due to Nim's parameter passing semantics, this + ## means that `T` doesn't need to be copied, so `awaitAndThen` can + ## sometimes be more efficient than the `^ proc <#^,FlowVar[T]>`_. + blockUntil(fv[]) + when defined(nimV2): + action(fv.blob) + elif T is string or T is seq: + action(cast[T](fv.data)) + elif T is ref: + {.error: "'awaitAndThen' not available for FlowVar[ref]".} + else: + action(fv.blob) + finished(fv[]) + +proc unsafeRead*[T](fv: FlowVar[ref T]): ptr T = + ## Blocks until the value is available and then returns this value. + blockUntil(fv[]) + when defined(nimV2): + result = cast[ptr T](fv.blob) + else: + result = cast[ptr T](fv.data) + finished(fv[]) + +proc `^`*[T](fv: FlowVar[T]): T = + ## Blocks until the value is available and then returns this value. + blockUntil(fv[]) + when not defined(nimV2) and (T is string or T is seq or T is ref): + deepCopy result, cast[T](fv.data) + else: + result = fv.blob + finished(fv[]) + +proc blockUntilAny*(flowVars: openArray[FlowVarBase]): int = + ## Awaits any of the given `flowVars`. Returns the index of one `flowVar` + ## for which a value arrived. + ## + ## A `flowVar` only supports one call to `blockUntilAny` at the same time. + ## That means if you `blockUntilAny([a,b])` and `blockUntilAny([b,c])` + ## the second call will only block until `c`. If there is no `flowVar` left + ## to be able to wait on, -1 is returned. + ## + ## **Note:** This results in non-deterministic behaviour and should be avoided. + var ai: AwaitInfo + ai.cv.initSemaphore() + var conflicts = 0 + result = -1 + for i in 0 .. flowVars.high: + if cas(addr flowVars[i].ai, nil, addr ai): + if not attach(flowVars[i], i): + result = i + break + else: + inc conflicts + if conflicts < flowVars.len: + if result < 0: + blockUntil(ai.cv) + result = ai.idx + for i in 0 .. flowVars.high: + discard cas(addr flowVars[i].ai, addr ai, nil) + destroySemaphore(ai.cv) + +proc isReady*(fv: FlowVarBase): bool = + ## Determines whether the specified `FlowVarBase`'s value is available. + ## + ## If `true`, awaiting `fv` will not block. + if fv.usesSemaphore and not fv.awaited: + acquire(fv.cv.L) + result = fv.cv.counter > 0 + release(fv.cv.L) + else: + result = true + +proc nimArgsPassingDone(p: pointer) {.compilerproc.} = + let w = cast[ptr Worker](p) + signal(w.taskStarted) + +const + MaxThreadPoolSize* {.intdefine.} = 256 ## Maximum size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + MaxDistinguishedThread* {.intdefine.} = 32 ## Maximum number of "distinguished" threads. + +type + ThreadId* = range[0..MaxDistinguishedThread-1] ## A thread identifier. + +var + currentPoolSize: int + maxPoolSize = MaxThreadPoolSize + minPoolSize = 4 + gSomeReady: Semaphore + readyWorker: ptr Worker + +# A workaround for recursion deadlock issue +# https://github.com/nim-lang/Nim/issues/4597 +var + numSlavesLock: Lock + numSlavesRunning {.guard: numSlavesLock.}: int + numSlavesWaiting {.guard: numSlavesLock.}: int + isSlave {.threadvar.}: bool + +numSlavesLock.initLock + +gSomeReady.initSemaphore() + +proc slave(w: ptr Worker) {.thread.} = + isSlave = true + while true: + if w.shutdown: + w.shutdown = false + atomicDec currentPoolSize + while true: + if w.data != nil: + sleep(threadpoolWaitMs) + else: + # The flowvar finalizer ("finished()") set w.data to nil, so we can + # safely terminate the thread. + # + # TODO: look for scenarios in which the flowvar is never finalized, so + # a shut down thread gets stuck in this loop until the main thread exits. + break + break + when declared(atomicStoreN): + atomicStoreN(addr(w.ready), true, ATOMIC_SEQ_CST) + else: + w.ready = true + readyWorker = w + signal(gSomeReady) + blockUntil(w.taskArrived) + # XXX Somebody needs to look into this (why does this assertion fail + # in Visual Studio?) + when not defined(vcc) and not defined(tcc): assert(not w.ready) + + withLock numSlavesLock: + inc numSlavesRunning + + w.f(w, w.data) + + withLock numSlavesLock: + dec numSlavesRunning + + if w.q.len != 0: w.cleanFlowVars + +proc distinguishedSlave(w: ptr Worker) {.thread.} = + while true: + when declared(atomicStoreN): + atomicStoreN(addr(w.ready), true, ATOMIC_SEQ_CST) + else: + w.ready = true + signal(w.readyForTask) + blockUntil(w.taskArrived) + assert(not w.ready) + w.f(w, w.data) + if w.q.len != 0: w.cleanFlowVars + +var + workers: array[MaxThreadPoolSize, Thread[ptr Worker]] + workersData: array[MaxThreadPoolSize, Worker] + + distinguished: array[MaxDistinguishedThread, Thread[ptr Worker]] + distinguishedData: array[MaxDistinguishedThread, Worker] + +when defined(nimPinToCpu): + var gCpus: Natural + +proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## Sets the minimum thread pool size. The default value of this is 4. + minPoolSize = size + +proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## Sets the maximum thread pool size. The default value of this + ## is `MaxThreadPoolSize <#MaxThreadPoolSize>`_. + maxPoolSize = size + if currentPoolSize > maxPoolSize: + for i in maxPoolSize..currentPoolSize-1: + let w = addr(workersData[i]) + w.shutdown = true + +when defined(nimRecursiveSpawn): + var localThreadId {.threadvar.}: int + +proc activateWorkerThread(i: int) {.noinline.} = + workersData[i].taskArrived.initSemaphore() + workersData[i].taskStarted.initSemaphore() + workersData[i].initialized = true + workersData[i].q.empty.initSemaphore() + initLock(workersData[i].q.lock) + createThread(workers[i], slave, addr(workersData[i])) + when defined(nimRecursiveSpawn): + localThreadId = i+1 + when defined(nimPinToCpu): + if gCpus > 0: pinToCpu(workers[i], i mod gCpus) + +proc activateDistinguishedThread(i: int) {.noinline.} = + distinguishedData[i].taskArrived.initSemaphore() + distinguishedData[i].taskStarted.initSemaphore() + distinguishedData[i].initialized = true + distinguishedData[i].q.empty.initSemaphore() + initLock(distinguishedData[i].q.lock) + distinguishedData[i].readyForTask.initSemaphore() + createThread(distinguished[i], distinguishedSlave, addr(distinguishedData[i])) + +proc setup() = + let p = countProcessors() + when defined(nimPinToCpu): + gCpus = p + currentPoolSize = min(p, MaxThreadPoolSize) + readyWorker = addr(workersData[0]) + for i in 0..<currentPoolSize: activateWorkerThread(i) + +proc preferSpawn*(): bool = + ## Use this proc to determine quickly if a `spawn` or a direct call is + ## preferable. + ## + ## If it returns `true`, a `spawn` may make sense. In general + ## it is not necessary to call this directly; use the `spawnX template + ## <#spawnX.t>`_ instead. + result = gSomeReady.counter > 0 + +proc spawn*(call: sink typed) {.magic: "Spawn".} = + ## Always spawns a new task, so that the `call` is never executed on + ## the calling thread. + ## + ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a + ## return type that is either `void` or compatible with `FlowVar[T]`. + discard "It uses `nimSpawn3` internally" + +proc pinnedSpawn*(id: ThreadId; call: sink typed) {.magic: "Spawn".} = + ## Always spawns a new task on the worker thread with `id`, so that + ## the `call` is **always** executed on the thread. + ## + ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a + ## return type that is either `void` or compatible with `FlowVar[T]`. + discard "It uses `nimSpawn4` internally" + +template spawnX*(call) = + ## Spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. + ## + ## Usually, it is advised to use the `spawn proc <#spawn,sinktyped>`_ + ## in order to not block the producer for an unknown amount of time. + ## + ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a + ## return type that is either 'void' or compatible with `FlowVar[T]`. + (if preferSpawn(): spawn call else: call) + +proc parallel*(body: untyped) {.magic: "Parallel".} + ## A parallel section can be used to execute a block in parallel. + ## + ## `body` has to be in a DSL that is a particular subset of the language. + ## + ## Please refer to `the manual <manual_experimental.html#parallel-amp-spawn>`_ + ## for further information. + +var + state: ThreadPoolState + stateLock: Lock + +initLock stateLock + +proc nimSpawn3(fn: WorkerProc; data: pointer) {.compilerproc.} = + # implementation of 'spawn' that is used by the code generator. + while true: + if selectWorker(readyWorker, fn, data): return + for i in 0..<currentPoolSize: + if selectWorker(addr(workersData[i]), fn, data): return + + # determine what to do, but keep in mind this is expensive too: + # state.calls < maxPoolSize: warmup phase + # (state.calls and 127) == 0: periodic check + if state.calls < maxPoolSize or (state.calls and 127) == 0: + # ensure the call to 'advice' is atomic: + if tryAcquire(stateLock): + if currentPoolSize < minPoolSize: + if not workersData[currentPoolSize].initialized: + activateWorkerThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + atomicInc currentPoolSize + if selectWorker(w, fn, data): + release(stateLock) + return + + case advice(state) + of doNothing: discard + of doCreateThread: + if currentPoolSize < maxPoolSize: + if not workersData[currentPoolSize].initialized: + activateWorkerThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + atomicInc currentPoolSize + if selectWorker(w, fn, data): + release(stateLock) + return + # else we didn't succeed but some other thread, so do nothing. + of doShutdownThread: + if currentPoolSize > minPoolSize: + let w = addr(workersData[currentPoolSize-1]) + w.shutdown = true + # we don't free anything here. Too dangerous. + release(stateLock) + # else the acquire failed, but this means some + # other thread succeeded, so we don't need to do anything here. + when defined(nimRecursiveSpawn): + if localThreadId > 0: + # we are a worker thread, so instead of waiting for something which + # might as well never happen (see tparallel_quicksort), we run the task + # on the current thread instead. + var self = addr(workersData[localThreadId-1]) + fn(self, data) + blockUntil(self.taskStarted) + return + + if isSlave: + # Run under lock until `numSlavesWaiting` increment to avoid a + # race (otherwise two last threads might start waiting together) + withLock numSlavesLock: + if numSlavesRunning <= numSlavesWaiting + 1: + # All the other slaves are waiting + # If we wait now, we-re deadlocked until + # an external spawn happens ! + if currentPoolSize < maxPoolSize: + if not workersData[currentPoolSize].initialized: + activateWorkerThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + atomicInc currentPoolSize + if selectWorker(w, fn, data): + return + else: + # There is no place in the pool. We're deadlocked. + # echo "Deadlock!" + discard + + inc numSlavesWaiting + + blockUntil(gSomeReady) + + if isSlave: + withLock numSlavesLock: + dec numSlavesWaiting + +var + distinguishedLock: Lock + +initLock distinguishedLock + +proc nimSpawn4(fn: WorkerProc; data: pointer; id: ThreadId) {.compilerproc.} = + acquire(distinguishedLock) + if not distinguishedData[id].initialized: + activateDistinguishedThread(id) + release(distinguishedLock) + while true: + if selectWorker(addr(distinguishedData[id]), fn, data): break + blockUntil(distinguishedData[id].readyForTask) + + +proc sync*() = + ## A simple barrier to wait for all `spawn`ed tasks. + ## + ## If you need more elaborate waiting, you have to use an explicit barrier. + while true: + var allReady = true + for i in 0 ..< currentPoolSize: + if not allReady: break + allReady = allReady and workersData[i].ready + if allReady: break + sleep(threadpoolWaitMs) + # We cannot "blockUntil(gSomeReady)" because workers may be shut down between + # the time we establish that some are not "ready" and the time we wait for a + # "signal(gSomeReady)" from inside "slave()" that can never come. + +setup() diff --git a/lib/pure/concurrency/threadpool.nim.cfg b/lib/pure/concurrency/threadpool.nim.cfg new file mode 100644 index 000000000..aed303eef --- /dev/null +++ b/lib/pure/concurrency/threadpool.nim.cfg @@ -0,0 +1 @@ +--threads:on |