lib/system/atomics.nim


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319

#
#
#            Nim's Runtime Library
#        (c) Copyright 2015 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# Atomic operations for Nim.
{.push stackTrace:off.}

const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang)

when someGcc and hasThreadSupport:
  type AtomMemModel* = distinct cint

  var ATOMIC_RELAXED* {.importc: "__ATOMIC_RELAXED", nodecl.}: AtomMemModel
    ## No barriers or synchronization.
  var ATOMIC_CONSUME* {.importc: "__ATOMIC_CONSUME", nodecl.}: AtomMemModel
    ## Data dependency only for both barrier and
    ## synchronization with another thread.
  var ATOMIC_ACQUIRE* {.importc: "__ATOMIC_ACQUIRE", nodecl.}: AtomMemModel
    ## Barrier to hoisting of code and synchronizes with
    ## release (or stronger)
    ## semantic stores from another thread.
  var ATOMIC_RELEASE* {.importc: "__ATOMIC_RELEASE", nodecl.}: AtomMemModel
    ## Barrier to sinking of code and synchronizes with
    ## acquire (or stronger)
    ## semantic loads from another thread.
  var ATOMIC_ACQ_REL* {.importc: "__ATOMIC_ACQ_REL", nodecl.}: AtomMemModel
    ## Full barrier in both directions and synchronizes
    ## with acquire loads
    ## and release stores in another thread.
  var ATOMIC_SEQ_CST* {.importc: "__ATOMIC_SEQ_CST", nodecl.}: AtomMemModel
    ## Full barrier in both directions and synchronizes
    ## with acquire loads
    ## and release stores in all threads.

  type
    AtomType* = SomeNumber|pointer|ptr|char|bool
      ## Type Class representing valid types for use with atomic procs

  proc atomicLoadN*[T: AtomType](p: ptr T, mem: AtomMemModel): T {.
    importc: "__atomic_load_n", nodecl.}
    ## This proc implements an atomic load operation. It returns the contents at p.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_CONSUME.

  proc atomicLoad*[T: AtomType](p, ret: ptr T, mem: AtomMemModel) {.
    importc: "__atomic_load", nodecl.}
    ## This is the generic version of an atomic load. It returns the contents at p in ret.

  proc atomicStoreN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel) {.
    importc: "__atomic_store_n", nodecl.}
    ## This proc implements an atomic store operation. It writes val at p.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, and ATOMIC_RELEASE.

  proc atomicStore*[T: AtomType](p, val: ptr T, mem: AtomMemModel) {.
    importc: "__atomic_store", nodecl.}
    ## This is the generic version of an atomic store. It stores the value of val at p

  proc atomicExchangeN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_exchange_n", nodecl.}
    ## This proc implements an atomic exchange operation. It writes val at p,
    ## and returns the previous contents at p.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL

  proc atomicExchange*[T: AtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
    importc: "__atomic_exchange", nodecl.}
    ## This is the generic version of an atomic exchange. It stores the contents at val at p.
    ## The original value at p is copied into ret.

  proc atomicCompareExchangeN*[T: AtomType](p, expected: ptr T, desired: T,
    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
    importc: "__atomic_compare_exchange_n ", nodecl.}
    ## This proc implements an atomic compare and exchange operation. This compares the
    ## contents at p with the contents at expected and if equal, writes desired at p.
    ## If they are not equal, the current contents at p is written into expected.
    ## Weak is true for weak compare_exchange, and false for the strong variation.
    ## Many targets only offer the strong variation and ignore the parameter.
    ## When in doubt, use the strong variation.
    ## True is returned if desired is written at p and the execution is considered
    ## to conform to the memory model specified by success_memmodel. There are no
    ## restrictions on what memory model can be used here. False is returned otherwise,
    ## and the execution is considered to conform to failure_memmodel. This memory model
    ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model
    ## than that specified by success_memmodel.

  proc atomicCompareExchange*[T: AtomType](p, expected, desired: ptr T,
    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
    importc: "__atomic_compare_exchange", nodecl.}
    ## This proc implements the generic version of atomic_compare_exchange.
    ## The proc is virtually identical to atomic_compare_exchange_n, except the desired
    ## value is also a pointer.

  ## Perform the operation return the new value, all memory models are valid
  proc atomicAddFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_add_fetch", nodecl.}
  proc atomicSubFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_sub_fetch", nodecl.}
  proc atomicOrFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_or_fetch ", nodecl.}
  proc atomicAndFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_and_fetch", nodecl.}
  proc atomicXorFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_xor_fetch", nodecl.}
  proc atomicNandFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_nand_fetch ", nodecl.}

  ## Perform the operation return the old value, all memory models are valid
  proc atomicFetchAdd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_add", nodecl.}
  proc atomicFetchSub*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_sub", nodecl.}
  proc atomicFetchOr*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_or", nodecl.}
  proc atomicFetchAnd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_and", nodecl.}
  proc atomicFetchXor*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_xor", nodecl.}
  proc atomicFetchNand*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_nand", nodecl.}

  proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.
    importc: "__atomic_test_and_set", nodecl.}
    ## This built-in function performs an atomic test-and-set operation on the byte at p.
    ## The byte is set to some implementation defined nonzero “set” value and the return
    ## value is true if and only if the previous contents were “set”.
    ## All memory models are valid.

  proc atomicClear*(p: pointer, mem: AtomMemModel) {.
    importc: "__atomic_clear", nodecl.}
    ## This built-in function performs an atomic clear operation at p.
    ## After the operation, at p contains 0.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_RELEASE

  proc atomicThreadFence*(mem: AtomMemModel) {.
    importc: "__atomic_thread_fence", nodecl.}
    ## This built-in function acts as a synchronization fence between threads based
    ## on the specified memory model. All memory orders are valid.

  proc atomicSignalFence*(mem: AtomMemModel) {.
    importc: "__atomic_signal_fence", nodecl.}
    ## This built-in function acts as a synchronization fence between a thread and
    ## signal handlers based in the same thread. All memory orders are valid.

  proc atomicAlwaysLockFree*(size: int, p: pointer): bool {.
    importc: "__atomic_always_lock_free", nodecl.}
    ## This built-in function returns true if objects of size bytes always generate
    ## lock free atomic instructions for the target architecture. size must resolve
    ## to a compile-time constant and the result also resolves to a compile-time constant.
    ## ptr is an optional pointer to the object that may be used to determine alignment.
    ## A value of 0 indicates typical alignment should be used. The compiler may also
    ## ignore this parameter.

  proc atomicIsLockFree*(size: int, p: pointer): bool {.
    importc: "__atomic_is_lock_free", nodecl.}
    ## This built-in function returns true if objects of size bytes always generate
    ## lock free atomic instructions for the target architecture. If it is not known
    ## to be lock free a call is made to a runtime routine named __atomic_is_lock_free.
    ## ptr is an optional pointer to the object that may be used to determine alignment.
    ## A value of 0 indicates typical alignment should be used. The compiler may also
    ## ignore this parameter.

  template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
elif defined(vcc) and hasThreadSupport:
  when defined(cpp):
    when sizeof(int) == 8:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importcpp: "_InterlockedExchangeAdd64(static_cast<NI volatile *>(#), #)",
        header: "<intrin.h>".}
    else:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importcpp: "_InterlockedExchangeAdd(reinterpret_cast<long volatile *>(#), static_cast<long>(#))",
        header: "<intrin.h>".}
  else:
    when sizeof(int) == 8:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importc: "_InterlockedExchangeAdd64", header: "<intrin.h>".}
    else:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importc: "_InterlockedExchangeAdd", header: "<intrin.h>".}

  proc fence*() {.importc: "_ReadWriteBarrier", header: "<intrin.h>".}

else:
  proc addAndFetch*(p: ptr int, val: int): int {.inline.} =
    inc(p[], val)
    result = p[]

proc atomicInc*(memLoc: var int, x: int = 1): int =
  when someGcc and hasThreadSupport:
    result = atomic_add_fetch(memLoc.addr, x, ATOMIC_RELAXED)
  elif defined(vcc) and hasThreadSupport:
    result = addAndFetch(memLoc.addr, x)
    inc(result, x)
  else:
    inc(memLoc, x)
    result = memLoc

proc atomicDec*(memLoc: var int, x: int = 1): int =
  when someGcc and hasThreadSupport:
    when declared(atomic_sub_fetch):
      result = atomic_sub_fetch(memLoc.addr, x, ATOMIC_RELAXED)
    else:
      result = atomic_add_fetch(memLoc.addr, -x, ATOMIC_RELAXED)
  elif defined(vcc) and hasThreadSupport:
    result = addAndFetch(memLoc.addr, -x)
    dec(result, x)
  else:
    dec(memLoc, x)
    result = memLoc

when defined(vcc):
  when defined(cpp):
    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
      {.importcpp: "_InterlockedCompareExchange64(static_cast<NI64 volatile *>(#), #, #)", header: "<intrin.h>".}
    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
      {.importcpp: "_InterlockedCompareExchange(static_cast<NI volatile *>(#), #, #)", header: "<intrin.h>".}
    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
      {.importcpp: "_InterlockedCompareExchange8(static_cast<char volatile *>(#), #, #)", header: "<intrin.h>".}
  else:
    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
      {.importc: "_InterlockedCompareExchange64", header: "<intrin.h>".}
    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
      {.importc: "_InterlockedCompareExchange", header: "<intrin.h>".}
    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
      {.importc: "_InterlockedCompareExchange8", header: "<intrin.h>".}

  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
    when sizeof(T) == 8:
      interlockedCompareExchange64(p, cast[int64](newValue), cast[int64](oldValue)) ==
        cast[int64](oldValue)
    elif sizeof(T) == 4:
      interlockedCompareExchange32(p, cast[int32](newValue), cast[int32](oldValue)) ==
        cast[int32](oldValue)
    elif sizeof(T) == 1:
      interlockedCompareExchange8(p, cast[byte](newValue), cast[byte](oldValue)) ==
        cast[byte](oldValue)
    else:
      {.error: "invalid CAS instruction".}

elif defined(tcc):
  when defined(amd64):
    {.emit:"""
static int __tcc_cas(int *ptr, int oldVal, int newVal)
{
    unsigned char ret;
    __asm__ __volatile__ (
            "  lock\n"
            "  cmpxchgq %2,%1\n"
            "  sete %0\n"
            : "=q" (ret), "=m" (*ptr)
            : "r" (newVal), "m" (*ptr), "a" (oldVal)
            : "memory");

    if (ret)
      return 0;
    else
      return 1;
}
""".}
  else:
    #assert sizeof(int) == 4
    {.emit:"""
static int __tcc_cas(int *ptr, int oldVal, int newVal)
{
    unsigned char ret;
    __asm__ __volatile__ (
            "  lock\n"
            "  cmpxchgl %2,%1\n"
            "  sete %0\n"
            : "=q" (ret), "=m" (*ptr)
            : "r" (newVal), "m" (*ptr), "a" (oldVal)
            : "memory");

    if (ret)
      return 0;
    else
      return 1;
}
""".}

  proc tcc_cas(p: ptr int; oldValue, newValue: int): bool
    {.importc: "__tcc_cas", nodecl.}
  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
    tcc_cas(cast[ptr int](p), cast[int](oldValue), cast[int](newValue))
elif declared(atomicCompareExchangeN):
  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
    atomicCompareExchangeN(p, oldValue.unsafeAddr, newValue, false, ATOMIC_SEQ_CST, ATOMIC_SEQ_CST)
else:
  # this is valid for GCC and Intel C++
  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
    {.importc: "__sync_bool_compare_and_swap", nodecl.}
  # XXX is this valid for 'int'?


when (defined(x86) or defined(amd64)) and defined(vcc):
  proc cpuRelax* {.importc: "YieldProcessor", header: "<windows.h>".}
elif (defined(x86) or defined(amd64)) and (someGcc or defined(bcc)):
  proc cpuRelax* {.inline.} =
    {.emit: """asm volatile("pause" ::: "memory");""".}
elif someGcc or defined(tcc):
  proc cpuRelax* {.inline.} =
    {.emit: """asm volatile("" ::: "memory");""".}
elif defined(icl):
  proc cpuRelax* {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
  from os import sleep

  proc cpuRelax* {.inline.} = os.sleep(1)

when not declared(fence) and hasThreadSupport:
  # XXX fixme
  proc fence*() {.inline.} =
    var dummy: bool
    discard cas(addr dummy, false, true)

{.pop.}