lib/system/atomics.nim


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */## print out a (global variable) string to stdout
#
# To run:
#   $ subx translate ex6.subx ex6
#   $ subx run ex6
#   Hello, world!

== code
# instruction                     effective address                                                   operand     displacement    immediate
# op          subop               mod             rm32          base        index         scale       r32
# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes

  # write(stdout, x, size)
    # fd = 1 (stdout)
  bb/copy                                                                                                                         1/imm32           # copy 1 to EBX
    # initialize x (location to write result to)
  b9/copy                                                                                                                         x/imm32           # copy to ECX
    # initialize size
  8b/copy                         0/mod/indirect  5/rm32/.disp32                                      2/r32/EDX   size/disp32                       # copy *size to EDX
    # write(fd, x, size)
  b8/copy                                                                                                                         4/imm32/write     # copy 4 to EAX
  cd/syscall  0x80/imm8

  # exit(EBX)
  b8/copy                                                                                                                         1/imm32/exit      # copy 1 to EAX
  cd/syscall  0x80/imm8

== data
size:  # size of string
  0e 00 00 00  # 14
x:  # string to print
  48 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 0a       00
# h  e  l  l  o  ,  ␣  w  o  r  l  d  !  newline  null

# vim:ft=subx:nowrap
#
#
#            Nim's Runtime Library
#        (c) Copyright 2015 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# Atomic operations for Nim.
{.push stackTrace:off, profiler:off.}

const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang)
const someVcc = defined(vcc) or defined(clang_cl)

type
  AtomType* = SomeNumber|pointer|ptr|char|bool
    ## Type Class representing valid types for use with atomic procs

when someGcc:
  type AtomMemModel* = distinct cint

  var ATOMIC_RELAXED* {.importc: "__ATOMIC_RELAXED", nodecl.}: AtomMemModel
    ## No barriers or synchronization.
  var ATOMIC_CONSUME* {.importc: "__ATOMIC_CONSUME", nodecl.}: AtomMemModel
    ## Data dependency only for both barrier and
    ## synchronization with another thread.
  var ATOMIC_ACQUIRE* {.importc: "__ATOMIC_ACQUIRE", nodecl.}: AtomMemModel
    ## Barrier to hoisting of code and synchronizes with
    ## release (or stronger)
    ## semantic stores from another thread.
  var ATOMIC_RELEASE* {.importc: "__ATOMIC_RELEASE", nodecl.}: AtomMemModel
    ## Barrier to sinking of code and synchronizes with
    ## acquire (or stronger)
    ## semantic loads from another thread.
  var ATOMIC_ACQ_REL* {.importc: "__ATOMIC_ACQ_REL", nodecl.}: AtomMemModel
    ## Full barrier in both directions and synchronizes
    ## with acquire loads
    ## and release stores in another thread.
  var ATOMIC_SEQ_CST* {.importc: "__ATOMIC_SEQ_CST", nodecl.}: AtomMemModel
    ## Full barrier in both directions and synchronizes
    ## with acquire loads
    ## and release stores in all threads.

  proc atomicLoadN*[T: AtomType](p: ptr T, mem: AtomMemModel): T {.
    importc: "__atomic_load_n", nodecl.}
    ## This proc implements an atomic load operation. It returns the contents at p.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_CONSUME.

  proc atomicLoad*[T: AtomType](p, ret: ptr T, mem: AtomMemModel) {.
    importc: "__atomic_load", nodecl.}
    ## This is the generic version of an atomic load. It returns the contents at p in ret.

  proc atomicStoreN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel) {.
    importc: "__atomic_store_n", nodecl.}
    ## This proc implements an atomic store operation. It writes val at p.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, and ATOMIC_RELEASE.

  proc atomicStore*[T: AtomType](p, val: ptr T, mem: AtomMemModel) {.
    importc: "__atomic_store", nodecl.}
    ## This is the generic version of an atomic store. It stores the value of val at p

  proc atomicExchangeN*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_exchange_n", nodecl.}
    ## This proc implements an atomic exchange operation. It writes val at p,
    ## and returns the previous contents at p.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL

  proc atomicExchange*[T: AtomType](p, val, ret: ptr T, mem: AtomMemModel) {.
    importc: "__atomic_exchange", nodecl.}
    ## This is the generic version of an atomic exchange. It stores the contents at val at p.
    ## The original value at p is copied into ret.

  proc atomicCompareExchangeN*[T: AtomType](p, expected: ptr T, desired: T,
    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
    importc: "__atomic_compare_exchange_n", nodecl.}
    ## This proc implements an atomic compare and exchange operation. This compares the
    ## contents at p with the contents at expected and if equal, writes desired at p.
    ## If they are not equal, the current contents at p is written into expected.
    ## Weak is true for weak compare_exchange, and false for the strong variation.
    ## Many targets only offer the strong variation and ignore the parameter.
    ## When in doubt, use the strong variation.
    ## True is returned if desired is written at p and the execution is considered
    ## to conform to the memory model specified by success_memmodel. There are no
    ## restrictions on what memory model can be used here. False is returned otherwise,
    ## and the execution is considered to conform to failure_memmodel. This memory model
    ## cannot be __ATOMIC_RELEASE nor __ATOMIC_ACQ_REL. It also cannot be a stronger model
    ## than that specified by success_memmodel.

  proc atomicCompareExchange*[T: AtomType](p, expected, desired: ptr T,
    weak: bool, success_memmodel: AtomMemModel, failure_memmodel: AtomMemModel): bool {.
    importc: "__atomic_compare_exchange", nodecl.}
    ## This proc implements the generic version of atomic_compare_exchange.
    ## The proc is virtually identical to atomic_compare_exchange_n, except the desired
    ## value is also a pointer.

  ## Perform the operation return the new value, all memory models are valid
  proc atomicAddFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_add_fetch", nodecl.}
  proc atomicSubFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_sub_fetch", nodecl.}
  proc atomicOrFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_or_fetch", nodecl.}
  proc atomicAndFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_and_fetch", nodecl.}
  proc atomicXorFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_xor_fetch", nodecl.}
  proc atomicNandFetch*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_nand_fetch", nodecl.}

  ## Perform the operation return the old value, all memory models are valid
  proc atomicFetchAdd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_add", nodecl.}
  proc atomicFetchSub*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_sub", nodecl.}
  proc atomicFetchOr*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_or", nodecl.}
  proc atomicFetchAnd*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_and", nodecl.}
  proc atomicFetchXor*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_xor", nodecl.}
  proc atomicFetchNand*[T: AtomType](p: ptr T, val: T, mem: AtomMemModel): T {.
    importc: "__atomic_fetch_nand", nodecl.}

  proc atomicTestAndSet*(p: pointer, mem: AtomMemModel): bool {.
    importc: "__atomic_test_and_set", nodecl.}
    ## This built-in function performs an atomic test-and-set operation on the byte at p.
    ## The byte is set to some implementation defined nonzero "set" value and the return
    ## value is true if and only if the previous contents were "set".
    ## All memory models are valid.

  proc atomicClear*(p: pointer, mem: AtomMemModel) {.
    importc: "__atomic_clear", nodecl.}
    ## This built-in function performs an atomic clear operation at p.
    ## After the operation, at p contains 0.
    ## ATOMIC_RELAXED, ATOMIC_SEQ_CST, ATOMIC_RELEASE

  proc atomicThreadFence*(mem: AtomMemModel) {.
    importc: "__atomic_thread_fence", nodecl.}
    ## This built-in function acts as a synchronization fence between threads based
    ## on the specified memory model. All memory orders are valid.

  proc atomicSignalFence*(mem: AtomMemModel) {.
    importc: "__atomic_signal_fence", nodecl.}
    ## This built-in function acts as a synchronization fence between a thread and
    ## signal handlers based in the same thread. All memory orders are valid.

  proc atomicAlwaysLockFree*(size: int, p: pointer): bool {.
    importc: "__atomic_always_lock_free", nodecl.}
    ## This built-in function returns true if objects of size bytes always generate
    ## lock free atomic instructions for the target architecture. size must resolve
    ## to a compile-time constant and the result also resolves to a compile-time constant.
    ## ptr is an optional pointer to the object that may be used to determine alignment.
    ## A value of 0 indicates typical alignment should be used. The compiler may also
    ## ignore this parameter.

  proc atomicIsLockFree*(size: int, p: pointer): bool {.
    importc: "__atomic_is_lock_free", nodecl.}
    ## This built-in function returns true if objects of size bytes always generate
    ## lock free atomic instructions for the target architecture. If it is not known
    ## to be lock free a call is made to a runtime routine named __atomic_is_lock_free.
    ## ptr is an optional pointer to the object that may be used to determine alignment.
    ## A value of 0 indicates typical alignment should be used. The compiler may also
    ## ignore this parameter.

  template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
elif someVcc:
  type AtomMemModel* = distinct cint

  const
    ATOMIC_RELAXED = 0.AtomMemModel
    ATOMIC_CONSUME = 1.AtomMemModel
    ATOMIC_ACQUIRE = 2.AtomMemModel
    ATOMIC_RELEASE = 3.AtomMemModel
    ATOMIC_ACQ_REL = 4.AtomMemModel
    ATOMIC_SEQ_CST = 5.AtomMemModel

  proc `==`(x1, x2: AtomMemModel): bool {.borrow.}

  proc readBarrier() {.importc: "_ReadBarrier", header: "<intrin.h>".}
  proc writeBarrier() {.importc: "_WriteBarrier", header: "<intrin.h>".}
  proc fence*() {.importc: "_ReadWriteBarrier", header: "<intrin.h>".}

  template barrier(mem: AtomMemModel) =
    when mem == ATOMIC_RELAXED: discard
    elif mem == ATOMIC_CONSUME: readBarrier()
    elif mem == ATOMIC_ACQUIRE: writeBarrier()
    elif mem == ATOMIC_RELEASE: fence()
    elif mem == ATOMIC_ACQ_REL: fence()
    elif mem == ATOMIC_SEQ_CST: fence()

  proc atomicLoadN*[T: AtomType](p: ptr T, mem: static[AtomMemModel]): T =
    result = p[]
    barrier(mem)

  when defined(cpp):
    when sizeof(int) == 8:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importcpp: "_InterlockedExchangeAdd64(static_cast<NI volatile *>(#), #)",
        header: "<intrin.h>".}
    else:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importcpp: "_InterlockedExchangeAdd(reinterpret_cast<long volatile *>(#), static_cast<long>(#))",
        header: "<intrin.h>".}
  else:
    when sizeof(int) == 8:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importc: "_InterlockedExchangeAdd64", header: "<intrin.h>".}
    else:
      proc addAndFetch*(p: ptr int, val: int): int {.
        importc: "_InterlockedExchangeAdd", header: "<intrin.h>".}

else:
  proc addAndFetch*(p: ptr int, val: int): int {.inline.} =
    inc(p[], val)
    result = p[]

proc atomicInc*(memLoc: var int, x: int = 1): int =
  when someGcc and hasThreadSupport:
    result = atomicAddFetch(memLoc.addr, x, ATOMIC_SEQ_CST)
  elif someVcc and hasThreadSupport:
    result = addAndFetch(memLoc.addr, x)
    inc(result, x)
  else:
    inc(memLoc, x)
    result = memLoc

proc atomicDec*(memLoc: var int, x: int = 1): int =
  when someGcc and hasThreadSupport:
    when declared(atomicSubFetch):
      result = atomicSubFetch(memLoc.addr, x, ATOMIC_SEQ_CST)
    else:
      result = atomicAddFetch(memLoc.addr, -x, ATOMIC_SEQ_CST)
  elif someVcc and hasThreadSupport:
    result = addAndFetch(memLoc.addr, -x)
    dec(result, x)
  else:
    dec(memLoc, x)
    result = memLoc

when someVcc:
  when defined(cpp):
    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
      {.importcpp: "_InterlockedCompareExchange64(static_cast<NI64 volatile *>(#), #, #)", header: "<intrin.h>".}
    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
      {.importcpp: "_InterlockedCompareExchange(static_cast<NI volatile *>(#), #, #)", header: "<intrin.h>".}
    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
      {.importcpp: "_InterlockedCompareExchange8(static_cast<char volatile *>(#), #, #)", header: "<intrin.h>".}
  else:
    proc interlockedCompareExchange64(p: pointer; exchange, comparand: int64): int64
      {.importc: "_InterlockedCompareExchange64", header: "<intrin.h>".}
    proc interlockedCompareExchange32(p: pointer; exchange, comparand: int32): int32
      {.importc: "_InterlockedCompareExchange", header: "<intrin.h>".}
    proc interlockedCompareExchange8(p: pointer; exchange, comparand: byte): byte
      {.importc: "_InterlockedCompareExchange8", header: "<intrin.h>".}

  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
    when sizeof(T) == 8:
      interlockedCompareExchange64(p, cast[int64](newValue), cast[int64](oldValue)) ==
        cast[int64](oldValue)
    elif sizeof(T) == 4:
      interlockedCompareExchange32(p, cast[int32](newValue), cast[int32](oldValue)) ==
        cast[int32](oldValue)
    elif sizeof(T) == 1:
      interlockedCompareExchange8(p, cast[byte](newValue), cast[byte](oldValue)) ==
        cast[byte](oldValue)
    else:
      {.error: "invalid CAS instruction".}

elif defined(tcc):
  when defined(amd64):
    {.emit:"""
static int __tcc_cas(int *ptr, int oldVal, int newVal)
{
    unsigned char ret;
    __asm__ __volatile__ (
            "  lock\n"
            "  cmpxchgq %2,%1\n"
            "  sete %0\n"
            : "=q" (ret), "=m" (*ptr)
            : "r" (newVal), "m" (*ptr), "a" (oldVal)
            : "memory");

    return ret;
}
""".}
  else:
    #assert sizeof(int) == 4
    {.emit:"""
static int __tcc_cas(int *ptr, int oldVal, int newVal)
{
    unsigned char ret;
    __asm__ __volatile__ (
            "  lock\n"
            "  cmpxchgl %2,%1\n"
            "  sete %0\n"
            : "=q" (ret), "=m" (*ptr)
            : "r" (newVal), "m" (*ptr), "a" (oldVal)
            : "memory");

    return ret;
}
""".}

  proc tcc_cas(p: ptr int; oldValue, newValue: int): bool
    {.importc: "__tcc_cas", nodecl.}
  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
    tcc_cas(cast[ptr int](p), cast[int](oldValue), cast[int](newValue))
elif declared(atomicCompareExchangeN):
  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
    atomicCompareExchangeN(p, oldValue.unsafeAddr, newValue, false, ATOMIC_SEQ_CST, ATOMIC_SEQ_CST)
else:
  # this is valid for GCC and Intel C++
  proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
    {.importc: "__sync_bool_compare_and_swap", nodecl.}
  # XXX is this valid for 'int'?


when (defined(x86) or defined(amd64)) and someVcc:
  proc cpuRelax* {.importc: "YieldProcessor", header: "<windows.h>".}
elif (defined(x86) or defined(amd64)) and (someGcc or defined(bcc)):
  proc cpuRelax* {.inline.} =
    {.emit: """asm volatile("pause" ::: "memory");""".}
elif someGcc or defined(tcc):
  proc cpuRelax* {.inline.} =
    {.emit: """asm volatile("" ::: "memory");""".}
elif defined(icl):
  proc cpuRelax* {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
  from os import sleep

  proc cpuRelax* {.inline.} = os.sleep(1)

when not declared(fence) and hasThreadSupport:
  # XXX fixme
  proc fence*() {.inline.} =
    var dummy: bool
    discard cas(addr dummy, false, true)

{.pop.}