summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAndreas Rumpf <rumpf_a@web.de>2023-05-14 16:58:28 +0200
committerGitHub <noreply@github.com>2023-05-14 16:58:28 +0200
commitf4a9b258c34a83efb74d9dea6853880e47f45b06 (patch)
tree3dc2c3fcf0e3fc3325a69614056bb7772675a6ab
parent0ece98620f8d9d7b874262c75fa148970626d44d (diff)
downloadNim-f4a9b258c34a83efb74d9dea6853880e47f45b06.tar.gz
isolation spec update; WIP (#21843)
* isolation spec update; WIP

* wip

* docs update, WIP

* progress

* Update doc/manual.md
-rw-r--r--compiler/ast.nim5
-rw-r--r--compiler/condsyms.nim2
-rw-r--r--compiler/isolation_check.nim76
-rw-r--r--compiler/pragmas.nim9
-rw-r--r--compiler/wordrecg.nim1
-rw-r--r--doc/manual.md10
-rw-r--r--doc/manual_experimental.md123
-rw-r--r--lib/std/isolation.nim6
-rw-r--r--tests/isolate/tisolated_lock.nim67
9 files changed, 284 insertions, 15 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim
index 41e538742..3ed9bf2b2 100644
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -515,7 +515,7 @@ type
     nfSkipFieldChecking # node skips field visable checking
 
   TNodeFlags* = set[TNodeFlag]
-  TTypeFlag* = enum   # keep below 32 for efficiency reasons (now: 46)
+  TTypeFlag* = enum   # keep below 32 for efficiency reasons (now: 47)
     tfVarargs,        # procedure has C styled varargs
                       # tyArray type represeting a varargs list
     tfNoSideEffect,   # procedure type does not allow side effects
@@ -585,6 +585,7 @@ type
     tfIsConstructor
     tfEffectSystemWorkaround
     tfIsOutParam
+    tfSendable
 
   TTypeFlags* = set[TTypeFlag]
 
@@ -634,7 +635,7 @@ const
   skError* = skUnknown
 
 var
-  eqTypeFlags* = {tfIterator, tfNotNil, tfVarIsPtr, tfGcSafe, tfNoSideEffect, tfIsOutParam}
+  eqTypeFlags* = {tfIterator, tfNotNil, tfVarIsPtr, tfGcSafe, tfNoSideEffect, tfIsOutParam, tfSendable}
     ## type flags that are essential for type equality.
     ## This is now a variable because for emulation of version:1.0 we
     ## might exclude {tfGcSafe, tfNoSideEffect}.
diff --git a/compiler/condsyms.nim b/compiler/condsyms.nim
index ea635e52e..9d9ba1605 100644
--- a/compiler/condsyms.nim
+++ b/compiler/condsyms.nim
@@ -154,4 +154,4 @@ proc initDefines*(symbols: StringTableRef) =
   defineSymbol("nimHasWarnBareExcept")
   defineSymbol("nimHasDup")
   defineSymbol("nimHasChecksums")
-
+  defineSymbol("nimHasSendable")
diff --git a/compiler/isolation_check.nim b/compiler/isolation_check.nim
index 2674605dc..273bfb7f9 100644
--- a/compiler/isolation_check.nim
+++ b/compiler/isolation_check.nim
@@ -72,6 +72,69 @@ proc isValueOnlyType(t: PType): bool =
   proc wrap(t: PType): bool {.nimcall.} = t.kind in {tyRef, tyPtr, tyVar, tyLent}
   result = not types.searchTypeFor(t, wrap)
 
+type
+  SearchResult = enum
+    NotFound, Abort, Found
+
+proc containsDangerousRefAux(t: PType; marker: var IntSet): SearchResult
+
+proc containsDangerousRefAux(n: PNode; marker: var IntSet): SearchResult =
+  result = NotFound
+  case n.kind
+  of nkRecList:
+    for i in 0..<n.len:
+      result = containsDangerousRefAux(n[i], marker)
+      if result == Found: return result
+  of nkRecCase:
+    assert(n[0].kind == nkSym)
+    result = containsDangerousRefAux(n[0], marker)
+    if result == Found: return result
+    for i in 1..<n.len:
+      case n[i].kind
+      of nkOfBranch, nkElse:
+        result = containsDangerousRefAux(lastSon(n[i]), marker)
+        if result == Found: return result
+      else: discard
+  of nkSym:
+    result = containsDangerousRefAux(n.sym.typ, marker)
+  else: discard
+
+proc containsDangerousRefAux(t: PType; marker: var IntSet): SearchResult =
+  result = NotFound
+  if t == nil: return result
+  if containsOrIncl(marker, t.id): return result
+
+  if t.kind == tyRef or (t.kind == tyProc and t.callConv == ccClosure):
+    result = Found
+  elif tfSendable in t.flags:
+    result = Abort
+  else:
+    # continue the type traversal:
+    result = NotFound
+
+  if result != NotFound: return result
+  case t.kind
+  of tyObject:
+    if t[0] != nil:
+      result = containsDangerousRefAux(t[0].skipTypes(skipPtrs), marker)
+    if result == NotFound: result = containsDangerousRefAux(t.n, marker)
+  of tyGenericInst, tyDistinct, tyAlias, tySink:
+    result = containsDangerousRefAux(lastSon(t), marker)
+  of tyArray, tySet, tyTuple, tySequence:
+    for i in 0..<t.len:
+      result = containsDangerousRefAux(t[i], marker)
+      if result == Found: return result
+  else:
+    discard
+
+proc containsDangerousRef(t: PType): bool =
+  # a `ref` type is "dangerous" if it occurs not within a type that is like `Isolated[T]`.
+  # For example:
+  # `ref int` # dangerous
+  # `Isolated[ref int]` # not dangerous
+  var marker = initIntSet()
+  result = containsDangerousRefAux(t, marker) == Found
+
 proc canAlias*(arg, ret: PType): bool =
   if isValueOnlyType(arg):
     # can alias only with addr(arg.x) and we don't care if it is not safe
@@ -80,12 +143,15 @@ proc canAlias*(arg, ret: PType): bool =
     var marker = initIntSet()
     result = canAlias(arg, ret, marker)
 
+const
+  SomeVar = {skForVar, skParam, skVar, skLet, skConst, skResult, skTemp}
+
 proc containsVariable(n: PNode): bool =
   case n.kind
   of nodesToIgnoreSet:
     result = false
   of nkSym:
-    result = n.sym.kind in {skForVar, skParam, skVar, skLet, skConst, skResult, skTemp}
+    result = n.sym.kind in SomeVar
   else:
     for ch in n:
       if containsVariable(ch): return true
@@ -109,7 +175,7 @@ proc checkIsolate*(n: PNode): bool =
           discard "fine, it is isolated already"
         else:
           let argType = n[i].typ
-          if argType != nil and not isCompileTimeOnly(argType) and containsTyRef(argType):
+          if argType != nil and not isCompileTimeOnly(argType) and containsDangerousRef(argType):
             if argType.canAlias(n.typ) or containsVariable(n[i]):
               # bug #19013: Alias information is not enough, we need to check for potential
               # "overlaps". I claim the problem can only happen by reading again from a location
@@ -143,6 +209,12 @@ proc checkIsolate*(n: PNode): bool =
         result = checkIsolate(n[^1])
       else:
         result = false
+    of nkSym:
+      result = true
+      if n.sym.kind in SomeVar:
+        let argType = n.typ
+        if argType != nil and not isCompileTimeOnly(argType) and containsDangerousRef(argType):
+          result = false
     else:
       # unanalysable expression:
       result = false
diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim
index 1857ef3ce..6fe09921d 100644
--- a/compiler/pragmas.nim
+++ b/compiler/pragmas.nim
@@ -71,7 +71,8 @@ const
     wPure, wHeader, wCompilerProc, wCore, wFinal, wSize, wShallow,
     wIncompleteStruct, wCompleteStruct, wByCopy, wByRef,
     wInheritable, wGensym, wInject, wRequiresInit, wUnchecked, wUnion, wPacked,
-    wCppNonPod, wBorrow, wGcSafe, wPartial, wExplain, wPackage, wCodegenDecl}
+    wCppNonPod, wBorrow, wGcSafe, wPartial, wExplain, wPackage, wCodegenDecl,
+    wSendable}
   fieldPragmas* = declPragmas + {wGuard, wBitsize, wCursor,
     wRequiresInit, wNoalias, wAlign} - {wExportNims, wNodecl} # why exclude these?
   varPragmas* = declPragmas + {wVolatile, wRegister, wThreadVar,
@@ -1057,6 +1058,12 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: var int,
         if sym.typ != nil:
           incl(sym.typ.flags, tfThread)
           if sym.typ.callConv == ccClosure: sym.typ.callConv = ccNimCall
+      of wSendable:
+        noVal(c, it)
+        if sym != nil and sym.typ != nil:
+          incl(sym.typ.flags, tfSendable)
+        else:
+          invalidPragma(c, it)
       of wGcSafe:
         noVal(c, it)
         if sym != nil:
diff --git a/compiler/wordrecg.nim b/compiler/wordrecg.nim
index 3369d244e..8763a5250 100644
--- a/compiler/wordrecg.nim
+++ b/compiler/wordrecg.nim
@@ -113,6 +113,7 @@ type
 
     wInOut = "inout", wByCopy = "bycopy", wByRef = "byref", wOneWay = "oneway",
     wBitsize = "bitsize", wImportHidden = "all",
+    wSendable = "sendable"
 
   TSpecialWords* = set[TSpecialWord]
 
diff --git a/doc/manual.md b/doc/manual.md
index f3fe62c49..8075da18f 100644
--- a/doc/manual.md
+++ b/doc/manual.md
@@ -6889,7 +6889,7 @@ iterator in which case the overloading resolution takes place:
   var x = 4
   write(stdout, x) # not ambiguous: uses the module C's x
   ```
-Modules can share their name, however, when trying to qualify a identifier with the module name the compiler will fail with ambiguous identifier error. One can qualify the identifier by aliasing the module.
+Modules can share their name, however, when trying to qualify an identifier with the module name the compiler will fail with ambiguous identifier error. One can qualify the identifier by aliasing the module.
 
 
 ```nim
@@ -8581,10 +8581,8 @@ Threads
 The `--threads:on`:option: command-line switch is enabled by default. The [typedthreads module](typedthreads.html) module then contains several threading primitives. See [spawn](manual_experimental.html#parallel-amp-spawn) for
 further details.
 
-The only way to create a thread is via `spawn` or
-`createThread`. The invoked proc must not use `var` parameters nor must
-any of its parameters contain a `ref` or `closure` type. This enforces
-the *no heap sharing restriction*.
+The only ways to create a thread is via `spawn` or `createThread`.
+
 
 Thread pragma
 -------------
@@ -8703,7 +8701,7 @@ model low level lockfree mechanisms:
 The `locks` pragma takes a list of lock expressions `locks: [a, b, ...]`
 in order to support *multi lock* statements. Why these are essential is
 explained in the [lock levels](manual_experimental.md#lock-levels) section
-of experimental manual.
+of the experimental manual.
 
 
 ### Protecting general locations
diff --git a/doc/manual_experimental.md b/doc/manual_experimental.md
index 37453bc89..93d993879 100644
--- a/doc/manual_experimental.md
+++ b/doc/manual_experimental.md
@@ -2007,3 +2007,126 @@ The field is within a `case` section of an `object`.
 
 **Note**: The implementation of "strict case objects" is experimental but the concept
 is solid and it is expected that eventually this mode becomes the default in later versions.
+
+
+Threading under ARC/ORC
+=======================
+
+ARC/ORC supports a shared heap out of the box. This means that messages can be sent between
+threads without copies. However, without copying the data there is an inherent danger of
+data races. Data races are prevented at compile-time if it is enforced that
+only **isolated** subgraphs can be sent around.
+
+
+Isolation
+---------
+
+The standard library module `isolation.nim` provides a generic type `Isolated[T]` that
+captures the important notion that nothing else can reference the graph that is wrapped
+inside `Isolated[T]`. It is what a channel implementation should use in order to enforce
+the freedom of data races:
+
+  ```nim
+
+  proc send*[T](c: var Channel[T]; msg: sink Isolated[T])
+  proc recv*[T](c: var Channel[T]): T
+    ## Note: Returns T, not Isolated[T] for convenience.
+
+  proc recvIso*[T](c: var Channel[T]): Isolated[T]
+    ## remembers the data is Isolated[T].
+
+  ```
+
+In order to create an `Isolated` graph one has to use either `isolate` or `unsafeIsolate`.
+`unsafeIsolate` is as its name says unsafe and no checking is performed. It should be considered
+to be as dangerous as a `cast` operation.
+
+
+Construction must ensure that the invariant holds, namely that the wrapped `T`
+is free of external aliases into it. `isolate` ensures this invariant. It is
+inspired by Pony's `recover` construct:
+
+  ```nim
+
+  func isolate(x: sink T): Isolated[T] {.magic: "Isolate".}
+
+  ```
+
+
+As you can see, this is a new builtin because the check it performs on `x` is non-trivial:
+
+If `T` does not contain a `ref` or `closure` type, it is isolated. Else the syntactic
+structure of `x` is analyzed:
+
+- Literals like `nil`, `4`, `"abc"` are isolated.
+- A local variable or a routine parameter is isolated if either of these conditions is true:
+  1. Its type is annotated with the `.sendable` pragma. Note `Isolated[T]` is annotated as
+     `.sendable`.
+  2. Its type contains the potentially dangerous `ref` and `proc {.closure}` types
+     only in places that are protected via a `.sendable` container.
+
+- An array constructor `[x...]` is isolated if every element `x` is isolated.
+- An object constructor `Obj(x...)` is isolated if every element `x` is isolated.
+- An `if` or `case` expression is isolated if all possible values the expression
+  may return are isolated.
+- A type conversion `C(x)` is isolated if `x` is isolated. Analogous for `cast`
+  expressions.
+- A function call `f(x...)` is isolated if `f` is `.noSideEffect` and for every argument `x`:
+  - `x` is isolated **or**
+  - `f`'s return type cannot *alias* `x`'s type. This is checked via a form of alias analysis as explained in the next paragraph.
+
+
+
+Alias analysis
+--------------
+
+We start with an important, simple case that must be valid: Sending the result
+of `parseJson` to a channel. Since the signature
+is `func parseJson(input: string): JsonNode` it is easy to see that JsonNode
+can never simply be a view into `input` which is a `string`.
+
+A different case is the identity function `id`, `send id(myJsonGraph)` must be
+invalid because we do not know how many aliases into `myJsonGraph` exist
+elsewhere.
+
+In general type `A` can alias type `T` if:
+
+- `A` and `T` are the same types.
+- `A` is a distinct type derived from `T`.
+- `A` is a field inside `T` if `T` is a final object type.
+- `T` is an inheritable object type. (An inherited type could always contain
+  a `field: A`).
+- `T` is a closure type. Reason: `T`'s environment can contain a field of
+  type `A`.
+- `A` is the element type of `T` if `T` is an array, sequence or pointer type.
+
+
+
+
+Sendable pragma
+---------------
+
+A container type can be marked as `.sendable`. `.sendable` declares that the type
+encapsulates a `ref` type effectively so that a variable of this container type
+can be used in an `isolate` context:
+
+  ```nim
+
+  type
+    Isolated*[T] {.sendable.} = object ## Isolated data can only be moved, not copied.
+      value: T
+
+  proc `=copy`*[T](dest: var Isolated[T]; src: Isolated[T]) {.error.}
+
+  proc `=sink`*[T](dest: var Isolated[T]; src: Isolated[T]) {.inline.} =
+    # delegate to value's sink operation
+    `=sink`(dest.value, src.value)
+
+  proc `=destroy`*[T](dest: var Isolated[T]) {.inline.} =
+    # delegate to value's destroy operation
+    `=destroy`(dest.value)
+
+  ```
+
+The `.sendable` pragma itself is an experimenal, unchecked, unsafe annotation. It is
+currently only used by `Isolated[T]`.
diff --git a/lib/std/isolation.nim b/lib/std/isolation.nim
index 7d6ac6092..b03e00651 100644
--- a/lib/std/isolation.nim
+++ b/lib/std/isolation.nim
@@ -15,7 +15,7 @@
 ##
 
 type
-  Isolated*[T] = object ## Isolated data can only be moved, not copied.
+  Isolated*[T] {.sendable.} = object ## Isolated data can only be moved, not copied.
     value: T
 
 proc `=copy`*[T](dest: var Isolated[T]; src: Isolated[T]) {.error.}
@@ -38,9 +38,9 @@ func isolate*[T](value: sink T): Isolated[T] {.magic: "Isolate".} =
 
 func unsafeIsolate*[T](value: sink T): Isolated[T] =
   ## Creates an isolated subgraph from the expression `value`.
-  ## 
+  ##
   ## .. warning:: The proc doesn't check whether `value` is isolated.
-  ## 
+  ##
   Isolated[T](value: value)
 
 func extract*[T](src: var Isolated[T]): T =
diff --git a/tests/isolate/tisolated_lock.nim b/tests/isolate/tisolated_lock.nim
new file mode 100644
index 000000000..312abf0f6
--- /dev/null
+++ b/tests/isolate/tisolated_lock.nim
@@ -0,0 +1,67 @@
+discard """
+  cmd: "nim $target --threads:on $options $file"
+  action: "compile"
+"""
+
+import std / [os, locks, atomics, isolation]
+
+type
+  MyList {.acyclic.} = ref object
+    data: string
+    next: Isolated[MyList]
+
+template withMyLock*(a: Lock, body: untyped) =
+  acquire(a)
+  {.gcsafe.}:
+    try:
+      body
+    finally:
+      release(a)
+
+var head: Isolated[MyList]
+var headL: Lock
+
+var shouldStop: Atomic[bool]
+
+initLock headL
+
+proc send(x: sink string) =
+  withMyLock headL:
+    head = isolate MyList(data: x, next: move head)
+
+proc worker() {.thread.} =
+  var workItem = MyList(nil)
+  var echoed = 0
+  while true:
+    withMyLock headL:
+      var h = extract head
+      if h != nil:
+        workItem = h
+        # workitem is now isolated:
+        head = move h.next
+      else:
+        workItem = nil
+    # workItem is isolated, so we can access it outside
+    # the lock:
+    if workItem.isNil:
+      if shouldStop.load:
+        break
+      else:
+        # give producer time to breath:
+        os.sleep 30
+    else:
+      if echoed < 100:
+        echo workItem.data
+      inc echoed
+
+var thr: Thread[void]
+createThread(thr, worker)
+
+send "abc"
+send "def"
+for i in 0 ..< 10_000:
+  send "xzy"
+  send "zzz"
+shouldStop.store true
+
+joinThread(thr)