diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2023-05-14 16:58:28 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-14 16:58:28 +0200 |
commit | f4a9b258c34a83efb74d9dea6853880e47f45b06 (patch) | |
tree | 3dc2c3fcf0e3fc3325a69614056bb7772675a6ab | |
parent | 0ece98620f8d9d7b874262c75fa148970626d44d (diff) | |
download | Nim-f4a9b258c34a83efb74d9dea6853880e47f45b06.tar.gz |
isolation spec update; WIP (#21843)
* isolation spec update; WIP * wip * docs update, WIP * progress * Update doc/manual.md
-rw-r--r-- | compiler/ast.nim | 5 | ||||
-rw-r--r-- | compiler/condsyms.nim | 2 | ||||
-rw-r--r-- | compiler/isolation_check.nim | 76 | ||||
-rw-r--r-- | compiler/pragmas.nim | 9 | ||||
-rw-r--r-- | compiler/wordrecg.nim | 1 | ||||
-rw-r--r-- | doc/manual.md | 10 | ||||
-rw-r--r-- | doc/manual_experimental.md | 123 | ||||
-rw-r--r-- | lib/std/isolation.nim | 6 | ||||
-rw-r--r-- | tests/isolate/tisolated_lock.nim | 67 |
9 files changed, 284 insertions, 15 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim index 41e538742..3ed9bf2b2 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -515,7 +515,7 @@ type nfSkipFieldChecking # node skips field visable checking TNodeFlags* = set[TNodeFlag] - TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 46) + TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 47) tfVarargs, # procedure has C styled varargs # tyArray type represeting a varargs list tfNoSideEffect, # procedure type does not allow side effects @@ -585,6 +585,7 @@ type tfIsConstructor tfEffectSystemWorkaround tfIsOutParam + tfSendable TTypeFlags* = set[TTypeFlag] @@ -634,7 +635,7 @@ const skError* = skUnknown var - eqTypeFlags* = {tfIterator, tfNotNil, tfVarIsPtr, tfGcSafe, tfNoSideEffect, tfIsOutParam} + eqTypeFlags* = {tfIterator, tfNotNil, tfVarIsPtr, tfGcSafe, tfNoSideEffect, tfIsOutParam, tfSendable} ## type flags that are essential for type equality. ## This is now a variable because for emulation of version:1.0 we ## might exclude {tfGcSafe, tfNoSideEffect}. diff --git a/compiler/condsyms.nim b/compiler/condsyms.nim index ea635e52e..9d9ba1605 100644 --- a/compiler/condsyms.nim +++ b/compiler/condsyms.nim @@ -154,4 +154,4 @@ proc initDefines*(symbols: StringTableRef) = defineSymbol("nimHasWarnBareExcept") defineSymbol("nimHasDup") defineSymbol("nimHasChecksums") - + defineSymbol("nimHasSendable") diff --git a/compiler/isolation_check.nim b/compiler/isolation_check.nim index 2674605dc..273bfb7f9 100644 --- a/compiler/isolation_check.nim +++ b/compiler/isolation_check.nim @@ -72,6 +72,69 @@ proc isValueOnlyType(t: PType): bool = proc wrap(t: PType): bool {.nimcall.} = t.kind in {tyRef, tyPtr, tyVar, tyLent} result = not types.searchTypeFor(t, wrap) +type + SearchResult = enum + NotFound, Abort, Found + +proc containsDangerousRefAux(t: PType; marker: var IntSet): SearchResult + +proc containsDangerousRefAux(n: PNode; marker: var IntSet): SearchResult = + result = NotFound + case n.kind + of nkRecList: + for i in 0..<n.len: + result = containsDangerousRefAux(n[i], marker) + if result == Found: return result + of nkRecCase: + assert(n[0].kind == nkSym) + result = containsDangerousRefAux(n[0], marker) + if result == Found: return result + for i in 1..<n.len: + case n[i].kind + of nkOfBranch, nkElse: + result = containsDangerousRefAux(lastSon(n[i]), marker) + if result == Found: return result + else: discard + of nkSym: + result = containsDangerousRefAux(n.sym.typ, marker) + else: discard + +proc containsDangerousRefAux(t: PType; marker: var IntSet): SearchResult = + result = NotFound + if t == nil: return result + if containsOrIncl(marker, t.id): return result + + if t.kind == tyRef or (t.kind == tyProc and t.callConv == ccClosure): + result = Found + elif tfSendable in t.flags: + result = Abort + else: + # continue the type traversal: + result = NotFound + + if result != NotFound: return result + case t.kind + of tyObject: + if t[0] != nil: + result = containsDangerousRefAux(t[0].skipTypes(skipPtrs), marker) + if result == NotFound: result = containsDangerousRefAux(t.n, marker) + of tyGenericInst, tyDistinct, tyAlias, tySink: + result = containsDangerousRefAux(lastSon(t), marker) + of tyArray, tySet, tyTuple, tySequence: + for i in 0..<t.len: + result = containsDangerousRefAux(t[i], marker) + if result == Found: return result + else: + discard + +proc containsDangerousRef(t: PType): bool = + # a `ref` type is "dangerous" if it occurs not within a type that is like `Isolated[T]`. + # For example: + # `ref int` # dangerous + # `Isolated[ref int]` # not dangerous + var marker = initIntSet() + result = containsDangerousRefAux(t, marker) == Found + proc canAlias*(arg, ret: PType): bool = if isValueOnlyType(arg): # can alias only with addr(arg.x) and we don't care if it is not safe @@ -80,12 +143,15 @@ proc canAlias*(arg, ret: PType): bool = var marker = initIntSet() result = canAlias(arg, ret, marker) +const + SomeVar = {skForVar, skParam, skVar, skLet, skConst, skResult, skTemp} + proc containsVariable(n: PNode): bool = case n.kind of nodesToIgnoreSet: result = false of nkSym: - result = n.sym.kind in {skForVar, skParam, skVar, skLet, skConst, skResult, skTemp} + result = n.sym.kind in SomeVar else: for ch in n: if containsVariable(ch): return true @@ -109,7 +175,7 @@ proc checkIsolate*(n: PNode): bool = discard "fine, it is isolated already" else: let argType = n[i].typ - if argType != nil and not isCompileTimeOnly(argType) and containsTyRef(argType): + if argType != nil and not isCompileTimeOnly(argType) and containsDangerousRef(argType): if argType.canAlias(n.typ) or containsVariable(n[i]): # bug #19013: Alias information is not enough, we need to check for potential # "overlaps". I claim the problem can only happen by reading again from a location @@ -143,6 +209,12 @@ proc checkIsolate*(n: PNode): bool = result = checkIsolate(n[^1]) else: result = false + of nkSym: + result = true + if n.sym.kind in SomeVar: + let argType = n.typ + if argType != nil and not isCompileTimeOnly(argType) and containsDangerousRef(argType): + result = false else: # unanalysable expression: result = false diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim index 1857ef3ce..6fe09921d 100644 --- a/compiler/pragmas.nim +++ b/compiler/pragmas.nim @@ -71,7 +71,8 @@ const wPure, wHeader, wCompilerProc, wCore, wFinal, wSize, wShallow, wIncompleteStruct, wCompleteStruct, wByCopy, wByRef, wInheritable, wGensym, wInject, wRequiresInit, wUnchecked, wUnion, wPacked, - wCppNonPod, wBorrow, wGcSafe, wPartial, wExplain, wPackage, wCodegenDecl} + wCppNonPod, wBorrow, wGcSafe, wPartial, wExplain, wPackage, wCodegenDecl, + wSendable} fieldPragmas* = declPragmas + {wGuard, wBitsize, wCursor, wRequiresInit, wNoalias, wAlign} - {wExportNims, wNodecl} # why exclude these? varPragmas* = declPragmas + {wVolatile, wRegister, wThreadVar, @@ -1057,6 +1058,12 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: var int, if sym.typ != nil: incl(sym.typ.flags, tfThread) if sym.typ.callConv == ccClosure: sym.typ.callConv = ccNimCall + of wSendable: + noVal(c, it) + if sym != nil and sym.typ != nil: + incl(sym.typ.flags, tfSendable) + else: + invalidPragma(c, it) of wGcSafe: noVal(c, it) if sym != nil: diff --git a/compiler/wordrecg.nim b/compiler/wordrecg.nim index 3369d244e..8763a5250 100644 --- a/compiler/wordrecg.nim +++ b/compiler/wordrecg.nim @@ -113,6 +113,7 @@ type wInOut = "inout", wByCopy = "bycopy", wByRef = "byref", wOneWay = "oneway", wBitsize = "bitsize", wImportHidden = "all", + wSendable = "sendable" TSpecialWords* = set[TSpecialWord] diff --git a/doc/manual.md b/doc/manual.md index f3fe62c49..8075da18f 100644 --- a/doc/manual.md +++ b/doc/manual.md @@ -6889,7 +6889,7 @@ iterator in which case the overloading resolution takes place: var x = 4 write(stdout, x) # not ambiguous: uses the module C's x ``` -Modules can share their name, however, when trying to qualify a identifier with the module name the compiler will fail with ambiguous identifier error. One can qualify the identifier by aliasing the module. +Modules can share their name, however, when trying to qualify an identifier with the module name the compiler will fail with ambiguous identifier error. One can qualify the identifier by aliasing the module. ```nim @@ -8581,10 +8581,8 @@ Threads The `--threads:on`:option: command-line switch is enabled by default. The [typedthreads module](typedthreads.html) module then contains several threading primitives. See [spawn](manual_experimental.html#parallel-amp-spawn) for further details. -The only way to create a thread is via `spawn` or -`createThread`. The invoked proc must not use `var` parameters nor must -any of its parameters contain a `ref` or `closure` type. This enforces -the *no heap sharing restriction*. +The only ways to create a thread is via `spawn` or `createThread`. + Thread pragma ------------- @@ -8703,7 +8701,7 @@ model low level lockfree mechanisms: The `locks` pragma takes a list of lock expressions `locks: [a, b, ...]` in order to support *multi lock* statements. Why these are essential is explained in the [lock levels](manual_experimental.md#lock-levels) section -of experimental manual. +of the experimental manual. ### Protecting general locations diff --git a/doc/manual_experimental.md b/doc/manual_experimental.md index 37453bc89..93d993879 100644 --- a/doc/manual_experimental.md +++ b/doc/manual_experimental.md @@ -2007,3 +2007,126 @@ The field is within a `case` section of an `object`. **Note**: The implementation of "strict case objects" is experimental but the concept is solid and it is expected that eventually this mode becomes the default in later versions. + + +Threading under ARC/ORC +======================= + +ARC/ORC supports a shared heap out of the box. This means that messages can be sent between +threads without copies. However, without copying the data there is an inherent danger of +data races. Data races are prevented at compile-time if it is enforced that +only **isolated** subgraphs can be sent around. + + +Isolation +--------- + +The standard library module `isolation.nim` provides a generic type `Isolated[T]` that +captures the important notion that nothing else can reference the graph that is wrapped +inside `Isolated[T]`. It is what a channel implementation should use in order to enforce +the freedom of data races: + + ```nim + + proc send*[T](c: var Channel[T]; msg: sink Isolated[T]) + proc recv*[T](c: var Channel[T]): T + ## Note: Returns T, not Isolated[T] for convenience. + + proc recvIso*[T](c: var Channel[T]): Isolated[T] + ## remembers the data is Isolated[T]. + + ``` + +In order to create an `Isolated` graph one has to use either `isolate` or `unsafeIsolate`. +`unsafeIsolate` is as its name says unsafe and no checking is performed. It should be considered +to be as dangerous as a `cast` operation. + + +Construction must ensure that the invariant holds, namely that the wrapped `T` +is free of external aliases into it. `isolate` ensures this invariant. It is +inspired by Pony's `recover` construct: + + ```nim + + func isolate(x: sink T): Isolated[T] {.magic: "Isolate".} + + ``` + + +As you can see, this is a new builtin because the check it performs on `x` is non-trivial: + +If `T` does not contain a `ref` or `closure` type, it is isolated. Else the syntactic +structure of `x` is analyzed: + +- Literals like `nil`, `4`, `"abc"` are isolated. +- A local variable or a routine parameter is isolated if either of these conditions is true: + 1. Its type is annotated with the `.sendable` pragma. Note `Isolated[T]` is annotated as + `.sendable`. + 2. Its type contains the potentially dangerous `ref` and `proc {.closure}` types + only in places that are protected via a `.sendable` container. + +- An array constructor `[x...]` is isolated if every element `x` is isolated. +- An object constructor `Obj(x...)` is isolated if every element `x` is isolated. +- An `if` or `case` expression is isolated if all possible values the expression + may return are isolated. +- A type conversion `C(x)` is isolated if `x` is isolated. Analogous for `cast` + expressions. +- A function call `f(x...)` is isolated if `f` is `.noSideEffect` and for every argument `x`: + - `x` is isolated **or** + - `f`'s return type cannot *alias* `x`'s type. This is checked via a form of alias analysis as explained in the next paragraph. + + + +Alias analysis +-------------- + +We start with an important, simple case that must be valid: Sending the result +of `parseJson` to a channel. Since the signature +is `func parseJson(input: string): JsonNode` it is easy to see that JsonNode +can never simply be a view into `input` which is a `string`. + +A different case is the identity function `id`, `send id(myJsonGraph)` must be +invalid because we do not know how many aliases into `myJsonGraph` exist +elsewhere. + +In general type `A` can alias type `T` if: + +- `A` and `T` are the same types. +- `A` is a distinct type derived from `T`. +- `A` is a field inside `T` if `T` is a final object type. +- `T` is an inheritable object type. (An inherited type could always contain + a `field: A`). +- `T` is a closure type. Reason: `T`'s environment can contain a field of + type `A`. +- `A` is the element type of `T` if `T` is an array, sequence or pointer type. + + + + +Sendable pragma +--------------- + +A container type can be marked as `.sendable`. `.sendable` declares that the type +encapsulates a `ref` type effectively so that a variable of this container type +can be used in an `isolate` context: + + ```nim + + type + Isolated*[T] {.sendable.} = object ## Isolated data can only be moved, not copied. + value: T + + proc `=copy`*[T](dest: var Isolated[T]; src: Isolated[T]) {.error.} + + proc `=sink`*[T](dest: var Isolated[T]; src: Isolated[T]) {.inline.} = + # delegate to value's sink operation + `=sink`(dest.value, src.value) + + proc `=destroy`*[T](dest: var Isolated[T]) {.inline.} = + # delegate to value's destroy operation + `=destroy`(dest.value) + + ``` + +The `.sendable` pragma itself is an experimenal, unchecked, unsafe annotation. It is +currently only used by `Isolated[T]`. diff --git a/lib/std/isolation.nim b/lib/std/isolation.nim index 7d6ac6092..b03e00651 100644 --- a/lib/std/isolation.nim +++ b/lib/std/isolation.nim @@ -15,7 +15,7 @@ ## type - Isolated*[T] = object ## Isolated data can only be moved, not copied. + Isolated*[T] {.sendable.} = object ## Isolated data can only be moved, not copied. value: T proc `=copy`*[T](dest: var Isolated[T]; src: Isolated[T]) {.error.} @@ -38,9 +38,9 @@ func isolate*[T](value: sink T): Isolated[T] {.magic: "Isolate".} = func unsafeIsolate*[T](value: sink T): Isolated[T] = ## Creates an isolated subgraph from the expression `value`. - ## + ## ## .. warning:: The proc doesn't check whether `value` is isolated. - ## + ## Isolated[T](value: value) func extract*[T](src: var Isolated[T]): T = diff --git a/tests/isolate/tisolated_lock.nim b/tests/isolate/tisolated_lock.nim new file mode 100644 index 000000000..312abf0f6 --- /dev/null +++ b/tests/isolate/tisolated_lock.nim @@ -0,0 +1,67 @@ +discard """ + cmd: "nim $target --threads:on $options $file" + action: "compile" +""" + +import std / [os, locks, atomics, isolation] + +type + MyList {.acyclic.} = ref object + data: string + next: Isolated[MyList] + +template withMyLock*(a: Lock, body: untyped) = + acquire(a) + {.gcsafe.}: + try: + body + finally: + release(a) + +var head: Isolated[MyList] +var headL: Lock + +var shouldStop: Atomic[bool] + +initLock headL + +proc send(x: sink string) = + withMyLock headL: + head = isolate MyList(data: x, next: move head) + +proc worker() {.thread.} = + var workItem = MyList(nil) + var echoed = 0 + while true: + withMyLock headL: + var h = extract head + if h != nil: + workItem = h + # workitem is now isolated: + head = move h.next + else: + workItem = nil + # workItem is isolated, so we can access it outside + # the lock: + if workItem.isNil: + if shouldStop.load: + break + else: + # give producer time to breath: + os.sleep 30 + else: + if echoed < 100: + echo workItem.data + inc echoed + +var thr: Thread[void] +createThread(thr, worker) + +send "abc" +send "def" +for i in 0 ..< 10_000: + send "xzy" + send "zzz" +shouldStop.store true + +joinThread(thr) |