diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2020-11-18 22:31:06 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-18 22:31:06 +0100 |
commit | b9eb91cbb5c8104955e595808d4b4d388894d406 (patch) | |
tree | a6a092c7a9aed055a33a73610e31ad88fbf87473 | |
parent | baaa19b92751598b6519a64eed2b1beb06b2e662 (diff) | |
download | Nim-b9eb91cbb5c8104955e595808d4b4d388894d406.tar.gz |
ORC: prepare for another patent-pending optimization (#15996)
* ORC: prepare for another patent-pending optimization * bugfix * '=copy' for refs can take a cyclic parameter for more ORC optimizations * ORC: exploit the common 'it = it.next' pattern * can't hurt to check for nil * use an algorithm that is not obviously broken * restore the test case * final cleanups for --gc:orc
-rw-r--r-- | changelog.md | 4 | ||||
-rw-r--r-- | compiler/injectdestructors.nim | 49 | ||||
-rw-r--r-- | compiler/liftdestructors.nim | 55 | ||||
-rw-r--r-- | lib/system/arc.nim | 17 | ||||
-rw-r--r-- | lib/system/cyclebreaker.nim | 4 | ||||
-rw-r--r-- | lib/system/orc.nim | 55 | ||||
-rw-r--r-- | tests/arc/tasyncleak.nim | 2 |
7 files changed, 147 insertions, 39 deletions
diff --git a/changelog.md b/changelog.md index ce06a23bc..7cbd4bb66 100644 --- a/changelog.md +++ b/changelog.md @@ -38,6 +38,10 @@ - Added `asyncdispatch.activeDescriptors` that returns the number of currently active async event handles/file descriptors +- ``--gc:orc`` is now 10% faster than previously for common workloads. If + you have trouble with its changed behavior, compile with ``-d:nimOldOrc``. + + - `os.FileInfo` (returned by `getFileInfo`) now contains `blockSize`, determining preferred I/O block size for this file object. diff --git a/compiler/injectdestructors.nim b/compiler/injectdestructors.nim index 7e64ebfdc..869e8ecc8 100644 --- a/compiler/injectdestructors.nim +++ b/compiler/injectdestructors.nim @@ -311,6 +311,43 @@ proc genSink(c: var Con; dest, ri: PNode, isDecl = false): PNode = # and copyMem(dest, source). This is efficient. result = newTree(nkStmtList, c.genDestroy(dest), newTree(nkFastAsgn, dest, ri)) +proc isCriticalLink(dest: PNode): bool {.inline.} = + #[ + Lins's idea that only "critical" links can introduce a cycle. This is + critical for the performance gurantees that we strive for: If you + traverse a data structure, no tracing will be performed at all. + ORC is about this promise: The GC only touches the memory that the + mutator touches too. + + These constructs cannot possibly create cycles:: + + local = ... + + new(x) + dest = ObjectConstructor(field: noalias(dest)) + + But since 'ObjectConstructor' is already moved into 'dest' all we really have + to look for is assignments to local variables. + ]# + result = dest.kind != nkSym + +proc finishCopy(c: var Con; result, dest: PNode; isFromSink: bool) = + if c.graph.config.selectedGC == gcOrc: + let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct}) + if cyclicType(t): + result.add boolLit(c.graph, result.info, isFromSink or isCriticalLink(dest)) + +proc genMarkCyclic(c: var Con; result, dest: PNode) = + if c.graph.config.selectedGC == gcOrc: + let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct}) + if cyclicType(t): + if t.kind == tyRef: + result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, dest) + else: + let xenv = genBuiltin(c.graph, mAccessEnv, "accessEnv", dest) + xenv.typ = getSysType(c.graph, dest.info, tyPointer) + result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, xenv) + proc genCopyNoCheck(c: var Con; dest, ri: PNode): PNode = let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink}) result = c.genOp(t, attachedAsgn, dest, ri) @@ -390,7 +427,9 @@ proc destructiveMoveVar(n: PNode; c: var Con; s: var Scope): PNode = v.add(vpart) result.add v - let wasMovedCall = c.genWasMoved(skipConv(n)) + let nn = skipConv(n) + c.genMarkCyclic(result, nn) + let wasMovedCall = c.genWasMoved(nn) result.add wasMovedCall result.add tempAsNode @@ -405,6 +444,7 @@ proc passCopyToSink(n: PNode; c: var Con; s: var Scope): PNode = result.add c.genWasMoved(tmp) var m = c.genCopy(tmp, n) m.add p(n, c, s, normal) + c.finishCopy(m, n, isFromSink = true) result.add m if isLValue(n) and not isCapturedVar(n) and n.typ.skipTypes(abstractInst).kind != tyRef and c.inSpawn == 0: message(c.graph.config, n.info, hintPerformance, @@ -667,6 +707,7 @@ proc pRaiseStmt(n: PNode, c: var Con; s: var Scope): PNode = let tmp = c.getTemp(s, n[0].typ, n.info) var m = c.genCopyNoCheck(tmp, n[0]) m.add p(n[0], c, s, normal) + c.finishCopy(m, n[0], isFromSink = false) result = newTree(nkStmtList, c.genWasMoved(tmp), m) var toDisarm = n[0] if toDisarm.kind == nkStmtListExpr: toDisarm = toDisarm.lastSon @@ -941,16 +982,18 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod else: result = c.genCopy(dest, ri) result.add p(ri, c, s, consumed) + c.finishCopy(result, dest, isFromSink = false) of nkBracket: # array constructor if ri.len > 0 and isDangerousSeq(ri.typ): result = c.genCopy(dest, ri) result.add p(ri, c, s, consumed) + c.finishCopy(result, dest, isFromSink = false) else: result = c.genSink(dest, p(ri, c, s, consumed), isDecl) of nkObjConstr, nkTupleConstr, nkClosure, nkCharLit..nkNilLit: result = c.genSink(dest, p(ri, c, s, consumed), isDecl) - of nkSym: + of nkSym: if dest.kind == nkSym and dest.sym == ri.sym: # rule (self-assignment-removal): result = newNodeI(nkEmpty, dest.info) @@ -966,6 +1009,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod else: result = c.genCopy(dest, ri) result.add p(ri, c, s, consumed) + c.finishCopy(result, dest, isFromSink = false) of nkHiddenSubConv, nkHiddenStdConv, nkConv, nkObjDownConv, nkObjUpConv: result = c.genSink(dest, p(ri, c, s, sinkArg), isDecl) of nkStmtListExpr, nkBlockExpr, nkIfExpr, nkCaseStmt, nkTryStmt: @@ -983,6 +1027,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod else: result = c.genCopy(dest, ri) result.add p(ri, c, s, consumed) + c.finishCopy(result, dest, isFromSink = false) proc computeUninit(c: var Con) = if not c.uninitComputed: diff --git a/compiler/liftdestructors.nim b/compiler/liftdestructors.nim index 45a66e795..512fb6190 100644 --- a/compiler/liftdestructors.nim +++ b/compiler/liftdestructors.nim @@ -65,7 +65,7 @@ proc newAsgnStmt(le, ri: PNode): PNode = result[0] = le result[1] = ri -proc genBuiltin(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode = +proc genBuiltin*(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode = result = newNodeI(nkCall, i.info) result.add createMagic(g, name, magic).newSymNode result.add i @@ -248,6 +248,19 @@ proc fillBodyObjT(c: var TLiftCtx; t: PType, body, x, y: PNode) = else: fillBodyObjTImpl(c, t, body, x, y) +proc boolLit*(g: ModuleGraph; info: TLineInfo; value: bool): PNode = + result = newIntLit(g, info, ord value) + result.typ = getSysType(g, info, tyBool) + +proc getCycleParam(c: TLiftCtx): PNode = + assert c.kind == attachedAsgn + if c.fn.typ.len == 4: + result = c.fn.typ.n.lastSon + assert result.kind == nkSym + assert result.sym.name.s == "cyclic" + else: + result = boolLit(c.g, c.info, true) + proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode = #if sfError in op.flags: # localError(c.config, x.info, "usage of '$1' is a user-defined error" % op.name.s) @@ -261,6 +274,13 @@ proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode = result.add x if y != nil: result.add y + if op.typ.len == 4: + assert y != nil + if c.fn.typ.len == 4: + result.add getCycleParam(c) + else: + # assume the worst: A cycle is created: + result.add boolLit(c.g, y.info, true) proc newOpCall(c: var TLiftCtx; op: PSym; x: PNode): PNode = result = newNodeIT(nkCall, x.info, op.typ[0]) @@ -526,6 +546,12 @@ proc fillStrOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = of attachedTrace: discard "strings are atomic and have no inner elements that are to trace" +proc cyclicType*(t: PType): bool = + case t.kind + of tyRef: result = types.canFormAcycle(t.lastSon) + of tyProc: result = t.callConv == ccClosure + else: result = false + proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = #[ bug #15753 is really subtle. Usually the classical write barrier for reference counting looks like this:: @@ -588,12 +614,13 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = body.add genIf(c, cond, actions) body.add newAsgnStmt(x, y) of attachedAsgn: - body.add genIf(c, y, callCodegenProc(c.g, - if isCyclic: "nimIncRefCyclic" else: "nimIncRef", c.info, y)) if isCyclic: + body.add genIf(c, y, callCodegenProc(c.g, + "nimIncRefCyclic", c.info, y, getCycleParam(c))) body.add newAsgnStmt(x, y) body.add genIf(c, cond, actions) else: + body.add genIf(c, y, callCodegenProc(c.g, "nimIncRef", c.info, y)) body.add genIf(c, cond, actions) body.add newAsgnStmt(x, y) of attachedDestructor: @@ -649,14 +676,13 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = of attachedAsgn: let yenv = genBuiltin(c.g, mAccessEnv, "accessEnv", y) yenv.typ = getSysType(c.g, c.info, tyPointer) - let incRefProc = - if c.g.config.selectedGC == gcOrc: "nimIncRefCyclic" - else: "nimIncRef" - body.add genIf(c, yenv, callCodegenProc(c.g, incRefProc, c.info, yenv)) if isCyclic: + body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRefCyclic", c.info, yenv, getCycleParam(c))) body.add newAsgnStmt(x, y) body.add genIf(c, cond, actions) else: + body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRef", c.info, yenv)) + body.add genIf(c, cond, actions) body.add newAsgnStmt(x, y) of attachedDestructor: @@ -888,6 +914,13 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp if kind notin {attachedDestructor, attachedDispose}: result.typ.addParam src + if kind == attachedAsgn and g.config.selectedGC == gcOrc and + cyclicType(typ.skipTypes(abstractInst)): + let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"), + nextId(idgen), result, info) + cycleParam.typ = getSysType(g, info, tyBool) + result.typ.addParam cycleParam + var n = newNodeI(nkProcDef, info, bodyPos+1) for i in 0..<n.len: n[i] = newNodeI(nkEmpty, info) n[namePos] = newSymNode(result) @@ -907,8 +940,8 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp; if result == nil: result = symPrototype(g, typ, typ.owner, kind, info, idgen) - var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType:typ, idgen: idgen) - a.fn = result + var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType: typ, idgen: idgen, + fn: result) let dest = result.typ.n[1].sym let d = newDeref(newSymNode(dest)) @@ -943,8 +976,8 @@ proc produceDestructorForDiscriminator*(g: ModuleGraph; typ: PType; field: PSym, info: TLineInfo; idgen: IdGenerator): PSym = assert(typ.skipTypes({tyAlias, tyGenericInst}).kind == tyObject) result = symPrototype(g, field.typ, typ.owner, attachedDestructor, info, idgen) - var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen) - a.fn = result + var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen, + fn: result) a.asgnForType = typ a.filterDiscriminator = field a.addMemReset = true diff --git a/lib/system/arc.nim b/lib/system/arc.nim index 4c97f1aa0..9ee367543 100644 --- a/lib/system/arc.nim +++ b/lib/system/arc.nim @@ -75,6 +75,8 @@ when defined(nimArcDebug): elif defined(nimArcIds): var gRefId: int + const traceId = -1 + proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} = let hdrSize = align(sizeof(RefHeader), alignment) let s = size + hdrSize @@ -126,13 +128,14 @@ proc nimIncRef(p: pointer) {.compilerRtl, inl.} = when traceCollector: cprintf("[INCREF] %p\n", head(p)) -proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} = - # This is only used by the old RTTI mechanism and we know - # that 'dest[]' is nil and needs no destruction. Which is really handy - # as we cannot destroy the object reliably if it's an object of unknown - # compile-time type. - dest[] = src - if src != nil: nimIncRef src +when not defined(gcOrc) or defined(nimThinout): + proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} = + # This is only used by the old RTTI mechanism and we know + # that 'dest[]' is nil and needs no destruction. Which is really handy + # as we cannot destroy the object reliably if it's an object of unknown + # compile-time type. + dest[] = src + if src != nil: nimIncRef src when not defined(nimscript) and defined(nimArcDebug): proc deallocatedRefId*(p: pointer): int = diff --git a/lib/system/cyclebreaker.nim b/lib/system/cyclebreaker.nim index c4d802a92..a3159f95c 100644 --- a/lib/system/cyclebreaker.nim +++ b/lib/system/cyclebreaker.nim @@ -70,10 +70,12 @@ template color(c): untyped = c.rc and colorMask template setColor(c, col) = c.rc = c.rc and not colorMask or col -proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} = +proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} = let h = head(p) inc h.rc, rcIncrement +proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = discard + type GcEnv = object traceStack: CellSeq diff --git a/lib/system/orc.nim b/lib/system/orc.nim index 28f8e5808..b0497a836 100644 --- a/lib/system/orc.nim +++ b/lib/system/orc.nim @@ -21,8 +21,7 @@ const colBlack = 0b000 colGray = 0b001 colWhite = 0b010 - colPurple = 0b011 - isCycleCandidate = 0b100 # cell is marked as a cycle candidate + maybeCycle = 0b100 # possibly part of a cycle; this has to be a "sticky" bit jumpStackFlag = 0b1000 colorMask = 0b011 @@ -39,11 +38,29 @@ template setColor(c, col) = else: c.rc = c.rc and not colorMask or col -proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} = +const + optimizedOrc = not defined(nimOldOrc) + +proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} = let h = head(p) inc h.rc, rcIncrement - #h.setColor colPurple # mark as potential cycle! - h.setColor colBlack + when optimizedOrc: + if cyclic: + h.rc = h.rc or maybeCycle + +proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = + when optimizedOrc: + if p != nil: + let h = head(p) + h.rc = h.rc or maybeCycle + +proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} = + # This is only used by the old RTTI mechanism and we know + # that 'dest[]' is nil and needs no destruction. Which is really handy + # as we cannot destroy the object reliably if it's an object of unknown + # compile-time type. + dest[] = src + if src != nil: nimIncRefCyclic(src, true) const useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all @@ -113,14 +130,15 @@ var proc unregisterCycle(s: Cell) = # swap with the last element. O(1) - let idx = s.rootIdx + let idx = s.rootIdx-1 when false: if idx >= roots.len or idx < 0: cprintf("[Bug!] %ld\n", idx) quit 1 roots.d[idx] = roots.d[roots.len-1] - roots.d[idx][0].rootIdx = idx + roots.d[idx][0].rootIdx = idx+1 dec roots.len + s.rootIdx = 0 proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) = #[ @@ -245,7 +263,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) = collectWhite(t) free(s) # watch out, a bug here! ]# - if s.color == colWhite and (s.rc and isCycleCandidate) == 0: + if s.color == colWhite and s.rootIdx == 0: orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty") s.setColor(colBlack) @@ -253,7 +271,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) = trace(s, desc, j) while j.traceStack.len > 0: let (t, desc) = j.traceStack.pop() - if t.color == colWhite and (t.rc and isCycleCandidate) == 0: + if t.color == colWhite and t.rootIdx == 0: j.toFree.add(t, desc) t.setColor(colBlack) trace(t, desc, j) @@ -284,7 +302,7 @@ proc collectCyclesBacon(j: var GcEnv) = init j.toFree for i in 0 ..< roots.len: let s = roots.d[i][0] - s.rc = s.rc and not isCycleCandidate + s.rootIdx = 0 collectWhite(s, roots.d[i][1], j) for i in 0 ..< j.toFree.len: @@ -341,13 +359,14 @@ proc collectCycles() = getOccupiedMem()) proc registerCycle(s: Cell; desc: PNimTypeV2) = - s.rootIdx = roots.len + s.rootIdx = roots.len+1 if roots.d == nil: init(roots) add(roots, s, desc) if roots.len >= rootsThreshold: collectCycles() - #writeCell("[added root]", s) + when logOrc: + writeCell("[added root]", s, desc) proc GC_runOrc* = ## Forces a cycle collection pass. @@ -379,17 +398,19 @@ proc GC_disableMarkAndSweep*() = ## For ``--gc:orc`` an alias for ``GC_disableOrc``. GC_disableOrc() +when optimizedOrc: + template markedAsCyclic(s: Cell): bool = (s.rc and maybeCycle) != 0 +else: + template markedAsCyclic(s: Cell): bool = true + proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimTypeV2) {.noinline.} = if isDestroyAction: - if (s.rc and isCycleCandidate) != 0: - s.rc = s.rc and not isCycleCandidate + if s.rootIdx > 0: unregisterCycle(s) else: # do not call 'rememberCycle' again unless this cell # got an 'incRef' event: - #s.setColor colGreen # XXX This is wrong! - if (s.rc and isCycleCandidate) == 0: - s.rc = s.rc or isCycleCandidate + if s.rootIdx == 0 and markedAsCyclic(s): s.setColor colBlack registerCycle(s, desc) diff --git a/tests/arc/tasyncleak.nim b/tests/arc/tasyncleak.nim index 72aa3d4c2..417b67edb 100644 --- a/tests/arc/tasyncleak.nim +++ b/tests/arc/tasyncleak.nim @@ -1,5 +1,5 @@ discard """ - outputsub: "(allocCount: 4016, deallocCount: 4014)" + outputsub: "(allocCount: 4014, deallocCount: 4012)" cmd: "nim c --gc:orc -d:nimAllocStats $file" """ |