summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAndreas Rumpf <rumpf_a@web.de>2020-11-18 22:31:06 +0100
committerGitHub <noreply@github.com>2020-11-18 22:31:06 +0100
commitb9eb91cbb5c8104955e595808d4b4d388894d406 (patch)
treea6a092c7a9aed055a33a73610e31ad88fbf87473
parentbaaa19b92751598b6519a64eed2b1beb06b2e662 (diff)
downloadNim-b9eb91cbb5c8104955e595808d4b4d388894d406.tar.gz
ORC: prepare for another patent-pending optimization (#15996)
* ORC: prepare for another patent-pending optimization

* bugfix

* '=copy' for refs can take a cyclic parameter for more ORC optimizations

* ORC: exploit the common 'it = it.next' pattern

* can't hurt to check for nil

* use an algorithm that is not obviously broken

* restore the test case

* final cleanups for --gc:orc
-rw-r--r--changelog.md4
-rw-r--r--compiler/injectdestructors.nim49
-rw-r--r--compiler/liftdestructors.nim55
-rw-r--r--lib/system/arc.nim17
-rw-r--r--lib/system/cyclebreaker.nim4
-rw-r--r--lib/system/orc.nim55
-rw-r--r--tests/arc/tasyncleak.nim2
7 files changed, 147 insertions, 39 deletions
diff --git a/changelog.md b/changelog.md
index ce06a23bc..7cbd4bb66 100644
--- a/changelog.md
+++ b/changelog.md
@@ -38,6 +38,10 @@
 - Added `asyncdispatch.activeDescriptors` that returns the number of currently
   active async event handles/file descriptors
 
+- ``--gc:orc`` is now 10% faster than previously for common workloads. If
+  you have trouble with its changed behavior, compile with ``-d:nimOldOrc``.
+
+
 - `os.FileInfo` (returned by `getFileInfo`) now contains `blockSize`,
   determining preferred I/O block size for this file object.
 
diff --git a/compiler/injectdestructors.nim b/compiler/injectdestructors.nim
index 7e64ebfdc..869e8ecc8 100644
--- a/compiler/injectdestructors.nim
+++ b/compiler/injectdestructors.nim
@@ -311,6 +311,43 @@ proc genSink(c: var Con; dest, ri: PNode, isDecl = false): PNode =
       # and copyMem(dest, source). This is efficient.
       result = newTree(nkStmtList, c.genDestroy(dest), newTree(nkFastAsgn, dest, ri))
 
+proc isCriticalLink(dest: PNode): bool {.inline.} =
+  #[
+  Lins's idea that only "critical" links can introduce a cycle. This is
+  critical for the performance gurantees that we strive for: If you
+  traverse a data structure, no tracing will be performed at all.
+  ORC is about this promise: The GC only touches the memory that the
+  mutator touches too.
+
+  These constructs cannot possibly create cycles::
+
+    local = ...
+
+    new(x)
+    dest = ObjectConstructor(field: noalias(dest))
+
+  But since 'ObjectConstructor' is already moved into 'dest' all we really have
+  to look for is assignments to local variables.
+  ]#
+  result = dest.kind != nkSym
+
+proc finishCopy(c: var Con; result, dest: PNode; isFromSink: bool) =
+  if c.graph.config.selectedGC == gcOrc:
+    let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
+    if cyclicType(t):
+      result.add boolLit(c.graph, result.info, isFromSink or isCriticalLink(dest))
+
+proc genMarkCyclic(c: var Con; result, dest: PNode) =
+  if c.graph.config.selectedGC == gcOrc:
+    let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
+    if cyclicType(t):
+      if t.kind == tyRef:
+        result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, dest)
+      else:
+        let xenv = genBuiltin(c.graph, mAccessEnv, "accessEnv", dest)
+        xenv.typ = getSysType(c.graph, dest.info, tyPointer)
+        result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, xenv)
+
 proc genCopyNoCheck(c: var Con; dest, ri: PNode): PNode =
   let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink})
   result = c.genOp(t, attachedAsgn, dest, ri)
@@ -390,7 +427,9 @@ proc destructiveMoveVar(n: PNode; c: var Con; s: var Scope): PNode =
     v.add(vpart)
 
     result.add v
-    let wasMovedCall = c.genWasMoved(skipConv(n))
+    let nn = skipConv(n)
+    c.genMarkCyclic(result, nn)
+    let wasMovedCall = c.genWasMoved(nn)
     result.add wasMovedCall
     result.add tempAsNode
 
@@ -405,6 +444,7 @@ proc passCopyToSink(n: PNode; c: var Con; s: var Scope): PNode =
     result.add c.genWasMoved(tmp)
     var m = c.genCopy(tmp, n)
     m.add p(n, c, s, normal)
+    c.finishCopy(m, n, isFromSink = true)
     result.add m
     if isLValue(n) and not isCapturedVar(n) and n.typ.skipTypes(abstractInst).kind != tyRef and c.inSpawn == 0:
       message(c.graph.config, n.info, hintPerformance,
@@ -667,6 +707,7 @@ proc pRaiseStmt(n: PNode, c: var Con; s: var Scope): PNode =
       let tmp = c.getTemp(s, n[0].typ, n.info)
       var m = c.genCopyNoCheck(tmp, n[0])
       m.add p(n[0], c, s, normal)
+      c.finishCopy(m, n[0], isFromSink = false)
       result = newTree(nkStmtList, c.genWasMoved(tmp), m)
       var toDisarm = n[0]
       if toDisarm.kind == nkStmtListExpr: toDisarm = toDisarm.lastSon
@@ -941,16 +982,18 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
     else:
       result = c.genCopy(dest, ri)
       result.add p(ri, c, s, consumed)
+      c.finishCopy(result, dest, isFromSink = false)
   of nkBracket:
     # array constructor
     if ri.len > 0 and isDangerousSeq(ri.typ):
       result = c.genCopy(dest, ri)
       result.add p(ri, c, s, consumed)
+      c.finishCopy(result, dest, isFromSink = false)
     else:
       result = c.genSink(dest, p(ri, c, s, consumed), isDecl)
   of nkObjConstr, nkTupleConstr, nkClosure, nkCharLit..nkNilLit:
     result = c.genSink(dest, p(ri, c, s, consumed), isDecl)
-  of nkSym:            
+  of nkSym:
     if dest.kind == nkSym and dest.sym == ri.sym:
       # rule (self-assignment-removal):
       result = newNodeI(nkEmpty, dest.info)
@@ -966,6 +1009,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
     else:
       result = c.genCopy(dest, ri)
       result.add p(ri, c, s, consumed)
+      c.finishCopy(result, dest, isFromSink = false)
   of nkHiddenSubConv, nkHiddenStdConv, nkConv, nkObjDownConv, nkObjUpConv:
     result = c.genSink(dest, p(ri, c, s, sinkArg), isDecl)
   of nkStmtListExpr, nkBlockExpr, nkIfExpr, nkCaseStmt, nkTryStmt:
@@ -983,6 +1027,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
     else:
       result = c.genCopy(dest, ri)
       result.add p(ri, c, s, consumed)
+      c.finishCopy(result, dest, isFromSink = false)
 
 proc computeUninit(c: var Con) =
   if not c.uninitComputed:
diff --git a/compiler/liftdestructors.nim b/compiler/liftdestructors.nim
index 45a66e795..512fb6190 100644
--- a/compiler/liftdestructors.nim
+++ b/compiler/liftdestructors.nim
@@ -65,7 +65,7 @@ proc newAsgnStmt(le, ri: PNode): PNode =
   result[0] = le
   result[1] = ri
 
-proc genBuiltin(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode =
+proc genBuiltin*(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode =
   result = newNodeI(nkCall, i.info)
   result.add createMagic(g, name, magic).newSymNode
   result.add i
@@ -248,6 +248,19 @@ proc fillBodyObjT(c: var TLiftCtx; t: PType, body, x, y: PNode) =
   else:
     fillBodyObjTImpl(c, t, body, x, y)
 
+proc boolLit*(g: ModuleGraph; info: TLineInfo; value: bool): PNode =
+  result = newIntLit(g, info, ord value)
+  result.typ = getSysType(g, info, tyBool)
+
+proc getCycleParam(c: TLiftCtx): PNode =
+  assert c.kind == attachedAsgn
+  if c.fn.typ.len == 4:
+    result = c.fn.typ.n.lastSon
+    assert result.kind == nkSym
+    assert result.sym.name.s == "cyclic"
+  else:
+    result = boolLit(c.g, c.info, true)
+
 proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode =
   #if sfError in op.flags:
   #  localError(c.config, x.info, "usage of '$1' is a user-defined error" % op.name.s)
@@ -261,6 +274,13 @@ proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode =
     result.add x
   if y != nil:
     result.add y
+  if op.typ.len == 4:
+    assert y != nil
+    if c.fn.typ.len == 4:
+      result.add getCycleParam(c)
+    else:
+      # assume the worst: A cycle is created:
+      result.add boolLit(c.g, y.info, true)
 
 proc newOpCall(c: var TLiftCtx; op: PSym; x: PNode): PNode =
   result = newNodeIT(nkCall, x.info, op.typ[0])
@@ -526,6 +546,12 @@ proc fillStrOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
   of attachedTrace:
     discard "strings are atomic and have no inner elements that are to trace"
 
+proc cyclicType*(t: PType): bool =
+  case t.kind
+  of tyRef: result = types.canFormAcycle(t.lastSon)
+  of tyProc: result = t.callConv == ccClosure
+  else: result = false
+
 proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
   #[ bug #15753 is really subtle. Usually the classical write barrier for reference
   counting looks like this::
@@ -588,12 +614,13 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
       body.add genIf(c, cond, actions)
       body.add newAsgnStmt(x, y)
   of attachedAsgn:
-    body.add genIf(c, y, callCodegenProc(c.g,
-        if isCyclic: "nimIncRefCyclic" else: "nimIncRef", c.info, y))
     if isCyclic:
+      body.add genIf(c, y, callCodegenProc(c.g,
+          "nimIncRefCyclic", c.info, y, getCycleParam(c)))
       body.add newAsgnStmt(x, y)
       body.add genIf(c, cond, actions)
     else:
+      body.add genIf(c, y, callCodegenProc(c.g, "nimIncRef", c.info, y))
       body.add genIf(c, cond, actions)
       body.add newAsgnStmt(x, y)
   of attachedDestructor:
@@ -649,14 +676,13 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
   of attachedAsgn:
     let yenv = genBuiltin(c.g, mAccessEnv, "accessEnv", y)
     yenv.typ = getSysType(c.g, c.info, tyPointer)
-    let incRefProc =
-      if c.g.config.selectedGC == gcOrc: "nimIncRefCyclic"
-      else: "nimIncRef"
-    body.add genIf(c, yenv, callCodegenProc(c.g, incRefProc, c.info, yenv))
     if isCyclic:
+      body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRefCyclic", c.info, yenv, getCycleParam(c)))
       body.add newAsgnStmt(x, y)
       body.add genIf(c, cond, actions)
     else:
+      body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRef", c.info, yenv))
+
       body.add genIf(c, cond, actions)
       body.add newAsgnStmt(x, y)
   of attachedDestructor:
@@ -888,6 +914,13 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp
   if kind notin {attachedDestructor, attachedDispose}:
     result.typ.addParam src
 
+  if kind == attachedAsgn and g.config.selectedGC == gcOrc and
+      cyclicType(typ.skipTypes(abstractInst)):
+    let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"),
+                            nextId(idgen), result, info)
+    cycleParam.typ = getSysType(g, info, tyBool)
+    result.typ.addParam cycleParam
+
   var n = newNodeI(nkProcDef, info, bodyPos+1)
   for i in 0..<n.len: n[i] = newNodeI(nkEmpty, info)
   n[namePos] = newSymNode(result)
@@ -907,8 +940,8 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp;
   if result == nil:
     result = symPrototype(g, typ, typ.owner, kind, info, idgen)
 
-  var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType:typ, idgen: idgen)
-  a.fn = result
+  var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType: typ, idgen: idgen,
+                   fn: result)
 
   let dest = result.typ.n[1].sym
   let d = newDeref(newSymNode(dest))
@@ -943,8 +976,8 @@ proc produceDestructorForDiscriminator*(g: ModuleGraph; typ: PType; field: PSym,
                                         info: TLineInfo; idgen: IdGenerator): PSym =
   assert(typ.skipTypes({tyAlias, tyGenericInst}).kind == tyObject)
   result = symPrototype(g, field.typ, typ.owner, attachedDestructor, info, idgen)
-  var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen)
-  a.fn = result
+  var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen,
+                   fn: result)
   a.asgnForType = typ
   a.filterDiscriminator = field
   a.addMemReset = true
diff --git a/lib/system/arc.nim b/lib/system/arc.nim
index 4c97f1aa0..9ee367543 100644
--- a/lib/system/arc.nim
+++ b/lib/system/arc.nim
@@ -75,6 +75,8 @@ when defined(nimArcDebug):
 elif defined(nimArcIds):
   var gRefId: int
 
+  const traceId = -1
+
 proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
   let hdrSize = align(sizeof(RefHeader), alignment)
   let s = size + hdrSize
@@ -126,13 +128,14 @@ proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
   when traceCollector:
     cprintf("[INCREF] %p\n", head(p))
 
-proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
-  # This is only used by the old RTTI mechanism and we know
-  # that 'dest[]' is nil and needs no destruction. Which is really handy
-  # as we cannot destroy the object reliably if it's an object of unknown
-  # compile-time type.
-  dest[] = src
-  if src != nil: nimIncRef src
+when not defined(gcOrc) or defined(nimThinout):
+  proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+    # This is only used by the old RTTI mechanism and we know
+    # that 'dest[]' is nil and needs no destruction. Which is really handy
+    # as we cannot destroy the object reliably if it's an object of unknown
+    # compile-time type.
+    dest[] = src
+    if src != nil: nimIncRef src
 
 when not defined(nimscript) and defined(nimArcDebug):
   proc deallocatedRefId*(p: pointer): int =
diff --git a/lib/system/cyclebreaker.nim b/lib/system/cyclebreaker.nim
index c4d802a92..a3159f95c 100644
--- a/lib/system/cyclebreaker.nim
+++ b/lib/system/cyclebreaker.nim
@@ -70,10 +70,12 @@ template color(c): untyped = c.rc and colorMask
 template setColor(c, col) =
   c.rc = c.rc and not colorMask or col
 
-proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
   let h = head(p)
   inc h.rc, rcIncrement
 
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = discard
+
 type
   GcEnv = object
     traceStack: CellSeq
diff --git a/lib/system/orc.nim b/lib/system/orc.nim
index 28f8e5808..b0497a836 100644
--- a/lib/system/orc.nim
+++ b/lib/system/orc.nim
@@ -21,8 +21,7 @@ const
   colBlack = 0b000
   colGray = 0b001
   colWhite = 0b010
-  colPurple = 0b011
-  isCycleCandidate = 0b100 # cell is marked as a cycle candidate
+  maybeCycle = 0b100 # possibly part of a cycle; this has to be a "sticky" bit
   jumpStackFlag = 0b1000
   colorMask = 0b011
 
@@ -39,11 +38,29 @@ template setColor(c, col) =
   else:
     c.rc = c.rc and not colorMask or col
 
-proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
+const
+  optimizedOrc = not defined(nimOldOrc)
+
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
   let h = head(p)
   inc h.rc, rcIncrement
-  #h.setColor colPurple # mark as potential cycle!
-  h.setColor colBlack
+  when optimizedOrc:
+    if cyclic:
+      h.rc = h.rc or maybeCycle
+
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
+  when optimizedOrc:
+    if p != nil:
+      let h = head(p)
+      h.rc = h.rc or maybeCycle
+
+proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+  # This is only used by the old RTTI mechanism and we know
+  # that 'dest[]' is nil and needs no destruction. Which is really handy
+  # as we cannot destroy the object reliably if it's an object of unknown
+  # compile-time type.
+  dest[] = src
+  if src != nil: nimIncRefCyclic(src, true)
 
 const
   useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all
@@ -113,14 +130,15 @@ var
 
 proc unregisterCycle(s: Cell) =
   # swap with the last element. O(1)
-  let idx = s.rootIdx
+  let idx = s.rootIdx-1
   when false:
     if idx >= roots.len or idx < 0:
       cprintf("[Bug!] %ld\n", idx)
       quit 1
   roots.d[idx] = roots.d[roots.len-1]
-  roots.d[idx][0].rootIdx = idx
+  roots.d[idx][0].rootIdx = idx+1
   dec roots.len
+  s.rootIdx = 0
 
 proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
   #[
@@ -245,7 +263,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
         collectWhite(t)
       free(s) # watch out, a bug here!
   ]#
-  if s.color == colWhite and (s.rc and isCycleCandidate) == 0:
+  if s.color == colWhite and s.rootIdx == 0:
     orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
 
     s.setColor(colBlack)
@@ -253,7 +271,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
     trace(s, desc, j)
     while j.traceStack.len > 0:
       let (t, desc) = j.traceStack.pop()
-      if t.color == colWhite and (t.rc and isCycleCandidate) == 0:
+      if t.color == colWhite and t.rootIdx == 0:
         j.toFree.add(t, desc)
         t.setColor(colBlack)
         trace(t, desc, j)
@@ -284,7 +302,7 @@ proc collectCyclesBacon(j: var GcEnv) =
   init j.toFree
   for i in 0 ..< roots.len:
     let s = roots.d[i][0]
-    s.rc = s.rc and not isCycleCandidate
+    s.rootIdx = 0
     collectWhite(s, roots.d[i][1], j)
 
   for i in 0 ..< j.toFree.len:
@@ -341,13 +359,14 @@ proc collectCycles() =
       getOccupiedMem())
 
 proc registerCycle(s: Cell; desc: PNimTypeV2) =
-  s.rootIdx = roots.len
+  s.rootIdx = roots.len+1
   if roots.d == nil: init(roots)
   add(roots, s, desc)
 
   if roots.len >= rootsThreshold:
     collectCycles()
-  #writeCell("[added root]", s)
+  when logOrc:
+    writeCell("[added root]", s, desc)
 
 proc GC_runOrc* =
   ## Forces a cycle collection pass.
@@ -379,17 +398,19 @@ proc GC_disableMarkAndSweep*() =
   ## For ``--gc:orc`` an alias for ``GC_disableOrc``.
   GC_disableOrc()
 
+when optimizedOrc:
+  template markedAsCyclic(s: Cell): bool = (s.rc and maybeCycle) != 0
+else:
+  template markedAsCyclic(s: Cell): bool = true
+
 proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimTypeV2) {.noinline.} =
   if isDestroyAction:
-    if (s.rc and isCycleCandidate) != 0:
-      s.rc = s.rc and not isCycleCandidate
+    if s.rootIdx > 0:
       unregisterCycle(s)
   else:
     # do not call 'rememberCycle' again unless this cell
     # got an 'incRef' event:
-    #s.setColor colGreen  # XXX This is wrong!
-    if (s.rc and isCycleCandidate) == 0:
-      s.rc = s.rc or isCycleCandidate
+    if s.rootIdx == 0 and markedAsCyclic(s):
       s.setColor colBlack
       registerCycle(s, desc)
 
diff --git a/tests/arc/tasyncleak.nim b/tests/arc/tasyncleak.nim
index 72aa3d4c2..417b67edb 100644
--- a/tests/arc/tasyncleak.nim
+++ b/tests/arc/tasyncleak.nim
@@ -1,5 +1,5 @@
 discard """
-  outputsub: "(allocCount: 4016, deallocCount: 4014)"
+  outputsub: "(allocCount: 4014, deallocCount: 4012)"
   cmd: "nim c --gc:orc -d:nimAllocStats $file"
 """