diff options
author | Araq <rumpf_a@web.de> | 2015-11-29 14:55:12 +0100 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2015-12-01 00:53:30 +0100 |
commit | a1739455d39507bb27c3a125d97ff9870037f772 (patch) | |
tree | 57ec4404edf051ae03af798f0a49432850fdd026 | |
parent | cc6d9f69b4813f08809a223e967e4c00c8874144 (diff) | |
download | Nim-a1739455d39507bb27c3a125d97ff9870037f772.tar.gz |
first version of the new hard realtime GC
-rw-r--r-- | lib/system/gc2.nim | 1483 |
1 files changed, 478 insertions, 1005 deletions
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim index 4ca0d144f..78901b1dc 100644 --- a/lib/system/gc2.nim +++ b/lib/system/gc2.nim @@ -1,7 +1,7 @@ # # # Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2015 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -9,13 +9,13 @@ # Garbage Collector # -# The basic algorithm is *Deferrent Reference Counting* with cycle detection. -# This is achieved by combining a Deutsch-Bobrow garbage collector -# together with Christoper's partial mark-sweep garbage collector. -# -# Special care has been taken to avoid recursion as far as possible to avoid -# stack overflows when traversing deep datastructures. It is well-suited -# for soft real time applications (like games). +# The basic algorithm is *Deferred Reference Counting* with an incremental mark +# and sweep GC to free cycles. It is hard realtime in that if you play +# according to its rules, no deadline will ever be missed. + +when defined(nimCoroutines): + import arch + {.push profiler:off.} const @@ -29,82 +29,30 @@ const when withRealTime and not declared(getTicks): include "system/timers" when defined(memProfiler): - proc nimProfile(requestedSize: int) + proc nimProfile(requestedSize: int) {.benign.} const - rcShift = 6 # the reference count is shifted so we can use - # the least significat bits for additinal flags: - - rcAlive = 0b00000 # object is reachable. - # color *black* in the original paper - - rcCycleCandidate = 0b00001 # possible root of a cycle. *purple* - - rcDecRefApplied = 0b00010 # the first dec-ref phase of the - # collector was already applied to this - # object. *gray* - - rcMaybeDead = 0b00011 # this object is a candidate for deletion - # during the collect cycles algorithm. - # *white*. - - rcReallyDead = 0b00100 # this is proved to be garbage - - rcRetiredBuffer = 0b00101 # this is a seq or string buffer that - # was replaced by a resize operation. - # see growObj for details - - rcColorMask = RefCount(0b00111) - - rcZct = 0b01000 # already added to ZCT - rcInCycleRoots = 0b10000 # already buffered as cycle candidate - rcHasStackRef = 0b100000 # the object had a stack ref in the last - # cycle collection - - rcMarkBit = rcHasStackRef # this is currently used for leak detection - # when traceGC is on - - rcBufferedAnywhere = rcZct or rcInCycleRoots - - rcIncrement = 1 shl rcShift # don't touch the color bits - -const - NewObjectsAreCycleRoots = true - # the alternative is to use the old strategy of adding cycle roots - # in incRef (in the compiler itself, this doesn't change much) - - IncRefRemovesCandidates = false - # this is safe only if we can reliably track the fact that the object - # has stack references. This could be easily done by adding another bit - # to the refcount field and setting it up in unmarkStackAndRegisters. - # The bit must also be set for new objects that are not rc1 and it must be - # examined in the decref loop in collectCycles. - # XXX: not implemented yet as tests didn't show any improvement from this - - MarkingSkipsAcyclicObjects = true - # Acyclic objects can be safely ignored in the mark and scan phases, - # because they cannot contribute to the internal count. - # XXX: if we generate specialized `markCyclic` and `markAcyclic` - # procs we can further optimize this as there won't be need for any - # checks in the code - - MinimumStackMarking = false - # Try to scan only the user stack and ignore the part of the stack - # belonging to the GC itself. see setStackTop for further info. - # XXX: still has problems in release mode in the compiler itself. - # investigate how it affects growObj - - CollectCyclesStats = false - + rcIncrement = 0b1000 # so that lowest 3 bits are not touched + rcWhite = 0b000 # cell is colored white + rcGrey = 0b001 # traditional color for incremental mark&sweep + rcBlack = 0b010 # marked as alive; direct children also have been processed + rcUnused = 0b011 + ZctFlag = 0b100 # in ZCT + rcShift = 3 # shift by rcShift to get the reference counter + colorMask = 0b011 type WalkOp = enum - waPush + waMarkGlobal, # part of the backup mark&sweep + waMarkGrey, + waZctDecRef #, waDebug - Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall.} + Phase {.pure.} = enum + None, Marking, Sweeping + Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.} # A ref type can have a finalizer that is called before the object's # storage is freed. - GcStat {.final, pure.} = object + GcStat = object stackScans: int # number of performed stack scans (for statistics) cycleCollections: int # number of performed full collections maxThreshold: int # max threshold that has been set @@ -113,134 +61,76 @@ type cycleTableSize: int # max entries in cycle table maxPause: int64 # max measured GC pause in nanoseconds - GcHeap {.final, pure.} = object # this contains the zero count and - # non-zero count table + GcStack = object + prev: ptr GcStack + next: ptr GcStack + starts: pointer + pos: pointer + maxStackSize: int + + GcHeap = object # this contains the zero count and + # non-zero count table + stack: ptr GcStack stackBottom: pointer - stackTop: pointer + phase: Phase cycleThreshold: int + when useCellIds: + idGenerator: int zct: CellSeq # the zero count table decStack: CellSeq # cells in the stack that are to decref again - cycleRoots: CellSeq - tempStack: CellSeq # temporary stack for recursion elimination - freeStack: CellSeq # objects ready to be freed + greyStack: CellSeq recGcLock: int # prevent recursion via finalizers; no thread lock - cycleRootsTrimIdx: int # Trimming is a light-weight collection of the - # cycle roots table that uses a cheap linear scan - # to find only possitively dead objects. - # One strategy is to perform it only for new objects - # allocated between the invocations of collectZCT. - # This index indicates the start of the range of - # such new objects within the table. when withRealTime: maxPause: Nanos # max allowed pause in nanoseconds; active if > 0 region: MemRegion # garbage collected region stat: GcStat -{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcStat: GcStat, - TGcHeap: GcHeap].} + additionalRoots: CellSeq # dummy roots for GC_ref/unref + var - gch* {.rtlThreadVar.}: GcHeap + gch {.rtlThreadVar.}: GcHeap when not defined(useNimRtl): instantiateForRegion(gch.region) -template acquire(gch: GcHeap) = - when hasThreadSupport and hasSharedHeap: - AcquireSys(HeapLock) - -template release(gch: GcHeap) = - when hasThreadSupport and hasSharedHeap: - releaseSys(HeapLock) - -template setColor(c: PCell, color) = - c.refcount = (c.refcount and not rcColorMask) or color - -template color(c: PCell): expr = - c.refcount and rcColorMask - -template isBitDown(c: PCell, bit): expr = - (c.refcount and bit) == 0 - -template isBitUp(c: PCell, bit): expr = - (c.refcount and bit) != 0 - -template setBit(c: PCell, bit): expr = - c.refcount = c.refcount or bit - -template isDead(c: Pcell): expr = - c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer - -template clearBit(c: PCell, bit): expr = - c.refcount = c.refcount and (not RefCount(bit)) - -when debugGC: - var gcCollectionIdx = 0 - - proc colorStr(c: PCell): cstring = - let color = c.color - case color - of rcAlive: return "alive" - of rcMaybeDead: return "maybedead" - of rcCycleCandidate: return "candidate" - of rcDecRefApplied: return "marked" - of rcRetiredBuffer: return "retired" - of rcReallyDead: return "dead" - else: return "unknown?" - - proc inCycleRootsStr(c: PCell): cstring = - if c.isBitUp(rcInCycleRoots): result = "cycleroot" - else: result = "" - - proc inZctStr(c: PCell): cstring = - if c.isBitUp(rcZct): result = "zct" - else: result = "" - - proc writeCell*(msg: CString, c: PCell, force = false) = - var kind = -1 - if c.typ != nil: kind = ord(c.typ.kind) - when trackAllocationSource: - c_fprintf(c_stdout, "[GC %d] %s: %p %d rc=%ld %s %s %s from %s(%ld)\n", - gcCollectionIdx, - msg, c, kind, c.refcount shr rcShift, - c.colorStr, c.inCycleRootsStr, c.inZctStr, - c.filename, c.line) - else: - c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n", - msg, c, kind, c.refcount shr rcShift) - -proc addZCT(zct: var CellSeq, c: PCell) {.noinline.} = - if c.isBitDown(rcZct): - c.setBit rcZct - zct.add c - -template setStackTop(gch) = - # This must be called immediately after we enter the GC code - # to minimize the size of the scanned stack. The stack consumed - # by the GC procs may amount to 200-400 bytes depending on the - # build settings and this contributes to false-positives - # in the conservative stack marking - when MinimumStackMarking: - var stackTop {.volatile.}: pointer - gch.stackTop = addr(stackTop) - -template addCycleRoot(cycleRoots: var CellSeq, c: PCell) = - if c.color != rcCycleCandidate: - c.setColor rcCycleCandidate - - # the object may be buffered already. for example, consider: - # decref; incref; decref - if c.isBitDown(rcInCycleRoots): - c.setBit rcInCycleRoots - cycleRoots.add c +proc initGC() = + when not defined(useNimRtl): + when traceGC: + for i in low(CellState)..high(CellState): init(states[i]) + gch.cycleThreshold = InitialCycleThreshold + gch.stat.stackScans = 0 + gch.stat.cycleCollections = 0 + gch.stat.maxThreshold = 0 + gch.stat.maxStackSize = 0 + gch.stat.maxStackCells = 0 + gch.stat.cycleTableSize = 0 + # init the rt + init(gch.zct) + init(gch.decStack) + init(gch.additionalRoots) + init(gch.greyStack) + +template gcAssert(cond: bool, msg: string) = + when defined(useGcAssert): + if not cond: + echo "[GCASSERT] ", msg + GC_disable() + writeStackTrace() + quit 1 + +proc addZCT(s: var CellSeq, c: PCell) {.noinline.} = + if (c.refcount and ZctFlag) == 0: + c.refcount = c.refcount or ZctFlag + add(s, c) proc cellToUsr(cell: PCell): pointer {.inline.} = # convert object (=pointer to refcount) to pointer to userdata result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell))) -proc usrToCell*(usr: pointer): PCell {.inline.} = +proc usrToCell(usr: pointer): PCell {.inline.} = # convert pointer to userdata to object (=pointer to refcount) result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell))) -proc canbeCycleRoot(c: PCell): bool {.inline.} = +proc canBeCycleRoot(c: PCell): bool {.inline.} = result = ntfAcyclic notin c.typ.flags proc extGetCellType(c: pointer): PNimType {.compilerproc.} = @@ -254,14 +144,43 @@ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = when BitsPerPage mod (sizeof(int)*8) != 0: {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} +template color(c): expr = c.refCount and colorMask +template setColor(c, col) = + when col == rcWhite: + c.refcount = c.refcount and not colorMask + else: + c.refcount = c.refcount and not colorMask or col + +proc writeCell(msg: cstring, c: PCell) = + var kind = -1 + if c.typ != nil: kind = ord(c.typ.kind) + when leakDetector: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n", + msg, c, kind, c.refcount shr rcShift, c.filename, c.line) + else: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n", + msg, c, kind, c.refcount shr rcShift, c.color) + +template gcTrace(cell, state: expr): stmt {.immediate.} = + when traceGC: traceCell(cell, state) + # forward declarations: -proc collectCT(gch: var GcHeap) -proc isOnStack*(p: pointer): bool {.noinline.} -proc forAllChildren(cell: PCell, op: WalkOp) -proc doOperation(p: pointer, op: WalkOp) -proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) +proc collectCT(gch: var GcHeap) {.benign.} +proc isOnStack(p: pointer): bool {.noinline, benign.} +proc forAllChildren(cell: PCell, op: WalkOp) {.benign.} +proc doOperation(p: pointer, op: WalkOp) {.benign.} +proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.} # we need the prototype here for debugging purposes +when hasThreadSupport and hasSharedHeap: + template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement + template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement) +else: + template `--`(x: expr): expr = + dec(x, rcIncrement) + x <% rcIncrement + template `++`(x: expr): stmt = inc(x, rcIncrement) + proc prepareDealloc(cell: PCell) = if cell.typ.finalizer != nil: # the finalizer could invoke something that @@ -273,246 +192,127 @@ proc prepareDealloc(cell: PCell) = (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell)) dec(gch.recGcLock) -when traceGC: - # traceGC is a special switch to enable extensive debugging - type - CellState = enum - csAllocated, csFreed - {.deprecated: [TCellState: CellState].} - var - states: array[CellState, CellSet] - - proc traceCell(c: PCell, state: CellState) = - case state - of csAllocated: - if c in states[csAllocated]: - writeCell("attempt to alloc an already allocated cell", c) - sysAssert(false, "traceCell 1") - excl(states[csFreed], c) - # writecell("allocated", c) - of csFreed: - if c in states[csFreed]: - writeCell("attempt to free a cell twice", c) - sysAssert(false, "traceCell 2") - if c notin states[csAllocated]: - writeCell("attempt to free not an allocated cell", c) - sysAssert(false, "traceCell 3") - excl(states[csAllocated], c) - # writecell("freed", c) - incl(states[state], c) - - proc computeCellWeight(c: PCell): int = - var x: CellSet - x.init - - let startLen = gch.tempStack.len - c.forAllChildren waPush - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if c in states[csFreed]: continue - inc result - if c notin x: - x.incl c - c.forAllChildren waPush - - template markChildrenRec(cell) = - let startLen = gch.tempStack.len - cell.forAllChildren waPush - let isMarked = cell.isBitUp(rcMarkBit) - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if c in states[csFreed]: continue - if c.isBitDown(rcMarkBit): - c.setBit rcMarkBit - c.forAllChildren waPush - if c.isBitUp(rcMarkBit) and not isMarked: - writecell("cyclic cell", cell) - cprintf "Weight %d\n", cell.computeCellWeight - - proc writeLeakage(onlyRoots: bool) = - if onlyRoots: - for c in elements(states[csAllocated]): - if c notin states[csFreed]: - markChildrenRec(c) - var f = 0 - var a = 0 - for c in elements(states[csAllocated]): - inc a - if c in states[csFreed]: inc f - elif c.isBitDown(rcMarkBit): - writeCell("leak", c) - cprintf "Weight %d\n", c.computeCellWeight - cfprintf(cstdout, "Allocations: %ld; freed: %ld\n", a, f) - -template gcTrace(cell, state: expr): stmt {.immediate.} = - when logGC: writeCell($state, cell) - when traceGC: traceCell(cell, state) - -template WithHeapLock(blk: stmt): stmt = - when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock) - blk - when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock) - proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = # we MUST access gch as a global here, because this crosses DLL boundaries! - WithHeapLock: addCycleRoot(gch.cycleRoots, c) + discard proc rtlAddZCT(c: PCell) {.rtl, inl.} = # we MUST access gch as a global here, because this crosses DLL boundaries! - WithHeapLock: addZCT(gch.zct, c) + addZCT(gch.zct, c) -type - CyclicMode = enum - Cyclic, - Acyclic, - MaybeCyclic - - ReleaseType = enum - AddToZTC - FreeImmediately - - HeapType = enum - LocalHeap - SharedHeap -{.deprecated: [TCyclicMode: CyclicMode, TReleaseType: ReleaseType, - THeapType: HeapType].} - -template `++` (rc: RefCount, heapType: HeapType): stmt = - when heapType == SharedHeap: - discard atomicInc(rc, rcIncrement) - else: - inc rc, rcIncrement - -template `--`(rc: RefCount): expr = - dec rc, rcIncrement - rc <% rcIncrement - -template `--` (rc: RefCount, heapType: HeapType): expr = - (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement else: --rc) - -template doDecRef(cc: PCell, - heapType = LocalHeap, - cycleFlag = MaybeCyclic): stmt = - var c = cc - sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr") - # XXX: move this elesewhere - - sysAssert(c.refcount >=% rcIncrement, "decRef") - if c.refcount--(heapType): - # this is the last reference from the heap - # add to a zero-count-table that will be matched against stack pointers +proc decRef(c: PCell) {.inline.} = + gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr") + gcAssert(c.refcount >=% rcIncrement, "decRef") + if --c.refcount: rtlAddZCT(c) - else: - when cycleFlag != Acyclic: - if cycleFlag == Cyclic or canBeCycleRoot(c): - # a cycle may have been broken - rtlAddCycleRoot(c) - -template doIncRef(cc: PCell, - heapType = LocalHeap, - cycleFlag = MaybeCyclic): stmt = - var c = cc - c.refcount++(heapType) - when cycleFlag != Acyclic: - when NewObjectsAreCycleRoots: - if canbeCycleRoot(c): - addCycleRoot(gch.cycleRoots, c) - elif IncRefRemovesCandidates: - c.setColor rcAlive - # XXX: this is not really atomic enough! - -proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p)) -proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p)) + +proc incRef(c: PCell) {.inline.} = + gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr") + c.refcount = c.refcount +% rcIncrement + +proc nimGCref(p: pointer) {.compilerProc.} = + let cell = usrToCell(p) + incRef(cell) + add(gch.additionalRoots, cell) + +proc nimGCunref(p: pointer) {.compilerProc.} = + let cell = usrToCell(p) + decRef(cell) + var L = gch.additionalRoots.len-1 + var i = L + let d = gch.additionalRoots.d + while i >= 0: + if d[i] == cell: + d[i] = d[L] + dec gch.additionalRoots.len + break + dec(i) + +template markGrey(x: PCell) = + if x.color == rcWhite and gch.phase == Phase.Marking: + x.setColor(rcGrey) + add(gch.greyStack, x) + +proc GC_addCycleRoot*[T](p: ref T) {.inline.} = + ## adds 'p' to the cycle candidate set for the cycle collector. It is + ## necessary if you used the 'acyclic' pragma for optimization + ## purposes and need to break cycles manually. + rtlAddCycleRoot(usrToCell(cast[pointer](p))) proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} = sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle") var c = usrToCell(p) - sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr") - if c.refcount--(LocalHeap): + gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr") + if --c.refcount: rtlAddZCT(c) sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2") sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5") -template doAsgnRef(dest: PPointer, src: pointer, - heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt = - sysAssert(not isOnStack(dest), "asgnRef") - # BUGFIX: first incRef then decRef! - if src != nil: doIncRef(usrToCell(src), heapType, cycleFlag) - if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag) - dest[] = src - proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} = # the code generator calls this proc! - doAsgnRef(dest, src, LocalHeap, MaybeCyclic) + gcAssert(not isOnStack(dest), "asgnRef") + # BUGFIX: first incRef then decRef! + if src != nil: + let s = usrToCell(src) + incRef(s) + markGrey(s) + if dest[] != nil: decRef(usrToCell(dest[])) + dest[] = src proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} = # the code generator calls this proc if it is known at compile time that no # cycle is possible. - doAsgnRef(dest, src, LocalHeap, Acyclic) + gcAssert(not isOnStack(dest), "asgnRefNoCycle") + if src != nil: + var c = usrToCell(src) + ++c.refcount + markGrey(c) + if dest[] != nil: + var c = usrToCell(dest[]) + if --c.refcount: + rtlAddZCT(c) + dest[] = src proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} = # unsureAsgnRef updates the reference counters only if dest is not on the # stack. It is used by the code generator if it cannot decide wether a # reference is in the stack or not (this can happen for var parameters). if not isOnStack(dest): - if src != nil: doIncRef(usrToCell(src)) - # XXX we must detect a shared heap here - # better idea may be to just eliminate the need for unsureAsgnRef - # + if src != nil: + let s = usrToCell(src) + incRef(s) + markGrey(s) # XXX finally use assembler for the stack checking instead! # the test for '!= nil' is correct, but I got tired of the segfaults # resulting from the crappy stack checking: - if cast[int](dest[]) >=% PageSize: doDecRef(usrToCell(dest[])) + if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[])) else: # can't be an interior pointer if it's a stack location! - sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, - "stack loc AND interior pointer") + gcAssert(interiorAllocatedPtr(gch.region, dest) == nil, + "stack loc AND interior pointer") dest[] = src -when hasThreadSupport and hasSharedHeap: - # shared heap version of the above procs - proc asgnRefSh(dest: PPointer, src: pointer) {.compilerProc, inline.} = - doAsgnRef(dest, src, SharedHeap, MaybeCyclic) - - proc asgnRefNoCycleSh(dest: PPointer, src: pointer) {.compilerProc, inline.} = - doAsgnRef(dest, src, SharedHeap, Acyclic) +type + GlobalMarkerProc = proc () {.nimcall, benign.} +var + globalMarkersLen: int + globalMarkers: array[0.. 7_000, GlobalMarkerProc] -proc initGC() = - when not defined(useNimRtl): - when traceGC: - for i in low(CellState)..high(CellState): init(states[i]) - gch.cycleThreshold = InitialCycleThreshold - gch.stat.stackScans = 0 - gch.stat.cycleCollections = 0 - gch.stat.maxThreshold = 0 - gch.stat.maxStackSize = 0 - gch.stat.maxStackCells = 0 - gch.stat.cycleTableSize = 0 - # init the rt - init(gch.zct) - init(gch.tempStack) - init(gch.freeStack) - init(gch.cycleRoots) - init(gch.decStack) +proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} = + if globalMarkersLen <= high(globalMarkers): + globalMarkers[globalMarkersLen] = markerProc + inc globalMarkersLen + else: + echo "[GC] cannot register global variable; too many global variables" + quit 1 -proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) = +proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} = var d = cast[ByteAddress](dest) case n.kind of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) of nkList: for i in 0..n.len-1: - # inlined for speed - if n.sons[i].kind == nkSlot: - if n.sons[i].typ.kind in {tyRef, tyString, tySequence}: - doOperation(cast[PPointer](d +% n.sons[i].offset)[], op) - else: - forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), - n.sons[i].typ, op) - else: - forAllSlotsAux(dest, n.sons[i], op) + forAllSlotsAux(dest, n.sons[i], op) of nkCase: var m = selectBranch(dest, n) if m != nil: forAllSlotsAux(dest, m, op) @@ -533,9 +333,10 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) = else: discard proc forAllChildren(cell: PCell, op: WalkOp) = - sysAssert(cell != nil, "forAllChildren: 1") - sysAssert(cell.typ != nil, "forAllChildren: 2") - sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3" + gcAssert(cell != nil, "forAllChildren: 1") + gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2") + gcAssert(cell.typ != nil, "forAllChildren: 3") + gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4" let marker = cell.typ.marker if marker != nil: marker(cellToUsr(cell), op.int) @@ -547,10 +348,9 @@ proc forAllChildren(cell: PCell, op: WalkOp) = var d = cast[ByteAddress](cellToUsr(cell)) var s = cast[PGenericSeq](d) if s != nil: - let baseAddr = d +% GenericSeqSize for i in 0..s.len-1: - forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size), - cell.typ.base, op) + forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +% + GenericSeqSize), cell.typ.base, op) else: discard proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} = @@ -571,7 +371,7 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} = template replaceZctEntry(i: expr) = c = d[i] if c.refcount >=% rcIncrement: - c.clearBit(rcZct) + c.refcount = c.refcount and not ZctFlag d[i] = res return if L > 8: @@ -592,408 +392,302 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} = for i in countdown(L-1, max(0, L-8)): var c = d[i] if c.refcount >=% rcIncrement: - c.clearBit(rcZct) + c.refcount = c.refcount and not ZctFlag d[i] = res return add(gch.zct, res) -proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap, rc1 = false): pointer = +{.push stackTrace: off, profiler:off.} +proc gcInvariant*() = + sysAssert(allocInv(gch.region), "injected") + when declared(markForDebug): + markForDebug(gch) +{.pop.} + +proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer = # generates a new object and sets its reference counter to 0 - acquire(gch) sysAssert(allocInv(gch.region), "rawNewObj begin") - sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") - + gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") collectCT(gch) - sysAssert(allocInv(gch.region), "rawNewObj after collect") - var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) - sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc") - - sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2") - + gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + # now it is buffered in the ZCT res.typ = typ - - when trackAllocationSource and not hasThreadSupport: - if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil: - res.filename = framePtr.prev.prev.filename - res.line = framePtr.prev.prev.line - else: - res.filename = "nofile" - - if rc1: - res.refcount = rcIncrement # refcount is 1 - else: - # its refcount is zero, so add it to the ZCT: - res.refcount = rcZct - addNewObjToZCT(res, gch) - - if NewObjectsAreCycleRoots and canBeCycleRoot(res): - res.setBit(rcInCycleRoots) - res.setColor rcCycleCandidate - gch.cycleRoots.add res - + when leakDetector and not hasThreadSupport: + if framePtr != nil and framePtr.prev != nil: + res.filename = framePtr.prev.filename + res.line = framePtr.prev.line + # refcount is zero, color is black, but mark it to be in the ZCT + res.refcount = ZctFlag sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") - + # its refcount is zero, so add it to the ZCT: + addNewObjToZCT(res, gch) when logGC: writeCell("new cell", res) gcTrace(res, csAllocated) - release(gch) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator result = cellToUsr(res) sysAssert(allocInv(gch.region), "rawNewObj end") {.pop.} -proc freeCell(gch: var GcHeap, c: PCell) = - # prepareDealloc(c) - gcTrace(c, csFreed) - - when reallyDealloc: rawDealloc(gch.region, c) - else: - sysAssert(c.typ != nil, "collectCycles") - zeroMem(c, sizeof(Cell)) - -template eraseAt(cells: var CellSeq, at: int): stmt = - cells.d[at] = cells.d[cells.len - 1] - dec cells.len - -template trimAt(roots: var CellSeq, at: int): stmt = - # This will remove a cycle root candidate during trimming. - # a candidate is removed either because it received a refup and - # it's no longer a candidate or because it received further refdowns - # and now it's dead for sure. - let c = roots.d[at] - c.clearBit(rcInCycleRoots) - roots.eraseAt(at) - if c.isBitUp(rcReallyDead) and c.refcount <% rcIncrement: - # This case covers both dead objects and retired buffers - # That's why we must also check the refcount (it may be - # kept possitive by stack references). - freeCell(gch, c) +proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} = + result = rawNewObj(typ, size, gch) + when defined(memProfiler): nimProfile(size) proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = - setStackTop(gch) - result = rawNewObj(typ, size, gch, false) + result = rawNewObj(typ, size, gch) zeroMem(result, size) when defined(memProfiler): nimProfile(size) -proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} = - setStackTop(gch) - result = rawNewObj(typ, size, gch, false) - when defined(memProfiler): nimProfile(size) - proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = - setStackTop(gch) # `newObj` already uses locks, so no need for them here. let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) result = newObj(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} = - setStackTop(gch) - result = rawNewObj(typ, size, gch, true) + # generates a new object and sets its reference counter to 1 + sysAssert(allocInv(gch.region), "newObjRC1 begin") + gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") + collectCT(gch) + sysAssert(allocInv(gch.region), "newObjRC1 after collectCT") + + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) + sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc") + sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + # now it is buffered in the ZCT + res.typ = typ + when leakDetector and not hasThreadSupport: + if framePtr != nil and framePtr.prev != nil: + res.filename = framePtr.prev.filename + res.line = framePtr.prev.line + res.refcount = rcIncrement # refcount is 1 + sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") + when logGC: writeCell("new cell", res) + gcTrace(res, csAllocated) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator + result = cellToUsr(res) + zeroMem(result, size) + sysAssert(allocInv(gch.region), "newObjRC1 end") when defined(memProfiler): nimProfile(size) proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} = - setStackTop(gch) let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) result = newObjRC1(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer = - acquire(gch) collectCT(gch) var ol = usrToCell(old) sysAssert(ol.typ != nil, "growObj: 1") - sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2") + gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2") sysAssert(allocInv(gch.region), "growObj begin") var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell))) - var elemSize = if ol.typ.kind != tyString: ol.typ.base.size - else: 1 + var elemSize = 1 + if ol.typ.kind != tyString: elemSize = ol.typ.base.size var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize - - # XXX: This should happen outside - # call user-defined move code - # call user-defined default constructor copyMem(res, ol, oldsize + sizeof(Cell)) zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)), newsize-oldsize) - sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3") - sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4") - - when false: - if ol.isBitUp(rcZct): - var j = gch.zct.len-1 - var d = gch.zct.d - while j >= 0: - if d[j] == ol: - d[j] = res - break - dec(j) - - if ol.isBitUp(rcInCycleRoots): - for i in 0 .. <gch.cycleRoots.len: - if gch.cycleRoots.d[i] == ol: - eraseAt(gch.cycleRoots, i) - - freeCell(gch, ol) - - else: - # the new buffer inherits the GC state of the old one - if res.isBitUp(rcZct): gch.zct.add res - if res.isBitUp(rcInCycleRoots): gch.cycleRoots.add res - - # Pay attention to what's going on here! We're not releasing the old memory. - # This is because at this point there may be an interior pointer pointing - # into this buffer somewhere on the stack (due to `var` parameters now and - # and `let` and `var:var` stack locations in the future). - # We'll release the memory in the next GC cycle. If we release it here, - # we cannot guarantee that no memory will be corrupted when only safe - # language features are used. Accessing the memory after the seq/string - # has been invalidated may still result in logic errors in the user code. - # We may improve on that by protecting the page in debug builds or - # by providing a warning when we detect a stack pointer into it. - let bufferFlags = ol.refcount and rcBufferedAnywhere - if bufferFlags == 0: - # we need this in order to collect it safely later - ol.refcount = rcRetiredBuffer or rcZct - gch.zct.add ol - else: - ol.refcount = rcRetiredBuffer or bufferFlags - - when logGC: - writeCell("growObj old cell", ol) - writeCell("growObj new cell", res) - + # This can be wrong for intermediate temps that are nevertheless on the + # heap because of lambda lifting: + #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4") + when logGC: + writeCell("growObj old cell", ol) + writeCell("growObj new cell", res) + gcTrace(ol, csZctFreed) gcTrace(res, csAllocated) - release(gch) + when reallyDealloc: + sysAssert(allocInv(gch.region), "growObj before dealloc") + if ol.refcount shr rcShift <=% 1: + # free immediately to save space: + if (ol.refcount and ZctFlag) != 0: + var j = gch.zct.len-1 + var d = gch.zct.d + while j >= 0: + if d[j] == ol: + d[j] = res + break + dec(j) + rawDealloc(gch.region, ol) + else: + # we split the old refcount in 2 parts. XXX This is still not entirely + # correct if the pointer that receives growObj's result is on the stack. + # A better fix would be to emit the location specific write barrier for + # 'growObj', but this is lots of more work and who knows what new problems + # this would create. + res.refcount = rcIncrement + decRef(ol) + else: + sysAssert(ol.typ != nil, "growObj: 5") + zeroMem(ol, sizeof(Cell)) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator result = cellToUsr(res) sysAssert(allocInv(gch.region), "growObj end") when defined(memProfiler): nimProfile(newsize-oldsize) proc growObj(old: pointer, newsize: int): pointer {.rtl.} = - setStackTop(gch) result = growObj(old, newsize, gch) {.push profiler:off.} -# ---------------- cycle collector ------------------------------------------- -proc doOperation(p: pointer, op: WalkOp) = - if p == nil: return - var c: PCell = usrToCell(p) - sysAssert(c != nil, "doOperation: 1") - gch.tempStack.add c +template takeStartTime(workPackageSize) {.dirty.} = + const workPackage = workPackageSize + when withRealTime: + var steps = workPackage + var t0: Ticks + if gch.maxPause > 0: t0 = getticks() -proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = - doOperation(d, WalkOp(op)) +template takeTime {.dirty.} = + when withRealTime: dec steps + +template checkTime {.dirty.} = + when withRealTime: + if steps == 0: + steps = workPackage + if gch.maxPause > 0: + let duration = getticks() - t0 + # the GC's measuring is not accurate and needs some cleanup actions + # (stack unmarking), so subtract some short amount of time in + # order to miss deadlines less often: + if duration >= gch.maxPause - 50_000: + return false -type - RecursionType = enum - FromChildren, - FromRoot -{.deprecated: [TRecursionType: RecursionType].} +# ---------------- cycle collector ------------------------------------------- -proc collectZCT(gch: var GcHeap): bool +proc freeCyclicCell(gch: var GcHeap, c: PCell) = + prepareDealloc(c) + gcTrace(c, csCycFreed) + when logGC: writeCell("cycle collector dealloc cell", c) + when reallyDealloc: + sysAssert(allocInv(gch.region), "free cyclic cell") + rawDealloc(gch.region, c) + else: + gcAssert(c.typ != nil, "freeCyclicCell") + zeroMem(c, sizeof(Cell)) -template pseudoRecursion(typ: RecursionType, body: stmt): stmt = - discard +proc sweep(gch: var GcHeap) = + # XXX make this incremental! + for x in allObjects(gch.region): + if isCell(x): + # cast to PCell is correct here: + var c = cast[PCell](x) + if c.color == rcWhite: freeCyclicCell(gch, c) + else: c.setColor(rcWhite) + +proc markS(gch: var GcHeap, c: PCell) = + if x.color != rcGrey: + x.setColor(rcGrey) + add(gch.greyStack, x) + #forAllChildren(c, waMarkGrey) + #x.setColor(rcBlack) + +proc markIncrementally(gch: var GcHeap): bool = + var L = addr(gch.greyStack.len) + takeStartTime(100) + while L[] > 0: + var c = gch.greyStack.d[0] + sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr") + gch.greyStack.d[0] = gch.greyStack.d[L[] - 1] + dec(L[]) + takeTime() + if c.color == clGrey: + forAllChildren(c, waMarkGrey) + c.setColor(clBlack) + checkTime() + result = true -proc trimCycleRoots(gch: var GcHeap, startIdx = gch.cycleRootsTrimIdx) = - var i = startIdx - while i < gch.cycleRoots.len: - if gch.cycleRoots.d[i].color != rcCycleCandidate: - gch.cycleRoots.trimAt i - else: - inc i +proc markGlobals(gch: var GcHeap) = + for i in 0 .. < globalMarkersLen: globalMarkers[i]() - gch.cycleRootsTrimIdx = gch.cycleRoots.len +proc markLocals(gch: var GcHeap) = + var d = gch.decStack.d + for i in 0..<gch.decStack.len: + sysAssert isAllocatedPtr(gch.region, d[i]), "markLocals" + markS(gch, d[i]) -# we now use a much simpler and non-recursive algorithm for cycle removal -proc collectCycles(gch: var GcHeap) = - if gch.cycleRoots.len == 0: return - gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len) +when logGC: + var + cycleCheckA: array[100, PCell] + cycleCheckALen = 0 + + proc alreadySeen(c: PCell): bool = + for i in 0 .. <cycleCheckALen: + if cycleCheckA[i] == c: return true + if cycleCheckALen == len(cycleCheckA): + gcAssert(false, "cycle detection overflow") + quit 1 + cycleCheckA[cycleCheckALen] = c + inc cycleCheckALen + + proc debugGraph(s: PCell) = + if alreadySeen(s): + writeCell("child cell (already seen) ", s) + else: + writeCell("cell {", s) + forAllChildren(s, waDebug) + c_fprintf(c_stdout, "}\n") - when CollectCyclesStats: - let l0 = gch.cycleRoots.len - let tStart = getTicks() +proc doOperation(p: pointer, op: WalkOp) = + if p == nil: return + var c: PCell = usrToCell(p) + gcAssert(c != nil, "doOperation: 1") + # the 'case' should be faster than function pointers because of easy + # prediction: + case op + of waZctDecRef: + #if not isAllocatedPtr(gch.region, c): + # c_fprintf(c_stdout, "[GC] decref bug: %p", c) + gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef") + gcAssert(c.refcount >=% rcIncrement, "doOperation 2") + #c.refcount = c.refcount -% rcIncrement + when logGC: writeCell("decref (from doOperation)", c) + decRef(c) + #if c.refcount <% rcIncrement: addZCT(gch.zct, c) + of waMarkGlobal: + when hasThreadSupport: + # could point to a cell which we don't own and don't want to touch/trace + if isAllocatedPtr(gch.region, c): + markS(gch, c) + else: + markS(gch, c) + of waMarkGrey: + if x.color != rcGrey: + c.setColor(rcGrey) + add(gch.greyStack, c) + #of waDebug: debugGraph(c) - var - decrefs = 0 - increfs = 0 - collected = 0 - maybedeads = 0 - - template ignoreObject(c: PCell): expr = - # This controls which objects will be ignored in the mark and scan stages - (when MarkingSkipsAcyclicObjects: not canbeCycleRoot(c) else: false) - # not canbeCycleRoot(c) - # false - # c.isBitUp(rcHasStackRef) - - template earlyMarkAliveRec(cell) = - let startLen = gch.tempStack.len - cell.setColor rcAlive - cell.forAllChildren waPush - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if c.color != rcAlive: - c.setColor rcAlive - c.forAllChildren waPush - - template earlyMarkAlive(stackRoots) = - # This marks all objects reachable from the stack as alive before any - # of the other stages is executed. Such objects cannot be garbage and - # they don't need to participate in the recursive decref/incref. - for i in 0 .. <stackRoots.len: - var c = stackRoots.d[i] - # c.setBit rcHasStackRef - earlyMarkAliveRec(c) - - earlyMarkAlive(gch.decStack) - - when CollectCyclesStats: - let tAfterEarlyMarkAlive = getTicks() - - template recursiveDecRef(cell) = - let startLen = gch.tempStack.len - cell.setColor rcDecRefApplied - cell.forAllChildren waPush - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if ignoreObject(c): continue - - sysAssert(c.refcount >=% rcIncrement, "recursive dec ref") - dec c.refcount, rcIncrement - inc decrefs - if c.color != rcDecRefApplied: - c.setColor rcDecRefApplied - c.forAllChildren waPush - - template markRoots(roots) = - var i = 0 - while i < roots.len: - if roots.d[i].color == rcCycleCandidate: - recursiveDecRef(roots.d[i]) - inc i - else: - roots.trimAt i - - markRoots(gch.cycleRoots) - - when CollectCyclesStats: - let tAfterMark = getTicks() - c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0 - - template recursiveMarkAlive(cell) = - let startLen = gch.tempStack.len - cell.setColor rcAlive - cell.forAllChildren waPush - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if ignoreObject(c): continue - inc c.refcount, rcIncrement - inc increfs - - if c.color != rcAlive: - c.setColor rcAlive - c.forAllChildren waPush - - template scanRoots(roots) = - for i in 0 .. <roots.len: - let startLen = gch.tempStack.len - gch.tempStack.add roots.d[i] - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if ignoreObject(c): continue - if c.color == rcDecRefApplied: - if c.refcount >=% rcIncrement: - recursiveMarkAlive(c) - else: - # note that this is not necessarily the ultimate - # destiny of the object. we may still mark it alive - # later if we encounter another node from where it's - # reachable. - c.setColor rcMaybeDead - inc maybedeads - c.forAllChildren waPush - - scanRoots(gch.cycleRoots) - - when CollectCyclesStats: - let tAfterScan = getTicks() - - template collectDead(roots) = - for i in 0 .. <roots.len: - var c = roots.d[i] - c.clearBit(rcInCycleRoots) - - let startLen = gch.tempStack.len - gch.tempStack.add c - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - when MarkingSkipsAcyclicObjects: - if not canbeCycleRoot(c): - # This is an acyclic object reachable from a dead cyclic object - # We must do a normal decref here that may add the acyclic object - # to the ZCT - doDecRef(c, LocalHeap, Cyclic) - continue - if c.color == rcMaybeDead and not c.isBitUp(rcInCycleRoots): - c.setColor(rcReallyDead) - inc collected - c.forAllChildren waPush - # we need to postpone the actual deallocation in order to allow - # the finalizers to run while the data structures are still intact - gch.freeStack.add c - prepareDealloc(c) - - for i in 0 .. <gch.freeStack.len: - freeCell(gch, gch.freeStack.d[i]) - - collectDead(gch.cycleRoots) - - when CollectCyclesStats: - let tFinal = getTicks() - cprintf "times:\n early mark alive: %d ms\n mark: %d ms\n scan: %d ms\n collect: %d ms\n decrefs: %d\n increfs: %d\n marked dead: %d\n collected: %d\n", - (tAfterEarlyMarkAlive - tStart) div 1_000_000, - (tAfterMark - tAfterEarlyMarkAlive) div 1_000_000, - (tAfterScan - tAfterMark) div 1_000_000, - (tFinal - tAfterScan) div 1_000_000, - decrefs, - increfs, - maybedeads, - collected - - deinit(gch.cycleRoots) - init(gch.cycleRoots) - - deinit(gch.freeStack) - init(gch.freeStack) - - when MarkingSkipsAcyclicObjects: - # Collect the acyclic objects that became unreachable due to collected - # cyclic objects. - discard collectZCT(gch) - # collectZCT may add new cycle candidates and we may decide to loop here - # if gch.cycleRoots.len > 0: repeat - -var gcDebugging* = false - -var seqdbg* : proc (s: PGenericSeq) {.cdecl.} +proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = + doOperation(d, WalkOp(op)) + +proc collectZCT(gch: var GcHeap): bool {.benign.} + +proc collectCycles(gch: var GcHeap): bool = + # ensure the ZCT 'color' is not used: + while gch.zct.len > 0: discard collectZCT(gch) + markGlobals(gch) + markLocals(gch) + if markIncrementally(gch): + gch.phase = Phase.Sweeping + sweep(gch) + gch.phase = Phase.None + result = true + else: + gch.phase = Phase.Marking proc gcMark(gch: var GcHeap, p: pointer) {.inline.} = # the addresses are not as cells on the stack, so turn them to cells: @@ -1005,235 +699,33 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} = var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell)) if objStart != nil: # mark the cell: - if objStart.color != rcReallyDead: - if gcDebugging: - # writeCell("marking ", objStart) - discard - else: - inc objStart.refcount, rcIncrement - gch.decStack.add objStart - else: - # With incremental clean-up, objects spend some time - # in various lists before being deallocated. - # We just found a reference on the stack to an object, - # which we have previously labeled as unreachable. - # This is either a bug in the GC or a pure accidental - # coincidence due to the conservative stack marking. - when debugGC: - # writeCell("marking dead object", objStart) - discard - when false: - if isAllocatedPtr(gch.region, cell): - sysAssert false, "allocated pointer but not interior?" - # mark the cell: - inc cell.refcount, rcIncrement - add(gch.decStack, cell) + objStart.refcount = objStart.refcount +% rcIncrement + add(gch.decStack, objStart) sysAssert(allocInv(gch.region), "gcMark end") -proc markThreadStacks(gch: var GcHeap) = - when hasThreadSupport and hasSharedHeap: - {.error: "not fully implemented".} - var it = threadList - while it != nil: - # mark registers: - for i in 0 .. high(it.registers): gcMark(gch, it.registers[i]) - var sp = cast[ByteAddress](it.stackBottom) - var max = cast[ByteAddress](it.stackTop) - # XXX stack direction? - # XXX unroll this loop: - while sp <=% max: - gcMark(gch, cast[PPointer](sp)[]) - sp = sp +% sizeof(pointer) - it = it.next - -# ----------------- stack management -------------------------------------- -# inspired from Smart Eiffel - -when defined(sparc): - const stackIncreases = false -elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or - defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820): - const stackIncreases = true -else: - const stackIncreases = false - -when not defined(useNimRtl): - {.push stack_trace: off.} - proc setStackBottom(theStackBottom: pointer) = - #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom) - # the first init must be the one that defines the stack bottom: - if gch.stackBottom == nil: gch.stackBottom = theStackBottom - else: - var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2 - var b = cast[ByteAddress](gch.stackBottom) - #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom) - when stackIncreases: - gch.stackBottom = cast[pointer](min(a, b)) - else: - gch.stackBottom = cast[pointer](max(a, b)) - {.pop.} - -proc stackSize(): int {.noinline.} = - var stackTop {.volatile.}: pointer - result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom)) +include gc_common -var - jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int - # a little hack to get the size of a JmpBuf in the generated C code - # in a platform independent way - -when defined(sparc): # For SPARC architecture. - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var b = cast[ByteAddress](gch.stackBottom) - var a = cast[ByteAddress](stackTop) - var x = cast[ByteAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = - when defined(sparcv9): - asm """"flushw \n" """ - else: - asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ - - var - max = gch.stackBottom - sp: PPointer - stackTop: array[0..1, pointer] - sp = addr(stackTop[0]) - # Addresses decrease as the stack grows. - while sp <= max: - gcMark(gch, sp[]) - sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer)) - -elif defined(ELATE): - {.error: "stack marking code is to be written for this architecture".} - -elif stackIncreases: - # --------------------------------------------------------------------------- - # Generic code for architectures where addresses increase as the stack grows. - # --------------------------------------------------------------------------- - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var a = cast[ByteAddress](gch.stackBottom) - var b = cast[ByteAddress](stackTop) - var x = cast[ByteAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = - var registers: C_JmpBuf - if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[ByteAddress](gch.stackBottom) - var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer) - # sp will traverse the JMP_BUF as well (jmp_buf size is added, - # otherwise sp would be below the registers structure). - while sp >=% max: - gcMark(gch, cast[PPointer](sp)[]) - sp = sp -% sizeof(pointer) - -else: - # --------------------------------------------------------------------------- - # Generic code for architectures where addresses decrease as the stack grows. - # --------------------------------------------------------------------------- - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var b = cast[ByteAddress](gch.stackBottom) - var a = cast[ByteAddress](stackTop) - var x = cast[ByteAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = - # We use a jmp_buf buffer that is in the C stack. - # Used to traverse the stack and registers assuming - # that 'setjmp' will save registers in the C stack. - type PStackSlice = ptr array [0..7, pointer] - var registers: C_JmpBuf - if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - when MinimumStackMarking: - # mark the registers - var jmpbufPtr = cast[ByteAddress](addr(registers)) - var jmpbufEnd = jmpbufPtr +% jmpbufSize - - while jmpbufPtr <=% jmpbufEnd: - gcMark(gch, cast[PPointer](jmpbufPtr)[]) - jmpbufPtr = jmpbufPtr +% sizeof(pointer) - - var sp = cast[ByteAddress](gch.stackTop) - else: - var sp = cast[ByteAddress](addr(registers)) - # mark the user stack - var max = cast[ByteAddress](gch.stackBottom) - # loop unrolled: - while sp <% max - 8*sizeof(pointer): - gcMark(gch, cast[PStackSlice](sp)[0]) - gcMark(gch, cast[PStackSlice](sp)[1]) - gcMark(gch, cast[PStackSlice](sp)[2]) - gcMark(gch, cast[PStackSlice](sp)[3]) - gcMark(gch, cast[PStackSlice](sp)[4]) - gcMark(gch, cast[PStackSlice](sp)[5]) - gcMark(gch, cast[PStackSlice](sp)[6]) - gcMark(gch, cast[PStackSlice](sp)[7]) - sp = sp +% sizeof(pointer)*8 - # last few entries: - while sp <=% max: - gcMark(gch, cast[PPointer](sp)[]) - sp = sp +% sizeof(pointer) - -# ---------------------------------------------------------------------------- -# end of non-portable code -# ---------------------------------------------------------------------------- - -proc releaseCell(gch: var GcHeap, cell: PCell) = - if cell.color != rcReallyDead: - prepareDealloc(cell) - cell.setColor rcReallyDead - - let l1 = gch.tempStack.len - cell.forAllChildren waPush - let l2 = gch.tempStack.len - for i in l1 .. <l2: - var cc = gch.tempStack.d[i] - if cc.refcount--(LocalHeap): - releaseCell(gch, cc) - else: - if canbeCycleRoot(cc): - addCycleRoot(gch.cycleRoots, cc) - - gch.tempStack.len = l1 - - if cell.isBitDown(rcBufferedAnywhere): - freeCell(gch, cell) - # else: - # This object is either buffered in the cycleRoots list and we'll leave - # it there to be collected in the next collectCycles or it's pending in - # the ZCT: - # (e.g. we are now cleaning the 15th object, but this one is 18th in the - # list. Note that this can happen only if we reached this point by the - # recursion). - # We can ignore it now as the ZCT cleaner will reach it soon. +proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = + forEachStackSlot(gch, gcMark) proc collectZCT(gch: var GcHeap): bool = - const workPackage = 100 + # Note: Freeing may add child objects to the ZCT! So essentially we do + # deep freeing, which is bad for incremental operation. In order to + # avoid a deep stack, we move objects to keep the ZCT small. + # This is performance critical! var L = addr(gch.zct.len) - - when withRealtime: - var steps = workPackage - var t0: Ticks - if gch.maxPause > 0: t0 = getticks() + takeStartTime(100) while L[] > 0: var c = gch.zct.d[0] - sysAssert c.isBitUp(rcZct), "collectZCT: rcZct missing!" - sysAssert(isAllocatedPtr(gch.region, c), "collectZCT: isAllocatedPtr") - + sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr") # remove from ZCT: - c.clearBit(rcZct) + gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT") + + c.refcount = c.refcount and not ZctFlag gch.zct.d[0] = gch.zct.d[L[] - 1] dec(L[]) - when withRealtime: dec steps + takeTime() if c.refcount <% rcIncrement: # It may have a RC > 0, if it is in the hardware stack or # it has not been removed yet from the ZCT. This is because @@ -1241,92 +733,78 @@ proc collectZCT(gch: var GcHeap): bool = # as this might be too slow. # In any case, it should be removed from the ZCT. But not # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!** - if c.color == rcRetiredBuffer: - if c.isBitDown(rcInCycleRoots): - freeCell(gch, c) + when logGC: writeCell("zct dealloc cell", c) + gcTrace(c, csZctFreed) + # We are about to free the object, call the finalizer BEFORE its + # children are deleted as well, because otherwise the finalizer may + # access invalid memory. This is done by prepareDealloc(): + prepareDealloc(c) + forAllChildren(c, waZctDecRef) + when reallyDealloc: + sysAssert(allocInv(gch.region), "collectZCT: rawDealloc") + rawDealloc(gch.region, c) else: - # if c.color == rcReallyDead: writeCell("ReallyDead in ZCT?", c) - releaseCell(gch, c) - when withRealtime: - if steps == 0: - steps = workPackage - if gch.maxPause > 0: - let duration = getticks() - t0 - # the GC's measuring is not accurate and needs some cleanup actions - # (stack unmarking), so subtract some short amount of time in to - # order to miss deadlines less often: - if duration >= gch.maxPause - 50_000: - return false + sysAssert(c.typ != nil, "collectZCT 2") + zeroMem(c, sizeof(Cell)) + checkTime() result = true - gch.trimCycleRoots - #deInit(gch.zct) - #init(gch.zct) proc unmarkStackAndRegisters(gch: var GcHeap) = var d = gch.decStack.d - for i in 0 .. <gch.decStack.len: + for i in 0..gch.decStack.len-1: sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters" - # XXX: just call doDecRef? - var c = d[i] - sysAssert c.typ != nil, "unmarkStackAndRegisters 2" - - if c.color == rcRetiredBuffer: - continue - - # XXX no need for an atomic dec here: - if c.refcount--(LocalHeap): - # the object survived only because of a stack reference - # it still doesn't have heap references - addZCT(gch.zct, c) - - if canbeCycleRoot(c): - # any cyclic object reachable from the stack can be turned into - # a leak if it's orphaned through the stack reference - # that's because the write-barrier won't be executed for stack - # locations - addCycleRoot(gch.cycleRoots, c) - + decRef(d[i]) gch.decStack.len = 0 proc collectCTBody(gch: var GcHeap) = - when withRealtime: + when withRealTime: let t0 = getticks() - when debugGC: inc gcCollectionIdx sysAssert(allocInv(gch.region), "collectCT: begin") - gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) + when not defined(nimCoroutines): + gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) sysAssert(gch.decStack.len == 0, "collectCT") prepareForInteriorPointerChecking(gch.region) markStackAndRegisters(gch) - markThreadStacks(gch) gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len) inc(gch.stat.stackScans) if collectZCT(gch): when cycleGC: if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC: - collectCycles(gch) - sysAssert gch.zct.len == 0, "zct is not null after collect cycles" - inc(gch.stat.cycleCollections) - gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * - CycleIncrease) - gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) + if collectCycles(gch): + inc(gch.stat.cycleCollections) + gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * + CycleIncrease) + gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) unmarkStackAndRegisters(gch) sysAssert(allocInv(gch.region), "collectCT: end") - when withRealtime: + when withRealTime: let duration = getticks() - t0 gch.stat.maxPause = max(gch.stat.maxPause, duration) when defined(reportMissedDeadlines): if gch.maxPause > 0 and duration > gch.maxPause: c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration) +when defined(nimCoroutines): + proc currentStackSizes(): int = + for stack in items(gch.stack): + result = result + stackSize(stack.starts, stack.pos) + proc collectCT(gch: var GcHeap) = - if (gch.zct.len >= ZctThreshold or (cycleGC and + # stackMarkCosts prevents some pathological behaviour: Stack marking + # becomes more expensive with large stacks and large stacks mean that + # cells with RC=0 are more likely to be kept alive by the stack. + when defined(nimCoroutines): + let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold) + else: + let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold) + if (gch.zct.len >= stackMarkCosts or (cycleGC and getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and gch.recGcLock == 0: collectCTBody(gch) -when withRealtime: +when withRealTime: proc toNano(x: int): Nanos {.inline.} = result = x * 1000 @@ -1334,13 +812,11 @@ when withRealtime: gch.maxPause = MaxPauseInUs.toNano proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) = - acquire(gch) gch.maxPause = us.toNano if (gch.zct.len >= ZctThreshold or (cycleGC and getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or strongAdvice: collectCTBody(gch) - release(gch) proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice) @@ -1358,11 +834,7 @@ when not defined(useNimRtl): dec(gch.recGcLock) proc GC_setStrategy(strategy: GC_Strategy) = - case strategy - of gcThroughput: discard - of gcResponsiveness: discard - of gcOptimizeSpace: discard - of gcOptimizeTime: discard + discard proc GC_enableMarkAndSweep() = gch.cycleThreshold = InitialCycleThreshold @@ -1372,13 +844,10 @@ when not defined(useNimRtl): # set to the max value to suppress the cycle detector proc GC_fullCollect() = - setStackTop(gch) - acquire(gch) var oldThreshold = gch.cycleThreshold gch.cycleThreshold = 0 # forces cycle collection collectCT(gch) gch.cycleThreshold = oldThreshold - release(gch) proc GC_getStatistics(): string = GC_disable() @@ -1390,9 +859,13 @@ when not defined(useNimRtl): "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" & "[GC] zct capacity: " & $gch.zct.cap & "\n" & "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" & - "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" & "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) - when traceGC: writeLeakage(true) + when defined(nimCoroutines): + result = result & "[GC] number of stacks: " & $gch.stack.len & "\n" + for stack in items(gch.stack): + result = result & "[GC] stack " & stack.starts.repr & "[GC] max stack size " & $stack.maxStackSize & "\n" + else: + result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" GC_enable() {.pop.} |