diff options
Diffstat (limited to 'lib/system/gc.nim')
-rw-r--r-- | lib/system/gc.nim | 1035 |
1 files changed, 497 insertions, 538 deletions
diff --git a/lib/system/gc.nim b/lib/system/gc.nim index 0ab5f4d94..9289c7f55 100644 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -1,7 +1,7 @@ # # -# Nimrod's Runtime Library -# (c) Copyright 2013 Andreas Rumpf +# Nim's Runtime Library +# (c) Copyright 2016 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -9,27 +9,76 @@ # Garbage Collector # -# The basic algorithm is *Deferrent Reference Counting* with cycle detection. -# This is achieved by combining a Deutsch-Bobrow garbage collector -# together with Christoper's partial mark-sweep garbage collector. -# -# Special care has been taken to avoid recursion as far as possible to avoid -# stack overflows when traversing deep datastructures. It is well-suited -# for soft real time applications (like games). +# Refcounting + Mark&Sweep. Complex algorithms avoided. +# Been there, done that, didn't work. + +#[ + +A *cell* is anything that is traced by the GC +(sequences, refs, strings, closures). + +The basic algorithm is *Deferrent Reference Counting* with cycle detection. +References on the stack are not counted for better performance and easier C +code generation. + +Each cell has a header consisting of a RC and a pointer to its type +descriptor. However the program does not know about these, so they are placed at +negative offsets. In the GC code the type `PCell` denotes a pointer +decremented by the right offset, so that the header can be accessed easily. It +is extremely important that `pointer` is not confused with a `PCell`. + +In Nim the compiler cannot always know if a reference +is stored on the stack or not. This is caused by var parameters. +Consider this example: + + ```Nim + proc setRef(r: var ref TNode) = + new(r) + + proc usage = + var + r: ref TNode + setRef(r) # here we should not update the reference counts, because + # r is on the stack + setRef(r.left) # here we should update the refcounts! + ``` + +We have to decide at runtime whether the reference is on the stack or not. +The generated code looks roughly like this: + + ```C + void setref(TNode** ref) { + unsureAsgnRef(ref, newObj(TNode_TI, sizeof(TNode))) + } + void usage(void) { + setRef(&r) + setRef(&r->left) + } + ``` + +Note that for systems with a continuous stack (which most systems have) +the check whether the ref is on the stack is very cheap (only two +comparisons). +]# + {.push profiler:off.} const CycleIncrease = 2 # is a multiplicative increase - InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow - ZctThreshold = 500 # we collect garbage if the ZCT's size - # reaches this threshold - # this seems to be a good value + InitialCycleThreshold = when defined(nimCycleBreaker): high(int) + else: 4*1024*1024 # X MB because cycle checking is slow + InitialZctThreshold = 500 # we collect garbage if the ZCT's size + # reaches this threshold + # this seems to be a good value withRealTime = defined(useRealtimeGC) -when withRealTime and not defined(getTicks): +when withRealTime and not declared(getTicks): include "system/timers" when defined(memProfiler): - proc nimProfile(requestedSize: int) + proc nimProfile(requestedSize: int) {.benign.} + +when hasThreadSupport: + import std/sharedlist const rcIncrement = 0b1000 # so that lowest 3 bits are not touched @@ -41,188 +90,198 @@ const rcShift = 3 # shift by rcShift to get the reference counter colorMask = 0b011 type - TWalkOp = enum - waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack, - waCollectWhite + WalkOp = enum + waMarkGlobal, # part of the backup/debug mark&sweep + waMarkPrecise, # part of the backup/debug mark&sweep + waZctDecRef, waPush + #, waDebug - TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.} + Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign, raises: [].} # A ref type can have a finalizer that is called before the object's # storage is freed. - TGcStat {.final, pure.} = object + GcStat {.final, pure.} = object stackScans: int # number of performed stack scans (for statistics) cycleCollections: int # number of performed full collections maxThreshold: int # max threshold that has been set maxStackSize: int # max stack size maxStackCells: int # max stack cells in ``decStack`` - cycleTableSize: int # max entries in cycle table + cycleTableSize: int # max entries in cycle table maxPause: int64 # max measured GC pause in nanoseconds - - TGcHeap {.final, pure.} = object # this contains the zero count and - # non-zero count table - stackBottom: pointer + + GcStack {.final, pure.} = object + when nimCoroutines: + prev: ptr GcStack + next: ptr GcStack + maxStackSize: int # Used to track statistics because we can not use + # GcStat.maxStackSize when multiple stacks exist. + bottom: pointer + + when withRealTime or nimCoroutines: + pos: pointer # Used with `withRealTime` only for code clarity, see GC_Step(). + when withRealTime: + bottomSaved: pointer + + GcHeap {.final, pure.} = object # this contains the zero count and + # non-zero count table + stack: GcStack + when nimCoroutines: + activeStack: ptr GcStack # current executing coroutine stack. cycleThreshold: int - zct: TCellSeq # the zero count table - decStack: TCellSeq # cells in the stack that are to decref again - cycleRoots: TCellSet - tempStack: TCellSeq # temporary stack for recursion elimination + zctThreshold: int + when useCellIds: + idGenerator: int + zct: CellSeq # the zero count table + decStack: CellSeq # cells in the stack that are to decref again + tempStack: CellSeq # temporary stack for recursion elimination recGcLock: int # prevent recursion via finalizers; no thread lock when withRealTime: - maxPause: TNanos # max allowed pause in nanoseconds; active if > 0 - region: TMemRegion # garbage collected region - stat: TGcStat + maxPause: Nanos # max allowed pause in nanoseconds; active if > 0 + region: MemRegion # garbage collected region + stat: GcStat + marked: CellSet + additionalRoots: CellSeq # dummy roots for GC_ref/unref + when hasThreadSupport: + toDispose: SharedList[pointer] + gcThreadId: int var - gch {.rtlThreadVar.}: TGcHeap + gch {.rtlThreadVar.}: GcHeap when not defined(useNimRtl): - InstantiateForRegion(gch.region) - -template acquire(gch: TGcHeap) = - when hasThreadSupport and hasSharedHeap: - AcquireSys(HeapLock) - -template release(gch: TGcHeap) = - when hasThreadSupport and hasSharedHeap: - releaseSys(HeapLock) + instantiateForRegion(gch.region) template gcAssert(cond: bool, msg: string) = when defined(useGcAssert): if not cond: - echo "[GCASSERT] ", msg - quit 1 - -proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} = + cstderr.rawWrite "[GCASSERT] " + cstderr.rawWrite msg + when defined(logGC): + cstderr.rawWrite "[GCASSERT] statistics:\L" + cstderr.rawWrite GC_getStatistics() + GC_disable() + writeStackTrace() + #var x: ptr int + #echo x[] + rawQuit 1 + +proc addZCT(s: var CellSeq, c: PCell) {.noinline.} = if (c.refcount and ZctFlag) == 0: c.refcount = c.refcount or ZctFlag add(s, c) proc cellToUsr(cell: PCell): pointer {.inline.} = # convert object (=pointer to refcount) to pointer to userdata - result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell))) + result = cast[pointer](cast[int](cell)+%ByteAddress(sizeof(Cell))) proc usrToCell(usr: pointer): PCell {.inline.} = # convert pointer to userdata to object (=pointer to refcount) - result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell))) - -proc canbeCycleRoot(c: PCell): bool {.inline.} = - result = ntfAcyclic notin c.typ.flags + result = cast[PCell](cast[int](usr)-%ByteAddress(sizeof(Cell))) proc extGetCellType(c: pointer): PNimType {.compilerproc.} = # used for code generation concerning debugging result = usrToCell(c).typ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = - result = int(usrToCell(p).refcount) shr rcShift + result = usrToCell(p).refcount shr rcShift # this that has to equals zero, otherwise we have to round up UnitsPerPage: when BitsPerPage mod (sizeof(int)*8) != 0: {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} -template color(c): expr = c.refCount and colorMask +template color(c): untyped = c.refCount and colorMask template setColor(c, col) = when col == rcBlack: - c.refcount = c.refCount and not colorMask + c.refcount = c.refcount and not colorMask else: - c.refcount = c.refCount and not colorMask or col + c.refcount = c.refcount and not colorMask or col + +when defined(logGC): + proc writeCell(msg: cstring, c: PCell) = + var kind = -1 + var typName: cstring = "nil" + if c.typ != nil: + kind = ord(c.typ.kind) + when defined(nimTypeNames): + if not c.typ.name.isNil: + typName = c.typ.name + + when leakDetector: + c_printf("[GC] %s: %p %d %s rc=%ld from %s(%ld)\n", + msg, c, kind, typName, c.refcount shr rcShift, c.filename, c.line) + else: + c_printf("[GC] %s: %p %d %s rc=%ld; thread=%ld\n", + msg, c, kind, typName, c.refcount shr rcShift, gch.gcThreadId) -proc writeCell(msg: CString, c: PCell) = - var kind = -1 - if c.typ != nil: kind = ord(c.typ.kind) - when leakDetector: - c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n", - msg, c, kind, c.refcount shr rcShift, c.filename, c.line) - else: - c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n", - msg, c, kind, c.refcount shr rcShift, c.color) +template logCell(msg: cstring, c: PCell) = + when defined(logGC): + writeCell(msg, c) -template gcTrace(cell, state: expr): stmt {.immediate.} = +template gcTrace(cell, state: untyped) = when traceGC: traceCell(cell, state) # forward declarations: -proc collectCT(gch: var TGcHeap) -proc IsOnStack*(p: pointer): bool {.noinline.} -proc forAllChildren(cell: PCell, op: TWalkOp) -proc doOperation(p: pointer, op: TWalkOp) -proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) +proc collectCT(gch: var GcHeap) {.benign, raises: [].} +proc isOnStack(p: pointer): bool {.noinline, benign, raises: [].} +proc forAllChildren(cell: PCell, op: WalkOp) {.benign, raises: [].} +proc doOperation(p: pointer, op: WalkOp) {.benign, raises: [].} +proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign, raises: [].} # we need the prototype here for debugging purposes -when hasThreadSupport and hasSharedHeap: - template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement - template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement) -else: - template `--`(x: expr): expr = - Dec(x, rcIncrement) - x <% rcIncrement - template `++`(x: expr): stmt = Inc(x, rcIncrement) - -proc prepareDealloc(cell: PCell) = - if cell.typ.finalizer != nil: - # the finalizer could invoke something that - # allocates memory; this could trigger a garbage - # collection. Since we are already collecting we - # prevend recursive entering here by a lock. - # XXX: we should set the cell's children to nil! - inc(gch.recGcLock) - (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell)) - dec(gch.recGcLock) +proc incRef(c: PCell) {.inline.} = + gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr") + c.refcount = c.refcount +% rcIncrement + # and not colorMask + logCell("incRef", c) -proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = - # we MUST access gch as a global here, because this crosses DLL boundaries! - when hasThreadSupport and hasSharedHeap: - AcquireSys(HeapLock) - if c.color != rcPurple: - c.setColor(rcPurple) - incl(gch.cycleRoots, c) - when hasThreadSupport and hasSharedHeap: - ReleaseSys(HeapLock) +proc nimGCref(p: pointer) {.compilerproc.} = + # we keep it from being collected by pretending it's not even allocated: + let c = usrToCell(p) + add(gch.additionalRoots, c) + incRef(c) proc rtlAddZCT(c: PCell) {.rtl, inl.} = # we MUST access gch as a global here, because this crosses DLL boundaries! - when hasThreadSupport and hasSharedHeap: - AcquireSys(HeapLock) addZCT(gch.zct, c) - when hasThreadSupport and hasSharedHeap: - ReleaseSys(HeapLock) proc decRef(c: PCell) {.inline.} = gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr") gcAssert(c.refcount >=% rcIncrement, "decRef") - if --c.refcount: + c.refcount = c.refcount -% rcIncrement + if c.refcount <% rcIncrement: rtlAddZCT(c) - elif canBeCycleRoot(c): - # unfortunately this is necessary here too, because a cycle might just - # have been broken up and we could recycle it. - rtlAddCycleRoot(c) - #writeCell("decRef", c) - -proc incRef(c: PCell) {.inline.} = - gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr") - c.refcount = c.refCount +% rcIncrement - # and not colorMask - #writeCell("incRef", c) - if canBeCycleRoot(c): - rtlAddCycleRoot(c) - -proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p)) -proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p)) - -proc GC_addCycleRoot*[T](p: ref T) {.inline.} = - ## adds 'p' to the cycle candidate set for the cycle collector. It is - ## necessary if you used the 'acyclic' pragma for optimization - ## purposes and need to break cycles manually. - rtlAddCycleRoot(usrToCell(cast[pointer](p))) + logCell("decRef", c) + +proc nimGCunref(p: pointer) {.compilerproc.} = + let cell = usrToCell(p) + var L = gch.additionalRoots.len-1 + var i = L + let d = gch.additionalRoots.d + while i >= 0: + if d[i] == cell: + d[i] = d[L] + dec gch.additionalRoots.len + break + dec(i) + decRef(usrToCell(p)) + +include gc_common + +template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) = + when false: + for i in 0..gch.decStack.len-1: + if gch.decStack.d[i] == c: + sysAssert(false, msg) -proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} = +proc nimGCunrefNoCycle(p: pointer) {.compilerproc, inline.} = sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle") - var c = usrToCell(p) - gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr") - if --c.refcount: - rtlAddZCT(c) - sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2") + decRef(usrToCell(p)) sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5") -proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} = +proc nimGCunrefRC1(p: pointer) {.compilerproc, inline.} = + decRef(usrToCell(p)) + +proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} = # the code generator calls this proc! gcAssert(not isOnStack(dest), "asgnRef") # BUGFIX: first incRef then decRef! @@ -230,23 +289,14 @@ proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} = if dest[] != nil: decRef(usrToCell(dest[])) dest[] = src -proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} = - # the code generator calls this proc if it is known at compile time that no - # cycle is possible. - if src != nil: - var c = usrToCell(src) - ++c.refcount - if dest[] != nil: - var c = usrToCell(dest[]) - if --c.refcount: - rtlAddZCT(c) - dest[] = src +proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline, + deprecated: "old compiler compat".} = asgnRef(dest, src) -proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} = +proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc.} = # unsureAsgnRef updates the reference counters only if dest is not on the - # stack. It is used by the code generator if it cannot decide wether a + # stack. It is used by the code generator if it cannot decide whether a # reference is in the stack or not (this can happen for var parameters). - if not IsOnStack(dest): + if not isOnStack(dest): if src != nil: incRef(usrToCell(src)) # XXX finally use assembler for the stack checking instead! # the test for '!= nil' is correct, but I got tired of the segfaults @@ -254,15 +304,16 @@ proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} = if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[])) else: # can't be an interior pointer if it's a stack location! - gcAssert(interiorAllocatedPtr(gch.region, dest) == nil, + gcAssert(interiorAllocatedPtr(gch.region, dest) == nil, "stack loc AND interior pointer") dest[] = src proc initGC() = when not defined(useNimRtl): when traceGC: - for i in low(TCellState)..high(TCellState): init(states[i]) + for i in low(CellState)..high(CellState): init(states[i]) gch.cycleThreshold = InitialCycleThreshold + gch.zctThreshold = InitialZctThreshold gch.stat.stackScans = 0 gch.stat.cycleCollections = 0 gch.stat.maxThreshold = 0 @@ -272,11 +323,22 @@ proc initGC() = # init the rt init(gch.zct) init(gch.tempStack) - init(gch.cycleRoots) init(gch.decStack) + init(gch.marked) + init(gch.additionalRoots) + when hasThreadSupport: + init(gch.toDispose) + gch.gcThreadId = atomicInc(gHeapidGenerator) - 1 + gcAssert(gch.gcThreadId >= 0, "invalid computed thread ID") -proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = - var d = cast[TAddress](dest) +proc cellsetReset(s: var CellSet) = + deinit(s) + init(s) + +{.push stacktrace:off.} + +proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} = + var d = cast[int](dest) case n.kind of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) of nkList: @@ -284,9 +346,9 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = # inlined for speed if n.sons[i].kind == nkSlot: if n.sons[i].typ.kind in {tyRef, tyString, tySequence}: - doOperation(cast[ppointer](d +% n.sons[i].offset)[], op) + doOperation(cast[PPointer](d +% n.sons[i].offset)[], op) else: - forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), + forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), n.sons[i].typ, op) else: forAllSlotsAux(dest, n.sons[i], op) @@ -295,45 +357,45 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = if m != nil: forAllSlotsAux(dest, m, op) of nkNone: sysAssert(false, "forAllSlotsAux") -proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = - var d = cast[TAddress](dest) +proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) = + var d = cast[int](dest) if dest == nil: return # nothing to do if ntfNoRefs notin mt.flags: - case mt.Kind + case mt.kind of tyRef, tyString, tySequence: # leaf: - doOperation(cast[ppointer](d)[], op) + doOperation(cast[PPointer](d)[], op) of tyObject, tyTuple: forAllSlotsAux(dest, mt.node, op) of tyArray, tyArrayConstr, tyOpenArray: for i in 0..(mt.size div mt.base.size)-1: forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op) - else: nil + else: discard -proc forAllChildren(cell: PCell, op: TWalkOp) = - gcAssert(cell != nil, "forAllChildren: 1") - gcAssert(cell.typ != nil, "forAllChildren: 2") - gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3" +proc forAllChildren(cell: PCell, op: WalkOp) = + gcAssert(cell != nil, "forAllChildren: cell is nil") + gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: pointer not part of the heap") + gcAssert(cell.typ != nil, "forAllChildren: cell.typ is nil") + gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: unknown GC'ed type" let marker = cell.typ.marker if marker != nil: marker(cellToUsr(cell), op.int) else: - case cell.typ.Kind + case cell.typ.kind of tyRef: # common case forAllChildrenAux(cellToUsr(cell), cell.typ.base, op) of tySequence: - var d = cast[TAddress](cellToUsr(cell)) + var d = cast[int](cellToUsr(cell)) var s = cast[PGenericSeq](d) if s != nil: for i in 0..s.len-1: - forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +% - GenericSeqSize), cell.typ.base, op) - else: nil + forAllChildrenAux(cast[pointer](d +% align(GenericSeqSize, cell.typ.base.align) +% i *% cell.typ.base.size), cell.typ.base, op) + else: discard -proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} = +proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} = # we check the last 8 entries (cache line) for a slot that could be reused. # In 63% of all cases we succeed here! But we have to optimize the heck # out of this small linear search so that ``newObj`` is not slowed down. - # + # # Slots to try cache hit # 1 32% # 4 59% @@ -344,7 +406,7 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} = var d = gch.zct.d when true: # loop unrolled for performance: - template replaceZctEntry(i: expr) = + template replaceZctEntry(i: untyped) = c = d[i] if c.refcount >=% rcIncrement: c.refcount = c.refcount and not ZctFlag @@ -373,121 +435,139 @@ proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} = return add(gch.zct, res) -proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer = +{.push stackTrace: off, profiler:off.} +proc gcInvariant*() = + sysAssert(allocInv(gch.region), "injected") + when declared(markForDebug): + markForDebug(gch) +{.pop.} + +template setFrameInfo(c: PCell) = + when leakDetector: + if framePtr != nil and framePtr.prev != nil: + c.filename = framePtr.prev.filename + c.line = framePtr.prev.line + else: + c.filename = nil + c.line = 0 + +proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer = # generates a new object and sets its reference counter to 0 - acquire(gch) + incTypeSize typ, size + sysAssert(allocInv(gch.region), "rawNewObj begin") gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") collectCT(gch) - sysAssert(allocInv(gch.region), "rawNewObj begin") - var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell))) - gcAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) + #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small" + gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2") # now it is buffered in the ZCT res.typ = typ - when leakDetector and not hasThreadSupport: - if framePtr != nil and framePtr.prev != nil: - res.filename = framePtr.prev.filename - res.line = framePtr.prev.line + setFrameInfo(res) # refcount is zero, color is black, but mark it to be in the ZCT res.refcount = ZctFlag sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") # its refcount is zero, so add it to the ZCT: addNewObjToZCT(res, gch) - when logGC: writeCell("new cell", res) + logCell("new cell", res) + track("rawNewObj", res, size) gcTrace(res, csAllocated) - release(gch) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator * 1000_000 + gch.gcThreadId result = cellToUsr(res) sysAssert(allocInv(gch.region), "rawNewObj end") -{.pop.} +{.pop.} # .stackTrace off +{.pop.} # .profiler off -proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = +proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} = + result = rawNewObj(typ, size, gch) + when defined(memProfiler): nimProfile(size) + +proc newObj(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} = result = rawNewObj(typ, size, gch) zeroMem(result, size) when defined(memProfiler): nimProfile(size) +{.push overflowChecks: on.} proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = # `newObj` already uses locks, so no need for them here. - let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) + let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size result = newObj(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len when defined(memProfiler): nimProfile(size) +{.pop.} -proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} = +proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline.} = # generates a new object and sets its reference counter to 1 + incTypeSize typ, size sysAssert(allocInv(gch.region), "newObjRC1 begin") - acquire(gch) gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") collectCT(gch) sysAssert(allocInv(gch.region), "newObjRC1 after collectCT") - - var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell))) + + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc") - sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2") # now it is buffered in the ZCT res.typ = typ - when leakDetector and not hasThreadSupport: - if framePtr != nil and framePtr.prev != nil: - res.filename = framePtr.prev.filename - res.line = framePtr.prev.line + setFrameInfo(res) res.refcount = rcIncrement # refcount is 1 sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") - when logGC: writeCell("new cell", res) + logCell("new cell", res) + track("newObjRC1", res, size) gcTrace(res, csAllocated) - release(gch) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator * 1000_000 + gch.gcThreadId result = cellToUsr(res) zeroMem(result, size) sysAssert(allocInv(gch.region), "newObjRC1 end") when defined(memProfiler): nimProfile(size) +{.push overflowChecks: on.} proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} = - let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) + let size = align(GenericSeqSize, typ.base.align) + len * typ.base.size result = newObjRC1(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len when defined(memProfiler): nimProfile(size) - -proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = - acquire(gch) +{.pop.} + +proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer = collectCT(gch) var ol = usrToCell(old) sysAssert(ol.typ != nil, "growObj: 1") gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2") sysAssert(allocInv(gch.region), "growObj begin") - var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell))) - var elemSize = 1 - if ol.typ.kind != tyString: elemSize = ol.typ.base.size - - var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize - copyMem(res, ol, oldsize + sizeof(TCell)) - zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)), + var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell))) + var elemSize,elemAlign = 1 + if ol.typ.kind != tyString: + elemSize = ol.typ.base.size + elemAlign = ol.typ.base.align + incTypeSize ol.typ, newsize + + var oldsize = align(GenericSeqSize, elemAlign) + cast[PGenericSeq](old).len * elemSize + copyMem(res, ol, oldsize + sizeof(Cell)) + zeroMem(cast[pointer](cast[int](res) +% oldsize +% sizeof(Cell)), newsize-oldsize) - sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3") - sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4") - #if res.refcount <% rcIncrement: - # add(gch.zct, res) - #else: # XXX: what to do here? - # decRef(ol) - if (ol.refcount and ZctFlag) != 0: - var j = gch.zct.len-1 - var d = gch.zct.d - while j >= 0: - if d[j] == ol: - d[j] = res - break - dec(j) - if canBeCycleRoot(ol): excl(gch.cycleRoots, ol) - when logGC: - writeCell("growObj old cell", ol) - writeCell("growObj new cell", res) + sysAssert((cast[int](res) and (MemAlign-1)) == 0, "growObj: 3") + # This can be wrong for intermediate temps that are nevertheless on the + # heap because of lambda lifting: + #gcAssert(res.refcount shr rcShift <=% 1, "growObj: 4") + logCell("growObj old cell", ol) + logCell("growObj new cell", res) gcTrace(ol, csZctFreed) gcTrace(res, csAllocated) - when reallyDealloc: rawDealloc(gch.region, ol) - else: - sysAssert(ol.typ != nil, "growObj: 5") - zeroMem(ol, sizeof(TCell)) - release(gch) + track("growObj old", ol, 0) + track("growObj new", res, newsize) + # since we steal the old seq's contents, we set the old length to 0. + cast[PGenericSeq](old).len = 0 + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator * 1000_000 + gch.gcThreadId result = cellToUsr(res) sysAssert(allocInv(gch.region), "growObj end") when defined(memProfiler): nimProfile(newsize-oldsize) @@ -495,57 +575,72 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = proc growObj(old: pointer, newsize: int): pointer {.rtl.} = result = growObj(old, newsize, gch) -{.push profiler:off.} +{.push profiler:off, stackTrace:off.} # ---------------- cycle collector ------------------------------------------- -proc freeCyclicCell(gch: var TGcHeap, c: PCell) = +proc freeCyclicCell(gch: var GcHeap, c: PCell) = prepareDealloc(c) gcTrace(c, csCycFreed) - when logGC: writeCell("cycle collector dealloc cell", c) - when reallyDealloc: rawDealloc(gch.region, c) + track("cycle collector dealloc cell", c, 0) + logCell("cycle collector dealloc cell", c) + when reallyDealloc: + sysAssert(allocInv(gch.region), "free cyclic cell") + beforeDealloc(gch, c, "freeCyclicCell: stack trash") + rawDealloc(gch.region, c) else: gcAssert(c.typ != nil, "freeCyclicCell") - zeroMem(c, sizeof(TCell)) - -proc markGray(s: PCell) = - if s.color != rcGray: - setColor(s, rcGray) - forAllChildren(s, waMarkGray) - -proc scanBlack(s: PCell) = - s.setColor(rcBlack) - forAllChildren(s, waScanBlack) - -proc scan(s: PCell) = - if s.color == rcGray: - if s.refcount >=% rcIncrement: - scanBlack(s) - else: - s.setColor(rcWhite) - forAllChildren(s, waScan) - -proc collectWhite(s: PCell) = - if s.color == rcWhite and s notin gch.cycleRoots: - s.setcolor(rcBlack) - forAllChildren(s, waCollectWhite) - freeCyclicCell(gch, s) - -proc MarkRoots(gch: var TGcHeap) = - var tabSize = 0 - for s in elements(gch.cycleRoots): - #writeCell("markRoot", s) - inc tabSize - if s.color == rcPurple and s.refCount >=% rcIncrement: - markGray(s) + zeroMem(c, sizeof(Cell)) + +proc sweep(gch: var GcHeap) = + for x in allObjects(gch.region): + if isCell(x): + # cast to PCell is correct here: + var c = cast[PCell](x) + if c notin gch.marked: freeCyclicCell(gch, c) + +proc markS(gch: var GcHeap, c: PCell) = + gcAssert isAllocatedPtr(gch.region, c), "markS: foreign heap root detected A!" + incl(gch.marked, c) + gcAssert gch.tempStack.len == 0, "stack not empty!" + forAllChildren(c, waMarkPrecise) + while gch.tempStack.len > 0: + dec gch.tempStack.len + var d = gch.tempStack.d[gch.tempStack.len] + gcAssert isAllocatedPtr(gch.region, d), "markS: foreign heap root detected B!" + if not containsOrIncl(gch.marked, d): + forAllChildren(d, waMarkPrecise) + +proc markGlobals(gch: var GcHeap) {.raises: [].} = + if gch.gcThreadId == 0: + for i in 0 .. globalMarkersLen-1: globalMarkers[i]() + for i in 0 .. threadLocalMarkersLen-1: threadLocalMarkers[i]() + let d = gch.additionalRoots.d + for i in 0 .. gch.additionalRoots.len-1: markS(gch, d[i]) + +when logGC: + var + cycleCheckA: array[100, PCell] + cycleCheckALen = 0 + + proc alreadySeen(c: PCell): bool = + for i in 0 .. cycleCheckALen-1: + if cycleCheckA[i] == c: return true + if cycleCheckALen == len(cycleCheckA): + gcAssert(false, "cycle detection overflow") + rawQuit 1 + cycleCheckA[cycleCheckALen] = c + inc cycleCheckALen + + proc debugGraph(s: PCell) = + if alreadySeen(s): + writeCell("child cell (already seen) ", s) else: - excl(gch.cycleRoots, s) - # (s.color == rcBlack and rc == 0) as 1 condition: - if s.refcount == 0: - freeCyclicCell(gch, s) - gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize) + writeCell("cell {", s) + forAllChildren(s, waDebug) + c_printf("}\n") -proc doOperation(p: pointer, op: TWalkOp) = +proc doOperation(p: pointer, op: WalkOp) = if p == nil: return var c: PCell = usrToCell(p) gcAssert(c != nil, "doOperation: 1") @@ -554,307 +649,145 @@ proc doOperation(p: pointer, op: TWalkOp) = case op of waZctDecRef: #if not isAllocatedPtr(gch.region, c): - # return - # c_fprintf(c_stdout, "[GC] decref bug: %p", c) + # c_printf("[GC] decref bug: %p", c) gcAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef") gcAssert(c.refcount >=% rcIncrement, "doOperation 2") - #c.refcount = c.refcount -% rcIncrement - when logGC: writeCell("decref (from doOperation)", c) + logCell("decref (from doOperation)", c) + track("waZctDecref", p, 0) decRef(c) - #if c.refcount <% rcIncrement: addZCT(gch.zct, c) of waPush: add(gch.tempStack, c) - of waCycleDecRef: - gcAssert(c.refcount >=% rcIncrement, "doOperation 3") - c.refcount = c.refcount -% rcIncrement - of waMarkGray: - gcAssert(c.refcount >=% rcIncrement, "waMarkGray") - c.refcount = c.refcount -% rcIncrement - markGray(c) - of waScan: scan(c) - of waScanBlack: - c.refcount = c.refcount +% rcIncrement - if c.color != rcBlack: - scanBlack(c) - of waCollectWhite: collectWhite(c) + of waMarkGlobal: + markS(gch, c) + of waMarkPrecise: + add(gch.tempStack, c) + #of waDebug: debugGraph(c) proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = - doOperation(d, TWalkOp(op)) - -proc CollectZCT(gch: var TGcHeap): bool + doOperation(d, WalkOp(op)) -proc collectRoots(gch: var TGcHeap) = - for s in elements(gch.cycleRoots): - excl(gch.cycleRoots, s) - collectWhite(s) +proc collectZCT(gch: var GcHeap): bool {.benign, raises: [].} -proc collectCycles(gch: var TGcHeap) = +proc collectCycles(gch: var GcHeap) {.raises: [].} = + when hasThreadSupport: + for c in gch.toDispose: + nimGCunref(c) # ensure the ZCT 'color' is not used: while gch.zct.len > 0: discard collectZCT(gch) - markRoots(gch) - # scanRoots: - for s in elements(gch.cycleRoots): scan(s) - collectRoots(gch) - - Deinit(gch.cycleRoots) - Init(gch.cycleRoots) - # alive cycles need to be kept in 'cycleRoots' if they are referenced - # from the stack; otherwise the write barrier will add the cycle root again - # anyway: - when false: - var d = gch.decStack.d - var cycleRootsLen = 0 - for i in 0..gch.decStack.len-1: - var c = d[i] - gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots" - gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell" - if canBeCycleRoot(c): - #if c notin gch.cycleRoots: - inc cycleRootsLen - incl(gch.cycleRoots, c) - gcAssert c.typ != nil, "addBackStackRoots 2" - if cycleRootsLen != 0: - cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen) - -proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} = + cellsetReset(gch.marked) + var d = gch.decStack.d + for i in 0..gch.decStack.len-1: + sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles" + markS(gch, d[i]) + markGlobals(gch) + sweep(gch) + +proc gcMark(gch: var GcHeap, p: pointer) {.inline.} = # the addresses are not as cells on the stack, so turn them to cells: sysAssert(allocInv(gch.region), "gcMark begin") - var cell = usrToCell(p) - var c = cast[TAddress](cell) + var c = cast[int](p) if c >% PageSize: # fast check: does it look like a cell? - var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell)) + var objStart = cast[PCell](interiorAllocatedPtr(gch.region, p)) if objStart != nil: # mark the cell: - objStart.refcount = objStart.refcount +% rcIncrement + incRef(objStart) add(gch.decStack, objStart) when false: + let cell = usrToCell(p) if isAllocatedPtr(gch.region, cell): sysAssert false, "allocated pointer but not interior?" # mark the cell: - cell.refcount = cell.refcount +% rcIncrement + incRef(cell) add(gch.decStack, cell) sysAssert(allocInv(gch.region), "gcMark end") -proc markThreadStacks(gch: var TGcHeap) = - when hasThreadSupport and hasSharedHeap: - {.error: "not fully implemented".} - var it = threadList - while it != nil: - # mark registers: - for i in 0 .. high(it.registers): gcMark(gch, it.registers[i]) - var sp = cast[TAddress](it.stackBottom) - var max = cast[TAddress](it.stackTop) - # XXX stack direction? - # XXX unroll this loop: - while sp <=% max: - gcMark(gch, cast[ppointer](sp)[]) - sp = sp +% sizeof(pointer) - it = it.next - -# ----------------- stack management -------------------------------------- -# inspired from Smart Eiffel - -when defined(sparc): - const stackIncreases = false -elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or - defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820): - const stackIncreases = true -else: - const stackIncreases = false - -when not defined(useNimRtl): - {.push stack_trace: off.} - proc setStackBottom(theStackBottom: pointer) = - #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom) - # the first init must be the one that defines the stack bottom: - if gch.stackBottom == nil: gch.stackBottom = theStackBottom - else: - var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2 - var b = cast[TAddress](gch.stackBottom) - #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom) - when stackIncreases: - gch.stackBottom = cast[pointer](min(a, b)) - else: - gch.stackBottom = cast[pointer](max(a, b)) - {.pop.} - -proc stackSize(): int {.noinline.} = - var stackTop {.volatile.}: pointer - result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom)) - -when defined(sparc): # For SPARC architecture. - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var b = cast[TAddress](gch.stackBottom) - var a = cast[TAddress](stackTop) - var x = cast[TAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = - when defined(sparcv9): - asm """"flushw \n" """ - else: - asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ - - var - max = gch.stackBottom - sp: PPointer - stackTop: array[0..1, pointer] - sp = addr(stackTop[0]) - # Addresses decrease as the stack grows. - while sp <= max: - gcMark(gch, sp[]) - sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) - -elif defined(ELATE): - {.error: "stack marking code is to be written for this architecture".} - -elif stackIncreases: - # --------------------------------------------------------------------------- - # Generic code for architectures where addresses increase as the stack grows. - # --------------------------------------------------------------------------- - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var a = cast[TAddress](gch.stackBottom) - var b = cast[TAddress](stackTop) - var x = cast[TAddress](p) - result = a <=% x and x <=% b - - var - jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int - # a little hack to get the size of a TJmpBuf in the generated C code - # in a platform independant way - - proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = - var registers: C_JmpBuf - if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[TAddress](gch.stackBottom) - var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer) - # sp will traverse the JMP_BUF as well (jmp_buf size is added, - # otherwise sp would be below the registers structure). - while sp >=% max: - gcMark(gch, cast[ppointer](sp)[]) - sp = sp -% sizeof(pointer) - -else: - # --------------------------------------------------------------------------- - # Generic code for architectures where addresses decrease as the stack grows. - # --------------------------------------------------------------------------- - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var b = cast[TAddress](gch.stackBottom) - var a = cast[TAddress](stackTop) - var x = cast[TAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = - # We use a jmp_buf buffer that is in the C stack. - # Used to traverse the stack and registers assuming - # that 'setjmp' will save registers in the C stack. - type PStackSlice = ptr array [0..7, pointer] - var registers: C_JmpBuf - if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[TAddress](gch.stackBottom) - var sp = cast[TAddress](addr(registers)) - # loop unrolled: - while sp <% max - 8*sizeof(pointer): - gcMark(gch, cast[PStackSlice](sp)[0]) - gcMark(gch, cast[PStackSlice](sp)[1]) - gcMark(gch, cast[PStackSlice](sp)[2]) - gcMark(gch, cast[PStackSlice](sp)[3]) - gcMark(gch, cast[PStackSlice](sp)[4]) - gcMark(gch, cast[PStackSlice](sp)[5]) - gcMark(gch, cast[PStackSlice](sp)[6]) - gcMark(gch, cast[PStackSlice](sp)[7]) - sp = sp +% sizeof(pointer)*8 - # last few entries: - while sp <=% max: - gcMark(gch, cast[ppointer](sp)[]) - sp = sp +% sizeof(pointer) - -# ---------------------------------------------------------------------------- -# end of non-portable code -# ---------------------------------------------------------------------------- - -proc CollectZCT(gch: var TGcHeap): bool = - # Note: Freeing may add child objects to the ZCT! So essentially we do - # deep freeing, which is bad for incremental operation. In order to +#[ + This method is conditionally marked with an attribute so that it gets ignored by the LLVM ASAN + (Address SANitizer) intrumentation as it will raise false errors due to the implementation of + garbage collection that is used by Nim. For more information, please see the documentation of + `CLANG_NO_SANITIZE_ADDRESS` in `lib/nimbase.h`. + ]# +proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl, + codegenDecl: "CLANG_NO_SANITIZE_ADDRESS N_LIB_PRIVATE $# $#$#".} = + forEachStackSlot(gch, gcMark) + +proc collectZCT(gch: var GcHeap): bool = + # Note: Freeing may add child objects to the ZCT! So essentially we do + # deep freeing, which is bad for incremental operation. In order to # avoid a deep stack, we move objects to keep the ZCT small. # This is performance critical! const workPackage = 100 var L = addr(gch.zct.len) - - when withRealtime: + + when withRealTime: var steps = workPackage - var t0: TTicks + var t0: Ticks if gch.maxPause > 0: t0 = getticks() while L[] > 0: var c = gch.zct.d[0] sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr") # remove from ZCT: gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT") - + c.refcount = c.refcount and not ZctFlag gch.zct.d[0] = gch.zct.d[L[] - 1] dec(L[]) - when withRealtime: dec steps - if c.refcount <% rcIncrement: + when withRealTime: dec steps + if c.refcount <% rcIncrement: # It may have a RC > 0, if it is in the hardware stack or # it has not been removed yet from the ZCT. This is because - # ``incref`` does not bother to remove the cell from the ZCT + # ``incref`` does not bother to remove the cell from the ZCT # as this might be too slow. # In any case, it should be removed from the ZCT. But not # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!** - if canBeCycleRoot(c): excl(gch.cycleRoots, c) - when logGC: writeCell("zct dealloc cell", c) + logCell("zct dealloc cell", c) + track("zct dealloc cell", c, 0) gcTrace(c, csZctFreed) # We are about to free the object, call the finalizer BEFORE its # children are deleted as well, because otherwise the finalizer may # access invalid memory. This is done by prepareDealloc(): prepareDealloc(c) forAllChildren(c, waZctDecRef) - when reallyDealloc: rawDealloc(gch.region, c) + when reallyDealloc: + sysAssert(allocInv(gch.region), "collectZCT: rawDealloc") + beforeDealloc(gch, c, "collectZCT: stack trash") + rawDealloc(gch.region, c) else: sysAssert(c.typ != nil, "collectZCT 2") - zeroMem(c, sizeof(TCell)) - when withRealtime: + zeroMem(c, sizeof(Cell)) + when withRealTime: if steps == 0: steps = workPackage if gch.maxPause > 0: let duration = getticks() - t0 - # the GC's measuring is not accurate and needs some cleanup actions + # the GC's measuring is not accurate and needs some cleanup actions # (stack unmarking), so subtract some short amount of time in # order to miss deadlines less often: if duration >= gch.maxPause - 50_000: return false result = true -proc unmarkStackAndRegisters(gch: var TGcHeap) = +proc unmarkStackAndRegisters(gch: var GcHeap) = var d = gch.decStack.d for i in 0..gch.decStack.len-1: sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters" decRef(d[i]) - #var c = d[i] - # XXX no need for an atomic dec here: - #if --c.refcount: - # addZCT(gch.zct, c) - #sysAssert c.typ != nil, "unmarkStackAndRegisters 2" gch.decStack.len = 0 -proc collectCTBody(gch: var TGcHeap) = - when withRealtime: +proc collectCTBody(gch: var GcHeap) {.raises: [].} = + when withRealTime: let t0 = getticks() sysAssert(allocInv(gch.region), "collectCT: begin") - - gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) + + when nimCoroutines: + for stack in gch.stack.items(): + gch.stat.maxStackSize = max(gch.stat.maxStackSize, stack.stackSize()) + else: + gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) sysAssert(gch.decStack.len == 0, "collectCT") prepareForInteriorPointerChecking(gch.region) markStackAndRegisters(gch) - markThreadStacks(gch) gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len) inc(gch.stat.stackScans) if collectZCT(gch): @@ -864,79 +797,97 @@ proc collectCTBody(gch: var TGcHeap) = #discard collectZCT(gch) inc(gch.stat.cycleCollections) gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * - cycleIncrease) + CycleIncrease) gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) unmarkStackAndRegisters(gch) sysAssert(allocInv(gch.region), "collectCT: end") - - when withRealtime: + + when withRealTime: let duration = getticks() - t0 gch.stat.maxPause = max(gch.stat.maxPause, duration) when defined(reportMissedDeadlines): if gch.maxPause > 0 and duration > gch.maxPause: - c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration) + c_printf("[GC] missed deadline: %ld\n", duration) -proc collectCT(gch: var TGcHeap) = - if (gch.zct.len >= ZctThreshold or (cycleGC and - getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and +proc collectCT(gch: var GcHeap) = + if (gch.zct.len >= gch.zctThreshold or (cycleGC and + getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and gch.recGcLock == 0: + when false: + prepareForInteriorPointerChecking(gch.region) + cellsetReset(gch.marked) + markForDebug(gch) collectCTBody(gch) + gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease) -when withRealtime: - proc toNano(x: int): TNanos {.inline.} = +proc GC_collectZct*() = + ## Collect the ZCT (zero count table). Unstable, experimental API for + ## testing purposes. + ## DO NOT USE! + collectCTBody(gch) + +when withRealTime: + proc toNano(x: int): Nanos {.inline.} = result = x * 1000 proc GC_setMaxPause*(MaxPauseInUs: int) = gch.maxPause = MaxPauseInUs.toNano - proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) = - acquire(gch) + proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) = gch.maxPause = us.toNano - if (gch.zct.len >= ZctThreshold or (cycleGC and - getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or + if (gch.zct.len >= gch.zctThreshold or (cycleGC and + getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or strongAdvice: collectCTBody(gch) - release(gch) + gch.zctThreshold = max(InitialZctThreshold, gch.zct.len * CycleIncrease) + + proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} = + if stackSize >= 0: + var stackTop {.volatile.}: pointer + gch.getActiveStack().pos = addr(stackTop) + + for stack in gch.stack.items(): + stack.bottomSaved = stack.bottom + when stackIncreases: + stack.bottom = cast[pointer]( + cast[int](stack.pos) - sizeof(pointer) * 6 - stackSize) + else: + stack.bottom = cast[pointer]( + cast[int](stack.pos) + sizeof(pointer) * 6 + stackSize) + + GC_step(gch, us, strongAdvice) - proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice) + if stackSize >= 0: + for stack in gch.stack.items(): + stack.bottom = stack.bottomSaved when not defined(useNimRtl): - proc GC_disable() = - when hasThreadSupport and hasSharedHeap: - discard atomicInc(gch.recGcLock, 1) - else: - inc(gch.recGcLock) + proc GC_disable() = + inc(gch.recGcLock) proc GC_enable() = - if gch.recGcLock > 0: - when hasThreadSupport and hasSharedHeap: - discard atomicDec(gch.recGcLock, 1) - else: - dec(gch.recGcLock) + when defined(nimDoesntTrackDefects): + if gch.recGcLock <= 0: + raise newException(AssertionDefect, + "API usage error: GC_enable called but GC is already enabled") + dec(gch.recGcLock) - proc GC_setStrategy(strategy: TGC_Strategy) = - case strategy - of gcThroughput: nil - of gcResponsiveness: nil - of gcOptimizeSpace: nil - of gcOptimizeTime: nil + proc GC_setStrategy(strategy: GC_Strategy) = + discard proc GC_enableMarkAndSweep() = gch.cycleThreshold = InitialCycleThreshold proc GC_disableMarkAndSweep() = - gch.cycleThreshold = high(gch.cycleThreshold)-1 + gch.cycleThreshold = high(typeof(gch.cycleThreshold))-1 # set to the max value to suppress the cycle detector proc GC_fullCollect() = - acquire(gch) var oldThreshold = gch.cycleThreshold gch.cycleThreshold = 0 # forces cycle collection collectCT(gch) gch.cycleThreshold = oldThreshold - release(gch) proc GC_getStatistics(): string = - GC_disable() result = "[GC] total memory: " & $(getTotalMem()) & "\n" & "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" & "[GC] stack scans: " & $gch.stat.stackScans & "\n" & @@ -945,8 +896,16 @@ when not defined(useNimRtl): "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" & "[GC] zct capacity: " & $gch.zct.cap & "\n" & "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" & - "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" & - "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) - GC_enable() + "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n" + when nimCoroutines: + result.add "[GC] number of stacks: " & $gch.stack.len & "\n" + for stack in items(gch.stack): + result.add "[GC] stack " & stack.bottom.repr & "[GC] max stack size " & cast[pointer](stack.maxStackSize).repr & "\n" + else: + # this caused memory leaks, see #10488 ; find a way without `repr` + # maybe using a local copy of strutils.toHex or snprintf + when defined(logGC): + result.add "[GC] stack bottom: " & gch.stack.bottom.repr + result.add "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" -{.pop.} +{.pop.} # profiler: off, stackTrace: off |