diff options
Diffstat (limited to 'lib/system/oldgc.nim')
-rw-r--r-- | lib/system/oldgc.nim | 1044 |
1 files changed, 1044 insertions, 0 deletions
diff --git a/lib/system/oldgc.nim b/lib/system/oldgc.nim new file mode 100644 index 000000000..f3b90e6bd --- /dev/null +++ b/lib/system/oldgc.nim @@ -0,0 +1,1044 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# Garbage Collector +# +# The basic algorithm is *Deferrent Reference Counting* with cycle detection. +# This is achieved by combining a Deutsch-Bobrow garbage collector +# together with Christoper's partial mark-sweep garbage collector. +# +# Special care has been taken to avoid recursion as far as possible to avoid +# stack overflows when traversing deep datastructures. It is well-suited +# for soft real time applications (like games). +{.push profiler:off.} + +const + CycleIncrease = 2 # is a multiplicative increase + InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow + ZctThreshold = 500 # we collect garbage if the ZCT's size + # reaches this threshold + # this seems to be a good value + withRealTime = defined(useRealtimeGC) + +when withRealTime and not defined(getTicks): + include "system/timers" +when defined(memProfiler): + proc nimProfile(requestedSize: int) + +include "system/timers" + +const + rcIncrement = 0b1000 # so that lowest 3 bits are not touched + # NOTE: Most colors are currently unused + rcBlack = 0b000 # cell is colored black; in use or free + rcGray = 0b001 # possible member of a cycle + rcWhite = 0b010 # member of a garbage cycle + rcPurple = 0b011 # possible root of a cycle + rcZct = 0b100 # in ZCT + rcRed = 0b101 # Candidate cycle undergoing sigma-computation + rcOrange = 0b110 # Candidate cycle awaiting epoch boundary + rcShift = 3 # shift by rcShift to get the reference counter + colorMask = 0b111 +type + TWalkOp = enum + waZctDecRef, waPush, waCycleDecRef + + TFinalizer {.compilerproc.} = proc (self: pointer) {.nimcall.} + # A ref type can have a finalizer that is called before the object's + # storage is freed. + + TGcStat {.final, pure.} = object + stackScans: int # number of performed stack scans (for statistics) + cycleCollections: int # number of performed full collections + maxThreshold: int # max threshold that has been set + maxStackSize: int # max stack size + maxStackCells: int # max stack cells in ``decStack`` + cycleTableSize: int # max entries in cycle table + maxPause: int64 # max measured GC pause in nanoseconds + + TGcHeap {.final, pure.} = object # this contains the zero count and + # non-zero count table + stackBottom: pointer + cycleThreshold: int + zct: TCellSeq # the zero count table + decStack: TCellSeq # cells in the stack that are to decref again + cycleRoots: TCellSet + tempStack: TCellSeq # temporary stack for recursion elimination + recGcLock: int # prevent recursion via finalizers; no thread lock + when withRealTime: + maxPause: TNanos # max allowed pause in nanoseconds; active if > 0 + region: TMemRegion # garbage collected region + stat: TGcStat + +var + gch {.rtlThreadVar.}: TGcHeap + +when not defined(useNimRtl): + InstantiateForRegion(gch.region) + +template acquire(gch: TGcHeap) = + when hasThreadSupport and hasSharedHeap: + AcquireSys(HeapLock) + +template release(gch: TGcHeap) = + when hasThreadSupport and hasSharedHeap: + releaseSys(HeapLock) + +proc addZCT(s: var TCellSeq, c: PCell) {.noinline.} = + if (c.refcount and rcZct) == 0: + c.refcount = c.refcount and not colorMask or rcZct + add(s, c) + +proc cellToUsr(cell: PCell): pointer {.inline.} = + # convert object (=pointer to refcount) to pointer to userdata + result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell))) + +proc usrToCell(usr: pointer): PCell {.inline.} = + # convert pointer to userdata to object (=pointer to refcount) + result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell))) + +proc canbeCycleRoot(c: PCell): bool {.inline.} = + result = ntfAcyclic notin c.typ.flags + +proc extGetCellType(c: pointer): PNimType {.compilerproc.} = + # used for code generation concerning debugging + result = usrToCell(c).typ + +proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = + result = int(usrToCell(p).refcount) shr rcShift + +# this that has to equals zero, otherwise we have to round up UnitsPerPage: +when BitsPerPage mod (sizeof(int)*8) != 0: + {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} + +when debugGC: + proc writeCell(msg: CString, c: PCell) = + var kind = -1 + if c.typ != nil: kind = ord(c.typ.kind) + when leakDetector: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n", + msg, c, kind, c.refcount shr rcShift, c.filename, c.line) + else: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n", + msg, c, kind, c.refcount shr rcShift) + +when traceGC: + # traceGC is a special switch to enable extensive debugging + type + TCellState = enum + csAllocated, csZctFreed, csCycFreed + var + states: array[TCellState, TCellSet] + + proc traceCell(c: PCell, state: TCellState) = + case state + of csAllocated: + if c in states[csAllocated]: + writeCell("attempt to alloc an already allocated cell", c) + sysAssert(false, "traceCell 1") + excl(states[csCycFreed], c) + excl(states[csZctFreed], c) + of csZctFreed: + if c in states[csZctFreed]: + writeCell("attempt to free zct cell twice", c) + sysAssert(false, "traceCell 2") + if c in states[csCycFreed]: + writeCell("attempt to free with zct, but already freed with cyc", c) + sysAssert(false, "traceCell 3") + if c notin states[csAllocated]: + writeCell("attempt to free not an allocated cell", c) + sysAssert(false, "traceCell 4") + excl(states[csAllocated], c) + of csCycFreed: + if c notin states[csAllocated]: + writeCell("attempt to free a not allocated cell", c) + sysAssert(false, "traceCell 5") + if c in states[csCycFreed]: + writeCell("attempt to free cyc cell twice", c) + sysAssert(false, "traceCell 6") + if c in states[csZctFreed]: + writeCell("attempt to free with cyc, but already freed with zct", c) + sysAssert(false, "traceCell 7") + excl(states[csAllocated], c) + incl(states[state], c) + + proc writeLeakage() = + var z = 0 + var y = 0 + var e = 0 + for c in elements(states[csAllocated]): + inc(e) + if c in states[csZctFreed]: inc(z) + elif c in states[csCycFreed]: inc(y) + else: writeCell("leak", c) + cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n", + e, z, y) + +template gcTrace(cell, state: expr): stmt {.immediate.} = + when traceGC: traceCell(cell, state) + +# forward declarations: +proc collectCT(gch: var TGcHeap) +proc IsOnStack*(p: pointer): bool {.noinline.} +proc forAllChildren(cell: PCell, op: TWalkOp) +proc doOperation(p: pointer, op: TWalkOp) +proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) +# we need the prototype here for debugging purposes + +when hasThreadSupport and hasSharedHeap: + template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement + template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement) +else: + template `--`(x: expr): expr = + Dec(x, rcIncrement) + x <% rcIncrement + template `++`(x: expr): stmt = Inc(x, rcIncrement) + +proc prepareDealloc(cell: PCell) = + if cell.typ.finalizer != nil: + # the finalizer could invoke something that + # allocates memory; this could trigger a garbage + # collection. Since we are already collecting we + # prevend recursive entering here by a lock. + # XXX: we should set the cell's children to nil! + inc(gch.recGcLock) + (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell)) + dec(gch.recGcLock) + +proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = + # we MUST access gch as a global here, because this crosses DLL boundaries! + when hasThreadSupport and hasSharedHeap: + AcquireSys(HeapLock) + incl(gch.cycleRoots, c) + when hasThreadSupport and hasSharedHeap: + ReleaseSys(HeapLock) + +proc rtlAddZCT(c: PCell) {.rtl, inl.} = + # we MUST access gch as a global here, because this crosses DLL boundaries! + when hasThreadSupport and hasSharedHeap: + AcquireSys(HeapLock) + addZCT(gch.zct, c) + when hasThreadSupport and hasSharedHeap: + ReleaseSys(HeapLock) + +proc decRef(c: PCell) {.inline.} = + sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr") + sysAssert(c.refcount >=% rcIncrement, "decRef") + if --c.refcount: + rtlAddZCT(c) + elif canBeCycleRoot(c): + # unfortunately this is necessary here too, because a cycle might just + # have been broken up and we could recycle it. + rtlAddCycleRoot(c) + +proc incRef(c: PCell) {.inline.} = + sysAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr") + ++c.refcount + if canBeCycleRoot(c): + rtlAddCycleRoot(c) + +proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p)) +proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p)) + +proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} = + sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle") + var c = usrToCell(p) + sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr") + if --c.refcount: + rtlAddZCT(c) + sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2") + sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5") + +proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} = + # the code generator calls this proc! + sysAssert(not isOnStack(dest), "asgnRef") + # BUGFIX: first incRef then decRef! + if src != nil: incRef(usrToCell(src)) + if dest[] != nil: decRef(usrToCell(dest[])) + dest[] = src + +proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} = + # the code generator calls this proc if it is known at compile time that no + # cycle is possible. + if src != nil: + var c = usrToCell(src) + ++c.refcount + if dest[] != nil: + var c = usrToCell(dest[]) + if --c.refcount: + rtlAddZCT(c) + dest[] = src + +proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerProc.} = + # unsureAsgnRef updates the reference counters only if dest is not on the + # stack. It is used by the code generator if it cannot decide wether a + # reference is in the stack or not (this can happen for var parameters). + if not IsOnStack(dest): + if src != nil: incRef(usrToCell(src)) + # XXX finally use assembler for the stack checking instead! + # the test for '!= nil' is correct, but I got tired of the segfaults + # resulting from the crappy stack checking: + if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[])) + else: + # can't be an interior pointer if it's a stack location! + sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, + "stack loc AND interior pointer") + dest[] = src + +proc initGC() = + when not defined(useNimRtl): + when traceGC: + for i in low(TCellState)..high(TCellState): Init(states[i]) + gch.cycleThreshold = InitialCycleThreshold + gch.stat.stackScans = 0 + gch.stat.cycleCollections = 0 + gch.stat.maxThreshold = 0 + gch.stat.maxStackSize = 0 + gch.stat.maxStackCells = 0 + gch.stat.cycleTableSize = 0 + # init the rt + init(gch.zct) + init(gch.tempStack) + Init(gch.cycleRoots) + Init(gch.decStack) + +proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = + var d = cast[TAddress](dest) + case n.kind + of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) + of nkList: + for i in 0..n.len-1: + # inlined for speed + if n.sons[i].kind == nkSlot: + if n.sons[i].typ.kind in {tyRef, tyString, tySequence}: + doOperation(cast[ppointer](d +% n.sons[i].offset)[], op) + else: + forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), + n.sons[i].typ, op) + else: + forAllSlotsAux(dest, n.sons[i], op) + of nkCase: + var m = selectBranch(dest, n) + if m != nil: forAllSlotsAux(dest, m, op) + of nkNone: sysAssert(false, "forAllSlotsAux") + +proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = + var d = cast[TAddress](dest) + if dest == nil: return # nothing to do + if ntfNoRefs notin mt.flags: + case mt.Kind + of tyRef, tyString, tySequence: # leaf: + doOperation(cast[ppointer](d)[], op) + of tyObject, tyTuple: + forAllSlotsAux(dest, mt.node, op) + of tyArray, tyArrayConstr, tyOpenArray: + for i in 0..(mt.size div mt.base.size)-1: + forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op) + else: nil + +proc forAllChildren(cell: PCell, op: TWalkOp) = + sysAssert(cell != nil, "forAllChildren: 1") + sysAssert(cell.typ != nil, "forAllChildren: 2") + sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3" + let marker = cell.typ.marker + if marker != nil: + marker(cellToUsr(cell), op.int) + else: + case cell.typ.Kind + of tyRef: # common case + forAllChildrenAux(cellToUsr(cell), cell.typ.base, op) + of tySequence: + var d = cast[TAddress](cellToUsr(cell)) + var s = cast[PGenericSeq](d) + if s != nil: + let baseAddr = d +% GenericSeqSize + for i in 0..s.len-1: + forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size), + cell.typ.base, op) + else: nil + +proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} = + # we check the last 8 entries (cache line) for a slot that could be reused. + # In 63% of all cases we succeed here! But we have to optimize the heck + # out of this small linear search so that ``newObj`` is not slowed down. + # + # Slots to try cache hit + # 1 32% + # 4 59% + # 8 63% + # 16 66% + # all slots 68% + var L = gch.zct.len + var d = gch.zct.d + when true: + # loop unrolled for performance: + template replaceZctEntry(i: expr) = + c = d[i] + if c.refcount >=% rcIncrement: + c.refcount = c.refcount and not colorMask + d[i] = res + return + if L > 8: + var c: PCell + replaceZctEntry(L-1) + replaceZctEntry(L-2) + replaceZctEntry(L-3) + replaceZctEntry(L-4) + replaceZctEntry(L-5) + replaceZctEntry(L-6) + replaceZctEntry(L-7) + replaceZctEntry(L-8) + add(gch.zct, res) + else: + d[L] = res + inc(gch.zct.len) + else: + for i in countdown(L-1, max(0, L-8)): + var c = d[i] + if c.refcount >=% rcIncrement: + c.refcount = c.refcount and not colorMask + d[i] = res + return + add(gch.zct, res) + +proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer = + # generates a new object and sets its reference counter to 0 + acquire(gch) + sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") + collectCT(gch) + sysAssert(allocInv(gch.region), "rawNewObj begin") + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell))) + sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + # now it is buffered in the ZCT + res.typ = typ + when trackAllocationSource and not hasThreadSupport: + if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil: + res.filename = framePtr.prev.prev.filename + res.line = framePtr.prev.prev.line + else: + res.filename = "nofile" + res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT + sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") + # its refcount is zero, so add it to the ZCT: + addNewObjToZCT(res, gch) + when logGC: writeCell("new cell", res) + gcTrace(res, csAllocated) + release(gch) + result = cellToUsr(res) + sysAssert(allocInv(gch.region), "rawNewObj end") + +{.pop.} + +proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = + result = rawNewObj(typ, size, gch) + zeroMem(result, size) + when defined(memProfiler): nimProfile(size) + +proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = + # `newObj` already uses locks, so no need for them here. + let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) + result = newObj(typ, size) + cast[PGenericSeq](result).len = len + cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) + +proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} = + # generates a new object and sets its reference counter to 1 + sysAssert(allocInv(gch.region), "newObjRC1 begin") + acquire(gch) + sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") + collectCT(gch) + sysAssert(allocInv(gch.region), "newObjRC1 after collectCT") + + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell))) + sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc") + sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + # now it is buffered in the ZCT + res.typ = typ + when trackAllocationSource and not hasThreadSupport: + if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil: + res.filename = framePtr.prev.prev.filename + res.line = framePtr.prev.prev.line + else: + res.filename = "nofile" + res.refcount = rcIncrement # refcount is 1 + sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") + when logGC: writeCell("new cell", res) + gcTrace(res, csAllocated) + release(gch) + result = cellToUsr(res) + zeroMem(result, size) + sysAssert(allocInv(gch.region), "newObjRC1 end") + when defined(memProfiler): nimProfile(size) + +proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} = + let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) + result = newObjRC1(typ, size) + cast[PGenericSeq](result).len = len + cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) + +proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = + acquire(gch) + collectCT(gch) + var ol = usrToCell(old) + sysAssert(ol.typ != nil, "growObj: 1") + sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2") + sysAssert(allocInv(gch.region), "growObj begin") + + var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell))) + var elemSize = 1 + if ol.typ.kind != tyString: elemSize = ol.typ.base.size + + var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize + copyMem(res, ol, oldsize + sizeof(TCell)) + zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)), + newsize-oldsize) + sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0, "growObj: 3") + sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4") + #if res.refcount <% rcIncrement: + # add(gch.zct, res) + #else: # XXX: what to do here? + # decRef(ol) + if (ol.refcount and colorMask) == rcZct: + var j = gch.zct.len-1 + var d = gch.zct.d + while j >= 0: + if d[j] == ol: + d[j] = res + break + dec(j) + if canBeCycleRoot(ol): excl(gch.cycleRoots, ol) + when logGC: + writeCell("growObj old cell", ol) + writeCell("growObj new cell", res) + gcTrace(ol, csZctFreed) + gcTrace(res, csAllocated) + when reallyDealloc: rawDealloc(gch.region, ol) + else: + sysAssert(ol.typ != nil, "growObj: 5") + zeroMem(ol, sizeof(TCell)) + release(gch) + result = cellToUsr(res) + sysAssert(allocInv(gch.region), "growObj end") + when defined(memProfiler): nimProfile(newsize-oldsize) + +proc growObj(old: pointer, newsize: int): pointer {.rtl.} = + result = growObj(old, newsize, gch) + +{.push profiler:off.} + +# ---------------- cycle collector ------------------------------------------- + +var + decrefs = 0 + increfs = 0 + marked = 0 + collected = 0 + +proc doOperation(p: pointer, op: TWalkOp) = + if p == nil: return + var c: PCell = usrToCell(p) + sysAssert(c != nil, "doOperation: 1") + case op # faster than function pointers because of easy prediction + of waZctDecRef: + #if not isAllocatedPtr(gch.region, c): + # return + # c_fprintf(c_stdout, "[GC] decref bug: %p", c) + sysAssert(isAllocatedPtr(gch.region, c), "decRef: waZctDecRef") + sysAssert(c.refcount >=% rcIncrement, "doOperation 2") + c.refcount = c.refcount -% rcIncrement + when logGC: writeCell("decref (from doOperation)", c) + if c.refcount <% rcIncrement: addZCT(gch.zct, c) + of waPush: + add(gch.tempStack, c) + of waCycleDecRef: + sysAssert(c.refcount >=% rcIncrement, "doOperation 3") + c.refcount = c.refcount -% rcIncrement + inc decrefs + +proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = + doOperation(d, TWalkOp(op)) + +# we now use a much simpler and non-recursive algorithm for cycle removal +proc collectCycles(gch: var TGcHeap) = + var tabSize = 0 + let tStart = getTicks() + decrefs = 0 + increfs = 0 + marked = 0 + collected = 0 + + # XXX: acyclic cutoff (specialized marker procs) + # short trim cycle roots + # long trim with threshold + # don't add new objects to both ztc and cycleroots? + # leak detector with hash in rawNew / free + # + for c in elements(gch.cycleRoots): + inc(tabSize) + forallChildren(c, waCycleDecRef) + if tabSize == 0: return + gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize) + + c_printf "COLLECT CYCLES: %d\n", tabSize + let tAfterMark = getTicks() + + # restore reference counts (a depth-first traversal is needed): + var marker: TCellSet + Init(marker) + for c in elements(gch.cycleRoots): + if c.refcount >=% rcIncrement: + inc marked + if not containsOrIncl(marker, c): + gch.tempStack.len = 0 + forAllChildren(c, waPush) + while gch.tempStack.len > 0: + dec(gch.tempStack.len) + var d = gch.tempStack.d[gch.tempStack.len] + d.refcount = d.refcount +% rcIncrement + inc increfs + if d in gch.cycleRoots and not containsOrIncl(marker, d): + forAllChildren(d, waPush) + + let tAfterScan = getTicks() + + # remove cycles: + for c in elements(gch.cycleRoots): + if c.refcount <% rcIncrement: + inc collected + gch.tempStack.len = 0 + forAllChildren(c, waPush) + while gch.tempStack.len > 0: + dec(gch.tempStack.len) + var d = gch.tempStack.d[gch.tempStack.len] + if d.refcount <% rcIncrement: + if d notin gch.cycleRoots: # d is leaf of c and not part of cycle + addZCT(gch.zct, d) + when logGC: writeCell("add to ZCT (from cycle collector)", d) + prepareDealloc(c) + gcTrace(c, csCycFreed) + when logGC: writeCell("cycle collector dealloc cell", c) + when reallyDealloc: rawDealloc(gch.region, c) + else: + sysAssert(c.typ != nil, "collectCycles") + zeroMem(c, sizeof(TCell)) + + let tFinal = getTicks() + + cprintf "times:\n mark: %d ms\n scan: %d ms\n collect: %d ms\n decrefs: %d\n increfs: %d\n marked: %d\n collected: %d\n", + (tAfterMark - tStart) div 1_000_000, + (tAfterScan - tAfterMark) div 1_000_000, + (tFinal - tAfterScan) div 1_000_000, + decrefs, + increfs, + marked, + collected + + Deinit(gch.cycleRoots) + Init(gch.cycleRoots) + +var gcDebugging* = false +var vis*: proc (a: pointer, b: PNimType) + +proc debugNode(n: ptr TNimNode) = + c_fprintf(c_stdout, "node %s\n", n.name) + for i in 0..n.len-1: + debugNode(n.sons[i]) + +proc debugTyp(x: PNimType) = + c_fprintf(c_stdout, "type %d\n", x.kind) + if x.node != nil: + debugNode(x.node) + +var seqdbg* : proc (s: PGenericSeq) {.cdecl.} + +type + TCyclicMode = enum + Cyclic, + Acyclic, + MaybeCyclic + + TReleaseType = enum + AddToZTC + FreeImmediately + + THeapType = enum + LocalHeap + SharedHeap + +template `++` (rc: TRefCount, heapType: THeapType): stmt = + when heapType == SharedHeap: + discard atomicInc(rc, rcIncrement) + else: + inc rc, rcIncrement + +template `--`(rc: TRefCount): expr = + dec rc, rcIncrement + rc <% rcIncrement + +template `--` (rc: TRefCount, heapType: THeapType): expr = + (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement + else: --rc) + +template doDecRef(cc: PCell, + heapType = LocalHeap, + cycleFlag = MaybeCyclic): stmt = + var c = cc + sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr") + # XXX: move this elesewhere + + sysAssert(c.refcount >=% rcIncrement, "decRef") + if c.refcount--(heapType): + # this is the last reference from the heap + # add to a zero-count-table that will be matched against stack pointers + rtlAddZCT(c) + # writeCell("decref to 0", c) + else: + when cycleFlag != Acyclic: + if cycleFlag == Cyclic or canBeCycleRoot(c): + # a cycle may have been broken + rtlAddCycleRoot(c) + +proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} = + # the addresses are not as cells on the stack, so turn them to cells: + sysAssert(allocInv(gch.region), "gcMark begin") + var cell = usrToCell(p) + var c = cast[TAddress](cell) + if c >% PageSize: + # fast check: does it look like a cell? + var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell)) + if objStart != nil: + # mark the cell: + if not gcDebugging: + objStart.refcount = objStart.refcount +% rcIncrement + add(gch.decStack, objStart) + when false: + if isAllocatedPtr(gch.region, cell): + sysAssert false, "allocated pointer but not interior?" + # mark the cell: + cell.refcount = cell.refcount +% rcIncrement + add(gch.decStack, cell) + sysAssert(allocInv(gch.region), "gcMark end") + +proc markThreadStacks(gch: var TGcHeap) = + when hasThreadSupport and hasSharedHeap: + {.error: "not fully implemented".} + var it = threadList + while it != nil: + # mark registers: + for i in 0 .. high(it.registers): gcMark(gch, it.registers[i]) + var sp = cast[TAddress](it.stackBottom) + var max = cast[TAddress](it.stackTop) + # XXX stack direction? + # XXX unroll this loop: + while sp <=% max: + gcMark(gch, cast[ppointer](sp)[]) + sp = sp +% sizeof(pointer) + it = it.next + +# ----------------- stack management -------------------------------------- +# inspired from Smart Eiffel + +when defined(sparc): + const stackIncreases = false +elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or + defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820): + const stackIncreases = true +else: + const stackIncreases = false + +when not defined(useNimRtl): + {.push stack_trace: off.} + proc setStackBottom(theStackBottom: pointer) = + #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom) + # the first init must be the one that defines the stack bottom: + if gch.stackBottom == nil: gch.stackBottom = theStackBottom + else: + var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2 + var b = cast[TAddress](gch.stackBottom) + #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom) + when stackIncreases: + gch.stackBottom = cast[pointer](min(a, b)) + else: + gch.stackBottom = cast[pointer](max(a, b)) + {.pop.} + +proc stackSize(): int {.noinline.} = + var stackTop {.volatile.}: pointer + result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom)) + +when defined(sparc): # For SPARC architecture. + proc isOnStack(p: pointer): bool = + var stackTop {.volatile.}: pointer + stackTop = addr(stackTop) + var b = cast[TAddress](gch.stackBottom) + var a = cast[TAddress](stackTop) + var x = cast[TAddress](p) + result = a <=% x and x <=% b + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + when defined(sparcv9): + asm """"flushw \n" """ + else: + asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ + + var + max = gch.stackBottom + sp: PPointer + stackTop: array[0..1, pointer] + sp = addr(stackTop[0]) + # Addresses decrease as the stack grows. + while sp <= max: + gcMark(gch, sp[]) + sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) + +elif defined(ELATE): + {.error: "stack marking code is to be written for this architecture".} + +elif stackIncreases: + # --------------------------------------------------------------------------- + # Generic code for architectures where addresses increase as the stack grows. + # --------------------------------------------------------------------------- + proc isOnStack(p: pointer): bool = + var stackTop {.volatile.}: pointer + stackTop = addr(stackTop) + var a = cast[TAddress](gch.stackBottom) + var b = cast[TAddress](stackTop) + var x = cast[TAddress](p) + result = a <=% x and x <=% b + + var + jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int + # a little hack to get the size of a TJmpBuf in the generated C code + # in a platform independant way + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + var registers: C_JmpBuf + if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. + var max = cast[TAddress](gch.stackBottom) + var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer) + # sp will traverse the JMP_BUF as well (jmp_buf size is added, + # otherwise sp would be below the registers structure). + while sp >=% max: + gcMark(gch, cast[ppointer](sp)[]) + sp = sp -% sizeof(pointer) + +else: + # --------------------------------------------------------------------------- + # Generic code for architectures where addresses decrease as the stack grows. + # --------------------------------------------------------------------------- + proc isOnStack(p: pointer): bool = + var stackTop {.volatile.}: pointer + stackTop = addr(stackTop) + var b = cast[TAddress](gch.stackBottom) + var a = cast[TAddress](stackTop) + var x = cast[TAddress](p) + result = a <=% x and x <=% b + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + # We use a jmp_buf buffer that is in the C stack. + # Used to traverse the stack and registers assuming + # that 'setjmp' will save registers in the C stack. + type PStackSlice = ptr array [0..7, pointer] + var registers: C_JmpBuf + if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. + var max = cast[TAddress](gch.stackBottom) + var sp = cast[TAddress](addr(registers)) + # loop unrolled: + while sp <% max - 8*sizeof(pointer): + gcMark(gch, cast[PStackSlice](sp)[0]) + gcMark(gch, cast[PStackSlice](sp)[1]) + gcMark(gch, cast[PStackSlice](sp)[2]) + gcMark(gch, cast[PStackSlice](sp)[3]) + gcMark(gch, cast[PStackSlice](sp)[4]) + gcMark(gch, cast[PStackSlice](sp)[5]) + gcMark(gch, cast[PStackSlice](sp)[6]) + gcMark(gch, cast[PStackSlice](sp)[7]) + sp = sp +% sizeof(pointer)*8 + # last few entries: + while sp <=% max: + gcMark(gch, cast[ppointer](sp)[]) + sp = sp +% sizeof(pointer) + +# ---------------------------------------------------------------------------- +# end of non-portable code +# ---------------------------------------------------------------------------- + +proc CollectZCT(gch: var TGcHeap): bool = + # Note: Freeing may add child objects to the ZCT! So essentially we do + # deep freeing, which is bad for incremental operation. In order to + # avoid a deep stack, we move objects to keep the ZCT small. + # This is performance critical! + const workPackage = 100 + var L = addr(gch.zct.len) + + when withRealtime: + var steps = workPackage + var t0: TTicks + if gch.maxPause > 0: t0 = getticks() + while L[] > 0: + var c = gch.zct.d[0] + sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr") + # remove from ZCT: + sysAssert((c.refcount and rcZct) == rcZct, "collectZCT") + + c.refcount = c.refcount and not colorMask + gch.zct.d[0] = gch.zct.d[L[] - 1] + dec(L[]) + when withRealtime: dec steps + if c.refcount <% rcIncrement: + # It may have a RC > 0, if it is in the hardware stack or + # it has not been removed yet from the ZCT. This is because + # ``incref`` does not bother to remove the cell from the ZCT + # as this might be too slow. + # In any case, it should be removed from the ZCT. But not + # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!** + if canBeCycleRoot(c): excl(gch.cycleRoots, c) + when logGC: writeCell("zct dealloc cell", c) + gcTrace(c, csZctFreed) + # We are about to free the object, call the finalizer BEFORE its + # children are deleted as well, because otherwise the finalizer may + # access invalid memory. This is done by prepareDealloc(): + prepareDealloc(c) + forAllChildren(c, waZctDecRef) + when reallyDealloc: rawDealloc(gch.region, c) + else: + sysAssert(c.typ != nil, "collectZCT 2") + zeroMem(c, sizeof(TCell)) + when withRealtime: + if steps == 0: + steps = workPackage + if gch.maxPause > 0: + let duration = getticks() - t0 + # the GC's measuring is not accurate and needs some cleanup actions + # (stack unmarking), so subtract some short amount of time in to + # order to miss deadlines less often: + if duration >= gch.maxPause - 50_000: + return false + result = true + +proc unmarkStackAndRegisters(gch: var TGcHeap) = + var d = gch.decStack.d + for i in 0..gch.decStack.len-1: + sysAssert isAllocatedPtr(gch.region, d[i]), "unmarkStackAndRegisters" + # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock + var c = d[i] + # XXX no need for an atomic dec here: + if --c.refcount: + addZCT(gch.zct, c) + sysAssert c.typ != nil, "unmarkStackAndRegisters 2" + gch.decStack.len = 0 + +proc collectCTBody(gch: var TGcHeap) = + when withRealtime: + let t0 = getticks() + sysAssert(allocInv(gch.region), "collectCT: begin") + + gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) + sysAssert(gch.decStack.len == 0, "collectCT") + prepareForInteriorPointerChecking(gch.region) + markStackAndRegisters(gch) + markThreadStacks(gch) + gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len) + inc(gch.stat.stackScans) + if collectZCT(gch): + when cycleGC: + if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC: + collectCycles(gch) + discard collectZCT(gch) + inc(gch.stat.cycleCollections) + gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * + cycleIncrease) + gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) + unmarkStackAndRegisters(gch) + sysAssert(allocInv(gch.region), "collectCT: end") + + when withRealtime: + let duration = getticks() - t0 + gch.stat.maxPause = max(gch.stat.maxPause, duration) + when defined(reportMissedDeadlines): + if gch.maxPause > 0 and duration > gch.maxPause: + c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration) + +proc collectCT(gch: var TGcHeap) = + if (gch.zct.len >= ZctThreshold or (cycleGC and + getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and + gch.recGcLock == 0: + collectCTBody(gch) + +when withRealtime: + proc toNano(x: int): TNanos {.inline.} = + result = x * 1000 + + proc GC_setMaxPause*(MaxPauseInUs: int) = + gch.maxPause = MaxPauseInUs.toNano + + proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) = + acquire(gch) + gch.maxPause = us.toNano + if (gch.zct.len >= ZctThreshold or (cycleGC and + getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or + strongAdvice: + collectCTBody(gch) + release(gch) + + proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice) + +when not defined(useNimRtl): + proc GC_disable() = + when hasThreadSupport and hasSharedHeap: + discard atomicInc(gch.recGcLock, 1) + else: + inc(gch.recGcLock) + proc GC_enable() = + if gch.recGcLock > 0: + when hasThreadSupport and hasSharedHeap: + discard atomicDec(gch.recGcLock, 1) + else: + dec(gch.recGcLock) + + proc GC_setStrategy(strategy: TGC_Strategy) = + case strategy + of gcThroughput: nil + of gcResponsiveness: nil + of gcOptimizeSpace: nil + of gcOptimizeTime: nil + + proc GC_enableMarkAndSweep() = + gch.cycleThreshold = InitialCycleThreshold + + proc GC_disableMarkAndSweep() = + gch.cycleThreshold = high(gch.cycleThreshold)-1 + # set to the max value to suppress the cycle detector + + proc GC_fullCollect() = + acquire(gch) + var oldThreshold = gch.cycleThreshold + gch.cycleThreshold = 0 # forces cycle collection + collectCT(gch) + gch.cycleThreshold = oldThreshold + release(gch) + + proc GC_getStatistics(): string = + GC_disable() + result = "[GC] total memory: " & $(getTotalMem()) & "\n" & + "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" & + "[GC] stack scans: " & $gch.stat.stackScans & "\n" & + "[GC] stack cells: " & $gch.stat.maxStackCells & "\n" & + "[GC] cycle collections: " & $gch.stat.cycleCollections & "\n" & + "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" & + "[GC] zct capacity: " & $gch.zct.cap & "\n" & + "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" & + "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" & + "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) + when traceGC: writeLeakage() + GC_enable() + +{.pop.} |