# # # Nimrod's Runtime Library # (c) Copyright 2008 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # # Garbage Collector # Current Features: # * incremental # * non-recursive # * generational # * excellent performance # Future Improvements: # * Both dlmalloc and TLSF lack zero-overhead object allocation. Thus, for # small objects we will should use our own allocator. # * Support for multi-threading. However, locks for the reference counting # might turn out to be too slow. # --------------------------------------------------------------------------- # Interface to TLSF: const useTLSF = false # benchmarking showed that *dlmalloc* is faster than *TLSF* when useTLSF: {.compile: "tlsf.c".} proc tlsfUsed: int {.importc: "TLSF_GET_USED_SIZE", noconv.} proc tlsfMax: int {.importc: "TLSF_GET_MAX_SIZE", noconv.} proc tlsf_malloc(size: int): pointer {.importc, noconv.} proc tlsf_free(p: pointer) {.importc, noconv.} proc tlsf_realloc(p: pointer, size: int): pointer {.importc, noconv.} else: # use DL malloc {.compile: "dlmalloc.c".} proc tlsfUsed: int {.importc: "dlmalloc_footprint", noconv.} proc tlsfMax: int {.importc: "dlmalloc_max_footprint", noconv.} proc tlsf_malloc(size: int): pointer {.importc: "dlmalloc", noconv.} proc tlsf_free(p: pointer) {.importc: "dlfree", noconv.} proc tlsf_realloc(p: pointer, size: int): pointer {. importc: "dlrealloc", noconv.} # --------------------------------------------------------------------------- proc getOccupiedMem(): int = return tlsfUsed() proc getFreeMem(): int = return tlsfMax() - tlsfUsed() proc getTotalMem(): int = return tlsfMax() # --------------------------------------------------------------------------- const debugGC = false # we wish to debug the GC... logGC = false traceGC = false # extensive debugging reallyDealloc = true # for debugging purposes this can be set to false cycleGC = true # (de)activate the cycle GC stressGC = debugGC # Guess the page size of the system; if it is the # wrong value, performance may be worse (this is not # for sure though), but GC still works; must be a power of two! const PageShift = if sizeof(pointer) == 4: 12 else: 13 PageSize = 1 shl PageShift # on 32 bit systems 4096 CycleIncrease = 2 # is a multiplicative increase InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow ZctThreshold = 256 # we collect garbage if the ZCT's size # reaches this threshold # this seems to be a good value const MemAlignment = 8 # BUGFIX: on AMD64, dlmalloc aligns at 8 byte boundary BitsPerUnit = sizeof(int)*8 # a "unit" is a word, i.e. 4 bytes # on a 32 bit system; I do not use the term "word" because under 32-bit # Windows it is sometimes only 16 bits BitsPerPage = PageSize div MemAlignment UnitsPerPage = BitsPerPage div BitsPerUnit # how many units do we need to describe a page: # on 32 bit systems this is only 16 (!) rcIncrement = 0b1000 # so that lowest 3 bits are not touched # NOTE: Most colors are currently unused rcBlack = 0b000 # cell is colored black; in use or free rcGray = 0b001 # possible member of a cycle rcWhite = 0b010 # member of a garbage cycle rcPurple = 0b011 # possible root of a cycle rcZct = 0b100 # in ZCT rcRed = 0b101 # Candidate cycle undergoing sigma-computation rcOrange = 0b110 # Candidate cycle awaiting epoch boundary rcShift = 3 # shift by rcShift to get the reference counter colorMask = 0b111 type TWalkOp = enum waZctDecRef, waPush, waCycleDecRef TCell {.pure.} = object refcount: int # the refcount and some flags typ: PNimType when debugGC: filename: cstring line: int PCell = ptr TCell TFinalizer {.compilerproc.} = proc (self: pointer) # A ref type can have a finalizer that is called before the object's # storage is freed. PPointer = ptr pointer TByteArray = array[0..1000_0000, byte] PByte = ptr TByteArray PString = ptr string PPageDesc = ptr TPageDesc TBitIndex = range[0..UnitsPerPage-1] TPageDesc {.final, pure.} = object next: PPageDesc # all nodes are connected with this pointer key: TAddress # start address at bit 0 bits: array[TBitIndex, int] # a bit vector PPageDescArray = ptr array[0..1000_000, PPageDesc] TCellSet {.final, pure.} = object counter, max: int head: PPageDesc data: PPageDescArray PCellArray = ptr array[0..100_000_000, PCell] TCellSeq {.final, pure.} = object len, cap: int d: PCellArray TGcHeap {.final, pure.} = object # this contains the zero count and # non-zero count table mask: TAddress # mask for fast pointer detection zct: TCellSeq # the zero count table stackCells: TCellSet # cells and addresses that look like a cell but # aren't of the hardware stack stackScans: int # number of performed stack scans (for statistics) cycleCollections: int # number of performed full collections maxThreshold: int # max threshold that has been set maxStackSize: int # max stack size maxStackPages: int # max number of pages in stack cycleTableSize: int # max entries in cycle table cycleRoots: TCellSet tempStack: TCellSeq # temporary stack for recursion elimination var gOutOfMem: ref EOutOfMemory stackBottom: pointer gch: TGcHeap cycleThreshold: int = InitialCycleThreshold recGcLock: int = 0 # we use a lock to prevend the garbage collector to be triggered in a # finalizer; the collector should not call itself this way! Thus every # object allocated by a finalizer will not trigger a garbage collection. # This is wasteful but safe. This is a lock against recursive garbage # collection, not a lock for threads! proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc.} # unsureAsgnRef updates the reference counters only if dest is not on the # stack. It is used by the code generator if it cannot decide wether a # reference is in the stack or not (this can happen for out/var parameters). #proc growObj(old: pointer, newsize: int): pointer {.compilerproc.} proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} proc raiseOutOfMem() {.noreturn.} = if gOutOfMem == nil: writeToStdErr("out of memory; cannot even throw an exception") quit(1) gOutOfMem.msg = "out of memory" raise gOutOfMem proc cellToUsr(cell: PCell): pointer {.inline.} = # convert object (=pointer to refcount) to pointer to userdata result = cast[pointer](cast[TAddress](cell)+%TAddress(sizeof(TCell))) proc usrToCell(usr: pointer): PCell {.inline.} = # convert pointer to userdata to object (=pointer to refcount) result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell))) proc canbeCycleRoot(c: PCell): bool {.inline.} = result = ntfAcyclic notin c.typ.flags proc extGetCellType(c: pointer): PNimType {.compilerproc.} = # used for code generation concerning debugging result = usrToCell(c).typ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = result = int(usrToCell(p).refcount) if result > 0: result = result shr rcShift else: result = 0 proc gcAlloc(size: int): pointer = result = tlsf_malloc(size) if result == nil: raiseOutOfMem() zeroMem(result, size) proc GC_disable() = inc(recGcLock) proc GC_enable() = if recGcLock > 0: dec(recGcLock) proc GC_setStrategy(strategy: TGC_Strategy) = case strategy of gcThroughput: nil of gcResponsiveness: nil of gcOptimizeSpace: nil of gcOptimizeTime: nil proc GC_enableMarkAndSweep() = cycleThreshold = InitialCycleThreshold proc GC_disableMarkAndSweep() = cycleThreshold = high(cycleThreshold)-1 # set to the max value to suppress the cycle detector proc nextTry(h, maxHash: int): int {.inline.} = result = ((5*h) + 1) and maxHash # For any initial h in range(maxHash), repeating that maxHash times # generates each int in range(maxHash) exactly once (see any text on # random-number generation for proof). # this that has to equals zero, otherwise we have to round up UnitsPerPage: when BitsPerPage mod BitsPerUnit != 0: {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} # ------------------- cell set handling --------------------------------------- proc inOperator(s: TCellSeq, c: PCell): bool {.inline.} = for i in 0 .. s.len-1: if s.d[i] == c: return True return False proc add(s: var TCellSeq, c: PCell) {.inline.} = if s.len >= s.cap: s.cap = s.cap * 3 div 2 var d = cast[PCellArray](tlsf_malloc(s.cap * sizeof(PCell))) if d == nil: raiseOutOfMem() copyMem(d, s.d, s.len * sizeof(PCell)) tlsf_free(s.d) s.d = d # BUGFIX: realloc failes on AMD64, sigh... #s.d = cast[PCellArray](tlsf_realloc(s.d, s.cap * sizeof(PCell))) #if s.d == nil: raiseOutOfMem() s.d[s.len] = c inc(s.len) proc addZCT(s: var TCellSeq, c: PCell) = if (c.refcount and colorMask) != rcZct: c.refcount = c.refcount and not colorMask or rcZct add(s, c) proc init(s: var TCellSeq, cap: int = 1024) = s.len = 0 s.cap = cap s.d = cast[PCellArray](gcAlloc(cap * sizeof(PCell))) const InitCellSetSize = 1024 # must be a power of two! proc CellSetInit(s: var TCellSet) = s.data = cast[PPageDescArray](gcAlloc(InitCellSetSize * sizeof(PPageDesc))) s.max = InitCellSetSize-1 s.counter = 0 s.head = nil proc CellSetDeinit(s: var TCellSet) = var it = s.head while it != nil: var n = it.next tlsf_free(it) it = n s.head = nil # play it safe here tlsf_free(s.data) s.data = nil s.counter = 0 proc CellSetGet(t: TCellSet, key: TAddress): PPageDesc = var h = cast[int](key) and t.max while t.data[h] != nil: if t.data[h].key == key: return t.data[h] h = nextTry(h, t.max) return nil proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) = var h = cast[int](desc.key) and t.max while data[h] != nil: assert(data[h] != desc) h = nextTry(h, t.max) assert(data[h] == nil) data[h] = desc proc CellSetEnlarge(t: var TCellSet) = var oldMax = t.max t.max = ((t.max+1)*2)-1 var n = cast[PPageDescArray](gcAlloc((t.max + 1) * sizeof(PPageDesc))) for i in 0 .. oldmax: if t.data[i] != nil: CellSetRawInsert(t, n, t.data[i]) tlsf_free(t.data) t.data = n proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc = var h = cast[int](key) and t.max while true: var x = t.data[h] if x == nil: break if x.key == key: return x h = nextTry(h, t.max) if ((t.max+1)*2 < t.counter*3) or ((t.max+1)-t.counter < 4): CellSetEnlarge(t) inc(t.counter) h = cast[int](key) and t.max while t.data[h] != nil: h = nextTry(h, t.max) assert(t.data[h] == nil) # the new page descriptor goes into result result = cast[PPageDesc](gcAlloc(sizeof(TPageDesc))) result.next = t.head result.key = key t.head = result t.data[h] = result # ---------- slightly higher level procs -------------------------------------- proc in_Operator(s: TCellSet, cell: PCell): bool = var u = cast[TAddress](cell) var t = CellSetGet(s, u shr PageShift) if t != nil: u = (u %% PageSize) /% MemAlignment result = (t.bits[u /% BitsPerUnit] and (1 shl (u %% BitsPerUnit))) != 0 else: result = false proc incl(s: var TCellSet, cell: PCell) = var u = cast[TAddress](cell) var t = CellSetPut(s, u shr PageShift) u = (u %% PageSize) /% MemAlignment t.bits[u /% BitsPerUnit] = t.bits[u /% BitsPerUnit] or (1 shl (u %% BitsPerUnit)) proc excl(s: var TCellSet, cell: PCell) = var u = cast[TAddress](cell) var t = CellSetGet(s, u shr PageShift) if t != nil: u = (u %% PageSize) /% MemAlignment t.bits[u /% BitsPerUnit] = (t.bits[u /% BitsPerUnit] and not (1 shl (u %% BitsPerUnit))) iterator elements(t: TCellSet): PCell {.inline.} = # while traversing it is forbidden to add pointers to the tree! var r = t.head while r != nil: var i = 0 while i <= high(r.bits): var w = r.bits[i] # taking a copy of r.bits[i] here is correct, because # modifying operations are not allowed during traversation var j = 0 while w != 0: # test all remaining bits for zero if (w and 1) != 0: # the bit is set! yield cast[PCell]((r.key shl PageShift) or # +% (i*%BitsPerUnit+%j) *% MemAlignment) inc(j) w = w shr 1 inc(i) r = r.next # --------------- end of Cellset routines ------------------------------------- when debugGC: proc writeCell(msg: CString, c: PCell) = var kind = -1 if c.typ != nil: kind = ord(c.typ.kind) when debugGC: c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n", msg, c, kind, c.refcount shr rcShift, c.filename, c.line) else: c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n", msg, c, kind, c.refcount shr rcShift) when traceGC: # traceGC is a special switch to enable extensive debugging type TCellState = enum csAllocated, csZctFreed, csCycFreed var states: array[TCellState, TCellSet] proc traceCell(c: PCell, state: TCellState) = case state of csAllocated: if c in states[csAllocated]: writeCell("attempt to alloc an already allocated cell", c) assert(false) excl(states[csCycFreed], c) excl(states[csZctFreed], c) of csZctFreed: if c in states[csZctFreed]: writeCell("attempt to free zct cell twice", c) assert(false) if c in states[csCycFreed]: writeCell("attempt to free with zct, but already freed with cyc", c) assert(false) if c notin states[csAllocated]: writeCell("attempt to free not an allocated cell", c) assert(false) excl(states[csAllocated], c) of csCycFreed: if c notin states[csAllocated]: writeCell("attempt to free a not allocated cell", c) assert(false) if c in states[csCycFreed]: writeCell("attempt to free cyc cell twice", c) assert(false) if c in states[csZctFreed]: writeCell("attempt to free with cyc, but already freed with zct", c) assert(false) excl(states[csAllocated], c) incl(states[state], c) proc writeLeakage() = var z = 0 var y = 0 var e = 0 for c in elements(states[csAllocated]): inc(e) if c in states[csZctFreed]: inc(z) elif c in states[csCycFreed]: inc(z) else: writeCell("leak", c) cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n", e, z, y) template gcTrace(cell, state: expr): stmt = when traceGC: traceCell(cell, state) # ----------------------------------------------------------------------------- # forward declarations: proc updateZCT() proc collectCT(gch: var TGcHeap, zctUpdated: bool) proc IsOnStack(p: pointer): bool {.noinline.} proc forAllChildren(cell: PCell, op: TWalkOp) proc doOperation(p: pointer, op: TWalkOp) proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) proc reprAny(p: pointer, typ: PNimType): string {.compilerproc.} # we need the prototype here for debugging purposes proc prepareDealloc(cell: PCell) = if cell.typ.finalizer != nil: # the finalizer could invoke something that # allocates memory; this could trigger a garbage # collection. Since we are already collecting we # prevend recursive entering here by a lock. # XXX: we should set the cell's children to nil! inc(recGcLock) (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell)) dec(recGcLock) proc setStackBottom(theStackBottom: pointer) {.compilerproc.} = stackBottom = theStackBottom proc initGC() = when traceGC: for i in low(TCellState)..high(TCellState): CellSetInit(states[i]) gch.stackScans = 0 gch.cycleCollections = 0 gch.maxThreshold = 0 gch.maxStackSize = 0 gch.maxStackPages = 0 gch.cycleTableSize = 0 # init the rt init(gch.zct) init(gch.tempStack) CellSetInit(gch.cycleRoots) CellSetInit(gch.stackCells) gch.mask = 0 new(gOutOfMem) # reserve space for the EOutOfMemory exception here! proc PossibleRoot(gch: var TGcHeap, c: PCell) {.inline.} = if canbeCycleRoot(c): incl(gch.cycleRoots, c) proc decRef(c: PCell) {.inline.} = when stressGC: if c.refcount <% rcIncrement: writeCell("broken cell", c) assert(c.refcount >% rcIncrement) c.refcount = c.refcount -% rcIncrement if c.refcount <% rcIncrement: addZCT(gch.zct, c) elif canBeCycleRoot(c): possibleRoot(gch, c) proc incRef(c: PCell) {.inline.} = c.refcount = c.refcount +% rcIncrement if canBeCycleRoot(c): # OPT: the code generator should special case this possibleRoot(gch, c) proc nimGCref(p: pointer) {.compilerproc, inline.} = incRef(usrToCell(p)) proc nimGCunref(p: pointer) {.compilerproc, inline.} = decRef(usrToCell(p)) proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} = # the code generator calls this proc! assert(not isOnStack(dest)) # BUGFIX: first incRef then decRef! if src != nil: incRef(usrToCell(src)) if dest^ != nil: decRef(usrToCell(dest^)) dest^ = src proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} = # the code generator calls this proc if it is known at compile time that no # cycle is possible. if src != nil: var c = usrToCell(src) c.refcount = c.refcount +% rcIncrement if dest^ != nil: var c = usrToCell(dest^) c.refcount = c.refcount -% rcIncrement if c.refcount <% rcIncrement: addZCT(gch.zct, c) dest^ = src proc unsureAsgnRef(dest: ppointer, src: pointer) = if not IsOnStack(dest): if src != nil: incRef(usrToCell(src)) if dest^ != nil: decRef(usrToCell(dest^)) dest^ = src proc getDiscriminant(aa: Pointer, n: ptr TNimNode): int = assert(n.kind == nkCase) var d: int var a = cast[TAddress](aa) case n.typ.size of 1: d = ze(cast[ptr int8](a +% n.offset)^) of 2: d = ze(cast[ptr int16](a +% n.offset)^) of 4: d = int(cast[ptr int32](a +% n.offset)^) else: assert(false) return d proc selectBranch(aa: Pointer, n: ptr TNimNode): ptr TNimNode = var discr = getDiscriminant(aa, n) if discr <% n.len: result = n.sons[discr] if result == nil: result = n.sons[n.len] # n.sons[n.len] contains the ``else`` part (but may be nil) else: result = n.sons[n.len] proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = var d = cast[TAddress](dest) case n.kind of nkNone: assert(false) of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) of nkList: for i in 0..n.len-1: forAllSlotsAux(dest, n.sons[i], op) of nkCase: var m = selectBranch(dest, n) if m != nil: forAllSlotsAux(dest, m, op) proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = var d = cast[TAddress](dest) if dest == nil: return # nothing to do if ntfNoRefs notin mt.flags: case mt.Kind of tyArray, tyArrayConstr, tyOpenArray: for i in 0..(mt.size div mt.base.size)-1: forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op) of tyRef, tyString, tySequence: # leaf: doOperation(cast[ppointer](d)^, op) of tyObject, tyTuple, tyPureObject: forAllSlotsAux(dest, mt.node, op) else: nil proc forAllChildren(cell: PCell, op: TWalkOp) = assert(cell != nil) assert(cell.typ != nil) case cell.typ.Kind of tyRef: # common case forAllChildrenAux(cellToUsr(cell), cell.typ.base, op) of tySequence: var d = cast[TAddress](cellToUsr(cell)) var s = cast[PGenericSeq](d) if s != nil: for i in 0..s.len-1: forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +% GenericSeqSize), cell.typ.base, op) of tyString: nil else: assert(false) proc checkCollection(zctUpdated: bool) {.inline.} = # checks if a collection should be done if recGcLock == 0: collectCT(gch, zctUpdated) proc newObj(typ: PNimType, size: int): pointer = # generates a new object and sets its reference counter to 0 assert(typ.kind in {tyRef, tyString, tySequence}) var zctUpdated = false if gch.zct.len >= ZctThreshold: updateZCT() zctUpdated = true # check if we have to collect: checkCollection(zctUpdated) var res = cast[PCell](gcAlloc(size + sizeof(TCell))) when stressGC: assert((cast[TAddress](res) and (MemAlignment-1)) == 0) # now it is buffered in the ZCT res.typ = typ when debugGC: if framePtr != nil and framePtr.prev != nil: res.filename = framePtr.prev.filename res.line = framePtr.prev.line res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT add(gch.zct, res) # its refcount is zero, so add it to the ZCT gch.mask = gch.mask or cast[TAddress](res) when logGC: writeCell("new cell", res) gcTrace(res, csAllocated) result = cellToUsr(res) proc newSeq(typ: PNimType, len: int): pointer = # XXX: overflow checks! result = newObj(typ, len * typ.base.size + GenericSeqSize) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).space = len proc growObj(old: pointer, newsize: int): pointer = checkCollection(false) var ol = usrToCell(old) assert(ol.typ != nil) assert(ol.typ.kind in {tyString, tySequence}) var res = cast[PCell](gcAlloc(newsize + sizeof(TCell))) var elemSize = 1 if ol.typ.kind != tyString: elemSize = ol.typ.base.size copyMem(res, ol, cast[PGenericSeq](old).len*elemSize + GenericSeqSize + sizeof(TCell)) assert((cast[TAddress](res) and (MemAlignment-1)) == 0) assert(res.refcount shr rcShift <=% 1) #if res.refcount <% rcIncrement: # add(gch.zct, res) #else: # XXX: what to do here? # decRef(ol) if (ol.refcount and colorMask) == rcZct: var j = gch.zct.len-1 var d = gch.zct.d while j >= 0: if d[j] == ol: d[j] = res break dec(j) if canBeCycleRoot(ol): excl(gch.cycleRoots, ol) gch.mask = gch.mask or cast[TAddress](res) when logGC: writeCell("growObj old cell", ol) writeCell("growObj new cell", res) gcTrace(ol, csZctFreed) gcTrace(res, csAllocated) when reallyDealloc: tlsf_free(ol) else: assert(ol.typ != nil) zeroMem(ol, sizeof(TCell)) result = cellToUsr(res) # ---------------- cycle collector ------------------------------------------- # When collecting cycles, we have to consider the following: # * there may still be references in the stack # * some cells may still be in the ZCT, because they are referenced from # the stack (!), so their refcounts are zero # the ZCT is a subset of stackCells here, so we only need to care # for stackcells proc doOperation(p: pointer, op: TWalkOp) = if p == nil: return var c: PCell = usrToCell(p) assert(c != nil) case op # faster than function pointers because of easy prediction of waZctDecRef: assert(c.refcount >=% rcIncrement) c.refcount = c.refcount -% rcIncrement when logGC: writeCell("decref (from doOperation)", c) if c.refcount <% rcIncrement: addZCT(gch.zct, c) of waPush: add(gch.tempStack, c) of waCycleDecRef: assert(c.refcount >=% rcIncrement) c.refcount = c.refcount -% rcIncrement # we now use a much simpler and non-recursive algorithm for cycle removal proc collectCycles(gch: var TGcHeap) = var tabSize = 0 for c in elements(gch.cycleRoots): inc(tabSize) forallChildren(c, waCycleDecRef) gch.cycleTableSize = max(gch.cycleTableSize, tabSize) # restore reference counts (a depth-first traversal is needed): var marker, newRoots: TCellSet CellSetInit(marker) CellSetInit(newRoots) for c in elements(gch.cycleRoots): var needsRestore = false if c in gch.stackCells: needsRestore = true incl(newRoots, c) # we need to scan this later again; maybe stack changes # NOTE: adding to ZCT here does NOT work elif c.refcount >=% rcIncrement: needsRestore = true if needsRestore: if c notin marker: incl(marker, c) gch.tempStack.len = 0 forAllChildren(c, waPush) while gch.tempStack.len > 0: dec(gch.tempStack.len) var d = gch.tempStack.d[gch.tempStack.len] d.refcount = d.refcount +% rcIncrement if d notin marker and d in gch.cycleRoots: incl(marker, d) forAllChildren(d, waPush) # remove cycles: for c in elements(gch.cycleRoots): if c.refcount <% rcIncrement and c notin gch.stackCells: gch.tempStack.len = 0 forAllChildren(c, waPush) while gch.tempStack.len > 0: dec(gch.tempStack.len) var d = gch.tempStack.d[gch.tempStack.len] if d.refcount <% rcIncrement: if d notin gch.cycleRoots: # d is leaf of c and not part of cycle addZCT(gch.zct, d) when logGC: writeCell("add to ZCT (from cycle collector)", d) prepareDealloc(c) gcTrace(c, csCycFreed) when logGC: writeCell("cycle collector dealloc cell", c) when reallyDealloc: tlsf_free(c) else: assert(c.typ != nil) zeroMem(c, sizeof(TCell)) CellSetDeinit(gch.cycleRoots) gch.cycleRoots = newRoots proc gcMark(p: pointer) = # {.fastcall.} = # the addresses are not as objects on the stack, so turn them to objects: var cell = usrToCell(p) var c = cast[TAddress](cell) if ((c and gch.mask) == c) and c >% 1024: # fast check: does it look like a cell? when logGC: cfprintf(cstdout, "in stackcells %p\n", cell) incl(gch.stackCells, cell) # yes: mark it # ----------------- stack management -------------------------------------- # inspired from Smart Eiffel (c) proc stackSize(): int {.noinline.} = var stackTop: array[0..1, pointer] result = abs(cast[int](addr(stackTop[0])) - cast[int](stackBottom)) when defined(sparc): # For SPARC architecture. proc isOnStack(p: pointer): bool = var stackTop: array[0..1, pointer] result = p >= addr(stackTop[0]) and p <= stackBottom proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = when defined(sparcv9): asm """"flushw \n" """ else: asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ var max = stackBottom sp: PPointer stackTop: array[0..1, pointer] sp = addr(stackTop[0]) # Addresses decrease as the stack grows. while sp <= max: gcMark(sp^) sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) elif defined(ELATE): {.error: "stack marking code is to be written for this architecture".} elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820): # --------------------------------------------------------------------------- # Generic code for architectures where addresses increase as the stack grows. # --------------------------------------------------------------------------- proc isOnStack(p: pointer): bool = var stackTop: array[0..1, pointer] result = p <= addr(stackTop[0]) and p >= stackBottom var jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int # a little hack to get the size of a TJmpBuf in the generated C code # in a platform independant way proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = var max = stackBottom registers: C_JmpBuf # The jmp_buf buffer is in the C stack. sp: PPointer # Used to traverse the stack and registers assuming # that `setjmp' will save registers in the C stack. if c_setjmp(registers) == 0: # To fill the C stack with registers. sp = cast[ppointer](cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)) # sp will traverse the JMP_BUF as well (jmp_buf size is added, # otherwise sp would be below the registers structure). while sp >= max: gcMark(sp^) sp = cast[ppointer](cast[TAddress](sp) -% sizeof(pointer)) elif defined(I386) and asmVersion: # addresses decrease as the stack grows: proc isOnStack(p: pointer): bool = var stackTop: array [0..1, pointer] result = p >= addr(stackTop[0]) and p <= stackBottom proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = # This code should be safe even for aggressive optimizers. The try # statement safes all registers into the safepoint, which we # scan additionally to the stack. type TPtrArray = array[0..0xffffff, pointer] try: var pa = cast[ptr TPtrArray](excHandler) for i in 0 .. sizeof(TSafePoint) - 1: gcMark(pa[i]) finally: # iterate over the stack: var max = cast[TAddress](stackBottom) var stackTop{.volatile.}: array [0..15, pointer] var sp {.volatile.} = cast[TAddress](addr(stackTop[0])) while sp <= max: gcMark(cast[ppointer](sp)^) sp = sp +% sizeof(pointer) when false: var counter = 0 #mov ebx, OFFSET `stackBottom` #mov ebx, [ebx] asm """ pusha mov edi, esp call `getStackBottom` mov ebx, eax L1: cmp edi, ebx ja L2 mov eax, [edi] call `gcMark` add edi, 4 inc [`counter`] jmp L1 L2: popa """ cfprintf(cstdout, "stack %ld\n", counter) else: # --------------------------------------------------------------------------- # Generic code for architectures where addresses decrease as the stack grows. # --------------------------------------------------------------------------- proc isOnStack(p: pointer): bool = var stackTop: array [0..1, pointer] result = p >= addr(stackTop[0]) and p <= stackBottom var gRegisters: C_JmpBuf jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int # a little hack to get the size of a TJmpBuf in the generated C code # in a platform independant way proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = when true: # new version: several C compilers are too smart here var max = cast[TAddress](stackBottom) stackTop: array [0..15, pointer] if c_setjmp(gregisters) == 0'i32: # To fill the C stack with registers. # iterate over the registers: var sp = cast[TAddress](addr(gregisters)) while sp < cast[TAddress](addr(gregisters))+%jmpbufSize: gcMark(cast[ppointer](sp)^) sp = sp +% sizeof(pointer) # iterate over the stack: sp = cast[TAddress](addr(stackTop[0])) while sp <= max: gcMark(cast[ppointer](sp)^) sp = sp +% sizeof(pointer) else: c_longjmp(gregisters, 42) # this can never happen, but should trick any compiler that is # not as smart as a human else: var max = stackBottom registers: C_JmpBuf # The jmp_buf buffer is in the C stack. sp: PPointer # Used to traverse the stack and registers assuming # that `setjmp' will save registers in the C stack. if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. sp = cast[ppointer](addr(registers)) while sp <= max: gcMark(sp^) sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) # ---------------------------------------------------------------------------- # end of non-portable code # ---------------------------------------------------------------------------- proc updateZCT() = # We have to make an additional pass over the ZCT unfortunately, because # the ZCT may be out of date, which means it contains cells with a # refcount > 0. The reason is that ``incRef`` does not bother to remove # the cell from the ZCT as this might be too slow. var j = 0 var L = gch.zct.len # because globals make it hard for the optimizer var d = gch.zct.d while j < L: var c = d[j] if c.refcount >=% rcIncrement: when logGC: writeCell("remove from ZCT", c) # remove from ZCT: dec(L) d[j] = d[L] c.refcount = c.refcount and not colorMask # we have a new cell at position i, so don't increment i else: inc(j) gch.zct.len = L proc CollectZCT(gch: var TGcHeap) = var i = 0 while i < gch.zct.len: var c = gch.zct.d[i] assert(c.refcount <% rcIncrement) assert((c.refcount and colorMask) == rcZct) if canBeCycleRoot(c): excl(gch.cycleRoots, c) if c notin gch.stackCells: # remove from ZCT: c.refcount = c.refcount and not colorMask gch.zct.d[i] = gch.zct.d[gch.zct.len-1] # we have a new cell at position i, so don't increment i dec(gch.zct.len) when logGC: writeCell("zct dealloc cell", c) gcTrace(c, csZctFreed) # We are about to free the object, call the finalizer BEFORE its # children are deleted as well, because otherwise the finalizer may # access invalid memory. This is done by prepareDealloc(): prepareDealloc(c) forAllChildren(c, waZctDecRef) when reallyDealloc: tlsf_free(c) else: assert(c.typ != nil) zeroMem(c, sizeof(TCell)) else: inc(i) when stressGC: for j in 0..gch.zct.len-1: assert(gch.zct.d[j] in gch.stackCells) proc collectCT(gch: var TGcHeap, zctUpdated: bool) = if gch.zct.len >= ZctThreshold or (cycleGC and getOccupiedMem() >= cycleThreshold) or stressGC: if not zctUpdated: updateZCT() gch.maxStackSize = max(gch.maxStackSize, stackSize()) CellSetInit(gch.stackCells) markStackAndRegisters(gch) gch.maxStackPages = max(gch.maxStackPages, gch.stackCells.counter) inc(gch.stackScans) collectZCT(gch) when cycleGC: if getOccupiedMem() >= cycleThreshold or stressGC: collectCycles(gch) collectZCT(gch) inc(gch.cycleCollections) cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * cycleIncrease) gch.maxThreshold = max(gch.maxThreshold, cycleThreshold) CellSetDeinit(gch.stackCells) proc GC_fullCollect() = var oldThreshold = cycleThreshold cycleThreshold = 0 # forces cycle collection collectCT(gch, false) cycleThreshold = oldThreshold proc GC_getStatistics(): string = GC_disable() result = "[GC] total memory: " & $(getTotalMem()) & "\n" & "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" & "[GC] stack scans: " & $gch.stackScans & "\n" & "[GC] stack pages: " & $gch.maxStackPages & "\n" & "[GC] cycle collections: " & $gch.cycleCollections & "\n" & "[GC] max threshold: " & $gch.maxThreshold & "\n" & "[GC] zct capacity: " & $gch.zct.cap & "\n" & "[GC] max cycle table size: " & $gch.cycleTableSize & "\n" & "[GC] max stack size: " & $gch.maxStackSize when traceGC: writeLeakage() GC_enable()