diff options
Diffstat (limited to 'lib/gc.nim')
-rw-r--r-- | lib/gc.nim | 1150 |
1 files changed, 578 insertions, 572 deletions
diff --git a/lib/gc.nim b/lib/gc.nim index 72a287064..680256a93 100644 --- a/lib/gc.nim +++ b/lib/gc.nim @@ -9,20 +9,71 @@ # Garbage Collector +# Current Features: +# * incremental +# * non-recursive +# * generational +# * excellent performance + +# Future Improvements: +# * Both dlmalloc and TLSF lack zero-overhead object allocation. Thus, for +# small objects we will should use our own allocator. +# * Support for multi-threading. However, locks for the reference counting +# might turn out to be too slow. + +# --------------------------------------------------------------------------- +# Interface to TLSF: +const + useTLSF = false # benchmarking showed that *dlmalloc* is faster than *TLSF* + +when useTLSF: + {.compile: "tlsf.c".} + + proc tlsfUsed: int {.importc: "TLSF_GET_USED_SIZE", noconv.} + proc tlsfMax: int {.importc: "TLSF_GET_MAX_SIZE", noconv.} + + proc tlsf_malloc(size: int): pointer {.importc, noconv.} + proc tlsf_free(p: pointer) {.importc, noconv.} + proc tlsf_realloc(p: pointer, size: int): pointer {.importc, noconv.} +else: + # use DL malloc + {.compile: "dlmalloc.c".} + proc tlsfUsed: int {.importc: "dlmalloc_footprint", noconv.} + proc tlsfMax: int {.importc: "dlmalloc_max_footprint", noconv.} + + proc tlsf_malloc(size: int): pointer {.importc: "dlmalloc", noconv.} + proc tlsf_free(p: pointer) {.importc: "dlfree", noconv.} + proc tlsf_realloc(p: pointer, size: int): pointer {. + importc: "dlrealloc", noconv.} -# For a description of the algorithms used here see: -# intern.html +# --------------------------------------------------------------------------- -{.define: debugGC.} # we wish to debug the GC... +proc getOccupiedMem(): int = return tlsfUsed() +proc getFreeMem(): int = return tlsfMax() - tlsfUsed() +proc getTotalMem(): int = return tlsfMax() -#when defined(debugGC): -# {.define: logGC.} # define if the GC should log some of its activities +# --------------------------------------------------------------------------- -{.define: cycleGC.} +# After several attempts, we now use a novel approach for cycle detection: +# increments/decrements of the reference counters are enqued into a buffer +# and not immediately performed. The reason is that increments may introduce +# new garbage cycles. The cycle detector only scans the changed subgraph. This +# provides superior performance. Of course only cells that may be part of +# a cycle are considered. However, reallocation does not work with this scheme! +# Because the queue may contain references to the old cell. +# The queue is thread-local storage, so that no synchronization is needed for +# reference counting. + +# With this scheme, the entire heap is never searched and there is no need for +# the AT. const + debugGC = false # we wish to debug the GC... + logGC = false traceGC = false # extensive debugging reallyDealloc = true # for debugging purposes this can be set to false + cycleGC = true # (de)activate the cycle GC + stressGC = debugGC # Guess the page size of the system; if it is the # wrong value, performance may be worse (this is not @@ -32,103 +83,107 @@ const PageSize = 1 shl PageShift # on 32 bit systems 4096 CycleIncrease = 2 # is a multiplicative increase - InitialCycleThreshold = 8*1024*1024 # X MB because cycle checking is slow - ZctThreshold = 512 # we collect garbage if the ZCT's size + InitialCycleThreshold = 4*1024*1024 # X MB because cycle checking is slow + ZctThreshold = 256 # we collect garbage if the ZCT's size # reaches this threshold - # this needs benchmarking... - -when defined(debugGC): - const stressGC = False -else: - const stressGC = False + # this seems to be a good value -# things the System module thinks should be available: -when defined(useDL) or defined(nativeDL): - type - TMallocInfo {.importc: "struct mallinfo", nodecl, final.} = object - arena: cint # non-mmapped space allocated from system - ordblks: cint # number of free chunks - smblks: cint # number of fastbin blocks - hblks: cint # number of mmapped regions - hblkhd: cint # space in mmapped regions - usmblks: cint # maximum total allocated space - fsmblks: cint # space available in freed fastbin blocks - uordblks: cint # total allocated space - fordblks: cint # total free space - keepcost: cint # top-most, releasable (via malloc_trim) space - -when defined(useDL): - proc mallinfo: TMallocInfo {.importc: "dlmallinfo", nodecl.} -elif defined(nativeDL): - proc mallinfo: TMallocInfo {.importc: "mallinfo", nodecl.} - -when defined(useDL) or defined(nativeDL): - proc getOccupiedMem(): int = return mallinfo().uordblks - proc getFreeMem(): int = return mallinfo().fordblks - proc getTotalMem(): int = - var m = mallinfo() - return int(m.hblkhd) + int(m.arena) -else: # not available: - proc getOccupiedMem(): int = return -1 - proc getFreeMem(): int = return -1 - proc getTotalMem(): int = return -1 +const + MemAlignment = sizeof(pointer)*2 # minimal memory block that can be allocated + BitsPerUnit = sizeof(int)*8 + # a "unit" is a word, i.e. 4 bytes + # on a 32 bit system; I do not use the term "word" because under 32-bit + # Windows it is sometimes only 16 bits -var - cycleThreshold: int = InitialCycleThreshold + BitsPerPage = PageSize div MemAlignment + UnitsPerPage = BitsPerPage div BitsPerUnit + # how many units do we need to describe a page: + # on 32 bit systems this is only 16 (!) - memUsed: int = 0 # we have to keep track how much we have allocated + rcIncrement = 0b1000 # so that lowest 3 bits are not touched + # NOTE: Most colors are currently unused + rcBlack = 0b000 # cell is colored black; in use or free + rcGray = 0b001 # possible member of a cycle + rcWhite = 0b010 # member of a garbage cycle + rcPurple = 0b011 # possible root of a cycle + rcZct = 0b100 # in ZCT + rcRed = 0b101 # Candidate cycle undergoing sigma-computation + rcOrange = 0b110 # Candidate cycle awaiting epoch boundary + rcShift = 3 # shift by rcShift to get the reference counter + colorMask = 0b111 +type + TWalkOp = enum + waZctDecRef, waPush, waCycleDecRef - recGcLock: int = 0 - # we use a lock to prevend the garbage collector to - # be triggered in a finalizer; the collector should not call - # itself this way! Thus every object allocated by a finalizer - # will not trigger a garbage collection. This is wasteful but safe. - # This is a lock against recursive garbage collection, not a lock for - # threads! - -when defined(useDL) and not defined(nativeDL): - {.compile: "dlmalloc.c".} + TCell {.pure.} = object + refcount: int # the refcount and some flags + typ: PNimType + when debugGC: + filename: cstring + line: int -type + PCell = ptr TCell TFinalizer {.compilerproc.} = proc (self: pointer) # A ref type can have a finalizer that is called before the object's # storage is freed. PPointer = ptr pointer + TByteArray = array[0..1000_0000, byte] + PByte = ptr TByteArray + PString = ptr string -proc asgnRef(dest: ppointer, src: pointer) {.compilerproc.} -proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc.} - # unsureAsgnRef updates the reference counters only if dest is not on the - # stack. It is used by the code generator if it cannot decide wether a - # reference is in the stack or not (this can happen for out/var parameters). -proc growObj(old: pointer, newsize: int): pointer {.compilerproc.} -proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} -proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} - -# implementation: - -when defined(useDL): - proc nimSize(p: pointer): int {. - importc: "dlmalloc_usable_size", header: "dlmalloc.h".} -elif defined(nativeDL): - proc nimSize(p: pointer): int {. - importc: "malloc_usable_size", header: "<malloc.h>".} + PPageDesc = ptr TPageDesc + TBitIndex = range[0..UnitsPerPage-1] + TPageDesc {.final, pure.} = object + next: PPageDesc # all nodes are connected with this pointer + key: TAddress # start address at bit 0 + bits: array[TBitIndex, int] # a bit vector -type - TWalkOp = enum - waNone, waRelease, waZctDecRef, waCycleDecRef, waCycleIncRef, waDebugIncRef + PPageDescArray = ptr array[0..1000_000, PPageDesc] + TCellSet {.final, pure.} = object + counter, max: int + head: PPageDesc + data: PPageDescArray - TCollectorData = int - TCell {.final.} = object - refcount: TCollectorData # the refcount and bit flags - typ: PNimType - when stressGC: - stackcount: int # stack counter for debugging - drefc: int # real reference counter for debugging + PCellArray = ptr array[0..100_000_000, PCell] + TCellSeq {.final, pure.} = object + len, cap: int + d: PCellArray - PCell = ptr TCell + TGcHeap {.final, pure.} = object # this contains the zero count and + # non-zero count table + mask: TAddress # mask for fast pointer detection + zct: TCellSeq # the zero count table + stackCells: TCellSet # cells and addresses that look like a cell but + # aren't of the hardware stack + + stackScans: int # number of performed stack scans (for statistics) + cycleCollections: int # number of performed full collections + maxThreshold: int # max threshold that has been set + maxStackSize: int # max stack size + maxStackPages: int # max number of pages in stack + cycleTableSize: int # max entries in cycle table + cycleRoots: TCellSet + tempStack: TCellSeq # temporary stack for recursion elimination var gOutOfMem: ref EOutOfMemory + stackBottom: pointer + gch: TGcHeap + cycleThreshold: int = InitialCycleThreshold + recGcLock: int = 0 + # we use a lock to prevend the garbage collector to be triggered in a + # finalizer; the collector should not call itself this way! Thus every + # object allocated by a finalizer will not trigger a garbage collection. + # This is wasteful but safe. This is a lock against recursive garbage + # collection, not a lock for threads! + +proc unsureAsgnRef(dest: ppointer, src: pointer) {.compilerproc.} + # unsureAsgnRef updates the reference counters only if dest is not on the + # stack. It is used by the code generator if it cannot decide wether a + # reference is in the stack or not (this can happen for out/var parameters). +#proc growObj(old: pointer, newsize: int): pointer {.compilerproc.} +proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} +proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} proc raiseOutOfMem() {.noreturn.} = if gOutOfMem == nil: @@ -145,17 +200,22 @@ proc usrToCell(usr: pointer): PCell {.inline.} = # convert pointer to userdata to object (=pointer to refcount) result = cast[PCell](cast[TAddress](usr)-%TAddress(sizeof(TCell))) +proc canbeCycleRoot(c: PCell): bool {.inline.} = + result = ntfAcyclic notin c.typ.flags + proc extGetCellType(c: pointer): PNimType {.compilerproc.} = # used for code generation concerning debugging result = usrToCell(c).typ proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = result = int(usrToCell(p).refcount) - if result < 0: result = 0 + if result > 0: result = result shr rcShift + else: result = 0 proc gcAlloc(size: int): pointer = - result = alloc0(size) + result = tlsf_malloc(size) if result == nil: raiseOutOfMem() + zeroMem(result, size) proc GC_disable() = inc(recGcLock) proc GC_enable() = @@ -181,95 +241,29 @@ proc nextTry(h, maxHash: int): int {.inline.} = # generates each int in range(maxHash) exactly once (see any text on # random-number generation for proof). -# ------------------ Any table (AT) ------------- - -# these values are for DL-malloc known for sure (and other allocators -# can only be worse): -when defined(useDL) or not defined(bcc): - const MemAlignment = 8 # minimal memory block that can be allocated -else: - const MemAlignment = 4 # Borland's memory manager is terrible! - -const - BitsPerUnit = sizeof(int)*8 - # a "unit" is a word, i.e. 4 bytes - # on a 32 bit system; I do not use the term "word" because under 32-bit - # Windows it is sometimes only 16 bits - - BitsPerPage = PageSize div MemAlignment - UnitsPerPage = BitsPerPage div BitsPerUnit - # how many units do we need to describe a page: - # on 32 bit systems this is only 16 (!) - # this that has to equals zero, otherwise we have to round up UnitsPerPage: when BitsPerPage mod BitsPerUnit != 0: {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} -# ------------------- cell set handling ------------------------------ -# A cellset consists of a hash table of page descriptors. A page -# descriptor has a bit for every Memalignment'th byte in the page. -# However, only bits corresponding to addresses that start memory blocks -# are set. -# Page descriptors are also linked to a list; the list -# is used for easy traversing of all page descriptors; this allows a -# fast iterator. -# We use a specialized hashing scheme; the formula is : -# hash = Page bitand max -# We use linear probing with the formular: (5*h)+1 -# Thus we likely get no collisions at all if the pages are given us -# sequentially by the operating system! -type - PPageDesc = ptr TPageDesc - - TBitIndex = range[0..UnitsPerPage-1] - - TPageDesc {.final.} = object - next: PPageDesc # all nodes are connected with this pointer - key: TAddress # start address at bit 0 - bits: array[TBitIndex, int] # a bit vector - - PPageDescArray = ptr array[0..1000_000, PPageDesc] - TCellSet {.final.} = object - counter, max: int - head: PPageDesc - data: PPageDescArray - - PCellArray = ptr array[0..100_000_000, PCell] - TCellSeq {.final.} = object - len, cap: int - d: PCellArray - - TSlowSet {.final.} = object # used for debugging purposes only - L: int # current length - cap: int # capacity - d: PCellArray - - TGcHeap {.final.} = object # this contains the zero count and - # non-zero count table - mask: TAddress # mask for fast pointer detection - zct: TCellSeq # the zero count table - at: TCellSet # a table that contains all references - newAT: TCellSet - stackCells: TCellSeq # cells that need to be decremented because they - # are in the hardware stack; a cell may occur - # several times in this data structure +# ------------------- cell set handling --------------------------------------- -var - stackBottom: pointer - gch: TGcHeap +proc inOperator(s: TCellSeq, c: PCell): bool {.inline.} = + for i in 0 .. s.len-1: + if s.d[i] == c: return True + return False proc add(s: var TCellSeq, c: PCell) {.inline.} = if s.len >= s.cap: s.cap = s.cap * 3 div 2 - s.d = cast[PCellArray](realloc(s.d, s.cap * sizeof(PCell))) + s.d = cast[PCellArray](tlsf_realloc(s.d, s.cap * sizeof(PCell))) if s.d == nil: raiseOutOfMem() s.d[s.len] = c inc(s.len) -proc inOperator(s: TCellSeq, c: PCell): bool {.inline.} = - for i in 0 .. s.len-1: - if s.d[i] == c: return True - return False +proc addZCT(s: var TCellSeq, c: PCell) = + if (c.refcount and colorMask) != rcZct: + c.refcount = c.refcount and not colorMask or rcZct + add(s, c) proc init(s: var TCellSeq, cap: int = 1024) = s.len = 0 @@ -289,13 +283,13 @@ proc CellSetDeinit(s: var TCellSet) = var it = s.head while it != nil: var n = it.next - dealloc(it) + tlsf_free(it) it = n s.head = nil # play it safe here - dealloc(s.data) + tlsf_free(s.data) s.data = nil s.counter = 0 - + proc CellSetGet(t: TCellSet, key: TAddress): PPageDesc = var h = cast[int](key) and t.max while t.data[h] != nil: @@ -313,15 +307,13 @@ proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, data[h] = desc proc CellSetEnlarge(t: var TCellSet) = - var - n: PPageDescArray - oldMax = t.max + var oldMax = t.max t.max = ((t.max+1)*2)-1 - n = cast[PPageDescArray](gcAlloc((t.max + 1) * sizeof(PPageDesc))) + var n = cast[PPageDescArray](gcAlloc((t.max + 1) * sizeof(PPageDesc))) for i in 0 .. oldmax: if t.data[i] != nil: CellSetRawInsert(t, n, t.data[i]) - dealloc(t.data) + tlsf_free(t.data) t.data = n proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc = @@ -345,14 +337,11 @@ proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc = t.head = result t.data[h] = result -# ---------- slightly higher level procs ---------------------------------- +# ---------- slightly higher level procs -------------------------------------- proc in_Operator(s: TCellSet, cell: PCell): bool = - var - u: TAddress - t: PPageDesc - u = cast[TAddress](cell) - t = CellSetGet(s, u shr PageShift) + var u = cast[TAddress](cell) + var t = CellSetGet(s, u shr PageShift) if t != nil: u = (u %% PageSize) /% MemAlignment result = (t.bits[u /% BitsPerUnit] and (1 shl (u %% BitsPerUnit))) != 0 @@ -360,21 +349,15 @@ proc in_Operator(s: TCellSet, cell: PCell): bool = result = false proc incl(s: var TCellSet, cell: PCell) = - var - u: TAddress - t: PPageDesc - u = cast[TAddress](cell) - t = CellSetPut(s, u shr PageShift) + var u = cast[TAddress](cell) + var t = CellSetPut(s, u shr PageShift) u = (u %% PageSize) /% MemAlignment t.bits[u /% BitsPerUnit] = t.bits[u /% BitsPerUnit] or (1 shl (u %% BitsPerUnit)) proc excl(s: var TCellSet, cell: PCell) = - var - u: TAddress - t: PPageDesc - u = cast[TAddress](cell) - t = CellSetGet(s, u shr PageShift) + var u = cast[TAddress](cell) + var t = CellSetGet(s, u shr PageShift) if t != nil: u = (u %% PageSize) /% MemAlignment t.bits[u /% BitsPerUnit] = (t.bits[u /% BitsPerUnit] and @@ -400,97 +383,43 @@ iterator elements(t: TCellSet): PCell {.inline.} = # --------------- end of Cellset routines ------------------------------------- -proc testPageDescs() = - var root: TCellSet - CellSetInit(root) - #var u = 10_000 - #while u <= 20_000: - # incl(root, cast[PCell](u)) - # inc(u, 8) - - incl(root, cast[PCell](0x81cdfb8)) - for cell in elements(root): - c_fprintf(c_stdout, "%p\n", cast[int](cell)) - -#testPageDescs() - -when defined(debugGC): +when logGC or traceGC: proc writeCell(msg: CString, c: PCell) = - if c.typ != nil: - if c.typ.kind == tyString: - c_fprintf(c_stdout, "%s\n", cast[TAddress](cellToUsr(c)) + sizeof(int)*2) - c_fprintf(c_stdout, "%s: %p %d\n", msg, c, c.typ.kind) - else: c_fprintf(c_stdout, "%s: %p (nil type)\n", msg, c) - proc writePtr(msg: CString, p: Pointer) = - c_fprintf(c_stdout, "%s: %p\n", msg, p) - + var kind = -1 + if c.typ != nil: kind = ord(c.typ.kind) + when debugGC: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n", + msg, c, kind, c.refcount shr rcShift, c.filename, c.line) + else: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n", + msg, c, kind, c.refcount shr rcShift) when traceGC: # traceGC is a special switch to enable extensive debugging type TCellState = enum csAllocated, csZctFreed, csCycFreed - - proc cellSetInit(s: var TSlowSet) = - s.L = 0 - s.cap = 4096 - s.d = cast[PCellArray](gcAlloc(s.cap * sizeof(PCell))) - - proc cellSetDeinit(s: var TSlowSet) = - s.L = 0 - s.cap = 0 - dealloc(s.d) - - proc incl(s: var TSlowSet, c: PCell) = - if s.L >= s.cap: - s.cap = s.cap * 3 div 2 - s.d = cast[PCellArray](realloc(s.d, s.cap * sizeof(PCell))) - if s.d == nil: raiseOutOfMem() - s.d[s.L] = c - inc(s.L) - - proc excl(s: var TSlowSet, c: PCell) = - var i = 0 - while i < s.L: - if s.d[i] == c: - s.d[i] = s.d[s.L-1] - dec(s.L) - break - inc(i) - - proc inOperator(s: TSlowSet, c: PCell): bool = - var i = 0 - while i < s.L: - if s.d[i] == c: return true - inc(i) - - iterator elements(s: TSlowSet): PCell = - var i = 0 - while i < s.L: - yield s.d[i] - inc(i) - var - states: array[TCellState, TSlowSet] # TCellSet] + states: array[TCellState, TCellSet] proc traceCell(c: PCell, state: TCellState) = case state of csAllocated: if c in states[csAllocated]: - writeCell("attempt to alloc a already allocated cell", c) + writeCell("attempt to alloc an already allocated cell", c) assert(false) excl(states[csCycFreed], c) excl(states[csZctFreed], c) of csZctFreed: - if c notin states[csAllocated]: - writeCell("attempt to free a not allocated cell", c) - assert(false) if c in states[csZctFreed]: writeCell("attempt to free zct cell twice", c) assert(false) if c in states[csCycFreed]: writeCell("attempt to free with zct, but already freed with cyc", c) assert(false) + if c notin states[csAllocated]: + writeCell("attempt to free not an allocated cell", c) + assert(false) excl(states[csAllocated], c) of csCycFreed: if c notin states[csAllocated]: @@ -505,17 +434,30 @@ when traceGC: excl(states[csAllocated], c) incl(states[state], c) + proc writeLeakage() = + var z = 0 + var y = 0 + var e = 0 + for c in elements(states[csAllocated]): + inc(e) + if c in states[csZctFreed]: inc(z) + elif c in states[csCycFreed]: inc(z) + else: writeCell("leak", c) + cfprintf(cstdout, "Allocations: %ld; ZCT freed: %ld; CYC freed: %ld\n", + e, z, y) + template gcTrace(cell, state: expr): stmt = when traceGC: traceCell(cell, state) -# ------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # forward declarations: -proc collectCT(gch: var TGcHeap) -proc IsOnStack(p: pointer): bool +proc updateZCT() +proc collectCT(gch: var TGcHeap, zctUpdated: bool) +proc IsOnStack(p: pointer): bool {.noinline.} proc forAllChildren(cell: PCell, op: TWalkOp) -proc collectCycles(gch: var TGcHeap) - +proc doOperation(p: pointer, op: TWalkOp) +proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) proc reprAny(p: pointer, typ: PNimType): string {.compilerproc.} # we need the prototype here for debugging purposes @@ -530,157 +472,86 @@ proc prepareDealloc(cell: PCell) = (cast[TFinalizer](cell.typ.finalizer))(cellToUsr(cell)) dec(recGcLock) - when defined(nimSize): - memUsed = memUsed - nimSize(cell) - else: - memUsed = memUsed - cell.typ.size - -proc checkZCT(): bool = - if recGcLock >= 1: return true # prevent endless recursion - inc(recGcLock) - result = true - for i in 0..gch.zct.len-1: - var c = gch.zct.d[i] - if c.refcount > 0: # should be in the ZCT! - writeCell("wrong ZCT entry", c) - result = false - elif gch.zct.d[-c.refcount] != c: - writeCell("wrong ZCT position", c) - result = false - dec(recGcLock) - -proc GC_invariant(): bool = - if recGcLock >= 1: return true # prevent endless recursion - inc(recGcLock) - result = True - block checks: - if not checkZCT(): - result = false - break checks - # set counters back to zero: - for c in elements(gch.AT): - var t = c.typ - if t == nil or t.kind notin {tySequence, tyString, tyRef}: - writeCell("corrupt cell?", c) - result = false - break checks - when stressGC: c.drefc = 0 - for c in elements(gch.AT): - forAllChildren(c, waDebugIncRef) - when stressGC: - for c in elements(gch.AT): - var rc = c.refcount - if rc < 0: rc = 0 - if c.drefc > rc + c.stackcount: - result = false # failed - c_fprintf(c_stdout, - "broken cell: %p, refc: %ld, stack: %ld, real: %ld\n", - c, c.refcount, c.stackcount, c.drefc) - break checks - dec(recGcLock) - -when stressGC: - proc GCdebugHook() = - if not GC_invariant(): - assert(false) - - dbgLineHook = GCdebugHook - proc setStackBottom(theStackBottom: pointer) {.compilerproc.} = stackBottom = theStackBottom proc initGC() = when traceGC: for i in low(TCellState)..high(TCellState): CellSetInit(states[i]) + gch.stackScans = 0 + gch.cycleCollections = 0 + gch.maxThreshold = 0 + gch.maxStackSize = 0 + gch.maxStackPages = 0 + gch.cycleTableSize = 0 # init the rt init(gch.zct) - CellSetInit(gch.at) - init(gch.stackCells) + init(gch.tempStack) + CellSetInit(gch.cycleRoots) + CellSetInit(gch.stackCells) gch.mask = 0 new(gOutOfMem) # reserve space for the EOutOfMemory exception here! - assert(GC_invariant()) - -proc decRef(cell: PCell) {.inline.} = - assert(cell.refcount > 0) # this should be the case! - when stressGC: assert(cell in gch.AT) - dec(cell.refcount) - if cell.refcount == 0: - cell.refcount = -gch.zct.len - when stressGC: assert(cell notin gch.zct) - add(gch.zct, cell) - when stressGC: assert(checkZCT()) - -proc incRef(cell: PCell) {.inline.} = - var rc = cell.refcount - if rc <= 0: - # remove from zero count table: - when stressGC: assert(gch.zct.len > -rc) - when stressGC: assert(gch.zct.d[-rc] == cell) - gch.zct.d[-rc] = gch.zct.d[gch.zct.len-1] - gch.zct.d[-rc].refcount = rc - dec(gch.zct.len) - cell.refcount = 1 - when stressGC: assert(checkZCT()) - else: - inc(cell.refcount) - when stressGC: assert(checkZCT()) -proc asgnRef(dest: ppointer, src: pointer) = +proc PossibleRoot(gch: var TGcHeap, c: PCell) {.inline.} = + if canbeCycleRoot(c): incl(gch.cycleRoots, c) + +proc decRef(c: PCell) {.inline.} = + when stressGC: + if c.refcount <% rcIncrement: + writeCell("broken cell", c) + assert(c.refcount >% rcIncrement) + c.refcount = c.refcount -% rcIncrement + if c.refcount <% rcIncrement: + addZCT(gch.zct, c) + elif canBeCycleRoot(c): + possibleRoot(gch, c) + +proc incRef(c: PCell) {.inline.} = + c.refcount = c.refcount +% rcIncrement + if canBeCycleRoot(c): + # OPT: the code generator should special case this + possibleRoot(gch, c) + +proc nimGCref(p: pointer) {.compilerproc, inline.} = incRef(usrToCell(p)) +proc nimGCunref(p: pointer) {.compilerproc, inline.} = decRef(usrToCell(p)) + +proc asgnRef(dest: ppointer, src: pointer) {.compilerproc, inline.} = # the code generator calls this proc! assert(not isOnStack(dest)) # BUGFIX: first incRef then decRef! if src != nil: incRef(usrToCell(src)) if dest^ != nil: decRef(usrToCell(dest^)) dest^ = src - when stressGC: assert(GC_invariant()) + +proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} = + # the code generator calls this proc if it is known at compile time that no + # cycle is possible. + if src != nil: + var c = usrToCell(src) + c.refcount = c.refcount +% rcIncrement + if dest^ != nil: + var c = usrToCell(dest^) + c.refcount = c.refcount -% rcIncrement + if c.refcount <% rcIncrement: + addZCT(gch.zct, c) + dest^ = src proc unsureAsgnRef(dest: ppointer, src: pointer) = if not IsOnStack(dest): if src != nil: incRef(usrToCell(src)) if dest^ != nil: decRef(usrToCell(dest^)) dest^ = src - when stressGC: assert(GC_invariant()) - -proc restore(cell: PCell) = - if cell notin gch.newAT: - incl(gch.newAT, Cell) - forAllChildren(cell, waCycleIncRef) - -proc doOperation(p: pointer, op: TWalkOp) = - if p == nil: return - var cell: PCell = usrToCell(p) - assert(cell != nil) - case op # faster than function pointers because of easy prediction - of waNone: assert(false) - of waRelease: decRef(cell) # DEAD CODE! - of waZctDecRef: - decRef(cell) - of waCycleDecRef: - assert(cell.refcount > 0) - dec(cell.refcount) - of waCycleIncRef: - inc(cell.refcount) # restore proper reference counts! - restore(cell) - of waDebugIncRef: - when stressGC: inc(cell.drefc) - -type - TByteArray = array[0..1000_0000, byte] - PByte = ptr TByteArray - PString = ptr string - -proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) proc getDiscriminant(aa: Pointer, n: ptr TNimNode): int = assert(n.kind == nkCase) - var d: int32 + var d: int var a = cast[TAddress](aa) case n.typ.size - of 1: d = toU32(cast[ptr int8](a +% n.offset)^) - of 2: d = toU32(cast[ptr int16](a +% n.offset)^) - of 4: d = toU32(cast[ptr int32](a +% n.offset)^) + of 1: d = ze(cast[ptr int8](a +% n.offset)^) + of 2: d = ze(cast[ptr int16](a +% n.offset)^) + of 4: d = int(cast[ptr int32](a +% n.offset)^) else: assert(false) - return int(d) + return d proc selectBranch(aa: Pointer, n: ptr TNimNode): ptr TNimNode = var discr = getDiscriminant(aa, n) @@ -692,8 +563,7 @@ proc selectBranch(aa: Pointer, n: ptr TNimNode): ptr TNimNode = result = n.sons[n.len] proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = - var - d = cast[TAddress](dest) + var d = cast[TAddress](dest) case n.kind of nkNone: assert(false) of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) @@ -704,35 +574,21 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = if m != nil: forAllSlotsAux(dest, m, op) proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = - const - handledTypes = {tyArray, tyArrayConstr, tyOpenArray, tyRef, - tyString, tySequence, tyObject, tyPureObject, tyTuple} - var - d = cast[TAddress](dest) + var d = cast[TAddress](dest) if dest == nil: return # nothing to do - case mt.Kind - of tyArray, tyArrayConstr, tyOpenArray: - if mt.base.kind in handledTypes: + if ntfNoRefs notin mt.flags: + case mt.Kind + of tyArray, tyArrayConstr, tyOpenArray: for i in 0..(mt.size div mt.base.size)-1: forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op) - of tyRef, tyString, tySequence: # leaf: - doOperation(cast[ppointer](d)^, op) - of tyObject, tyTuple, tyPureObject: - forAllSlotsAux(dest, mt.node, op) - else: nil + of tyRef, tyString, tySequence: # leaf: + doOperation(cast[ppointer](d)^, op) + of tyObject, tyTuple, tyPureObject: + forAllSlotsAux(dest, mt.node, op) + else: nil proc forAllChildren(cell: PCell, op: TWalkOp) = assert(cell != nil) - when defined(debugGC): - if cell.typ == nil: - writeCell("cell has no type descriptor", cell) - when traceGC: - if cell notin states[csAllocated]: - writeCell("cell has never been allocated!", cell) - if cell in states[csCycFreed]: - writeCell("cell has been freed by Cyc", cell) - if cell in states[csZctFreed]: - writeCell("cell has been freed by Zct", cell) assert(cell.typ != nil) case cell.typ.Kind of tyRef: # common case @@ -740,46 +596,41 @@ proc forAllChildren(cell: PCell, op: TWalkOp) = of tySequence: var d = cast[TAddress](cellToUsr(cell)) var s = cast[PGenericSeq](d) - if s != nil: # BUGFIX + if s != nil: for i in 0..s.len-1: forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +% GenericSeqSize), cell.typ.base, op) of tyString: nil else: assert(false) -proc checkCollection() {.inline.} = +proc checkCollection(zctUpdated: bool) {.inline.} = # checks if a collection should be done if recGcLock == 0: - collectCT(gch) + collectCT(gch, zctUpdated) proc newObj(typ: PNimType, size: int): pointer = # generates a new object and sets its reference counter to 0 - var - res: PCell - when stressGC: assert(checkZCT()) assert(typ.kind in {tyRef, tyString, tySequence}) + var zctUpdated = false + if gch.zct.len >= ZctThreshold: + updateZCT() + zctUpdated = true # check if we have to collect: - checkCollection() - res = cast[PCell](Alloc0(size + sizeof(TCell))) + checkCollection(zctUpdated) + var res = cast[PCell](gcAlloc(size + sizeof(TCell))) when stressGC: assert((cast[TAddress](res) and (MemAlignment-1)) == 0) - if res == nil: raiseOutOfMem() - when defined(nimSize): - memUsed = memUsed + nimSize(res) - else: - memUsed = memUsed + size - # now it is buffered in the ZCT res.typ = typ - res.refcount = -gch.zct.len - add(gch.zct, res) # its refcount is zero, so add it to the ZCT - incl(gch.at, res) # add it to the any table too + when debugGC: + if framePtr != nil and framePtr.prev != nil: + res.filename = framePtr.prev.filename + res.line = framePtr.prev.line + res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT + add(gch.zct, res) # its refcount is zero, so add it to the ZCT gch.mask = gch.mask or cast[TAddress](res) - when defined(debugGC): - when defined(logGC): writeCell("new cell", res) + when logGC: writeCell("new cell", res) gcTrace(res, csAllocated) result = cellToUsr(res) - assert(res.typ == typ) - when stressGC: assert(checkZCT()) proc newSeq(typ: PNimType, len: int): pointer = # XXX: overflow checks! @@ -788,96 +639,138 @@ proc newSeq(typ: PNimType, len: int): pointer = cast[PGenericSeq](result).space = len proc growObj(old: pointer, newsize: int): pointer = - var - res, ol: PCell - when stressGC: assert(checkZCT()) - checkCollection() - ol = usrToCell(old) + checkCollection(false) + var ol = usrToCell(old) + assert(ol.typ != nil) assert(ol.typ.kind in {tyString, tySequence}) - when defined(nimSize): - memUsed = memUsed - nimSize(ol) - else: - memUsed = memUsed - ol.size # this is not exact - # pity that we don't know the old size - res = cast[PCell](realloc(ol, newsize + sizeof(TCell))) - #res = cast[PCell](gcAlloc(newsize + sizeof(TCell))) - #copyMem(res, ol, nimSize(ol)) + var res = cast[PCell](gcAlloc(newsize + sizeof(TCell))) + var elemSize = 1 + if ol.typ.kind != tyString: + elemSize = ol.typ.base.size + copyMem(res, ol, cast[PGenericSeq](old).len*elemSize + + GenericSeqSize + sizeof(TCell)) + assert((cast[TAddress](res) and (MemAlignment-1)) == 0) - when defined(nimSize): - memUsed = memUsed + nimSize(res) + assert(res.refcount shr rcShift <=% 1) + #if res.refcount <% rcIncrement: + # add(gch.zct, res) + #else: # XXX: what to do here? + # decRef(ol) + if (ol.refcount and colorMask) == rcZct: + var j = gch.zct.len-1 + var d = gch.zct.d + while j >= 0: + if d[j] == ol: + d[j] = res + break + dec(j) + if canBeCycleRoot(ol): excl(gch.cycleRoots, ol) + gch.mask = gch.mask or cast[TAddress](res) + when logGC: + writeCell("growObj old cell", ol) + writeCell("growObj new cell", res) + gcTrace(ol, csZctFreed) + gcTrace(res, csAllocated) + when reallyDealloc: tlsf_free(ol) else: - memUsed = memUsed + newsize - - if res != ol: - if res == nil: raiseOutOfMem() - if res.refcount <= 0: - assert(gch.zct.d[-res.refcount] == ol) - gch.zct.d[-res.refcount] = res - excl(gch.at, ol) - incl(gch.at, res) - gch.mask = gch.mask or cast[TAddress](res) - when defined(logGC): - writeCell("growObj old cell", ol) - writeCell("growObj new cell", res) - gcTrace(ol, csZctFreed) - gcTrace(res, csAllocated) + assert(ol.typ != nil) + zeroMem(ol, sizeof(TCell)) result = cellToUsr(res) - when stressGC: assert(checkZCT()) -proc collectCycles(gch: var TGcHeap) = - when defined(logGC): - c_fprintf(c_stdout, "collecting cycles!\n") +# ---------------- cycle collector ------------------------------------------- + +# When collecting cycles, we have to consider the following: +# * there may still be references in the stack +# * some cells may still be in the ZCT, because they are referenced from +# the stack (!), so their refcounts are zero +# the ZCT is a subset of stackCells here, so we only need to care +# for stackcells + +proc doOperation(p: pointer, op: TWalkOp) = + if p == nil: return + var c: PCell = usrToCell(p) + assert(c != nil) + case op # faster than function pointers because of easy prediction + of waZctDecRef: + assert(c.refcount >=% rcIncrement) + c.refcount = c.refcount -% rcIncrement + when logGC: writeCell("decref (from doOperation)", c) + if c.refcount <% rcIncrement: addZCT(gch.zct, c) + of waPush: + add(gch.tempStack, c) + of waCycleDecRef: + assert(c.refcount >=% rcIncrement) + c.refcount = c.refcount -% rcIncrement - # step 1: pretend that any node is dead - for c in elements(gch.at): +# we now use a much simpler and non-recursive algorithm for cycle removal +proc collectCycles(gch: var TGcHeap) = + var tabSize = 0 + for c in elements(gch.cycleRoots): + inc(tabSize) + assert(c.typ != nil) forallChildren(c, waCycleDecRef) - CellSetInit(gch.newAt) - # step 2: restore life cells - for c in elements(gch.at): - if c.refcount > 0: restore(c) - # step 3: free dead cells: - for cell in elements(gch.at): - if cell.refcount == 0: - # We free an object that is part of a cycle here. Its children - # may have been freed already. Thus the finalizer could access - # garbage. To handle this case properly we need two passes for - # freeing here which is too expensive. We just don't call the - # finalizer for now. YYY: Any better ideas? - prepareDealloc(cell) - gcTrace(cell, csCycFreed) - when defined(logGC): - writeCell("cycle collector dealloc cell", cell) - when reallyDealloc: dealloc(cell) - CellSetDeinit(gch.at) - gch.at = gch.newAt - -proc gcMark(gch: var TGcHeap, p: pointer) = + gch.cycleTableSize = max(gch.cycleTableSize, tabSize) + + # restore reference counts (a depth-first traversal is needed): + var marker, newRoots: TCellSet + CellSetInit(marker) + CellSetInit(newRoots) + for c in elements(gch.cycleRoots): + var needsRestore = false + if c in gch.stackCells: + needsRestore = true + incl(newRoots, c) + # we need to scan this later again; maybe stack changes + # NOTE: adding to ZCT here does NOT work + elif c.refcount >=% rcIncrement: + needsRestore = true + if needsRestore: + if c notin marker: + incl(marker, c) + gch.tempStack.len = 0 + forAllChildren(c, waPush) + while gch.tempStack.len > 0: + dec(gch.tempStack.len) + var d = gch.tempStack.d[gch.tempStack.len] + d.refcount = d.refcount +% rcIncrement + if d notin marker and d in gch.cycleRoots: + incl(marker, d) + forAllChildren(d, waPush) + # remove cycles: + for c in elements(gch.cycleRoots): + if c.refcount <% rcIncrement and c notin gch.stackCells: + gch.tempStack.len = 0 + forAllChildren(c, waPush) + while gch.tempStack.len > 0: + dec(gch.tempStack.len) + var d = gch.tempStack.d[gch.tempStack.len] + if d.refcount <% rcIncrement: + if d notin gch.cycleRoots: # d is leaf of c and not part of cycle + addZCT(gch.zct, d) + when logGC: writeCell("add to ZCT (from cycle collector)", d) + prepareDealloc(c) + gcTrace(c, csCycFreed) + when logGC: writeCell("cycle collector dealloc cell", c) + when reallyDealloc: tlsf_free(c) + else: + assert(c.typ != nil) + zeroMem(c, sizeof(TCell)) + CellSetDeinit(gch.cycleRoots) + gch.cycleRoots = newRoots + +proc gcMark(p: pointer) {.fastcall.} = # the addresses are not as objects on the stack, so turn them to objects: var cell = usrToCell(p) var c = cast[TAddress](cell) - if ((c and gch.mask) == c) and cell in gch.at: - # is the page that p "points to" in the AT? (All allocated pages are - # always in the AT) - incRef(cell) - when stressGC: inc(cell.stackcount) - add(gch.stackCells, cell) - -proc unmarkStackAndRegisters(gch: var TGcHeap) = - when stressGC: assert(checkZCT()) - for i in 0 .. gch.stackCells.len-1: - var cell = gch.stackCells.d[i] - assert(cell.refcount > 0) - when stressGC: - assert(cell.stackcount > 0) - dec(cell.stackcount) - decRef(cell) - gch.stackCells.len = 0 # reset to zero - when stressGC: assert(checkZCT()) + if ((c and gch.mask) == c) and c >% 1024: + # fast check: does it look like a cell? + when logGC: cfprintf(cstdout, "in stackcells %p\n", cell) + incl(gch.stackCells, cell) # yes: mark it # ----------------- stack management -------------------------------------- # inspired from Smart Eiffel (c) -proc stackSize(): int = +proc stackSize(): int {.noinline.} = var stackTop: array[0..1, pointer] result = abs(cast[int](addr(stackTop[0])) - cast[int](stackBottom)) @@ -888,26 +781,24 @@ when defined(sparc): # For SPARC architecture. stackTop: array[0..1, pointer] result = p >= addr(stackTop[0]) and p <= stackBottom - proc markStackAndRegisters(gch: var TGcHeap) = + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = when defined(sparcv9): - asm " flushw" + asm """"flushw \n" """ else: - asm " ta 0x3 ! ST_FLUSH_WINDOWS" + asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ var max = stackBottom sp: PPointer stackTop: array[0..1, pointer] - stackTop[0] = nil - stackTop[1] = nil sp = addr(stackTop[0]) # Addresses decrease as the stack grows. while sp <= max: - gcMark(gch, sp^) + gcMark(sp^) sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) elif defined(ELATE): - {.error: "stack marking code has to be written for this architecture".} + {.error: "stack marking code is to be written for this architecture".} elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820): @@ -921,24 +812,71 @@ elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or result = p <= addr(stackTop[0]) and p >= stackBottom var - jmpbufSize {.importc: "sizeof(jmp_buf)".}: int + jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int # a little hack to get the size of a TJmpBuf in the generated C code # in a platform independant way - proc markStackAndRegisters(gch: var TGcHeap) = + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = var max = stackBottom registers: C_JmpBuf # The jmp_buf buffer is in the C stack. sp: PPointer # Used to traverse the stack and registers assuming # that `setjmp' will save registers in the C stack. - c_setjmp(registers) # To fill the C stack with registers. - sp = cast[ppointer](cast[TAddress](addr(registers)) +% - jmpbufSize -% sizeof(pointer)) - # sp will traverse the JMP_BUF as well (jmp_buf size is added, - # otherwise sp would be below the registers structure). - while sp >= max: - gcMark(gch, sp^) - sp = cast[ppointer](cast[TAddress](sp) -% sizeof(pointer)) + if c_setjmp(registers) == 0: # To fill the C stack with registers. + sp = cast[ppointer](cast[TAddress](addr(registers)) +% + jmpbufSize -% sizeof(pointer)) + # sp will traverse the JMP_BUF as well (jmp_buf size is added, + # otherwise sp would be below the registers structure). + while sp >= max: + gcMark(sp^) + sp = cast[ppointer](cast[TAddress](sp) -% sizeof(pointer)) + +elif defined(I386) and asmVersion: + # addresses decrease as the stack grows: + proc isOnStack(p: pointer): bool = + var + stackTop: array [0..1, pointer] + result = p >= addr(stackTop[0]) and p <= stackBottom + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + # This code should be safe even for aggressive optimizers. The try + # statement safes all registers into the safepoint, which we + # scan additionally to the stack. + type + TPtrArray = array[0..0xffffff, pointer] + try: + var pa = cast[ptr TPtrArray](excHandler) + for i in 0 .. sizeof(TSafePoint) - 1: + gcMark(pa[i]) + finally: + # iterate over the stack: + var max = cast[TAddress](stackBottom) + var stackTop{.volatile.}: array [0..15, pointer] + var sp {.volatile.} = cast[TAddress](addr(stackTop[0])) + while sp <= max: + gcMark(cast[ppointer](sp)^) + sp = sp +% sizeof(pointer) + when false: + var counter = 0 + #mov ebx, OFFSET `stackBottom` + #mov ebx, [ebx] + asm """ + pusha + mov edi, esp + call `getStackBottom` + mov ebx, eax + L1: + cmp edi, ebx + ja L2 + mov eax, [edi] + call `gcMark` + add edi, 4 + inc [`counter`] + jmp L1 + L2: + popa + """ + cfprintf(cstdout, "stack %ld\n", counter) else: # --------------------------------------------------------------------------- @@ -949,67 +887,135 @@ else: stackTop: array [0..1, pointer] result = p >= addr(stackTop[0]) and p <= stackBottom - proc markStackAndRegisters(gch: var TGcHeap) = - var - max = stackBottom - registers: C_JmpBuf # The jmp_buf buffer is in the C stack. - sp: PPointer # Used to traverse the stack and registers assuming - # that `setjmp' will save registers in the C stack. - c_setjmp(registers) # To fill the C stack with registers. - sp = cast[ppointer](addr(registers)) - while sp <= max: - gcMark(gch, sp^) - sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) + var + gRegisters: C_JmpBuf + jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int + # a little hack to get the size of a TJmpBuf in the generated C code + # in a platform independant way + + proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = + when true: + # new version: several C compilers are too smart here + var + max = cast[TAddress](stackBottom) + stackTop: array [0..15, pointer] + if c_setjmp(gregisters) == 0'i32: # To fill the C stack with registers. + # iterate over the registers: + var sp = cast[TAddress](addr(gregisters)) + while sp < cast[TAddress](addr(gregisters))+%jmpbufSize: + gcMark(cast[ppointer](sp)^) + sp = sp +% sizeof(pointer) + # iterate over the stack: + sp = cast[TAddress](addr(stackTop[0])) + while sp <= max: + gcMark(cast[ppointer](sp)^) + sp = sp +% sizeof(pointer) + else: + c_longjmp(gregisters, 42) + # this can never happen, but should trick any compiler that is + # not as smart as a human + else: + var + max = stackBottom + registers: C_JmpBuf # The jmp_buf buffer is in the C stack. + sp: PPointer # Used to traverse the stack and registers assuming + # that `setjmp' will save registers in the C stack. + if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. + sp = cast[ppointer](addr(registers)) + while sp <= max: + gcMark(sp^) + sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) # ---------------------------------------------------------------------------- # end of non-portable code # ---------------------------------------------------------------------------- +proc updateZCT() = + # We have to make an additional pass over the ZCT unfortunately, because + # the ZCT may be out of date, which means it contains cells with a + # refcount > 0. The reason is that ``incRef`` does not bother to remove + # the cell from the ZCT as this might be too slow. + var j = 0 + var L = gch.zct.len # because globals make it hard for the optimizer + var d = gch.zct.d + while j < L: + var c = d[j] + if c.refcount >=% rcIncrement: + when logGC: writeCell("remove from ZCT", c) + # remove from ZCT: + dec(L) + d[j] = d[L] + c.refcount = c.refcount and not colorMask + # we have a new cell at position i, so don't increment i + else: + inc(j) + gch.zct.len = L + proc CollectZCT(gch: var TGcHeap) = - while gch.zct.len > 0: - var c = gch.zct.d[0] - assert(c.refcount <= 0) - # remove from ZCT: - gch.zct.d[0] = gch.zct.d[gch.zct.len-1] - gch.zct.d[0].refcount = 0 - dec(gch.zct.len) - # We are about to free the object, call the finalizer BEFORE its - # children are deleted as well, because otherwise the finalizer may - # access invalid memory. This is done by prepareDealloc(): - gcTrace(c, csZctFreed) - prepareDealloc(c) - forAllChildren(c, waZctDecRef) - excl(gch.at, c) - when defined(logGC): - writeCell("zct dealloc cell", c) - #when defined(debugGC) and defined(nimSize): zeroMem(c, nimSize(c)) - when reallyDealloc: dealloc(c) - -proc collectCT(gch: var TGcHeap) = - when defined(logGC): - c_fprintf(c_stdout, "collecting zero count table; stack size: %ld\n", - stackSize()) - when stressGC: assert(checkZCT()) - if gch.zct.len >= ZctThreshold or memUsed >= cycleThreshold or stressGC: + var i = 0 + while i < gch.zct.len: + var c = gch.zct.d[i] + assert(c.refcount <% rcIncrement) + assert((c.refcount and colorMask) == rcZct) + if canBeCycleRoot(c): excl(gch.cycleRoots, c) + if c notin gch.stackCells: + # remove from ZCT: + c.refcount = c.refcount and not colorMask + gch.zct.d[i] = gch.zct.d[gch.zct.len-1] + # we have a new cell at position i, so don't increment i + dec(gch.zct.len) + when logGC: writeCell("zct dealloc cell", c) + gcTrace(c, csZctFreed) + # We are about to free the object, call the finalizer BEFORE its + # children are deleted as well, because otherwise the finalizer may + # access invalid memory. This is done by prepareDealloc(): + prepareDealloc(c) + forAllChildren(c, waZctDecRef) + when reallyDealloc: tlsf_free(c) + else: + assert(c.typ != nil) + zeroMem(c, sizeof(TCell)) + else: + inc(i) + when stressGC: + for j in 0..gch.zct.len-1: assert(gch.zct.d[j] in gch.stackCells) + +proc collectCT(gch: var TGcHeap, zctUpdated: bool) = + if gch.zct.len >= ZctThreshold or (cycleGC and + getOccupiedMem() >= cycleThreshold) or stressGC: + if not zctUpdated: updateZCT() + gch.maxStackSize = max(gch.maxStackSize, stackSize()) + CellSetInit(gch.stackCells) markStackAndRegisters(gch) - when stressGC: assert(GC_invariant()) + gch.maxStackPages = max(gch.maxStackPages, gch.stackCells.counter) + inc(gch.stackScans) collectZCT(gch) - when stressGC: assert(GC_invariant()) - assert(gch.zct.len == 0) - when defined(cycleGC): - if memUsed >= cycleThreshold or stressGC: - when defined(logGC): - c_fprintf(c_stdout, "collecting cycles; memory used: %ld\n", memUsed) + when cycleGC: + if getOccupiedMem() >= cycleThreshold or stressGC: collectCycles(gch) - cycleThreshold = max(InitialCycleThreshold, memUsed * cycleIncrease) - when defined(logGC): - c_fprintf(c_stdout, "now used: %ld; threshold: %ld\n", - memUsed, cycleThreshold) - unmarkStackAndRegisters(gch) - when stressGC: assert(GC_invariant()) + collectZCT(gch) + inc(gch.cycleCollections) + cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * + cycleIncrease) + gch.maxThreshold = max(gch.maxThreshold, cycleThreshold) + CellSetDeinit(gch.stackCells) proc GC_fullCollect() = var oldThreshold = cycleThreshold cycleThreshold = 0 # forces cycle collection - collectCT(gch) + collectCT(gch, false) cycleThreshold = oldThreshold + +proc GC_getStatistics(): string = + GC_disable() + result = "[GC] total memory: " & $(getTotalMem()) & "\n" & + "[GC] occupied memory: " & $(getOccupiedMem()) & "\n" & + "[GC] stack scans: " & $gch.stackScans & "\n" & + "[GC] stack pages: " & $gch.maxStackPages & "\n" & + "[GC] cycle collections: " & $gch.cycleCollections & "\n" & + "[GC] max threshold: " & $gch.maxThreshold & "\n" & + "[GC] zct capacity: " & $gch.zct.cap & "\n" & + "[GC] max cycle table size: " & $gch.cycleTableSize & "\n" & + "[GC] max stack size: " & $gch.maxStackSize + when traceGC: writeLeakage() + GC_enable() |