diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2016-05-22 15:01:36 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2016-05-22 15:01:36 +0200 |
commit | 2ecf4633f441dfe654e51a5528baf6449f08a528 (patch) | |
tree | 3b4db962c8512699afade57e8c059460bb4f5e60 | |
parent | ec7d556c208725f336ff626c7be299cc14638e92 (diff) | |
parent | dcf830bba9f69676c9cb66f5b3654f1817ad68d1 (diff) | |
download | Nim-2ecf4633f441dfe654e51a5528baf6449f08a528.tar.gz |
Merge branch 'prim-gc' into devel
-rw-r--r-- | compiler/ccgstmts.nim | 2 | ||||
-rw-r--r-- | lib/system/alloc.nim | 61 | ||||
-rw-r--r-- | lib/system/gc.nim | 175 | ||||
-rw-r--r-- | tests/gc/thavlak.nim | 457 | ||||
-rw-r--r-- | tests/gc/tlists.nim | 37 | ||||
-rw-r--r-- | tests/testament/categories.nim | 3 |
6 files changed, 549 insertions, 186 deletions
diff --git a/compiler/ccgstmts.nim b/compiler/ccgstmts.nim index 988c923c8..61412ad67 100644 --- a/compiler/ccgstmts.nim +++ b/compiler/ccgstmts.nim @@ -16,7 +16,7 @@ const # above X strings a hash-switch for strings is generated proc registerGcRoot(p: BProc, v: PSym) = - if gSelectedGC in {gcMarkAndSweep, gcGenerational, gcV2} and + if gSelectedGC in {gcMarkAndSweep, gcGenerational, gcV2, gcRefc} and containsGarbageCollectedRef(v.loc.t): # we register a specialized marked proc here; this has the advantage # that it works out of the box for thread local storage then :-) diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index e0fd53b7b..00a16e2bb 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -27,15 +27,14 @@ const type PTrunk = ptr Trunk - Trunk {.final.} = object + Trunk = object next: PTrunk # all nodes are connected with this pointer key: int # start address at bit 0 bits: array[0..IntsPerTrunk-1, int] # a bit vector TrunkBuckets = array[0..255, PTrunk] - IntSet {.final.} = object + IntSet = object data: TrunkBuckets -{.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkBuckets: TrunkBuckets].} type AlignType = BiggestFloat @@ -64,8 +63,6 @@ type next, prev: PBigChunk # chunks of the same (or bigger) size align: int data: AlignType # start of usable memory -{.deprecated: [TAlignType: AlignType, TFreeCell: FreeCell, TBaseChunk: BaseChunk, - TBigChunk: BigChunk, TSmallChunk: SmallChunk].} template smallChunkOverhead(): expr = sizeof(SmallChunk)-sizeof(AlignType) template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType) @@ -79,18 +76,18 @@ template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType) type PLLChunk = ptr LLChunk - LLChunk {.pure.} = object ## *low-level* chunk + LLChunk = object ## *low-level* chunk size: int # remaining size acc: int # accumulator next: PLLChunk # next low-level chunk; only needed for dealloc PAvlNode = ptr AvlNode - AvlNode {.pure, final.} = object + AvlNode = object link: array[0..1, PAvlNode] # Left (0) and right (1) links key, upperBound: int level: int - MemRegion {.final, pure.} = object + MemRegion = object minLargeObj, maxLargeObj: int freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk] llmem: PLLChunk @@ -99,6 +96,7 @@ type freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access chunkStarts: IntSet root, deleted, last, freeAvlNodes: PAvlNode + locked: bool # if locked, we cannot free pages. {.deprecated: [TLLChunk: LLChunk, TAvlNode: AvlNode, TMemRegion: MemRegion].} # shared: @@ -234,7 +232,8 @@ proc isSmallChunk(c: PChunk): bool {.inline.} = proc chunkUnused(c: PChunk): bool {.inline.} = result = not c.used -iterator allObjects(m: MemRegion): pointer {.inline.} = +iterator allObjects(m: var MemRegion): pointer {.inline.} = + m.locked = true for s in elements(m.chunkStarts): # we need to check here again as it could have been modified: if s in m.chunkStarts: @@ -252,6 +251,7 @@ iterator allObjects(m: MemRegion): pointer {.inline.} = else: let c = cast[PBigChunk](c) yield addr(c.data) + m.locked = false proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {. magic: "Plugin", compileTime.} @@ -385,7 +385,7 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) = excl(a.chunkStarts, pageIndex(c)) c = cast[PBigChunk](le) - if c.size < ChunkOsReturn or doNotUnmap: + if c.size < ChunkOsReturn or doNotUnmap or a.locked: incl(a, a.chunkStarts, pageIndex(c)) updatePrevSize(a, c, c.size) listAdd(a.freeChunksList, c) @@ -442,26 +442,29 @@ proc getSmallChunk(a: var MemRegion): PSmallChunk = # ----------------------------------------------------------------------------- proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.} -proc allocInv(a: MemRegion): bool = - ## checks some (not all yet) invariants of the allocator's data structures. - for s in low(a.freeSmallChunks)..high(a.freeSmallChunks): - var c = a.freeSmallChunks[s] - while not (c == nil): - if c.next == c: - echo "[SYSASSERT] c.next == c" - return false - if not (c.size == s * MemAlign): - echo "[SYSASSERT] c.size != s * MemAlign" - return false - var it = c.freeList - while not (it == nil): - if not (it.zeroField == 0): - echo "[SYSASSERT] it.zeroField != 0" - c_printf("%ld %p\n", it.zeroField, it) +when true: + template allocInv(a: MemRegion): bool = true +else: + proc allocInv(a: MemRegion): bool = + ## checks some (not all yet) invariants of the allocator's data structures. + for s in low(a.freeSmallChunks)..high(a.freeSmallChunks): + var c = a.freeSmallChunks[s] + while not (c == nil): + if c.next == c: + echo "[SYSASSERT] c.next == c" return false - it = it.next - c = c.next - result = true + if not (c.size == s * MemAlign): + echo "[SYSASSERT] c.size != s * MemAlign" + return false + var it = c.freeList + while not (it == nil): + if not (it.zeroField == 0): + echo "[SYSASSERT] it.zeroField != 0" + c_printf("%ld %p\n", it.zeroField, it) + return false + it = it.next + c = c.next + result = true proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = sysAssert(allocInv(a), "rawAlloc: begin") diff --git a/lib/system/gc.nim b/lib/system/gc.nim index 5c2170a17..ed5b2d1e9 100644 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -1,7 +1,7 @@ # # # Nim's Runtime Library -# (c) Copyright 2015 Andreas Rumpf +# (c) Copyright 2016 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -9,13 +9,8 @@ # Garbage Collector # -# The basic algorithm is *Deferred Reference Counting* with cycle detection. -# This is achieved by combining a Deutsch-Bobrow garbage collector -# together with Christoper's partial mark-sweep garbage collector. -# -# Special care has been taken to avoid recursion as far as possible to avoid -# stack overflows when traversing deep datastructures. It is well-suited -# for soft real time applications (like games). +# Refcounting + Mark&Sweep. Complex algorithms avoided. +# Been there, done that, didn't work. when defined(nimCoroutines): import arch @@ -30,7 +25,7 @@ const # this seems to be a good value withRealTime = defined(useRealtimeGC) useMarkForDebug = defined(gcGenerational) - useBackupGc = false # use a simple M&S GC to collect + useBackupGc = true # use a simple M&S GC to collect # cycles instead of the complex # algorithm @@ -55,8 +50,7 @@ type WalkOp = enum waMarkGlobal, # part of the backup/debug mark&sweep waMarkPrecise, # part of the backup/debug mark&sweep - waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack, - waCollectWhite #, waDebug + waZctDecRef, waPush Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.} # A ref type can have a finalizer that is called before the object's @@ -87,7 +81,6 @@ type idGenerator: int zct: CellSeq # the zero count table decStack: CellSeq # cells in the stack that are to decref again - cycleRoots: CellSet tempStack: CellSeq # temporary stack for recursion elimination recGcLock: int # prevent recursion via finalizers; no thread lock when withRealTime: @@ -136,9 +129,6 @@ proc usrToCell(usr: pointer): PCell {.inline.} = # convert pointer to userdata to object (=pointer to refcount) result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell))) -proc canBeCycleRoot(c: PCell): bool {.inline.} = - result = ntfAcyclic notin c.typ.flags - proc extGetCellType(c: pointer): PNimType {.compilerproc.} = # used for code generation concerning debugging result = usrToCell(c).typ @@ -200,14 +190,16 @@ proc prepareDealloc(cell: PCell) = (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell)) dec(gch.recGcLock) +template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) = + when false: + for i in 0..gch.decStack.len-1: + if gch.decStack.d[i] == c: + sysAssert(false, msg) + proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = # we MUST access gch as a global here, because this crosses DLL boundaries! when hasThreadSupport and hasSharedHeap: acquireSys(HeapLock) - when cycleGC: - if c.color != rcPurple: - c.setColor(rcPurple) - incl(gch.cycleRoots, c) when hasThreadSupport and hasSharedHeap: releaseSys(HeapLock) @@ -224,19 +216,12 @@ proc decRef(c: PCell) {.inline.} = gcAssert(c.refcount >=% rcIncrement, "decRef") if --c.refcount: rtlAddZCT(c) - elif canbeCycleRoot(c): - # unfortunately this is necessary here too, because a cycle might just - # have been broken up and we could recycle it. - rtlAddCycleRoot(c) - #writeCell("decRef", c) proc incRef(c: PCell) {.inline.} = gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr") c.refcount = c.refcount +% rcIncrement # and not colorMask #writeCell("incRef", c) - if canbeCycleRoot(c): - rtlAddCycleRoot(c) proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p)) proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p)) @@ -306,7 +291,6 @@ proc initGC() = # init the rt init(gch.zct) init(gch.tempStack) - init(gch.cycleRoots) init(gch.decStack) when useMarkForDebug or useBackupGc: init(gch.marked) @@ -563,7 +547,7 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer = d[j] = res break dec(j) - if canbeCycleRoot(ol): excl(gch.cycleRoots, ol) + beforeDealloc(gch, ol, "growObj stack trash") rawDealloc(gch.region, ol) else: # we split the old refcount in 2 parts. XXX This is still not entirely @@ -597,54 +581,12 @@ proc freeCyclicCell(gch: var GcHeap, c: PCell) = when logGC: writeCell("cycle collector dealloc cell", c) when reallyDealloc: sysAssert(allocInv(gch.region), "free cyclic cell") + beforeDealloc(gch, c, "freeCyclicCell: stack trash") rawDealloc(gch.region, c) else: gcAssert(c.typ != nil, "freeCyclicCell") zeroMem(c, sizeof(Cell)) -proc markGray(s: PCell) = - if s.color != rcGray: - setColor(s, rcGray) - forAllChildren(s, waMarkGray) - -proc scanBlack(s: PCell) = - s.setColor(rcBlack) - forAllChildren(s, waScanBlack) - -proc scan(s: PCell) = - if s.color == rcGray: - if s.refcount >=% rcIncrement: - scanBlack(s) - else: - s.setColor(rcWhite) - forAllChildren(s, waScan) - -proc collectWhite(s: PCell) = - # This is a hacky way to deal with the following problem (bug #1796) - # Consider this content in cycleRoots: - # x -> a; y -> a where 'a' is an acyclic object so not included in - # cycleRoots itself. Then 'collectWhite' used to free 'a' twice. The - # 'isAllocatedPtr' check prevents this. This also means we do not need - # to query 's notin gch.cycleRoots' at all. - if isAllocatedPtr(gch.region, s) and s.color == rcWhite: - s.setColor(rcBlack) - forAllChildren(s, waCollectWhite) - freeCyclicCell(gch, s) - -proc markRoots(gch: var GcHeap) = - var tabSize = 0 - for s in elements(gch.cycleRoots): - #writeCell("markRoot", s) - inc tabSize - if s.color == rcPurple and s.refcount >=% rcIncrement: - markGray(s) - else: - excl(gch.cycleRoots, s) - # (s.color == rcBlack and rc == 0) as 1 condition: - if s.refcount == 0: - freeCyclicCell(gch, s) - gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize) - when useBackupGc: proc sweep(gch: var GcHeap) = for x in allObjects(gch.region): @@ -667,16 +609,6 @@ when useMarkForDebug or useBackupGc: proc markGlobals(gch: var GcHeap) = for i in 0 .. < globalMarkersLen: globalMarkers[i]() - proc stackMarkS(gch: var GcHeap, p: pointer) {.inline.} = - # the addresses are not as cells on the stack, so turn them to cells: - var cell = usrToCell(p) - var c = cast[ByteAddress](cell) - if c >% PageSize: - # fast check: does it look like a cell? - var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell)) - if objStart != nil: - markS(gch, objStart) - when logGC: var cycleCheckA: array[100, PCell] @@ -717,19 +649,6 @@ proc doOperation(p: pointer, op: WalkOp) = #if c.refcount <% rcIncrement: addZCT(gch.zct, c) of waPush: add(gch.tempStack, c) - of waCycleDecRef: - gcAssert(c.refcount >=% rcIncrement, "doOperation 3") - c.refcount = c.refcount -% rcIncrement - of waMarkGray: - gcAssert(c.refcount >=% rcIncrement, "waMarkGray") - c.refcount = c.refcount -% rcIncrement - markGray(c) - of waScan: scan(c) - of waScanBlack: - c.refcount = c.refcount +% rcIncrement - if c.color != rcBlack: - scanBlack(c) - of waCollectWhite: collectWhite(c) of waMarkGlobal: when useMarkForDebug or useBackupGc: when hasThreadSupport: @@ -748,14 +667,6 @@ proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = proc collectZCT(gch: var GcHeap): bool {.benign.} -when useMarkForDebug or useBackupGc: - proc markStackAndRegistersForSweep(gch: var GcHeap) {.noinline, cdecl, - benign.} - -proc collectRoots(gch: var GcHeap) = - for s in elements(gch.cycleRoots): - collectWhite(s) - proc collectCycles(gch: var GcHeap) = when hasThreadSupport: for c in gch.toDispose: @@ -764,33 +675,12 @@ proc collectCycles(gch: var GcHeap) = while gch.zct.len > 0: discard collectZCT(gch) when useBackupGc: cellsetReset(gch.marked) - markStackAndRegistersForSweep(gch) - markGlobals(gch) - sweep(gch) - else: - markRoots(gch) - # scanRoots: - for s in elements(gch.cycleRoots): scan(s) - collectRoots(gch) - - cellsetReset(gch.cycleRoots) - # alive cycles need to be kept in 'cycleRoots' if they are referenced - # from the stack; otherwise the write barrier will add the cycle root again - # anyway: - when false: var d = gch.decStack.d - var cycleRootsLen = 0 for i in 0..gch.decStack.len-1: - var c = d[i] - gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots" - gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell" - if canBeCycleRoot(c): - #if c notin gch.cycleRoots: - inc cycleRootsLen - incl(gch.cycleRoots, c) - gcAssert c.typ != nil, "addBackStackRoots 2" - if cycleRootsLen != 0: - cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen) + sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles" + markS(gch, d[i]) + markGlobals(gch) + sweep(gch) proc gcMark(gch: var GcHeap, p: pointer) {.inline.} = # the addresses are not as cells on the stack, so turn them to cells: @@ -812,31 +702,11 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} = add(gch.decStack, cell) sysAssert(allocInv(gch.region), "gcMark end") -proc markThreadStacks(gch: var GcHeap) = - when hasThreadSupport and hasSharedHeap: - {.error: "not fully implemented".} - var it = threadList - while it != nil: - # mark registers: - for i in 0 .. high(it.registers): gcMark(gch, it.registers[i]) - var sp = cast[ByteAddress](it.stackBottom) - var max = cast[ByteAddress](it.stackTop) - # XXX stack direction? - # XXX unroll this loop: - while sp <=% max: - gcMark(gch, cast[ppointer](sp)[]) - sp = sp +% sizeof(pointer) - it = it.next - include gc_common proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = forEachStackSlot(gch, gcMark) -when useMarkForDebug or useBackupGc: - proc markStackAndRegistersForSweep(gch: var GcHeap) = - forEachStackSlot(gch, stackMarkS) - proc collectZCT(gch: var GcHeap): bool = # Note: Freeing may add child objects to the ZCT! So essentially we do # deep freeing, which is bad for incremental operation. In order to @@ -866,8 +736,6 @@ proc collectZCT(gch: var GcHeap): bool = # as this might be too slow. # In any case, it should be removed from the ZCT. But not # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!** - when cycleGC: - if canbeCycleRoot(c): excl(gch.cycleRoots, c) when logGC: writeCell("zct dealloc cell", c) gcTrace(c, csZctFreed) # We are about to free the object, call the finalizer BEFORE its @@ -877,6 +745,7 @@ proc collectZCT(gch: var GcHeap): bool = forAllChildren(c, waZctDecRef) when reallyDealloc: sysAssert(allocInv(gch.region), "collectZCT: rawDealloc") + beforeDealloc(gch, c, "collectZCT: stack trash") rawDealloc(gch.region, c) else: sysAssert(c.typ != nil, "collectZCT 2") @@ -915,7 +784,6 @@ proc collectCTBody(gch: var GcHeap) = sysAssert(gch.decStack.len == 0, "collectCT") prepareForInteriorPointerChecking(gch.region) markStackAndRegisters(gch) - markThreadStacks(gch) gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len) inc(gch.stat.stackScans) if collectZCT(gch): @@ -937,11 +805,6 @@ proc collectCTBody(gch: var GcHeap) = if gch.maxPause > 0 and duration > gch.maxPause: c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration) -when useMarkForDebug or useBackupGc: - proc markForDebug(gch: var GcHeap) = - markStackAndRegistersForSweep(gch) - markGlobals(gch) - when defined(nimCoroutines): proc currentStackSizes(): int = for stack in items(gch.stack): @@ -1035,7 +898,7 @@ when not defined(useNimRtl): "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" & "[GC] zct capacity: " & $gch.zct.cap & "\n" & "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" & - "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) + "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n" when defined(nimCoroutines): result = result & "[GC] number of stacks: " & $gch.stack.len & "\n" for stack in items(gch.stack): diff --git a/tests/gc/thavlak.nim b/tests/gc/thavlak.nim new file mode 100644 index 000000000..efab49e36 --- /dev/null +++ b/tests/gc/thavlak.nim @@ -0,0 +1,457 @@ +discard """ + output: '''Welcome to LoopTesterApp, Nim edition +Constructing Simple CFG... +15000 dummy loops +Constructing CFG... +Performing Loop Recognition +1 Iteration +Another 50 iterations... +.................................................. +Found 1 loops (including artificial root node) (50)''' +""" + +# bug #3184 + +import tables +import sequtils +import sets + +type + BasicBlock = object + inEdges: seq[ref BasicBlock] + outEdges: seq[ref BasicBlock] + name: int + +proc newBasicBlock(name: int): ref BasicBlock = + new(result) + result.inEdges = newSeq[ref BasicBlock]() + result.outEdges = newSeq[ref BasicBlock]() + result.name = name + +proc hash(x: ref BasicBlock): int {.inline.} = + result = x.name + +type + BasicBlockEdge = object + fr: ref BasicBlock + to: ref BasicBlock + + Cfg = object + basicBlockMap: Table[int, ref BasicBlock] + edgeList: seq[BasicBlockEdge] + startNode: ref BasicBlock + +proc newCfg(): Cfg = + result.basicBlockMap = initTable[int, ref BasicBlock]() + result.edgeList = newSeq[BasicBlockEdge]() + +proc createNode(self: var Cfg, name: int): ref BasicBlock = + result = self.basicBlockMap.getOrDefault(name) + if result == nil: + result = newBasicBlock(name) + self.basicBlockMap.add name, result + + if self.startNode == nil: + self.startNode = result + +proc addEdge(self: var Cfg, edge: BasicBlockEdge) = + self.edgeList.add(edge) + +proc getNumNodes(self: Cfg): int = + self.basicBlockMap.len + +proc newBasicBlockEdge(cfg: var Cfg, fromName: int, toName: int): BasicBlockEdge = + result.fr = cfg.createNode(fromName) + result.to = cfg.createNode(toName) + result.fr.outEdges.add(result.to) + result.to.inEdges.add(result.fr) + cfg.addEdge(result) + +type + SimpleLoop = object + basicBlocks: seq[ref BasicBlock] # TODO: set here + children: seq[ref SimpleLoop] # TODO: set here + parent: ref SimpleLoop + header: ref BasicBlock + isRoot: bool + isReducible: bool + counter: int + nestingLevel: int + depthLevel: int + +proc newSimpleLoop(): ref SimpleLoop = + new(result) + result.basicBlocks = newSeq[ref BasicBlock]() + result.children = newSeq[ref SimpleLoop]() + result.parent = nil + result.header = nil + result.isRoot = false + result.isReducible = true + result.counter = 0 + result.nestingLevel = 0 + result.depthLevel = 0 + +proc addNode(self: ref SimpleLoop, bb: ref BasicBlock) = + self.basicBlocks.add bb + +proc addChildLoop(self: ref SimpleLoop, loop: ref SimpleLoop) = + self.children.add loop + +proc setParent(self: ref SimpleLoop, parent: ref SimpleLoop) = + self.parent = parent + self.parent.addChildLoop(self) + +proc setHeader(self: ref SimpleLoop, bb: ref BasicBlock) = + self.basicBlocks.add(bb) + self.header = bb + +proc setNestingLevel(self: ref SimpleLoop, level: int) = + self.nestingLevel = level + if level == 0: self.isRoot = true + +var loop_counter: int = 0 + +type + Lsg = object + loops: seq[ref SimpleLoop] + root: ref SimpleLoop + +proc createNewLoop(self: var Lsg): ref SimpleLoop = + result = newSimpleLoop() + loop_counter += 1 + result.counter = loop_counter + +proc addLoop(self: var Lsg, l: ref SimpleLoop) = + self.loops.add l + +proc newLsg(): Lsg = + result.loops = newSeq[ref SimpleLoop]() + result.root = result.createNewLoop() + result.root.setNestingLevel(0) + result.addLoop(result.root) + +proc getNumLoops(self: Lsg): int = + self.loops.len + +type + UnionFindNode = object + parent: ref UnionFindNode + bb: ref BasicBlock + l: ref SimpleLoop + dfsNumber: int + +proc newUnionFindNode(): ref UnionFindNode = + new(result) + when false: + result.parent = nil + result.bb = nil + result.l = nil + result.dfsNumber = 0 + +proc initNode(self: ref UnionFindNode, bb: ref BasicBlock, dfsNumber: int) = + self.parent = self + self.bb = bb + self.dfsNumber = dfsNumber + +proc findSet(self: ref UnionFindNode): ref UnionFindNode = + var nodeList = newSeq[ref UnionFindNode]() + result = self + + while result != result.parent: + var parent = result.parent + if parent != parent.parent: nodeList.add result + result = parent + + for iter in nodeList: iter.parent = result.parent + +proc union(self: ref UnionFindNode, unionFindNode: ref UnionFindNode) = + self.parent = unionFindNode + + +const + BB_TOP = 0 # uninitialized + BB_NONHEADER = 1 # a regular BB + BB_REDUCIBLE = 2 # reducible loop + BB_SELF = 3 # single BB loop + BB_IRREDUCIBLE = 4 # irreducible loop + BB_DEAD = 5 # a dead BB + BB_LAST = 6 # Sentinel + + # # Marker for uninitialized nodes. + UNVISITED = -1 + + # # Safeguard against pathologic algorithm behavior. + MAXNONBACKPREDS = (32 * 1024) + +type + HavlakLoopFinder = object + cfg: Cfg + lsg: Lsg + +proc newHavlakLoopFinder(cfg: Cfg, lsg: Lsg): HavlakLoopFinder = + result.cfg = cfg + result.lsg = lsg + +proc isAncestor(w: int, v: int, last: seq[int]): bool = + w <= v and v <= last[w] + +proc dfs(currentNode: ref BasicBlock, nodes: var seq[ref UnionFindNode], number: var Table[ref BasicBlock, int], last: var seq[int], current: int): int = + var stack = @[(currentNode, current)] + while stack.len > 0: + let (currentNode, current) = stack.pop() + nodes[current].initNode(currentNode, current) + number[currentNode] = current + + result = current + for target in currentNode.outEdges: + if number[target] == UNVISITED: + stack.add((target, result+1)) + #result = dfs(target, nodes, number, last, result + 1) + last[number[currentNode]] = result + +proc findLoops(self: var HavlakLoopFinder): int = + var startNode = self.cfg.startNode + if startNode == nil: return 0 + var size = self.cfg.getNumNodes + + var nonBackPreds = newSeq[HashSet[int]]() + var backPreds = newSeq[seq[int]]() + var number = initTable[ref BasicBlock, int]() + var header = newSeq[int](size) + var types = newSeq[int](size) + var last = newSeq[int](size) + var nodes = newSeq[ref UnionFindNode]() + + for i in 1..size: + nonBackPreds.add initSet[int](1) + backPreds.add newSeq[int]() + nodes.add newUnionFindNode() + + # Step a: + # - initialize all nodes as unvisited. + # - depth-first traversal and numbering. + # - unreached BB's are marked as dead. + # + for v in self.cfg.basicBlockMap.values: number[v] = UNVISITED + var res = dfs(startNode, nodes, number, last, 0) + + # Step b: + # - iterate over all nodes. + # + # A backedge comes from a descendant in the DFS tree, and non-backedges + # from non-descendants (following Tarjan). + # + # - check incoming edges 'v' and add them to either + # - the list of backedges (backPreds) or + # - the list of non-backedges (nonBackPreds) + # + for w in 0 .. <size: + header[w] = 0 + types[w] = BB_NONHEADER + + var nodeW = nodes[w].bb + if nodeW != nil: + for nodeV in nodeW.inEdges: + var v = number[nodeV] + if v != UNVISITED: + if isAncestor(w, v, last): + backPreds[w].add v + else: + nonBackPreds[w].incl v + else: + types[w] = BB_DEAD + + # Start node is root of all other loops. + header[0] = 0 + + # Step c: + # + # The outer loop, unchanged from Tarjan. It does nothing except + # for those nodes which are the destinations of backedges. + # For a header node w, we chase backward from the sources of the + # backedges adding nodes to the set P, representing the body of + # the loop headed by w. + # + # By running through the nodes in reverse of the DFST preorder, + # we ensure that inner loop headers will be processed before the + # headers for surrounding loops. + + for w in countdown(size - 1, 0): + # this is 'P' in Havlak's paper + var nodePool = newSeq[ref UnionFindNode]() + + var nodeW = nodes[w].bb + if nodeW != nil: # dead BB + # Step d: + for v in backPreds[w]: + if v != w: + nodePool.add nodes[v].findSet + else: + types[w] = BB_SELF + + # Copy nodePool to workList. + # + var workList = newSeq[ref UnionFindNode]() + for x in nodePool: workList.add x + + if nodePool.len != 0: types[w] = BB_REDUCIBLE + + # work the list... + # + while workList.len > 0: + var x = workList[0] + workList.del(0) + + # Step e: + # + # Step e represents the main difference from Tarjan's method. + # Chasing upwards from the sources of a node w's backedges. If + # there is a node y' that is not a descendant of w, w is marked + # the header of an irreducible loop, there is another entry + # into this loop that avoids w. + # + + # The algorithm has degenerated. Break and + # return in this case. + # + var nonBackSize = nonBackPreds[x.dfsNumber].len + if nonBackSize > MAXNONBACKPREDS: return 0 + + for iter in nonBackPreds[x.dfsNumber]: + var y = nodes[iter] + var ydash = y.findSet + + if not isAncestor(w, ydash.dfsNumber, last): + types[w] = BB_IRREDUCIBLE + nonBackPreds[w].incl ydash.dfsNumber + else: + if ydash.dfsNumber != w and not nodePool.contains(ydash): + workList.add ydash + nodePool.add ydash + + # Collapse/Unionize nodes in a SCC to a single node + # For every SCC found, create a loop descriptor and link it in. + # + if (nodePool.len > 0) or (types[w] == BB_SELF): + var l = self.lsg.createNewLoop + + l.setHeader(nodeW) + l.isReducible = types[w] != BB_IRREDUCIBLE + + # At this point, one can set attributes to the loop, such as: + # + # the bottom node: + # iter = backPreds(w).begin(); + # loop bottom is: nodes(iter).node; + # + # the number of backedges: + # backPreds(w).size() + # + # whether this loop is reducible: + # types(w) != BB_IRREDUCIBLE + # + nodes[w].l = l + + for node in nodePool: + # Add nodes to loop descriptor. + header[node.dfsNumber] = w + node.union(nodes[w]) + + # Nested loops are not added, but linked together. + var node_l = node.l + if node_l != nil: + node_l.setParent(l) + else: + l.addNode(node.bb) + + self.lsg.addLoop(l) + + result = self.lsg.getNumLoops + + +type + LoopTesterApp = object + cfg: Cfg + lsg: Lsg + +proc newLoopTesterApp(): LoopTesterApp = + result.cfg = newCfg() + result.lsg = newLsg() + +proc buildDiamond(self: var LoopTesterApp, start: int): int = + var bb0 = start + var x1 = newBasicBlockEdge(self.cfg, bb0, bb0 + 1) + var x2 = newBasicBlockEdge(self.cfg, bb0, bb0 + 2) + var x3 = newBasicBlockEdge(self.cfg, bb0 + 1, bb0 + 3) + var x4 = newBasicBlockEdge(self.cfg, bb0 + 2, bb0 + 3) + result = bb0 + 3 + +proc buildConnect(self: var LoopTesterApp, start1: int, end1: int) = + var x1 = newBasicBlockEdge(self.cfg, start1, end1) + +proc buildStraight(self: var LoopTesterApp, start: int, n: int): int = + for i in 0..n-1: + self.buildConnect(start + i, start + i + 1) + result = start + n + +proc buildBaseLoop(self: var LoopTesterApp, from1: int): int = + var header = self.buildStraight(from1, 1) + var diamond1 = self.buildDiamond(header) + var d11 = self.buildStraight(diamond1, 1) + var diamond2 = self.buildDiamond(d11) + var footer = self.buildStraight(diamond2, 1) + + self.buildConnect(diamond2, d11) + self.buildConnect(diamond1, header) + self.buildConnect(footer, from1) + result = self.buildStraight(footer, 1) + +proc run(self: var LoopTesterApp) = + echo "Welcome to LoopTesterApp, Nim edition" + echo "Constructing Simple CFG..." + + var x1 = self.cfg.createNode(0) + var x2 = self.buildBaseLoop(0) + var x3 = self.cfg.createNode(1) + self.buildConnect(0, 2) + + echo "15000 dummy loops" + + for i in 1..15000: + var h = newHavlakLoopFinder(self.cfg, newLsg()) + var res = h.findLoops + + echo "Constructing CFG..." + var n = 2 + + for parlooptrees in 1..10: + var x6 = self.cfg.createNode(n + 1) + self.buildConnect(2, n + 1) + n += 1 + for i in 1..100: + var top = n + n = self.buildStraight(n, 1) + for j in 1..25: n = self.buildBaseLoop(n) + var bottom = self.buildStraight(n, 1) + self.buildConnect n, top + n = bottom + self.buildConnect(n, 1) + + echo "Performing Loop Recognition\n1 Iteration" + + var h = newHavlakLoopFinder(self.cfg, newLsg()) + var loops = h.findLoops + + echo "Another 50 iterations..." + + var sum = 0 + for i in 1..50: + write stdout, "." + flushFile(stdout) + var hlf = newHavlakLoopFinder(self.cfg, newLsg()) + sum += hlf.findLoops + #echo getOccupiedMem() + echo "\nFound ", loops, " loops (including artificial root node) (", sum, ")" + +var l = newLoopTesterApp() +l.run diff --git a/tests/gc/tlists.nim b/tests/gc/tlists.nim new file mode 100644 index 000000000..26b32396c --- /dev/null +++ b/tests/gc/tlists.nim @@ -0,0 +1,37 @@ +discard """ + output: '''Success''' +""" + +# bug #3793 + +import os +import math +import lists +import strutils + +proc mkleak() = + # allocate 10 MB via linked lists + let numberOfLists = 100 + for i in countUp(1, numberOfLists): + var leakList = initDoublyLinkedList[string]() + let numberOfLeaks = 50000 + for j in countUp(1, numberOfLeaks): + let leakSize = 200 + let leaked = newString(leakSize) + leakList.append(leaked) + +proc mkManyLeaks() = + for i in 0..0: + when false: echo getOccupiedMem() + mkleak() + when false: echo getOccupiedMem() + # Force a full collection. This should free all of the + # lists and bring the memory usage down to a few MB's. + GC_fullCollect() + when false: echo getOccupiedMem() + if getOccupiedMem() > 8 * 200 * 50_000 * 2: + echo GC_getStatistics() + quit "leaking" + echo "Success" + +mkManyLeaks() diff --git a/tests/testament/categories.nim b/tests/testament/categories.nim index 150c55edc..cf09aaae0 100644 --- a/tests/testament/categories.nim +++ b/tests/testament/categories.nim @@ -152,6 +152,9 @@ proc gcTests(r: var TResults, cat: Category, options: string) = testWithoutBoehm "closureleak" testWithoutMs "refarrayleak" + testWithoutBoehm "tlists" + testWithoutBoehm "thavlak" + test "stackrefleak" test "cyclecollector" |