#
#
# Nim's Runtime Library
# (c) Copyright 2015 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# A simple mark&sweep garbage collector for Nim. Define the
# symbol ``gcUseBitvectors`` to generate a variant of this GC.
{.push profiler:off.}
const
InitialThreshold = 4*1024*1024 # X MB because marking&sweeping is slow
withBitvectors = defined(gcUseBitvectors)
# bitvectors are significantly faster for GC-bench, but slower for
# bootstrapping and use more memory
rcWhite = 0
rcGrey = 1 # unused
rcBlack = 2
template mulThreshold(x): expr {.immediate.} = x * 2
when defined(memProfiler):
proc nimProfile(requestedSize: int)
type
WalkOp = enum
waMarkGlobal, # we need to mark conservatively for global marker procs
# as these may refer to a global var and not to a thread
# local
waMarkPrecise # fast precise marking
Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
# A ref type can have a finalizer that is called before the object's
# storage is freed.
GlobalMarkerProc = proc () {.nimcall, benign.}
GcStat = object
collections: int # number of performed full collections
maxThreshold: int # max threshold that has been set
maxStackSize: int # max stack size
freedObjects: int # max entries in cycle table
GcHeap = object # this contains the zero count and
# non-zero count table
stackBottom: pointer
cycleThreshold: int
when useCellIds:
idGenerator: int
when withBitvectors:
allocated, marked: CellSet
tempStack: CellSeq # temporary stack for recursion elimination
recGcLock: int # prevent recursion via finalizers; no thread lock
region: MemRegion # garbage collected region
stat: GcStat
additionalRoots: CellSeq # dummy roots for GC_ref/unref
{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcStat: GcStat,
TGlobalMarkerProc: GlobalMarkerProc, TGcHeap: GcHeap].}
var
gch {.rtlThreadVar.}: GcHeap
when not defined(useNimRtl):
instantiateForRegion(gch.region)
template acquire(gch: GcHeap) =
when hasThreadSupport and hasSharedHeap:
acquireSys(HeapLock)
template release(gch: GcHeap) =
when hasThreadSupport and hasSharedHeap:
releaseSys(HeapLock)
template gcAssert(cond: bool, msg: string) =
when defined(useGcAssert):
if not cond:
echo "[GCASSERT] ", msg
quit 1
proc cellToUsr(cell: PCell): pointer {.inline.} =
# convert object (=pointer to refcount) to pointer to userdata
result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell)))
proc usrToCell(usr: pointer): PCell {.inline.} =
# convert pointer to userdata to object (=pointer to refcount)
result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
proc canbeCycleRoot(c: PCell): bool {.inline.} =
result = ntfAcyclic notin c.typ.flags
proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
# used for code generation concerning debugging
result = usrToCell(c).typ
proc unsureAsgnRef(dest: PPointer, src: pointer) {.inline.} =
dest[] = src
proc internRefcount(p: pointer): int {.exportc: "getRefcount".} =
result = 0
var
globalMarkersLen: int
globalMarkers: array[0.. 7_000, GlobalMarkerProc]
proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} =
if globalMarkersLen <= high(globalMarkers):
globalMarkers[globalMarkersLen] = markerProc
inc globalMarkersLen
else:
echo "[GC] cannot register global variable; too many global variables"
quit 1
# this that has to equals zero, otherwise we have to round up UnitsPerPage:
when BitsPerPage mod (sizeof(int)*8) != 0:
{.error: "(BitsPerPage mod BitsPerUnit) should be zero!".}
# forward declarations:
proc collectCT(gch: var GcHeap) {.benign.}
proc isOnStack*(p: pointer): bool {.noinline, benign.}
proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
proc doOperation(p: pointer, op: WalkOp) {.benign.}
proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
# we need the prototype here for debugging purposes
proc prepareDealloc(cell: PCell) =
if cell.typ.finalizer != nil:
# the finalizer could invoke something that
# allocates memory; this could trigger a garbage
# collection. Since we are already collecting we
# prevend recursive entering here by a lock.
# XXX: we should set the cell's children to nil!
inc(gch.recGcLock)
(cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
dec(gch.recGcLock)
proc nimGCref(p: pointer) {.compilerProc.} =
# we keep it from being collected by pretending it's not even allocated:
when false:
when withBitvectors: excl(gch.allocated, usrToCell(p))
else: usrToCell(p).refcount = rcBlack
add(gch.additionalRoots, usrToCell(p))
proc nimGCunref(p: pointer) {.compilerProc.} =
let cell = usrToCell(p)
var L = gch.additionalRoots.len-1
var i = L
let d = gch.additionalRoots.d
while i >= 0:
if d[i] == cell:
d[i] = d[L]
dec gch.additionalRoots.len
break
dec(i)
when false:
when withBitvectors: incl(gch.allocated, usrToCell(p))
else: usrToCell(p).refcount = rcWhite
proc initGC() =
when not defined(useNimRtl):
gch.cycleThreshold = InitialThreshold
gch.stat.collections = 0
gch.stat.maxThreshold = 0
gch.stat.maxStackSize = 0
init(gch.tempStack)
init(gch.additionalRoots)
when withBitvectors:
init(gch.allocated)
init(gch.marked)
var
localGcInitialized {.rtlThreadVar.}: bool
proc setupForeignThreadGc*() =
## call this if you registered a callback that will be run from a thread not
## under your control. This has a cheap thread-local guard, so the GC for
## this thread will only be initialized once per thread, no matter how often
## it is called.
if not localGcInitialized:
localGcInitialized = true
var stackTop {.volatile.}: pointer
setStackBottom(addr(stackTop))
initGC()
proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
var d = cast[ByteAddress](dest)
case n.kind
of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op)
of nkList:
for i in 0..n.len-1:
forAllSlotsAux(dest, n.sons[i], op)
of nkCase:
var m = selectBranch(dest, n)
if m != nil: forAllSlotsAux(dest, m, op)
of nkNone: sysAssert(false, "forAllSlotsAux")
proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) =
var d = cast[ByteAddress](dest)
if dest == nil: return # nothing to do
if ntfNoRefs notin mt.flags:
case mt.kind
of tyRef, tyString, tySequence: # leaf:
doOperation(cast[PPointer](d)[], op)
of tyObject, tyTuple:
forAllSlotsAux(dest, mt.node, op)
of tyArray, tyArrayConstr, tyOpenArray:
for i in 0..(mt.size div mt.base.size)-1:
forAllChildrenAux(cast[pointer](d +% i *% mt.base.size), mt.base, op)
else: discard
proc forAllChildren(cell: PCell, op: WalkOp) =
gcAssert(cell != nil, "forAllChildren: 1")
gcAssert(cell.typ != nil, "forAllChildren: 2")
gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3"
let marker = cell.typ.marker
if marker != nil:
marker(cellToUsr(cell), op.int)
else:
case cell.typ.kind
of tyRef: # common case
forAllChildrenAux(cellToUsr(cell), cell.typ.base, op)
of tySequence:
var d = cast[ByteAddress](cellToUsr(cell))
var s = cast[PGenericSeq](d)
if s != nil:
for i in 0..s.len-1:
forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +%
GenericSeqSize), cell.typ.base, op)
else: discard
proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
# generates a new object and sets its reference counter to 0
acquire(gch)
gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
collectCT(gch)
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2")
# now it is buffered in the ZCT
res.typ = typ
when leakDetector and not hasThreadSupport:
if framePtr != nil and framePtr.prev != nil:
res.filename = framePtr.prev.filename
res.line = framePtr.prev.line
res.refcount = 0
release(gch)
when withBitvectors: incl(gch.allocated, res)
when useCellIds:
inc gch.idGenerator
res.id = gch.idGenerator
result = cellToUsr(res)
when useCellIds:
proc getCellId*[T](x: ref T): int =
let p = usrToCell(cast[pointer](x))
result = p.id
{.pop.}
proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
result = rawNewObj(typ, size, gch)
zeroMem(result, size)
when defined(memProfiler): nimProfile(size)
proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
result = rawNewObj(typ, size, gch)
when defined(memProfiler): nimProfile(size)
proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
# `newObj` already uses locks, so no need for them here.
let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
result = newObj(typ, size)
cast[PGenericSeq](result).len = len
cast[PGenericSeq](result).reserved = len
when defined(memProfiler): nimProfile(size)
proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
result = rawNewObj(typ, size, gch)
zeroMem(result, size)
when defined(memProfiler): nimProfile(size)
proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
result = newObj(typ, size)
cast[PGenericSeq](result).len = len
cast[PGenericSeq](result).reserved = len
when defined(memProfiler): nimProfile(size)
proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
acquire(gch)
collectCT(gch)
var ol = usrToCell(old)
sysAssert(ol.typ != nil, "growObj: 1")
gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
var elemSize = 1
if ol.typ.kind != tyString: elemSize = ol.typ.base.size
var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
copyMem(res, ol, oldsize + sizeof(Cell))
zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
newsize-oldsize)
sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
when false:
# this is wrong since seqs can be shared via 'shallow':
when withBitvectors: excl(gch.allocated, ol)
when reallyDealloc: rawDealloc(gch.region, ol)
else:
zeroMem(ol, sizeof(Cell))
when withBitvectors: incl(gch.allocated, res)
when useCellIds:
inc gch.idGenerator
res.id = gch.idGenerator
release(gch)
result = cellToUsr(res)
when defined(memProfiler): nimProfile(newsize-oldsize)
proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
result = growObj(old, newsize, gch)
{.push profiler:off.}
# ----------------- collector -----------------------------------------------
proc mark(gch: var GcHeap, c: PCell) =
when withBitvectors:
incl(gch.marked, c)
gcAssert gch.tempStack.len == 0, "stack not empty!"
forAllChildren(c, waMarkPrecise)
while gch.tempStack.len > 0:
dec gch.tempStack.len
var d = gch.tempStack.d[gch.tempStack.len]
if not containsOrIncl(gch.marked, d):
forAllChildren(d, waMarkPrecise)
else:
# XXX no 'if c.refCount != rcBlack' here?
c.refCount = rcBlack
gcAssert gch.tempStack.len == 0, "stack not empty!"
forAllChildren(c, waMarkPrecise)
while gch.tempStack.len > 0:
dec gch.tempStack.len
var d = gch.tempStack.d[gch.tempStack.len]
if d.refcount == rcWhite:
d.refCount = rcBlack
forAllChildren(d, waMarkPrecise)
proc doOperation(p: pointer, op: WalkOp) =
if p == nil: return
var c: PCell = usrToCell(p)
gcAssert(c != nil, "doOperation: 1")
case op
of waMarkGlobal:
when hasThreadSupport:
# could point to a cell which we don't own and don't want to touch/trace
if isAllocatedPtr(gch.region, c):
mark(gch, c)
else:
mark(gch, c)
of waMarkPrecise: add(gch.tempStack, c)
proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
doOperation(d, WalkOp(op))
proc freeCyclicCell(gch: var GcHeap, c: PCell) =
inc gch.stat.freedObjects
prepareDealloc(c)
when reallyDealloc: rawDealloc(gch.region, c)
else:
gcAssert(c.typ != nil, "freeCyclicCell")
zeroMem(c, sizeof(Cell))
proc sweep(gch: var GcHeap) =
when withBitvectors:
for c in gch.allocated.elementsExcept(gch.marked):
gch.allocated.excl(c)
freeCyclicCell(gch, c)
else:
for x in allObjects(gch.region):
if isCell(x):
# cast to PCell is correct here:
var c = cast[PCell](x)
if c.refcount == rcBlack: c.refcount = rcWhite
else: freeCyclicCell(gch, c)
when false:
proc newGcInvariant*() =
for x in allObjects(gch.region):
if isCell(x):
var c = cast[PCell](x)
if c.typ == nil:
writeStackTrace()
quit 1
proc markGlobals(gch: var GcHeap) =
for i in 0 .. < globalMarkersLen: globalMarkers[i]()
let d = gch.additionalRoots.d
for i in 0 .. < gch.additionalRoots.len: mark(gch, d[i])
proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
# the addresses are not as cells on the stack, so turn them to cells:
var cell = usrToCell(p)
var c = cast[ByteAddress](cell)
if c >% PageSize:
# fast check: does it look like a cell?
var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
if objStart != nil:
mark(gch, objStart)
# ----------------- stack management --------------------------------------
# inspired from Smart Eiffel
when defined(sparc):
const stackIncreases = false
elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
const stackIncreases = true
else:
const stackIncreases = false
when not defined(useNimRtl):
{.push stack_trace: off.}
proc setStackBottom(theStackBottom: pointer) =
#c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
# the first init must be the one that defines the stack bottom:
if gch.stackBottom == nil: gch.stackBottom = theStackBottom
else:
var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
var b = cast[ByteAddress](gch.stackBottom)
#c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
when stackIncreases:
gch.stackBottom = cast[pointer](min(a, b))
else:
gch.stackBottom = cast[pointer](max(a, b))
{.pop.}
proc stackSize(): int {.noinline.} =
var stackTop {.volatile.}: pointer
result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
when defined(sparc): # For SPARC architecture.
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[ByteAddress](gch.stackBottom)
var a = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
when defined(sparcv9):
asm """"flushw \n" """
else:
asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """
var
max = gch.stackBottom
sp: PPointer
stackTop: array[0..1, pointer]
sp = addr(stackTop[0])
# Addresses decrease as the stack grows.
while sp <= max:
gcMark(gch, sp[])
sp = cast[ppointer](cast[ByteAddress](sp) +% sizeof(pointer))
elif defined(ELATE):
{.error: "stack marking code is to be written for this architecture".}
elif stackIncreases:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses increase as the stack grows.
# ---------------------------------------------------------------------------
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var a = cast[ByteAddress](gch.stackBottom)
var b = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
var
jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
# a little hack to get the size of a JmpBuf in the generated C code
# in a platform independent way
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
var registers: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[ByteAddress](gch.stackBottom)
var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
# sp will traverse the JMP_BUF as well (jmp_buf size is added,
# otherwise sp would be below the registers structure).
while sp >=% max:
gcMark(gch, cast[ppointer](sp)[])
sp = sp -% sizeof(pointer)
else:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses decrease as the stack grows.
# ---------------------------------------------------------------------------
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[ByteAddress](gch.stackBottom)
var a = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
# We use a jmp_buf buffer that is in the C stack.
# Used to traverse the stack and registers assuming
# that 'setjmp' will save registers in the C stack.
type PStackSlice = ptr array [0..7, pointer]
var registers {.noinit.}: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[ByteAddress](gch.stackBottom)
var sp = cast[ByteAddress](addr(registers))
when defined(amd64):
# words within the jmp_buf structure may not be properly aligned.
let regEnd = sp +% sizeof(registers)
while sp <% regEnd:
gcMark(gch, cast[PPointer](sp)[])
gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
sp = sp +% sizeof(pointer)
# Make sure sp is word-aligned
sp = sp and not (sizeof(pointer) - 1)
# loop unrolled:
while sp <% max - 8*sizeof(pointer):
gcMark(gch, cast[PStackSlice](sp)[0])
gcMark(gch, cast[PStackSlice](sp)[1])
gcMark(gch, cast[PStackSlice](sp)[2])
gcMark(gch, cast[PStackSlice](sp)[3])
gcMark(gch, cast[PStackSlice](sp)[4])
gcMark(gch, cast[PStackSlice](sp)[5])
gcMark(gch, cast[PStackSlice](sp)[6])
gcMark(gch, cast[PStackSlice](sp)[7])
sp = sp +% sizeof(pointer)*8
# last few entries:
while sp <=% max:
gcMark(gch, cast[PPointer](sp)[])
sp = sp +% sizeof(pointer)
# ----------------------------------------------------------------------------
# end of non-portable code
# ----------------------------------------------------------------------------
proc collectCTBody(gch: var GcHeap) =
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
prepareForInteriorPointerChecking(gch.region)
markStackAndRegisters(gch)
markGlobals(gch)
sweep(gch)
inc(gch.stat.collections)
when withBitvectors:
deinit(gch.marked)
init(gch.marked)
gch.cycleThreshold = max(InitialThreshold, getOccupiedMem().mulThreshold)
gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
sysAssert(allocInv(gch.region), "collectCT: end")
proc collectCT(gch: var GcHeap) =
if getOccupiedMem(gch.region) >= gch.cycleThreshold and gch.recGcLock == 0:
collectCTBody(gch)
when not defined(useNimRtl):
proc GC_disable() =
when hasThreadSupport and hasSharedHeap:
atomicInc(gch.recGcLock, 1)
else:
inc(gch.recGcLock)
proc GC_enable() =
if gch.recGcLock > 0:
when hasThreadSupport and hasSharedHeap:
atomicDec(gch.recGcLock, 1)
else:
dec(gch.recGcLock)
proc GC_setStrategy(strategy: GC_Strategy) = discard
proc GC_enableMarkAndSweep() =
gch.cycleThreshold = InitialThreshold
proc GC_disableMarkAndSweep() =
gch.cycleThreshold = high(gch.cycleThreshold)-1
# set to the max value to suppress the cycle detector
proc GC_fullCollect() =
acquire(gch)
var oldThreshold = gch.cycleThreshold
gch.cycleThreshold = 0 # forces cycle collection
collectCT(gch)
gch.cycleThreshold = oldThreshold
release(gch)
proc GC_getStatistics(): string =
GC_disable()
result = "[GC] total memory: " & $getTotalMem() & "\n" &
"[GC] occupied memory: " & $getOccupiedMem() & "\n" &
"[GC] collections: " & $gch.stat.collections & "\n" &
"[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
"[GC] freed objects: " & $gch.stat.freedObjects & "\n" &
"[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
GC_enable()
{.pop.}