diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pure/collections/queues.nim | 89 | ||||
-rwxr-xr-x | lib/pure/marshal.nim | 33 | ||||
-rwxr-xr-x | lib/pure/osproc.nim | 22 | ||||
-rwxr-xr-x | lib/system.nim | 21 | ||||
-rwxr-xr-x | lib/system/alloc.nim | 209 | ||||
-rwxr-xr-x | lib/system/assign.nim | 14 | ||||
-rw-r--r-- | lib/system/atomics.nim | 4 | ||||
-rwxr-xr-x | lib/system/cellsets.nim | 6 | ||||
-rwxr-xr-x | lib/system/ecmasys.nim | 2 | ||||
-rwxr-xr-x | lib/system/gc.nim | 182 | ||||
-rw-r--r-- | lib/system/inboxes.nim | 203 | ||||
-rwxr-xr-x | lib/system/mmdisp.nim | 15 | ||||
-rwxr-xr-x | lib/system/repr.nim | 4 | ||||
-rw-r--r-- | lib/system/syslocks.nim | 101 | ||||
-rwxr-xr-x | lib/system/threads.nim | 256 |
15 files changed, 798 insertions, 363 deletions
diff --git a/lib/pure/collections/queues.nim b/lib/pure/collections/queues.nim new file mode 100644 index 000000000..2130d9949 --- /dev/null +++ b/lib/pure/collections/queues.nim @@ -0,0 +1,89 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implementation of a queue. The underlying implementation uses a ``seq``. + +import math + +type + TQueue* {.pure, final.}[T] = object ## a queue + data: seq[T] + rd, wr, count, mask: int + +proc initQueue*[T](initialSize=4): TQueue[T] = + ## creates a new queue. `initialSize` needs to be a power of 2. + assert IsPowerOfTwo(initialSize) + result.mask = initialSize-1 + newSeq(result.data, initialSize) + +proc len*[T](q: TQueue[T]): int = + ## returns the number of elements of `q`. + result = q.count + +iterator items*[T](q: TQueue[T]): T = + ## yields every element of `q`. + var i = q.rd + var c = q.count + while c > 0: + dec c + yield q.data[i] + i = (i + 1) and q.mask + +proc add*[T](q: var TQueue[T], item: T) = + ## adds an `item` to the end of the queue `q`. + var cap = q.mask+1 + if q.count >= cap: + var n: seq[T] + newSeq(n, cap*2) + var i = 0 + for x in items(q): + shallowCopy(n[i], x) + inc i + shallowCopy(q.data, n) + q.mask = cap*2 - 1 + q.wr = q.count + q.rd = 0 + inc q.count + q.data[q.wr] = item + q.wr = (q.wr + 1) and q.mask + +proc enqueue*[T](q: var TQueue[T], item: T) = + ## alias for the ``add`` operation. + add(q, item) + +proc dequeue*[T](q: var TQueue[T]): T = + ## removes and returns the first element of the queue `q`. + assert q.count > 0 + dec q.count + result = q.data[q.rd] + q.rd = (q.rd + 1) and q.mask + +proc `$`*[T](q: TQueue[T]): string = + ## turns a queue into its string representation. + result = "[" + for x in items(q): + if result.len > 1: result.add(", ") + result.add($x) + result.add("]") + +when isMainModule: + var q = initQueue[int]() + q.add(123) + q.add(9) + q.add(4) + var first = q.dequeue + q.add(56) + q.add(6) + var second = q.dequeue + q.add(789) + + assert first == 123 + assert second == 9 + assert($q == "[4, 56, 6, 789]") + diff --git a/lib/pure/marshal.nim b/lib/pure/marshal.nim index f96d177ae..354d70a71 100755 --- a/lib/pure/marshal.nim +++ b/lib/pure/marshal.nim @@ -8,7 +8,26 @@ # ## This module contains procs for serialization and deseralization of -## arbitrary Nimrod data structures. The serialization format uses JSON. +## arbitrary Nimrod data structures. The serialization format uses JSON. +## +## **Restriction**: For objects their type is **not** serialized. This means +## essentially that it does not work if the object has some other runtime +## type than its compiletime type: +## +## .. code-block:: nimrod +## +## type +## TA = object +## TB = object of TA +## f: int +## +## var +## a: ref TA +## b: ref TB +## +## new(b) +## a = b +## echo($$a[]) # produces "{}", not "{f: 0}" import streams, typeinfo, json, intsets, tables @@ -286,3 +305,15 @@ when isMainModule: echo($$test7) testit(test7) + type + TA = object + TB = object of TA + f: int + + var + a: ref TA + b: ref TB + new(b) + a = b + echo($$a[]) # produces "{}", not "{f: 0}" + diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index 60bef813d..2b7047143 100755 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -77,11 +77,14 @@ proc startProcess*(command: string, ## If ``env == nil`` the environment is inherited of ## the parent process. `options` are additional flags that may be passed ## to `startProcess`. See the documentation of ``TProcessOption`` for the - ## meaning of these flags. + ## meaning of these flags. You need to `close` the process when done. ## ## Return value: The newly created process object. Nil is never returned, ## but ``EOS`` is raised in case of an error. +proc close*(p: PProcess) {.rtl, extern: "nosp$1".} + ## When the process has finished executing, cleanup related handles + proc suspend*(p: PProcess) {.rtl, extern: "nosp$1".} ## Suspends the process `p`. @@ -179,6 +182,7 @@ proc execProcesses*(cmds: openArray[string], err.add("\n") echo(err) result = max(waitForExit(q[r]), result) + if q[r] != nil: close(q[r]) q[r] = startProcessAux(cmds[i], options=options) r = (r + 1) mod n else: @@ -189,15 +193,18 @@ proc execProcesses*(cmds: openArray[string], if not running(q[r]): #echo(outputStream(q[r]).readLine()) result = max(waitForExit(q[r]), result) + if q[r] != nil: close(q[r]) q[r] = startProcessAux(cmds[i], options=options) inc(i) if i > high(cmds): break for i in 0..m-1: + if q[i] != nil: close(q[i]) result = max(waitForExit(q[i]), result) else: for i in 0..high(cmds): var p = startProcessAux(cmds[i], options=options) result = max(waitForExit(p), result) + close(p) proc select*(readfds: var seq[PProcess], timeout = 500): int ## `select` with a sensible Nimrod interface. `timeout` is in miliseconds. @@ -215,6 +222,8 @@ when not defined(useNimRtl): while running(p) or not outp.atEnd(outp): result.add(outp.readLine()) result.add("\n") + outp.close(outp) + close(p) when false: proc deallocCStringArray(a: cstringArray) = @@ -356,6 +365,12 @@ when defined(Windows) and not defined(useNimRtl): result.FProcessHandle = procInfo.hProcess result.id = procInfo.dwProcessID + proc close(p: PProcess) = + discard CloseHandle(p.inputHandle) + discard CloseHandle(p.outputHandle) + discard CloseHandle(p.errorHandle) + discard CloseHandle(p.FProcessHandle) + proc suspend(p: PProcess) = discard SuspendThread(p.FProcessHandle) @@ -523,6 +538,11 @@ elif not defined(useNimRtl): discard close(p_stdin[readIdx]) discard close(p_stdout[writeIdx]) + proc close(p: PProcess) = + discard close(p.inputHandle) + discard close(p.outputHandle) + discard close(p.errorHandle) + proc suspend(p: PProcess) = discard kill(p.id, SIGSTOP) diff --git a/lib/system.nim b/lib/system.nim index 5ece9375e..5c7102664 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -785,6 +785,10 @@ when hasThreadSupport and not hasSharedHeap: else: {.pragma: rtlThreadVar.} +template sysAssert(cond: expr) = + # change this to activate system asserts + nil + include "system/inclrtl" when not defined(ecmascript) and not defined(nimrodVm): @@ -1251,7 +1255,7 @@ proc each*[T](data: var openArray[T], op: proc (x: var T)) = for i in 0..data.len-1: op(data[i]) iterator fields*[T: tuple](x: T): expr {.magic: "Fields", noSideEffect.} - ## iterates over every field of `x`. Warning: This is really transforms + ## iterates over every field of `x`. Warning: This really transforms ## the 'for' and unrolls the loop. The current implementation also has a bug ## that affects symbol binding in the loop body. iterator fields*[S: tuple, T: tuple](x: S, y: T): tuple[a, b: expr] {. @@ -1261,13 +1265,13 @@ iterator fields*[S: tuple, T: tuple](x: S, y: T): tuple[a, b: expr] {. ## The current implementation also has a bug that affects symbol binding ## in the loop body. iterator fieldPairs*[T: tuple](x: T): expr {.magic: "FieldPairs", noSideEffect.} - ## iterates over every field of `x`. Warning: This is really transforms + ## iterates over every field of `x`. Warning: This really transforms ## the 'for' and unrolls the loop. The current implementation also has a bug ## that affects symbol binding in the loop body. iterator fieldPairs*[S: tuple, T: tuple](x: S, y: T): tuple[a, b: expr] {. magic: "FieldPairs", noSideEffect.} ## iterates over every field of `x` and `y`. - ## Warning: This is really transforms the 'for' and unrolls the loop. + ## Warning: This really transforms the 'for' and unrolls the loop. ## The current implementation also has a bug that affects symbol binding ## in the loop body. @@ -1703,10 +1707,10 @@ when not defined(EcmaScript) and not defined(NimrodVM): # ---------------------------------------------------------------------------- - proc atomicInc*(memLoc: var int, x: int): int {.inline.} + proc atomicInc*(memLoc: var int, x: int = 1): int {.inline.} ## atomic increment of `memLoc`. Returns the value after the operation. - proc atomicDec*(memLoc: var int, x: int): int {.inline.} + proc atomicDec*(memLoc: var int, x: int = 1): int {.inline.} ## atomic decrement of `memLoc`. Returns the value after the operation. include "system/atomics" @@ -1719,6 +1723,7 @@ when not defined(EcmaScript) and not defined(NimrodVM): context: C_JmpBuf when hasThreadSupport: + include "system/syslocks" include "system/threads" else: initStackBottom() @@ -1739,14 +1744,14 @@ when not defined(EcmaScript) and not defined(NimrodVM): proc reprAny(p: pointer, typ: PNimType): string {.compilerRtl.} proc getDiscriminant(aa: Pointer, n: ptr TNimNode): int = - assert(n.kind == nkCase) + sysAssert(n.kind == nkCase) var d: int var a = cast[TAddress](aa) case n.typ.size of 1: d = ze(cast[ptr int8](a +% n.offset)[]) of 2: d = ze(cast[ptr int16](a +% n.offset)[]) of 4: d = int(cast[ptr int32](a +% n.offset)[]) - else: assert(false) + else: sysAssert(false) return d proc selectBranch(aa: Pointer, n: ptr TNimNode): ptr TNimNode = @@ -1764,6 +1769,8 @@ when not defined(EcmaScript) and not defined(NimrodVM): {.pop.} include "system/sysio" + when hasThreadSupport: + include "system/inboxes" iterator lines*(filename: string): string = ## Iterate over any line in the file named `filename`. diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 3273242d6..8a54e0ddd 100755 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -128,12 +128,12 @@ template bigChunkOverhead(): expr = sizeof(TBigChunk)-sizeof(TAlignType) proc roundup(x, v: int): int {.inline.} = result = (x + (v-1)) and not (v-1) - assert(result >= x) + sysAssert(result >= x) #return ((-x) and (v-1)) +% x -assert(roundup(14, PageSize) == PageSize) -assert(roundup(15, 8) == 16) -assert(roundup(65, 8) == 72) +sysAssert(roundup(14, PageSize) == PageSize) +sysAssert(roundup(15, 8) == 16) +sysAssert(roundup(65, 8) == 72) # ------------- chunk table --------------------------------------------------- # We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk @@ -149,35 +149,35 @@ type acc: int # accumulator next: PLLChunk # next low-level chunk; only needed for dealloc - TAllocator {.final, pure.} = object + TMemRegion {.final, pure.} = object llmem: PLLChunk currMem, maxMem, freeMem: int # memory sizes (allocated from OS) lastSize: int # needed for the case that OS gives us pages linearly freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk] freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access chunkStarts: TIntSet - -proc incCurrMem(a: var TAllocator, bytes: int) {.inline.} = + +proc incCurrMem(a: var TMemRegion, bytes: int) {.inline.} = inc(a.currMem, bytes) -proc decCurrMem(a: var TAllocator, bytes: int) {.inline.} = +proc decCurrMem(a: var TMemRegion, bytes: int) {.inline.} = a.maxMem = max(a.maxMem, a.currMem) dec(a.currMem, bytes) -proc getMaxMem(a: var TAllocator): int = +proc getMaxMem(a: var TMemRegion): int = # Since we update maxPagesCount only when freeing pages, # maxPagesCount may not be up to date. Thus we use the # maximum of these both values here: return max(a.currMem, a.maxMem) -proc llAlloc(a: var TAllocator, size: int): pointer = +proc llAlloc(a: var TMemRegion, size: int): pointer = # *low-level* alloc for the memory managers data structures. Deallocation # is done at he end of the allocator's life time. if a.llmem == nil or size > a.llmem.size: # the requested size is ``roundup(size+sizeof(TLLChunk), PageSize)``, but # since we know ``size`` is a (small) constant, we know the requested size # is one page: - assert roundup(size+sizeof(TLLChunk), PageSize) == PageSize + sysAssert roundup(size+sizeof(TLLChunk), PageSize) == PageSize var old = a.llmem # can be nil and is correct with nil a.llmem = cast[PLLChunk](osAllocPages(PageSize)) incCurrMem(a, PageSize) @@ -189,7 +189,7 @@ proc llAlloc(a: var TAllocator, size: int): pointer = inc(a.llmem.acc, size) zeroMem(result, size) -proc llDeallocAll(a: var TAllocator) = +proc llDeallocAll(a: var TMemRegion) = var it = a.llmem while it != nil: # we know each block in the list has the size of 1 page: @@ -204,7 +204,7 @@ proc IntSetGet(t: TIntSet, key: int): PTrunk = it = it.next result = nil -proc IntSetPut(a: var TAllocator, t: var TIntSet, key: int): PTrunk = +proc IntSetPut(a: var TMemRegion, t: var TIntSet, key: int): PTrunk = result = IntSetGet(t, key) if result == nil: result = cast[PTrunk](llAlloc(a, sizeof(result[]))) @@ -220,7 +220,7 @@ proc Contains(s: TIntSet, key: int): bool = else: result = false -proc Incl(a: var TAllocator, s: var TIntSet, key: int) = +proc Incl(a: var TMemRegion, s: var TIntSet, key: int) = var t = IntSetPut(a, s, key shr TrunkShift) var u = key and TrunkMask t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask)) @@ -259,13 +259,13 @@ proc pageIndex(p: pointer): int {.inline.} = proc pageAddr(p: pointer): PChunk {.inline.} = result = cast[PChunk](cast[TAddress](p) and not PageMask) - #assert(Contains(allocator.chunkStarts, pageIndex(result))) + #sysAssert(Contains(allocator.chunkStarts, pageIndex(result))) -proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = +proc requestOsChunks(a: var TMemRegion, size: int): PBigChunk = incCurrMem(a, size) inc(a.freeMem, size) result = cast[PBigChunk](osAllocPages(size)) - assert((cast[TAddress](result) and PageMask) == 0) + sysAssert((cast[TAddress](result) and PageMask) == 0) #zeroMem(result, size) result.next = nil result.prev = nil @@ -273,7 +273,7 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = result.size = size # update next.prevSize: var nxt = cast[TAddress](result) +% size - assert((nxt and PageMask) == 0) + sysAssert((nxt and PageMask) == 0) var next = cast[PChunk](nxt) if pageIndex(next) in a.chunkStarts: #echo("Next already allocated!") @@ -281,7 +281,7 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = # set result.prevSize: var lastSize = if a.lastSize != 0: a.lastSize else: PageSize var prv = cast[TAddress](result) -% lastSize - assert((nxt and PageMask) == 0) + sysAssert((nxt and PageMask) == 0) var prev = cast[PChunk](prv) if pageIndex(prev) in a.chunkStarts and prev.size == lastSize: #echo("Prev already allocated!") @@ -290,11 +290,11 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = result.prevSize = 0 # unknown a.lastSize = size # for next request -proc freeOsChunks(a: var TAllocator, p: pointer, size: int) = +proc freeOsChunks(a: var TMemRegion, p: pointer, size: int) = # update next.prevSize: var c = cast[PChunk](p) var nxt = cast[TAddress](p) +% c.size - assert((nxt and PageMask) == 0) + sysAssert((nxt and PageMask) == 0) var next = cast[PChunk](nxt) if pageIndex(next) in a.chunkStarts: next.prevSize = 0 # XXX used @@ -304,7 +304,7 @@ proc freeOsChunks(a: var TAllocator, p: pointer, size: int) = dec(a.freeMem, size) #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size) -proc isAccessible(a: TAllocator, p: pointer): bool {.inline.} = +proc isAccessible(a: TMemRegion, p: pointer): bool {.inline.} = result = Contains(a.chunkStarts, pageIndex(p)) proc contains[T](list, x: T): bool = @@ -313,7 +313,7 @@ proc contains[T](list, x: T): bool = if it == x: return true it = it.next -proc writeFreeList(a: TAllocator) = +proc writeFreeList(a: TMemRegion) = var it = a.freeChunksList c_fprintf(c_stdout, "freeChunksList: %p\n", it) while it != nil: @@ -322,23 +322,23 @@ proc writeFreeList(a: TAllocator) = it = it.next proc ListAdd[T](head: var T, c: T) {.inline.} = - assert(c notin head) - assert c.prev == nil - assert c.next == nil + sysAssert(c notin head) + sysAssert c.prev == nil + sysAssert c.next == nil c.next = head if head != nil: - assert head.prev == nil + sysAssert head.prev == nil head.prev = c head = c proc ListRemove[T](head: var T, c: T) {.inline.} = - assert(c in head) + sysAssert(c in head) if c == head: head = c.next - assert c.prev == nil + sysAssert c.prev == nil if head != nil: head.prev = nil else: - assert c.prev != nil + sysAssert c.prev != nil c.prev.next = c.next if c.next != nil: c.next.prev = c.prev c.next = nil @@ -350,22 +350,22 @@ proc isSmallChunk(c: PChunk): bool {.inline.} = proc chunkUnused(c: PChunk): bool {.inline.} = result = not c.used -proc updatePrevSize(a: var TAllocator, c: PBigChunk, +proc updatePrevSize(a: var TMemRegion, c: PBigChunk, prevSize: int) {.inline.} = var ri = cast[PChunk](cast[TAddress](c) +% c.size) - assert((cast[TAddress](ri) and PageMask) == 0) + sysAssert((cast[TAddress](ri) and PageMask) == 0) if isAccessible(a, ri): ri.prevSize = prevSize -proc freeBigChunk(a: var TAllocator, c: PBigChunk) = +proc freeBigChunk(a: var TMemRegion, c: PBigChunk) = var c = c - assert(c.size >= PageSize) + sysAssert(c.size >= PageSize) inc(a.freeMem, c.size) when coalescRight: var ri = cast[PChunk](cast[TAddress](c) +% c.size) - assert((cast[TAddress](ri) and PageMask) == 0) + sysAssert((cast[TAddress](ri) and PageMask) == 0) if isAccessible(a, ri) and chunkUnused(ri): - assert(not isSmallChunk(ri)) + sysAssert(not isSmallChunk(ri)) if not isSmallChunk(ri): ListRemove(a.freeChunksList, cast[PBigChunk](ri)) inc(c.size, ri.size) @@ -373,9 +373,9 @@ proc freeBigChunk(a: var TAllocator, c: PBigChunk) = when coalescLeft: if c.prevSize != 0: var le = cast[PChunk](cast[TAddress](c) -% c.prevSize) - assert((cast[TAddress](le) and PageMask) == 0) + sysAssert((cast[TAddress](le) and PageMask) == 0) if isAccessible(a, le) and chunkUnused(le): - assert(not isSmallChunk(le)) + sysAssert(not isSmallChunk(le)) if not isSmallChunk(le): ListRemove(a.freeChunksList, cast[PBigChunk](le)) inc(le.size, c.size) @@ -390,9 +390,9 @@ proc freeBigChunk(a: var TAllocator, c: PBigChunk) = else: freeOsChunks(a, c, c.size) -proc splitChunk(a: var TAllocator, c: PBigChunk, size: int) = +proc splitChunk(a: var TMemRegion, c: PBigChunk, size: int) = var rest = cast[PBigChunk](cast[TAddress](c) +% size) - assert(rest notin a.freeChunksList) + sysAssert(rest notin a.freeChunksList) rest.size = c.size - size rest.used = false rest.next = nil @@ -403,14 +403,14 @@ proc splitChunk(a: var TAllocator, c: PBigChunk, size: int) = incl(a, a.chunkStarts, pageIndex(rest)) ListAdd(a.freeChunksList, rest) -proc getBigChunk(a: var TAllocator, size: int): PBigChunk = +proc getBigChunk(a: var TMemRegion, size: int): PBigChunk = # use first fit for now: - assert((size and PageMask) == 0) - assert(size > 0) + sysAssert((size and PageMask) == 0) + sysAssert(size > 0) result = a.freeChunksList block search: while result != nil: - assert chunkUnused(result) + sysAssert chunkUnused(result) if result.size == size: ListRemove(a.freeChunksList, result) break search @@ -419,7 +419,7 @@ proc getBigChunk(a: var TAllocator, size: int): PBigChunk = splitChunk(a, result, size) break search result = result.next - assert result != a.freeChunksList + sysAssert result != a.freeChunksList if size < InitialMemoryRequest: result = requestOsChunks(a, InitialMemoryRequest) splitChunk(a, result, size) @@ -430,10 +430,10 @@ proc getBigChunk(a: var TAllocator, size: int): PBigChunk = incl(a, a.chunkStarts, pageIndex(result)) dec(a.freeMem, size) -proc getSmallChunk(a: var TAllocator): PSmallChunk = +proc getSmallChunk(a: var TMemRegion): PSmallChunk = var res = getBigChunk(a, PageSize) - assert res.prev == nil - assert res.next == nil + sysAssert res.prev == nil + sysAssert res.next == nil result = cast[PSmallChunk](res) # ----------------------------------------------------------------------------- @@ -442,9 +442,13 @@ proc getCellSize(p: pointer): int {.inline.} = var c = pageAddr(p) result = c.size -proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = - assert(roundup(65, 8) == 72) - assert requestedSize >= sizeof(TFreeCell) +proc memSize(a: TMemRegion, p: pointer): int {.inline.} = + var c = pageAddr(p) + result = c.size + +proc rawAlloc(a: var TMemRegion, requestedSize: int): pointer = + sysAssert(roundup(65, 8) == 72) + sysAssert requestedSize >= sizeof(TFreeCell) var size = roundup(requestedSize, MemAlign) #c_fprintf(c_stdout, "alloc; size: %ld; %ld\n", requestedSize, size) if size <= SmallChunkSize-smallChunkOverhead(): @@ -454,7 +458,7 @@ proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = if c == nil: c = getSmallChunk(a) c.freeList = nil - assert c.size == PageSize + sysAssert c.size == PageSize c.size = size c.acc = size c.free = SmallChunkSize - smallChunkOverhead() - size @@ -462,36 +466,40 @@ proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = c.prev = nil ListAdd(a.freeSmallChunks[s], c) result = addr(c.data) - assert((cast[TAddress](result) and (MemAlign-1)) == 0) + sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0) else: - assert c.next != c + sysAssert c.next != c #if c.size != size: # c_fprintf(c_stdout, "csize: %lld; size %lld\n", c.size, size) - assert c.size == size + sysAssert c.size == size if c.freeList == nil: - assert(c.acc + smallChunkOverhead() + size <= SmallChunkSize) + sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize) result = cast[pointer](cast[TAddress](addr(c.data)) +% c.acc) inc(c.acc, size) else: result = c.freeList - assert(c.freeList.zeroField == 0) + sysAssert(c.freeList.zeroField == 0) c.freeList = c.freeList.next dec(c.free, size) - assert((cast[TAddress](result) and (MemAlign-1)) == 0) + sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0) if c.free < size: ListRemove(a.freeSmallChunks[s], c) else: size = roundup(requestedSize+bigChunkOverhead(), PageSize) # allocate a large block var c = getBigChunk(a, size) - assert c.prev == nil - assert c.next == nil - assert c.size == size + sysAssert c.prev == nil + sysAssert c.next == nil + sysAssert c.size == size result = addr(c.data) - assert((cast[TAddress](result) and (MemAlign-1)) == 0) - assert(isAccessible(a, result)) + sysAssert((cast[TAddress](result) and (MemAlign-1)) == 0) + sysAssert(isAccessible(a, result)) + +proc rawAlloc0(a: var TMemRegion, requestedSize: int): pointer = + result = rawAlloc(a, requestedSize) + zeroMem(result, requestedSize) -proc rawDealloc(a: var TAllocator, p: pointer) = +proc rawDealloc(a: var TMemRegion, p: pointer) = var c = pageAddr(p) if isSmallChunk(c): # `p` is within a small chunk: @@ -499,7 +507,7 @@ proc rawDealloc(a: var TAllocator, p: pointer) = var s = c.size var f = cast[ptr TFreeCell](p) #echo("setting to nil: ", $cast[TAddress](addr(f.zeroField))) - assert(f.zeroField != 0) + sysAssert(f.zeroField != 0) f.zeroField = 0 f.next = c.freeList c.freeList = f @@ -509,7 +517,7 @@ proc rawDealloc(a: var TAllocator, p: pointer) = s -% sizeof(TFreeCell)) # check if it is not in the freeSmallChunks[s] list: if c.free < s: - assert c notin a.freeSmallChunks[s div memAlign] + sysAssert c notin a.freeSmallChunks[s div memAlign] # add it to the freeSmallChunks[s] array: ListAdd(a.freeSmallChunks[s div memAlign], c) inc(c.free, s) @@ -525,7 +533,7 @@ proc rawDealloc(a: var TAllocator, p: pointer) = # free big chunk freeBigChunk(a, cast[PBigChunk](c)) -proc isAllocatedPtr(a: TAllocator, p: pointer): bool = +proc isAllocatedPtr(a: TMemRegion, p: pointer): bool = if isAccessible(a, p): var c = pageAddr(p) if not chunkUnused(c): @@ -539,40 +547,40 @@ proc isAllocatedPtr(a: TAllocator, p: pointer): bool = var c = cast[PBigChunk](c) result = p == addr(c.data) and cast[ptr TFreeCell](p).zeroField >% 1 -proc deallocOsPages(a: var TAllocator) = - # we free every 'ordinarily' allocated page by iterating over the page - # bits: - for p in elements(a.chunkStarts): +proc deallocOsPages(a: var TMemRegion) = + # we free every 'ordinarily' allocated page by iterating over the page bits: + for p in elements(a.chunkStarts): var page = cast[PChunk](p shl pageShift) var size = if page.size < PageSize: PageSize else: page.size osDeallocPages(page, size) # And then we free the pages that are in use for the page bits: llDeallocAll(a) -var - allocator {.rtlThreadVar.}: TAllocator +proc getFreeMem(a: TMemRegion): int {.inline.} = result = a.freeMem +proc getTotalMem(a: TMemRegion): int {.inline.} = result = a.currMem +proc getOccupiedMem(a: TMemRegion): int {.inline.} = + result = a.currMem - a.freeMem -proc deallocOsPages = deallocOsPages(allocator) +# ---------------------- thread memory region ------------------------------- -# ---------------------- interface to programs ------------------------------- +template InstantiateForRegion(allocator: expr) = + proc deallocOsPages = deallocOsPages(allocator) -when not defined(useNimRtl): - - proc unlockedAlloc(size: int): pointer {.inline.} = + proc unlockedAlloc(size: int): pointer = result = rawAlloc(allocator, size+sizeof(TFreeCell)) cast[ptr TFreeCell](result).zeroField = 1 # mark it as used - assert(not isAllocatedPtr(allocator, result)) + sysAssert(not isAllocatedPtr(allocator, result)) result = cast[pointer](cast[TAddress](result) +% sizeof(TFreeCell)) - proc unlockedAlloc0(size: int): pointer {.inline.} = + proc unlockedAlloc0(size: int): pointer = result = unlockedAlloc(size) zeroMem(result, size) - proc unlockedDealloc(p: pointer) {.inline.} = + proc unlockedDealloc(p: pointer) = var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell)) - assert(cast[ptr TFreeCell](x).zeroField == 1) + sysAssert(cast[ptr TFreeCell](x).zeroField == 1) rawDealloc(allocator, x) - assert(not isAllocatedPtr(allocator, x)) + sysAssert(not isAllocatedPtr(allocator, x)) proc alloc(size: int): pointer = when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock) @@ -601,37 +609,18 @@ when not defined(useNimRtl): elif p != nil: dealloc(p) - proc countFreeMem(): int = - # only used for assertions - var it = allocator.freeChunksList - while it != nil: - inc(result, it.size) - it = it.next + when false: + proc countFreeMem(): int = + # only used for assertions + var it = allocator.freeChunksList + while it != nil: + inc(result, it.size) + it = it.next proc getFreeMem(): int = result = allocator.freeMem - #assert(result == countFreeMem()) + #sysAssert(result == countFreeMem()) proc getTotalMem(): int = return allocator.currMem proc getOccupiedMem(): int = return getTotalMem() - getFreeMem() -when isMainModule: - const iterations = 4000_000 - incl(allocator.chunkStarts, 11) - assert 11 in allocator.chunkStarts - excl(allocator.chunkStarts, 11) - assert 11 notin allocator.chunkStarts - var p: array [1..iterations, pointer] - for i in 7..7: - var x = i * 8 - for j in 1.. iterations: - p[j] = alloc(allocator, x) - for j in 1..iterations: - assert isAllocatedPtr(allocator, p[j]) - echo($i, " used memory: ", $(allocator.currMem)) - for j in countdown(iterations, 1): - #echo("j: ", $j) - dealloc(allocator, p[j]) - assert(not isAllocatedPtr(allocator, p[j])) - echo($i, " after freeing: ", $(allocator.currMem)) - diff --git a/lib/system/assign.nim b/lib/system/assign.nim index aa5cd3af3..33bfa15f3 100755 --- a/lib/system/assign.nim +++ b/lib/system/assign.nim @@ -24,7 +24,7 @@ proc genericAssignAux(dest, src: Pointer, n: ptr TNimNode, shallow: bool) = n.typ.size) var m = selectBranch(src, n) if m != nil: genericAssignAux(dest, src, m, shallow) - of nkNone: assert(false) + of nkNone: sysAssert(false) #else: # echo "ugh memory corruption! ", n.kind # quit 1 @@ -33,7 +33,7 @@ proc genericAssignAux(dest, src: Pointer, mt: PNimType, shallow: bool) = var d = cast[TAddress](dest) s = cast[TAddress](src) - assert(mt != nil) + sysAssert(mt != nil) case mt.Kind of tyString: var x = cast[ppointer](dest) @@ -50,7 +50,7 @@ proc genericAssignAux(dest, src: Pointer, mt: PNimType, shallow: bool) = # this can happen! nil sequences are allowed unsureAsgnRef(x, s2) return - assert(dest != nil) + sysAssert(dest != nil) unsureAsgnRef(x, newObj(mt, seq.len * mt.base.size + GenericSeqSize)) var dst = cast[taddress](cast[ppointer](dest)[]) for i in 0..seq.len-1: @@ -101,7 +101,7 @@ proc objectInit(dest: Pointer, typ: PNimType) {.compilerProc.} proc objectInitAux(dest: Pointer, n: ptr TNimNode) = var d = cast[TAddress](dest) case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSLot: objectInit(cast[pointer](d +% n.offset), n.typ) of nkList: for i in 0..n.len-1: @@ -134,7 +134,7 @@ proc genericReset(dest: Pointer, mt: PNimType) {.compilerProc.} proc genericResetAux(dest: Pointer, n: ptr TNimNode) = var d = cast[TAddress](dest) case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSlot: genericReset(cast[pointer](d +% n.offset), n.typ) of nkList: for i in 0..n.len-1: genericResetAux(dest, n.sons[i]) @@ -145,7 +145,7 @@ proc genericResetAux(dest: Pointer, n: ptr TNimNode) = proc genericReset(dest: Pointer, mt: PNimType) = var d = cast[TAddress](dest) - assert(mt != nil) + sysAssert(mt != nil) case mt.Kind of tyString, tyRef, tySequence: unsureAsgnRef(cast[ppointer](dest), nil) @@ -173,4 +173,4 @@ proc FieldDiscriminantCheck(oldDiscVal, newDiscVal: int, if newBranch != oldBranch and oldDiscVal != 0: raise newException(EInvalidField, "assignment to discriminant changes object branch") - + diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index 31c25c5af..64f8e03e0 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -22,14 +22,14 @@ else: inc(p, val) result = p -proc atomicInc(memLoc: var int, x: int): int = +proc atomicInc(memLoc: var int, x: int = 1): int = when hasThreadSupport: result = sync_add_and_fetch(memLoc, x) else: inc(memLoc, x) result = memLoc -proc atomicDec(memLoc: var int, x: int): int = +proc atomicDec(memLoc: var int, x: int = 1): int = when hasThreadSupport: when defined(sync_sub_and_fetch): result = sync_sub_and_fetch(memLoc, x) diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim index e262d4b77..7502636fa 100755 --- a/lib/system/cellsets.nim +++ b/lib/system/cellsets.nim @@ -102,9 +102,9 @@ proc CellSetGet(t: TCellSet, key: TAddress): PPageDesc = proc CellSetRawInsert(t: TCellSet, data: PPageDescArray, desc: PPageDesc) = var h = cast[int](desc.key) and t.max while data[h] != nil: - assert(data[h] != desc) + sysAssert(data[h] != desc) h = nextTry(h, t.max) - assert(data[h] == nil) + sysAssert(data[h] == nil) data[h] = desc proc CellSetEnlarge(t: var TCellSet) = @@ -130,7 +130,7 @@ proc CellSetPut(t: var TCellSet, key: TAddress): PPageDesc = inc(t.counter) h = cast[int](key) and t.max while t.data[h] != nil: h = nextTry(h, t.max) - assert(t.data[h] == nil) + sysAssert(t.data[h] == nil) # the new page descriptor goes into result result = cast[PPageDesc](unlockedAlloc0(sizeof(TPageDesc))) result.next = t.head diff --git a/lib/system/ecmasys.nim b/lib/system/ecmasys.nim index e2ecb370a..7f91feb6b 100755 --- a/lib/system/ecmasys.nim +++ b/lib/system/ecmasys.nim @@ -408,7 +408,7 @@ proc NimCopy(x: pointer, ti: PNimType): pointer {.compilerproc.} proc NimCopyAux(dest, src: Pointer, n: ptr TNimNode) {.exportc.} = case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSlot: asm "`dest`[`n`.offset] = NimCopy(`src`[`n`.offset], `n`.typ);" of nkList: diff --git a/lib/system/gc.nim b/lib/system/gc.nim index 29fd2eae5..d1fa98514 100755 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -53,17 +53,20 @@ type TGcHeap {.final, pure.} = object # this contains the zero count and # non-zero count table + stackBottom: pointer + cycleThreshold: int zct: TCellSeq # the zero count table decStack: TCellSeq # cells in the stack that are to decref again cycleRoots: TCellSet tempStack: TCellSeq # temporary stack for recursion elimination recGcLock: int # prevent recursion via finalizers; no thread lock + region: TMemRegion # garbage collected region stat: TGcStat var - stackBottom {.rtlThreadVar.}: pointer gch {.rtlThreadVar.}: TGcHeap - cycleThreshold {.rtlThreadVar.}: int + +InstantiateForRegion(gch.region) proc acquire(gch: var TGcHeap) {.inline.} = when hasThreadSupport and hasSharedHeap: @@ -124,30 +127,30 @@ when traceGC: of csAllocated: if c in states[csAllocated]: writeCell("attempt to alloc an already allocated cell", c) - assert(false) + sysAssert(false) excl(states[csCycFreed], c) excl(states[csZctFreed], c) of csZctFreed: if c in states[csZctFreed]: writeCell("attempt to free zct cell twice", c) - assert(false) + sysAssert(false) if c in states[csCycFreed]: writeCell("attempt to free with zct, but already freed with cyc", c) - assert(false) + sysAssert(false) if c notin states[csAllocated]: writeCell("attempt to free not an allocated cell", c) - assert(false) + sysAssert(false) excl(states[csAllocated], c) of csCycFreed: if c notin states[csAllocated]: writeCell("attempt to free a not allocated cell", c) - assert(false) + sysAssert(false) if c in states[csCycFreed]: writeCell("attempt to free cyc cell twice", c) - assert(false) + sysAssert(false) if c in states[csZctFreed]: writeCell("attempt to free with cyc, but already freed with zct", c) - assert(false) + sysAssert(false) excl(states[csAllocated], c) incl(states[state], c) @@ -216,7 +219,7 @@ proc decRef(c: PCell) {.inline.} = when stressGC: if c.refcount <% rcIncrement: writeCell("broken cell", c) - assert(c.refcount >=% rcIncrement) + sysAssert(c.refcount >=% rcIncrement) #if c.refcount <% rcIncrement: quit("leck mich") if --c.refcount: rtlAddZCT(c) @@ -233,7 +236,7 @@ proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p)) proc asgnRef(dest: ppointer, src: pointer) {.compilerProc, inline.} = # the code generator calls this proc! - assert(not isOnStack(dest)) + sysAssert(not isOnStack(dest)) # BUGFIX: first incRef then decRef! if src != nil: incRef(usrToCell(src)) if dest[] != nil: decRef(usrToCell(dest[])) @@ -267,7 +270,7 @@ proc initGC() = when not defined(useNimRtl): when traceGC: for i in low(TCellState)..high(TCellState): Init(states[i]) - cycleThreshold = InitialCycleThreshold + gch.cycleThreshold = InitialCycleThreshold gch.stat.stackScans = 0 gch.stat.cycleCollections = 0 gch.stat.maxThreshold = 0 @@ -289,7 +292,7 @@ proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: TWalkOp) = of nkCase: var m = selectBranch(dest, n) if m != nil: forAllSlotsAux(dest, m, op) - of nkNone: assert(false) + of nkNone: sysAssert(false) proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = var d = cast[TAddress](dest) @@ -306,9 +309,9 @@ proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) = else: nil proc forAllChildren(cell: PCell, op: TWalkOp) = - assert(cell != nil) - assert(cell.typ != nil) - assert cell.typ.kind in {tyRef, tySequence, tyString} + sysAssert(cell != nil) + sysAssert(cell.typ != nil) + sysAssert cell.typ.kind in {tyRef, tySequence, tyString} case cell.typ.Kind of tyRef: # common case forAllChildrenAux(cellToUsr(cell), cell.typ.base, op) @@ -321,12 +324,7 @@ proc forAllChildren(cell: PCell, op: TWalkOp) = GenericSeqSize), cell.typ.base, op) else: nil -proc checkCollection {.inline.} = - # checks if a collection should be done - if gch.recGcLock == 0: - collectCT(gch) - -proc addNewObjToZCT(res: PCell) {.inline.} = +proc addNewObjToZCT(res: PCell, gch: var TGcHeap) {.inline.} = # we check the last 8 entries (cache line) for a slot that could be reused. # In 63% of all cases we succeed here! But we have to optimize the heck # out of this small linear search so that ``newObj`` is not slowed down. @@ -370,14 +368,14 @@ proc addNewObjToZCT(res: PCell) {.inline.} = return add(gch.zct, res) -proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = +proc newObj(typ: PNimType, size: int, gch: var TGcHeap): pointer = # generates a new object and sets its reference counter to 0 acquire(gch) - assert(typ.kind in {tyRef, tyString, tySequence}) - checkCollection() - var res = cast[PCell](rawAlloc(allocator, size + sizeof(TCell))) + sysAssert(typ.kind in {tyRef, tyString, tySequence}) + collectCT(gch) + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(TCell))) zeroMem(res, size+sizeof(TCell)) - assert((cast[TAddress](res) and (MemAlign-1)) == 0) + sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0) # now it is buffered in the ZCT res.typ = typ when debugGC and not hasThreadSupport: @@ -385,13 +383,16 @@ proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = res.filename = framePtr.prev.filename res.line = framePtr.prev.line res.refcount = rcZct # refcount is zero, but mark it to be in the ZCT - assert(isAllocatedPtr(allocator, res)) + sysAssert(isAllocatedPtr(gch.region, res)) # its refcount is zero, so add it to the ZCT: - addNewObjToZCT(res) + addNewObjToZCT(res, gch) when logGC: writeCell("new cell", res) gcTrace(res, csAllocated) release(gch) - result = cellToUsr(res) + result = cellToUsr(res) + +proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = + result = newObj(typ, size, gch) proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = # `newObj` already uses locks, so no need for them here. @@ -399,23 +400,22 @@ proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = cast[PGenericSeq](result).len = len cast[PGenericSeq](result).space = len -proc growObj(old: pointer, newsize: int): pointer {.rtl.} = +proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = acquire(gch) - checkCollection() + collectCT(gch) var ol = usrToCell(old) - assert(ol.typ != nil) - assert(ol.typ.kind in {tyString, tySequence}) - var res = cast[PCell](rawAlloc(allocator, newsize + sizeof(TCell))) + sysAssert(ol.typ != nil) + sysAssert(ol.typ.kind in {tyString, tySequence}) + var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(TCell))) var elemSize = 1 - if ol.typ.kind != tyString: - elemSize = ol.typ.base.size + if ol.typ.kind != tyString: elemSize = ol.typ.base.size var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize copyMem(res, ol, oldsize + sizeof(TCell)) zeroMem(cast[pointer](cast[TAddress](res)+% oldsize +% sizeof(TCell)), newsize-oldsize) - assert((cast[TAddress](res) and (MemAlign-1)) == 0) - assert(res.refcount shr rcShift <=% 1) + sysAssert((cast[TAddress](res) and (MemAlign-1)) == 0) + sysAssert(res.refcount shr rcShift <=% 1) #if res.refcount <% rcIncrement: # add(gch.zct, res) #else: # XXX: what to do here? @@ -434,29 +434,32 @@ proc growObj(old: pointer, newsize: int): pointer {.rtl.} = writeCell("growObj new cell", res) gcTrace(ol, csZctFreed) gcTrace(res, csAllocated) - when reallyDealloc: rawDealloc(allocator, ol) + when reallyDealloc: rawDealloc(gch.region, ol) else: - assert(ol.typ != nil) + sysAssert(ol.typ != nil) zeroMem(ol, sizeof(TCell)) release(gch) result = cellToUsr(res) +proc growObj(old: pointer, newsize: int): pointer {.rtl.} = + result = growObj(old, newsize, gch) + # ---------------- cycle collector ------------------------------------------- proc doOperation(p: pointer, op: TWalkOp) = if p == nil: return var c: PCell = usrToCell(p) - assert(c != nil) + sysAssert(c != nil) case op # faster than function pointers because of easy prediction of waZctDecRef: - assert(c.refcount >=% rcIncrement) + sysAssert(c.refcount >=% rcIncrement) c.refcount = c.refcount -% rcIncrement when logGC: writeCell("decref (from doOperation)", c) if c.refcount <% rcIncrement: addZCT(gch.zct, c) of waPush: add(gch.tempStack, c) of waCycleDecRef: - assert(c.refcount >=% rcIncrement) + sysAssert(c.refcount >=% rcIncrement) c.refcount = c.refcount -% rcIncrement # we now use a much simpler and non-recursive algorithm for cycle removal @@ -496,20 +499,20 @@ proc collectCycles(gch: var TGcHeap) = prepareDealloc(c) gcTrace(c, csCycFreed) when logGC: writeCell("cycle collector dealloc cell", c) - when reallyDealloc: rawDealloc(allocator, c) + when reallyDealloc: rawDealloc(gch.region, c) else: - assert(c.typ != nil) + sysAssert(c.typ != nil) zeroMem(c, sizeof(TCell)) Deinit(gch.cycleRoots) Init(gch.cycleRoots) -proc gcMark(p: pointer) {.inline.} = +proc gcMark(gch: var TGcHeap, p: pointer) {.inline.} = # the addresses are not as cells on the stack, so turn them to cells: var cell = usrToCell(p) var c = cast[TAddress](cell) if c >% PageSize and (c and (MemAlign-1)) == 0: # fast check: does it look like a cell? - if isAllocatedPtr(allocator, cell): + if isAllocatedPtr(gch.region, cell): # mark the cell: cell.refcount = cell.refcount +% rcIncrement add(gch.decStack, cell) @@ -520,13 +523,13 @@ proc markThreadStacks(gch: var TGcHeap) = var it = threadList while it != nil: # mark registers: - for i in 0 .. high(it.registers): gcMark(it.registers[i]) + for i in 0 .. high(it.registers): gcMark(gch, it.registers[i]) var sp = cast[TAddress](it.stackBottom) var max = cast[TAddress](it.stackTop) # XXX stack direction? # XXX unroll this loop: while sp <=% max: - gcMark(cast[ppointer](sp)[]) + gcMark(gch, cast[ppointer](sp)[]) sp = sp +% sizeof(pointer) it = it.next @@ -545,24 +548,24 @@ when not defined(useNimRtl): proc setStackBottom(theStackBottom: pointer) = #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom) # the first init must be the one that defines the stack bottom: - if stackBottom == nil: stackBottom = theStackBottom + if gch.stackBottom == nil: gch.stackBottom = theStackBottom else: var a = cast[TAddress](theStackBottom) # and not PageMask - PageSize*2 - var b = cast[TAddress](stackBottom) + var b = cast[TAddress](gch.stackBottom) when stackIncreases: - stackBottom = cast[pointer](min(a, b)) + gch.stackBottom = cast[pointer](min(a, b)) else: - stackBottom = cast[pointer](max(a, b)) + gch.stackBottom = cast[pointer](max(a, b)) proc stackSize(): int {.noinline.} = var stackTop {.volatile.}: pointer - result = abs(cast[int](addr(stackTop)) - cast[int](stackBottom)) + result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom)) when defined(sparc): # For SPARC architecture. proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer stackTop = addr(stackTop) - var b = cast[TAddress](stackBottom) + var b = cast[TAddress](gch.stackBottom) var a = cast[TAddress](stackTop) var x = cast[TAddress](p) result = a <=% x and x <=% b @@ -574,13 +577,13 @@ when defined(sparc): # For SPARC architecture. asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ var - max = stackBottom + max = gch.stackBottom sp: PPointer stackTop: array[0..1, pointer] sp = addr(stackTop[0]) # Addresses decrease as the stack grows. while sp <= max: - gcMark(sp[]) + gcMark(gch, sp[]) sp = cast[ppointer](cast[TAddress](sp) +% sizeof(pointer)) elif defined(ELATE): @@ -593,7 +596,7 @@ elif stackIncreases: proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer stackTop = addr(stackTop) - var a = cast[TAddress](stackBottom) + var a = cast[TAddress](gch.stackBottom) var b = cast[TAddress](stackTop) var x = cast[TAddress](p) result = a <=% x and x <=% b @@ -606,12 +609,12 @@ elif stackIncreases: proc markStackAndRegisters(gch: var TGcHeap) {.noinline, cdecl.} = var registers: C_JmpBuf if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[TAddress](stackBottom) + var max = cast[TAddress](gch.stackBottom) var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer) # sp will traverse the JMP_BUF as well (jmp_buf size is added, # otherwise sp would be below the registers structure). while sp >=% max: - gcMark(cast[ppointer](sp)[]) + gcMark(gch, cast[ppointer](sp)[]) sp = sp -% sizeof(pointer) else: @@ -621,7 +624,7 @@ else: proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer stackTop = addr(stackTop) - var b = cast[TAddress](stackBottom) + var b = cast[TAddress](gch.stackBottom) var a = cast[TAddress](stackTop) var x = cast[TAddress](p) result = a <=% x and x <=% b @@ -633,22 +636,22 @@ else: type PStackSlice = ptr array [0..7, pointer] var registers: C_JmpBuf if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[TAddress](stackBottom) + var max = cast[TAddress](gch.stackBottom) var sp = cast[TAddress](addr(registers)) # loop unrolled: while sp <% max - 8*sizeof(pointer): - gcMark(cast[PStackSlice](sp)[0]) - gcMark(cast[PStackSlice](sp)[1]) - gcMark(cast[PStackSlice](sp)[2]) - gcMark(cast[PStackSlice](sp)[3]) - gcMark(cast[PStackSlice](sp)[4]) - gcMark(cast[PStackSlice](sp)[5]) - gcMark(cast[PStackSlice](sp)[6]) - gcMark(cast[PStackSlice](sp)[7]) + gcMark(gch, cast[PStackSlice](sp)[0]) + gcMark(gch, cast[PStackSlice](sp)[1]) + gcMark(gch, cast[PStackSlice](sp)[2]) + gcMark(gch, cast[PStackSlice](sp)[3]) + gcMark(gch, cast[PStackSlice](sp)[4]) + gcMark(gch, cast[PStackSlice](sp)[5]) + gcMark(gch, cast[PStackSlice](sp)[6]) + gcMark(gch, cast[PStackSlice](sp)[7]) sp = sp +% sizeof(pointer)*8 # last few entries: while sp <=% max: - gcMark(cast[ppointer](sp)[]) + gcMark(gch, cast[ppointer](sp)[]) sp = sp +% sizeof(pointer) # ---------------------------------------------------------------------------- @@ -664,7 +667,7 @@ proc CollectZCT(gch: var TGcHeap) = while L[] > 0: var c = gch.zct.d[0] # remove from ZCT: - assert((c.refcount and colorMask) == rcZct) + sysAssert((c.refcount and colorMask) == rcZct) c.refcount = c.refcount and not colorMask gch.zct.d[0] = gch.zct.d[L[] - 1] dec(L[]) @@ -683,41 +686,42 @@ proc CollectZCT(gch: var TGcHeap) = # access invalid memory. This is done by prepareDealloc(): prepareDealloc(c) forAllChildren(c, waZctDecRef) - when reallyDealloc: rawDealloc(allocator, c) + when reallyDealloc: rawDealloc(gch.region, c) else: - assert(c.typ != nil) + sysAssert(c.typ != nil) zeroMem(c, sizeof(TCell)) proc unmarkStackAndRegisters(gch: var TGcHeap) = var d = gch.decStack.d for i in 0..gch.decStack.len-1: - assert isAllocatedPtr(allocator, d[i]) + sysAssert isAllocatedPtr(allocator, d[i]) # decRef(d[i]) inlined: cannot create a cycle and must not acquire lock var c = d[i] # XXX no need for an atomic dec here: if --c.refcount: addZCT(gch.zct, c) - assert c.typ != nil + sysAssert c.typ != nil gch.decStack.len = 0 proc collectCT(gch: var TGcHeap) = - if gch.zct.len >= ZctThreshold or (cycleGC and - getOccupiedMem() >= cycleThreshold) or stressGC: + if (gch.zct.len >= ZctThreshold or (cycleGC and + getOccupiedMem(gch.region) >= gch.cycleThreshold) or stressGC) and + gch.recGcLock == 0: gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) - assert(gch.decStack.len == 0) + sysAssert(gch.decStack.len == 0) markStackAndRegisters(gch) markThreadStacks(gch) gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len) inc(gch.stat.stackScans) collectZCT(gch) when cycleGC: - if getOccupiedMem() >= cycleThreshold or stressGC: + if getOccupiedMem() >= gch.cycleThreshold or stressGC: collectCycles(gch) collectZCT(gch) inc(gch.stat.cycleCollections) - cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * - cycleIncrease) - gch.stat.maxThreshold = max(gch.stat.maxThreshold, cycleThreshold) + gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * + cycleIncrease) + gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) unmarkStackAndRegisters(gch) when not defined(useNimRtl): @@ -741,18 +745,18 @@ when not defined(useNimRtl): of gcOptimizeTime: nil proc GC_enableMarkAndSweep() = - cycleThreshold = InitialCycleThreshold + gch.cycleThreshold = InitialCycleThreshold proc GC_disableMarkAndSweep() = - cycleThreshold = high(cycleThreshold)-1 + gch.cycleThreshold = high(gch.cycleThreshold)-1 # set to the max value to suppress the cycle detector proc GC_fullCollect() = acquire(gch) - var oldThreshold = cycleThreshold - cycleThreshold = 0 # forces cycle collection + var oldThreshold = gch.cycleThreshold + gch.cycleThreshold = 0 # forces cycle collection collectCT(gch) - cycleThreshold = oldThreshold + gch.cycleThreshold = oldThreshold release(gch) proc GC_getStatistics(): string = diff --git a/lib/system/inboxes.nim b/lib/system/inboxes.nim new file mode 100644 index 000000000..8f683f612 --- /dev/null +++ b/lib/system/inboxes.nim @@ -0,0 +1,203 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Message passing for threads. The current implementation is slow and does +## not work with cyclic data structures. But hey, it's better than nothing. + +type + pbytes = ptr array[0.. 0xffff, byte] + TInbox {.pure, final.} = object ## msg queue for a thread + rd, wr, count, mask: int + data: pbytes + lock: TSysLock + cond: TSysCond + elemType: PNimType + region: TMemRegion + PInbox = ptr TInbox + TLoadStoreMode = enum mStore, mLoad + +proc initInbox(p: pointer) = + var inbox = cast[PInbox](p) + initSysLock(inbox.lock) + initSysCond(inbox.cond) + inbox.mask = -1 + +proc freeInbox(p: pointer) = + var inbox = cast[PInbox](p) + deallocOsPages(inbox.region) + deinitSys(inbox.lock) + deinitSysCond(inbox.cond) + +proc storeAux(dest, src: Pointer, mt: PNimType, t: PInbox, mode: TLoadStoreMode) +proc storeAux(dest, src: Pointer, n: ptr TNimNode, t: PInbox, + mode: TLoadStoreMode) = + var + d = cast[TAddress](dest) + s = cast[TAddress](src) + case n.kind + of nkSlot: storeAux(cast[pointer](d +% n.offset), + cast[pointer](s +% n.offset), n.typ, t, mode) + of nkList: + for i in 0..n.len-1: storeAux(dest, src, n.sons[i], t, mode) + of nkCase: + copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset), + n.typ.size) + var m = selectBranch(src, n) + if m != nil: storeAux(dest, src, m, t, mode) + of nkNone: sysAssert(false) + +proc storeAux(dest, src: Pointer, mt: PNimType, t: PInbox, + mode: TLoadStoreMode) = + var + d = cast[TAddress](dest) + s = cast[TAddress](src) + sysAssert(mt != nil) + case mt.Kind + of tyString: + if mode == mStore: + var x = cast[ppointer](dest) + var s2 = cast[ppointer](s)[] + if s2 == nil: + x[] = nil + else: + var ss = cast[NimString](s2) + var ns = cast[NimString](rawAlloc(t.region, ss.len+1 + GenericSeqSize)) + copyMem(ns, ss, ss.len+1 + GenericSeqSize) + x[] = ns + else: + var x = cast[ppointer](dest) + var s2 = cast[ppointer](s)[] + if s2 == nil: + unsureAsgnRef(x, s2) + else: + unsureAsgnRef(x, copyString(cast[NimString](s2))) + rawDealloc(t.region, s2) + of tySequence: + var s2 = cast[ppointer](src)[] + var seq = cast[PGenericSeq](s2) + var x = cast[ppointer](dest) + if s2 == nil: + if mode == mStore: + x[] = nil + else: + unsureAsgnRef(x, nil) + else: + sysAssert(dest != nil) + if mode == mStore: + x[] = rawAlloc(t.region, seq.len *% mt.base.size +% GenericSeqSize) + else: + unsureAsgnRef(x, newObj(mt, seq.len * mt.base.size + GenericSeqSize)) + var dst = cast[taddress](cast[ppointer](dest)[]) + for i in 0..seq.len-1: + storeAux( + cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize), + cast[pointer](cast[TAddress](s2) +% i *% mt.base.size +% + GenericSeqSize), + mt.Base, t, mode) + var dstseq = cast[PGenericSeq](dst) + dstseq.len = seq.len + dstseq.space = seq.len + if mode != mStore: rawDealloc(t.region, s2) + of tyObject: + # copy type field: + var pint = cast[ptr PNimType](dest) + # XXX use dynamic type here! + pint[] = mt + storeAux(dest, src, mt.node, t, mode) + of tyTuple, tyPureObject: + storeAux(dest, src, mt.node, t, mode) + of tyArray, tyArrayConstr: + for i in 0..(mt.size div mt.base.size)-1: + storeAux(cast[pointer](d +% i*% mt.base.size), + cast[pointer](s +% i*% mt.base.size), mt.base, t, mode) + of tyRef: + var s = cast[ppointer](src)[] + var x = cast[ppointer](dest) + if s == nil: + if mode == mStore: + x[] = nil + else: + unsureAsgnRef(x, nil) + else: + if mode == mStore: + x[] = rawAlloc(t.region, mt.base.size) + else: + # XXX we should use the dynamic type here too, but that is not stored in + # the inbox at all --> use source[]'s object type? but how? we need a + # tyRef to the object! + var obj = newObj(mt.base, mt.base.size) + unsureAsgnRef(x, obj) + storeAux(x[], s, mt.base, t, mode) + if mode != mStore: rawDealloc(t.region, s) + else: + copyMem(dest, src, mt.size) # copy raw bits + +proc rawSend(q: PInbox, data: pointer, typ: PNimType) = + ## adds an `item` to the end of the queue `q`. + var cap = q.mask+1 + if q.count >= cap: + # start with capicity for 2 entries in the queue: + if cap == 0: cap = 1 + var n = cast[pbytes](rawAlloc0(q.region, cap*2*typ.size)) + var z = 0 + var i = q.rd + var c = q.count + while c > 0: + dec c + copyMem(addr(n[z*typ.size]), addr(q.data[i*typ.size]), typ.size) + i = (i + 1) and q.mask + inc z + if q.data != nil: rawDealloc(q.region, q.data) + q.data = n + q.mask = cap*2 - 1 + q.wr = q.count + q.rd = 0 + #echo "came here" + storeAux(addr(q.data[q.wr * typ.size]), data, typ, q, mStore) + inc q.count + q.wr = (q.wr + 1) and q.mask + +proc rawRecv(q: PInbox, data: pointer, typ: PNimType) = + assert q.count > 0 + dec q.count + storeAux(data, addr(q.data[q.rd * typ.size]), typ, q, mLoad) + q.rd = (q.rd + 1) and q.mask + +template lockInbox(q: expr, action: stmt) = + acquireSys(q.lock) + action + releaseSys(q.lock) + +proc send*[TMsg](receiver: var TThread[TMsg], msg: TMsg) = + ## sends a message to a thread. `msg` is deeply copied. + var q = cast[PInbox](getInBoxMem(receiver)) + acquireSys(q.lock) + var m: TMsg + shallowCopy(m, msg) + rawSend(q, addr(m), cast[PNimType](getTypeInfo(msg))) + releaseSys(q.lock) + SignalSysCond(q.cond) + +proc recv*[TMsg](): TMsg = + ## receives a message from its internal message queue. This blocks until + ## a message has arrived! You may use ``peek`` to avoid the blocking. + var q = cast[PInbox](getInBoxMem()) + acquireSys(q.lock) + while q.count <= 0: + WaitSysCond(q.cond, q.lock) + rawRecv(q, addr(result), cast[PNimType](getTypeInfo(result))) + releaseSys(q.lock) + +proc peek*(): int = + ## returns the current number of messages in the inbox. + var q = cast[PInbox](getInBoxMem()) + lockInbox(q): + result = q.count + + diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim index d450c520e..e5efff615 100755 --- a/lib/system/mmdisp.nim +++ b/lib/system/mmdisp.nim @@ -62,11 +62,10 @@ when defined(boehmgc): const boehmLib = "boehmgc.dll" elif defined(macosx): const boehmLib = "libgc.dylib" - - proc boehmGCinit {.importc: "GC_init", dynlib: boehmLib.} else: const boehmLib = "/usr/lib/libgc.so.1" - + + proc boehmGCinit {.importc: "GC_init", dynlib: boehmLib.} proc boehmGC_disable {.importc: "GC_disable", dynlib: boehmLib.} proc boehmGC_enable {.importc: "GC_enable", dynlib: boehmLib.} proc boehmGCincremental {. @@ -177,12 +176,20 @@ elif defined(nogc): proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerproc, inline.} = dest[] = src + var allocator {.rtlThreadVar.}: TMemRegion + InstantiateForRegion(allocator) + include "system/cellsets" else: include "system/alloc" + + proc unlockedAlloc(size: int): pointer {.inline.} + proc unlockedAlloc0(size: int): pointer {.inline.} + proc unlockedDealloc(p: pointer) {.inline.} + include "system/cellsets" - assert(sizeof(TCell) == sizeof(TFreeCell)) + sysAssert(sizeof(TCell) == sizeof(TFreeCell)) include "system/gc" {.pop.} diff --git a/lib/system/repr.nim b/lib/system/repr.nim index 256313ebd..6b940ccb4 100755 --- a/lib/system/repr.nim +++ b/lib/system/repr.nim @@ -158,7 +158,7 @@ when not defined(useNimRtl): proc reprRecordAux(result: var string, p: pointer, n: ptr TNimNode, cl: var TReprClosure) = case n.kind - of nkNone: assert(false) + of nkNone: sysAssert(false) of nkSlot: add result, $n.name add result, " = " @@ -206,7 +206,7 @@ when not defined(useNimRtl): var t = cast[ptr PNimType](p)[] reprRecord(result, p, t, cl) of tyRef, tyPtr: - assert(p != nil) + sysAssert(p != nil) if cast[ppointer](p)[] == nil: add result, "nil" else: reprRef(result, cast[ppointer](p)[], typ, cl) of tySequence: diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim new file mode 100644 index 000000000..c91e83dcd --- /dev/null +++ b/lib/system/syslocks.nim @@ -0,0 +1,101 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Low level system locks and condition vars. + +when defined(Windows): + type + THandle = int + TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi + DebugInfo: pointer + LockCount: int32 + RecursionCount: int32 + OwningThread: int + LockSemaphore: int + Reserved: int32 + + TSysCond = THandle + + proc InitSysLock(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "InitializeCriticalSection".} + ## Initializes the lock `L`. + + proc TryAcquireSysAux(L: var TSysLock): int32 {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "TryEnterCriticalSection".} + ## Tries to acquire the lock `L`. + + proc TryAcquireSys(L: var TSysLock): bool {.inline.} = + result = TryAcquireSysAux(L) != 0'i32 + + proc AcquireSys(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "EnterCriticalSection".} + ## Acquires the lock `L`. + + proc ReleaseSys(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "LeaveCriticalSection".} + ## Releases the lock `L`. + + proc DeinitSys(L: var TSysLock) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "DeleteCriticalSection".} + + proc CreateEvent(lpEventAttributes: pointer, + bManualReset, bInitialState: int32, + lpName: cstring): TSysCond {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "CreateEvent".} + + proc CloseHandle(hObject: THandle) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "CloseHandle".} + proc WaitForSingleObject(hHandle: THandle, dwMilliseconds: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} + + proc SignalSysCond(hEvent: TSysCond) {.stdcall, noSideEffect, + dynlib: "kernel32", importc: "SetEvent".} + + proc InitSysCond(cond: var TSysCond) {.inline.} = + cond = CreateEvent(nil, 0'i32, 0'i32, nil) + proc DeinitSysCond(cond: var TSysCond) {.inline.} = + CloseHandle(cond) + proc WaitSysCond(cond: var TSysCond, lock: var TSysLock) = + releaseSys(lock) + discard WaitForSingleObject(cond, -1'i32) + acquireSys(lock) + +else: + type + TSysLock {.importc: "pthread_mutex_t", pure, final, + header: "<sys/types.h>".} = object + TSysCond {.importc: "pthread_cond_t", pure, final, + header: "<sys/types.h>".} = object + + proc InitSysLock(L: var TSysLock, attr: pointer = nil) {. + importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.} + + proc AcquireSys(L: var TSysLock) {.noSideEffect, + importc: "pthread_mutex_lock", header: "<pthread.h>".} + proc TryAcquireSysAux(L: var TSysLock): cint {.noSideEffect, + importc: "pthread_mutex_trylock", header: "<pthread.h>".} + + proc TryAcquireSys(L: var TSysLock): bool {.inline.} = + result = TryAcquireSysAux(L) == 0'i32 + + proc ReleaseSys(L: var TSysLock) {.noSideEffect, + importc: "pthread_mutex_unlock", header: "<pthread.h>".} + proc DeinitSys(L: var TSysLock) {. + importc: "pthread_mutex_destroy", header: "<pthread.h>".} + + proc InitSysCond(cond: var TSysCond, cond_attr: pointer = nil) {. + importc: "pthread_cond_init", header: "<pthread.h>".} + proc WaitSysCond(cond: var TSysCond, lock: var TSysLock) {. + importc: "pthread_cond_wait", header: "<pthread.h>".} + proc SignalSysCond(cond: var TSysCond) {. + importc: "pthread_cond_signal", header: "<pthread.h>".} + + proc DeinitSysCond(cond: var TSysCond) {. + importc: "pthread_cond_destroy", header: "<pthread.h>".} + diff --git a/lib/system/threads.nim b/lib/system/threads.nim index 86a6a5691..9bb67863b 100755 --- a/lib/system/threads.nim +++ b/lib/system/threads.nim @@ -25,8 +25,8 @@ ## thr: array [0..4, TThread[tuple[a,b: int]]] ## L: TLock ## -## proc threadFunc(interval: tuple[a,b: int]) {.procvar.} = -## for i in interval.a..interval.b: +## proc threadFunc(interval: tuple[a,b: int]) {.thread.} = +## for i in interval.a..interval.b: ## Acquire(L) # lock stdout ## echo i ## Release(L) @@ -41,38 +41,13 @@ const maxRegisters = 256 # don't think there is an arch with more registers maxLocksPerThread* = 10 ## max number of locks a thread can hold ## at the same time + useStackMaskHack = false ## use the stack mask hack for better performance + StackGuardSize = 4096 + ThreadStackMask = 1024*256*sizeof(int)-1 + ThreadStackSize = ThreadStackMask+1 - StackGuardSize -when defined(Windows): - type - TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi - DebugInfo: pointer - LockCount: int32 - RecursionCount: int32 - OwningThread: int - LockSemaphore: int - Reserved: int32 - - proc InitSysLock(L: var TSysLock) {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "InitializeCriticalSection".} - ## Initializes the lock `L`. - - proc TryAcquireSysAux(L: var TSysLock): int32 {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "TryEnterCriticalSection".} - ## Tries to acquire the lock `L`. - - proc TryAcquireSys(L: var TSysLock): bool {.inline.} = - result = TryAcquireSysAux(L) != 0'i32 - - proc AcquireSys(L: var TSysLock) {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "EnterCriticalSection".} - ## Acquires the lock `L`. - - proc ReleaseSys(L: var TSysLock) {.stdcall, noSideEffect, - dynlib: "kernel32", importc: "LeaveCriticalSection".} - ## Releases the lock `L`. - +when defined(windows): type - THandle = int TSysThread = THandle TWinThreadProc = proc (x: pointer): int32 {.stdcall.} @@ -95,9 +70,6 @@ when defined(Windows): dwMilliseconds: int32): int32 {. stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".} - proc WaitForSingleObject(hHandle: TSysThread, dwMilliseconds: int32): int32 {. - stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} - proc TerminateThread(hThread: TSysThread, dwExitCode: int32): int32 {. stdcall, dynlib: "kernel32", importc: "TerminateThread".} @@ -116,24 +88,6 @@ else: {.passC: "-pthread".} type - TSysLock {.importc: "pthread_mutex_t", pure, final, - header: "<sys/types.h>".} = object - - proc InitSysLock(L: var TSysLock, attr: pointer = nil) {. - importc: "pthread_mutex_init", header: "<pthread.h>", noSideEffect.} - - proc AcquireSys(L: var TSysLock) {.noSideEffect, - importc: "pthread_mutex_lock", header: "<pthread.h>".} - proc TryAcquireSysAux(L: var TSysLock): cint {.noSideEffect, - importc: "pthread_mutex_trylock", header: "<pthread.h>".} - - proc TryAcquireSys(L: var TSysLock): bool {.inline.} = - result = TryAcquireSysAux(L) == 0'i32 - - proc ReleaseSys(L: var TSysLock) {.noSideEffect, - importc: "pthread_mutex_unlock", header: "<pthread.h>".} - - type TSysThread {.importc: "pthread_t", header: "<sys/types.h>", final, pure.} = object Tpthread_attr {.importc: "pthread_attr_t", @@ -191,57 +145,71 @@ else: proc ThreadVarGetValue(s: TThreadVarSlot): pointer {.inline.} = result = pthread_getspecific(s) -const emulatedThreadVars = defined(macosx) + when useStackMaskHack: + proc pthread_attr_setstack(attr: var TPthread_attr, stackaddr: pointer, + size: int): cint {. + importc: "pthread_attr_setstack", header: "<pthread.h>".} + +const + emulatedThreadVars = true when emulatedThreadVars: # the compiler generates this proc for us, so that we can get the size of - # the thread local var block: + # the thread local var block; we use this only for sanity checking though proc NimThreadVarsSize(): int {.noconv, importc: "NimThreadVarsSize".} -proc ThreadVarsAlloc(size: int): pointer = - result = c_malloc(size) - zeroMem(result, size) -proc ThreadVarsDealloc(p: pointer) {.importc: "free", nodecl.} - +# we preallocate a fixed size for thread local storage, so that no heap +# allocations are needed. Currently less than 7K are used on a 64bit machine. +# We use ``float`` for proper alignment: type + TThreadLocalStorage = array [0..1_000, float] + PGcThread = ptr TGcThread TGcThread {.pure.} = object sys: TSysThread next, prev: PGcThread - stackBottom, stackTop, threadLocalStorage: pointer + stackBottom, stackTop: pointer stackSize: int - locksLen: int - locks: array [0..MaxLocksPerThread-1, pointer] - registers: array[0..maxRegisters-1, pointer] # register contents for GC + inbox: TThreadLocalStorage + when emulatedThreadVars and not useStackMaskHack: + tls: TThreadLocalStorage + else: + nil # XXX it'd be more efficient to not use a global variable for the # thread storage slot, but to rely on the implementation to assign slot 0 # for us... ;-) var globalsSlot = ThreadVarAlloc() #const globalsSlot = TThreadVarSlot(0) -#assert checkSlot.int == globalsSlot.int - -proc ThisThread(): PGcThread {.compilerRtl, inl.} = - result = cast[PGcThread](ThreadVarGetValue(globalsSlot)) +#sysAssert checkSlot.int == globalsSlot.int proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} = - result = cast[PGcThread](ThreadVarGetValue(globalsSlot)).threadLocalStorage + result = addr(cast[PGcThread](ThreadVarGetValue(globalsSlot)).tls) + +when useStackMaskHack: + proc MaskStackPointer(offset: int): pointer {.compilerRtl, inl.} = + var x {.volatile.}: pointer + x = addr(x) + result = cast[pointer]((cast[int](x) and not ThreadStackMask) +% + (0) +% offset) # create for the main thread. Note: do not insert this data into the list # of all threads; it's not to be stopped etc. when not defined(useNimRtl): - var mainThread: TGcThread - - ThreadVarSetValue(globalsSlot, addr(mainThread)) - when emulatedThreadVars: - mainThread.threadLocalStorage = ThreadVarsAlloc(NimThreadVarsSize()) - - initStackBottom() - initGC() + when not useStackMaskHack: + var mainThread: TGcThread + ThreadVarSetValue(globalsSlot, addr(mainThread)) + initStackBottom() + initGC() var heapLock: TSysLock InitSysLock(HeapLock) + when emulatedThreadVars: + if NimThreadVarsSize() > sizeof(TThreadLocalStorage): + echo "too large thread local storage size requested" + quit 1 + var threadList: PGcThread @@ -251,11 +219,11 @@ when not defined(useNimRtl): t.prev = nil t.next = threadList if threadList != nil: - assert(threadList.prev == nil) + sysAssert(threadList.prev == nil) threadList.prev = t threadList = t ReleaseSys(HeapLock) - + proc unregisterThread(t: PGcThread) = # we need to use the GC global lock here! AcquireSys(HeapLock) @@ -270,9 +238,7 @@ when not defined(useNimRtl): # on UNIX, the GC uses ``SIGFREEZE`` to tell every thread to stop so that # the GC can examine the stacks? - - proc stopTheWord() = - nil + proc stopTheWord() = nil # We jump through some hops here to ensure that Nimrod thread procs can have # the Nimrod calling convention. This is needed because thread procs are @@ -286,26 +252,33 @@ type fn: proc (p: TParam) data: TParam +proc initInbox(p: pointer) +proc freeInbox(p: pointer) when not defined(boehmgc) and not hasSharedHeap: proc deallocOsPages() template ThreadProcWrapperBody(closure: expr) = ThreadVarSetValue(globalsSlot, closure) var t = cast[ptr TThread[TParam]](closure) - when emulatedThreadVars: - t.threadLocalStorage = ThreadVarsAlloc(NimThreadVarsSize()) + when useStackMaskHack: + var tls: TThreadLocalStorage when not defined(boehmgc) and not hasSharedHeap: # init the GC for this thread: setStackBottom(addr(t)) initGC() t.stackBottom = addr(t) registerThread(t) + initInbox(addr(t.inbox)) try: + when false: + var a = addr(tls) + var b = MaskStackPointer(1293920-372736-303104-36864) + c_fprintf(c_stdout, "TLS: %p\nmasked: %p\ndiff: %ld\n", + a, b, cast[int](a) - cast[int](b)) t.fn(t.data) finally: # XXX shut-down is not executed when the thread is forced down! - when emulatedThreadVars: - ThreadVarsDealloc(t.threadLocalStorage) + freeInbox(addr(t.inbox)) unregisterThread(t) when defined(deallocOsPages): deallocOsPages() @@ -330,7 +303,7 @@ proc joinThreads*[TParam](t: openArray[TThread[TParam]]) = ## waits for every thread in `t` to finish. when hostOS == "windows": var a: array[0..255, TSysThread] - assert a.len >= t.len + sysAssert a.len >= t.len for i in 0..t.high: a[i] = t[i].sys discard WaitForMultipleObjects(t.len, cast[ptr TSysThread](addr(a)), 1, -1) else: @@ -338,7 +311,7 @@ proc joinThreads*[TParam](t: openArray[TThread[TParam]]) = when false: # XXX a thread should really release its heap here somehow: - proc destroyThread*[TParam](t: var TThread[TParam]) {.inline.} = + proc destroyThread*[TParam](t: var TThread[TParam]) = ## forces the thread `t` to terminate. This is potentially dangerous if ## you don't have full control over `t` and its acquired resources. when hostOS == "windows": @@ -348,28 +321,32 @@ when false: unregisterThread(addr(t)) proc createThread*[TParam](t: var TThread[TParam], - tp: proc (param: TParam), - param: TParam, - stackSize = 1024*256*sizeof(int)) {. - magic: "CreateThread".} = + tp: proc (param: TParam) {.thread.}, + param: TParam) = ## creates a new thread `t` and starts its execution. Entry point is the ## proc `tp`. `param` is passed to `tp`. t.data = param t.fn = tp - t.stackSize = stackSize + t.stackSize = ThreadStackSize when hostOS == "windows": var dummyThreadId: int32 - t.sys = CreateThread(nil, stackSize, threadProcWrapper[TParam], + t.sys = CreateThread(nil, ThreadStackSize, threadProcWrapper[TParam], addr(t), 0'i32, dummyThreadId) if t.sys <= 0: raise newException(EResourceExhausted, "cannot create thread") else: var a: Tpthread_attr pthread_attr_init(a) - pthread_attr_setstacksize(a, stackSize) + pthread_attr_setstacksize(a, ThreadStackSize) if pthread_create(t.sys, a, threadProcWrapper[TParam], addr(t)) != 0: raise newException(EResourceExhausted, "cannot create thread") +when useStackMaskHack: + proc runMain(tp: proc (dummy: pointer) {.thread.}) {.compilerproc.} = + var mainThread: TThread[pointer] + createThread(mainThread, tp, nil) + joinThread(mainThread) + # --------------------------- lock handling ---------------------------------- type @@ -380,18 +357,20 @@ const when nodeadlocks: var - deadlocksPrevented* = 0 ## counts the number of times a + deadlocksPrevented*: int ## counts the number of times a ## deadlock has been prevented + locksLen {.threadvar.}: int + locks {.threadvar.}: array [0..MaxLocksPerThread-1, pointer] + + proc OrderedLocks(): bool = + for i in 0 .. locksLen-2: + if locks[i] >= locks[i+1]: return false + result = true proc InitLock*(lock: var TLock) {.inline.} = ## Initializes the lock `lock`. InitSysLock(lock) -proc OrderedLocks(g: PGcThread): bool = - for i in 0 .. g.locksLen-2: - if g.locks[i] >= g.locks[i+1]: return false - result = true - proc TryAcquire*(lock: var TLock): bool {.inline.} = ## Try to acquires the lock `lock`. Returns `true` on success. result = TryAcquireSys(lock) @@ -399,88 +378,93 @@ proc TryAcquire*(lock: var TLock): bool {.inline.} = if not result: return # we have to add it to the ordered list. Oh, and we might fail if # there is no space in the array left ... - var g = ThisThread() - if g.locksLen >= len(g.locks): + if locksLen >= len(locks): ReleaseSys(lock) raise newException(EResourceExhausted, "cannot acquire additional lock") # find the position to add: var p = addr(lock) - var L = g.locksLen-1 + var L = locksLen-1 var i = 0 while i <= L: - assert g.locks[i] != nil - if g.locks[i] < p: inc(i) # in correct order - elif g.locks[i] == p: return # thread already holds lock + sysAssert locks[i] != nil + if locks[i] < p: inc(i) # in correct order + elif locks[i] == p: return # thread already holds lock else: # do the crazy stuff here: while L >= i: - g.locks[L+1] = g.locks[L] + locks[L+1] = locks[L] dec L - g.locks[i] = p - inc(g.locksLen) - assert OrderedLocks(g) + locks[i] = p + inc(locksLen) + sysAssert OrderedLocks() return # simply add to the end: - g.locks[g.locksLen] = p - inc(g.locksLen) - assert OrderedLocks(g) + locks[locksLen] = p + inc(locksLen) + sysAssert OrderedLocks(g) proc Acquire*(lock: var TLock) = ## Acquires the lock `lock`. when nodeadlocks: - var g = ThisThread() var p = addr(lock) - var L = g.locksLen-1 + var L = locksLen-1 var i = 0 while i <= L: - assert g.locks[i] != nil - if g.locks[i] < p: inc(i) # in correct order - elif g.locks[i] == p: return # thread already holds lock + sysAssert locks[i] != nil + if locks[i] < p: inc(i) # in correct order + elif locks[i] == p: return # thread already holds lock else: # do the crazy stuff here: - if g.locksLen >= len(g.locks): + if locksLen >= len(locks): raise newException(EResourceExhausted, "cannot acquire additional lock") while L >= i: - ReleaseSys(cast[ptr TSysLock](g.locks[L])[]) - g.locks[L+1] = g.locks[L] + ReleaseSys(cast[ptr TSysLock](locks[L])[]) + locks[L+1] = locks[L] dec L # acquire the current lock: AcquireSys(lock) - g.locks[i] = p - inc(g.locksLen) + locks[i] = p + inc(locksLen) # acquire old locks in proper order again: - L = g.locksLen-1 + L = locksLen-1 inc i while i <= L: - AcquireSys(cast[ptr TSysLock](g.locks[i])[]) + AcquireSys(cast[ptr TSysLock](locks[i])[]) inc(i) # DANGER: We can only modify this global var if we gained every lock! # NO! We need an atomic increment. Crap. discard system.atomicInc(deadlocksPrevented, 1) - assert OrderedLocks(g) + sysAssert OrderedLocks(g) return # simply add to the end: - if g.locksLen >= len(g.locks): + if locksLen >= len(locks): raise newException(EResourceExhausted, "cannot acquire additional lock") AcquireSys(lock) - g.locks[g.locksLen] = p - inc(g.locksLen) - assert OrderedLocks(g) + locks[locksLen] = p + inc(locksLen) + sysAssert OrderedLocks(g) else: AcquireSys(lock) proc Release*(lock: var TLock) = ## Releases the lock `lock`. when nodeadlocks: - var g = ThisThread() var p = addr(lock) - var L = g.locksLen + var L = locksLen for i in countdown(L-1, 0): - if g.locks[i] == p: - for j in i..L-2: g.locks[j] = g.locks[j+1] - dec g.locksLen + if locks[i] == p: + for j in i..L-2: locks[j] = locks[j+1] + dec locksLen break ReleaseSys(lock) +# ------------------------ message passing support --------------------------- + +proc getInBoxMem*[TMsg](t: var TThread[TMsg]): pointer {.inline.} = + result = addr(t.inbox) + +proc getInBoxMem*(): pointer {.inline.} = + result = addr(cast[PGcThread](ThreadVarGetValue(globalsSlot)).inbox) + |