diff options
Diffstat (limited to 'lib/system')
-rwxr-xr-x | lib/system/alloc.nim | 71 | ||||
-rw-r--r-- | lib/system/atomics.nim | 41 | ||||
-rwxr-xr-x | lib/system/excpt.nim | 108 | ||||
-rwxr-xr-x | lib/system/gc.nim | 43 | ||||
-rwxr-xr-x | lib/system/repr.nim | 2 | ||||
-rwxr-xr-x | lib/system/systhread.nim | 98 | ||||
-rwxr-xr-x | lib/system/threads.nim | 481 |
7 files changed, 583 insertions, 261 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 2280415e1..c12dc253a 100755 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf +# (c) Copyright 2011 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -80,7 +80,7 @@ else: # system immediately. const - ChunkOsReturn = 256 * PageSize + ChunkOsReturn = 256 * PageSize # 1 MB InitialMemoryRequest = ChunkOsReturn div 2 # < ChunkOsReturn! SmallChunkSize = PageSize @@ -101,6 +101,7 @@ type next: ptr TFreeCell # next free cell in chunk (overlaid with refcount) zeroField: int # 0 means cell is not used (overlaid with typ field) # 1 means cell is manually managed pointer + # otherwise a PNimType is stored in there PChunk = ptr TBaseChunk PBigChunk = ptr TBigChunk @@ -151,6 +152,7 @@ type TAllocator {.final, pure.} = object llmem: PLLChunk currMem, maxMem, freeMem: int # memory sizes (allocated from OS) + lastSize: int # needed for the case that OS gives us pages linearly freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk] freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access chunkStarts: TIntSet @@ -167,10 +169,7 @@ proc getMaxMem(a: var TAllocator): int = # maxPagesCount may not be up to date. Thus we use the # maximum of these both values here: return max(a.currMem, a.maxMem) - -var - allocator: TAllocator - + proc llAlloc(a: var TAllocator, size: int): pointer = # *low-level* alloc for the memory managers data structures. Deallocation # is never done. @@ -192,10 +191,10 @@ proc IntSetGet(t: TIntSet, key: int): PTrunk = it = it.next result = nil -proc IntSetPut(t: var TIntSet, key: int): PTrunk = +proc IntSetPut(a: var TAllocator, t: var TIntSet, key: int): PTrunk = result = IntSetGet(t, key) if result == nil: - result = cast[PTrunk](llAlloc(allocator, sizeof(result[]))) + result = cast[PTrunk](llAlloc(a, sizeof(result[]))) result.next = t.data[key and high(t.data)] t.data[key and high(t.data)] = result result.key = key @@ -208,8 +207,8 @@ proc Contains(s: TIntSet, key: int): bool = else: result = false -proc Incl(s: var TIntSet, key: int) = - var t = IntSetPut(s, key shr TrunkShift) +proc Incl(a: var TAllocator, s: var TIntSet, key: int) = + var t = IntSetPut(a, s, key shr TrunkShift) var u = key and TrunkMask t.bits[u shr IntShift] = t.bits[u shr IntShift] or (1 shl (u and IntMask)) @@ -219,18 +218,6 @@ proc Excl(s: var TIntSet, key: int) = var u = key and TrunkMask t.bits[u shr IntShift] = t.bits[u shr IntShift] and not (1 shl (u and IntMask)) - -proc ContainsOrIncl(s: var TIntSet, key: int): bool = - var t = IntSetGet(s, key shr TrunkShift) - if t != nil: - var u = key and TrunkMask - result = (t.bits[u shr IntShift] and (1 shl (u and IntMask))) != 0 - if not result: - t.bits[u shr IntShift] = t.bits[u shr IntShift] or - (1 shl (u and IntMask)) - else: - Incl(s, key) - result = false # ------------- chunk management ---------------------------------------------- proc pageIndex(c: PChunk): int {.inline.} = @@ -241,9 +228,7 @@ proc pageIndex(p: pointer): int {.inline.} = proc pageAddr(p: pointer): PChunk {.inline.} = result = cast[PChunk](cast[TAddress](p) and not PageMask) - assert(Contains(allocator.chunkStarts, pageIndex(result))) - -var lastSize = PageSize + #assert(Contains(allocator.chunkStarts, pageIndex(result))) proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = incCurrMem(a, size) @@ -263,6 +248,7 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = #echo("Next already allocated!") next.prevSize = size # set result.prevSize: + var lastSize = if a.lastSize != 0: a.lastSize else: PageSize var prv = cast[TAddress](result) -% lastSize assert((nxt and PageMask) == 0) var prev = cast[PChunk](prv) @@ -271,7 +257,7 @@ proc requestOsChunks(a: var TAllocator, size: int): PBigChunk = result.prevSize = lastSize else: result.prevSize = 0 # unknown - lastSize = size # for next request + a.lastSize = size # for next request proc freeOsChunks(a: var TAllocator, p: pointer, size: int) = # update next.prevSize: @@ -287,8 +273,8 @@ proc freeOsChunks(a: var TAllocator, p: pointer, size: int) = dec(a.freeMem, size) #c_fprintf(c_stdout, "[Alloc] back to OS: %ld\n", size) -proc isAccessible(p: pointer): bool {.inline.} = - result = Contains(allocator.chunkStarts, pageIndex(p)) +proc isAccessible(a: TAllocator, p: pointer): bool {.inline.} = + result = Contains(a.chunkStarts, pageIndex(p)) proc contains[T](list, x: T): bool = var it = list @@ -337,7 +323,7 @@ proc updatePrevSize(a: var TAllocator, c: PBigChunk, prevSize: int) {.inline.} = var ri = cast[PChunk](cast[TAddress](c) +% c.size) assert((cast[TAddress](ri) and PageMask) == 0) - if isAccessible(ri): + if isAccessible(a, ri): ri.prevSize = prevSize proc freeBigChunk(a: var TAllocator, c: PBigChunk) = @@ -347,7 +333,7 @@ proc freeBigChunk(a: var TAllocator, c: PBigChunk) = when coalescRight: var ri = cast[PChunk](cast[TAddress](c) +% c.size) assert((cast[TAddress](ri) and PageMask) == 0) - if isAccessible(ri) and chunkUnused(ri): + if isAccessible(a, ri) and chunkUnused(ri): assert(not isSmallChunk(ri)) if not isSmallChunk(ri): ListRemove(a.freeChunksList, cast[PBigChunk](ri)) @@ -357,7 +343,7 @@ proc freeBigChunk(a: var TAllocator, c: PBigChunk) = if c.prevSize != 0: var le = cast[PChunk](cast[TAddress](c) -% c.prevSize) assert((cast[TAddress](le) and PageMask) == 0) - if isAccessible(le) and chunkUnused(le): + if isAccessible(a, le) and chunkUnused(le): assert(not isSmallChunk(le)) if not isSmallChunk(le): ListRemove(a.freeChunksList, cast[PBigChunk](le)) @@ -366,7 +352,7 @@ proc freeBigChunk(a: var TAllocator, c: PBigChunk) = c = cast[PBigChunk](le) if c.size < ChunkOsReturn: - incl(a.chunkStarts, pageIndex(c)) + incl(a, a.chunkStarts, pageIndex(c)) updatePrevSize(a, c, c.size) ListAdd(a.freeChunksList, c) c.used = false @@ -383,7 +369,7 @@ proc splitChunk(a: var TAllocator, c: PBigChunk, size: int) = rest.prevSize = size updatePrevSize(a, c, rest.size) c.size = size - incl(a.chunkStarts, pageIndex(rest)) + incl(a, a.chunkStarts, pageIndex(rest)) ListAdd(a.freeChunksList, rest) proc getBigChunk(a: var TAllocator, size: int): PBigChunk = @@ -410,7 +396,7 @@ proc getBigChunk(a: var TAllocator, size: int): PBigChunk = result = requestOsChunks(a, size) result.prevSize = 0 # XXX why is this needed? result.used = true - incl(a.chunkStarts, pageIndex(result)) + incl(a, a.chunkStarts, pageIndex(result)) dec(a.freeMem, size) proc getSmallChunk(a: var TAllocator): PSmallChunk = @@ -472,7 +458,7 @@ proc rawAlloc(a: var TAllocator, requestedSize: int): pointer = assert c.size == size result = addr(c.data) assert((cast[TAddress](result) and (MemAlign-1)) == 0) - assert(isAccessible(result)) + assert(isAccessible(a, result)) proc rawDealloc(a: var TAllocator, p: pointer) = var c = pageAddr(p) @@ -509,7 +495,7 @@ proc rawDealloc(a: var TAllocator, p: pointer) = freeBigChunk(a, cast[PBigChunk](c)) proc isAllocatedPtr(a: TAllocator, p: pointer): bool = - if isAccessible(p): + if isAccessible(a, p): var c = pageAddr(p) if not chunkUnused(c): if isSmallChunk(c): @@ -522,11 +508,12 @@ proc isAllocatedPtr(a: TAllocator, p: pointer): bool = var c = cast[PBigChunk](c) result = p == addr(c.data) and cast[ptr TFreeCell](p).zeroField >% 1 +var + allocator {.rtlThreadVar.}: TAllocator + # ---------------------- interface to programs ------------------------------- when not defined(useNimRtl): - var heapLock: TSysLock - InitSysLock(HeapLock) proc unlockedAlloc(size: int): pointer {.inline.} = result = rawAlloc(allocator, size+sizeof(TFreeCell)) @@ -545,18 +532,18 @@ when not defined(useNimRtl): assert(not isAllocatedPtr(allocator, x)) proc alloc(size: int): pointer = - when hasThreadSupport: AquireSys(HeapLock) + when hasThreadSupport and hasSharedHeap: AquireSys(HeapLock) result = unlockedAlloc(size) - when hasThreadSupport: ReleaseSys(HeapLock) + when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock) proc alloc0(size: int): pointer = result = alloc(size) zeroMem(result, size) proc dealloc(p: pointer) = - when hasThreadSupport: AquireSys(HeapLock) + when hasThreadSupport and hasSharedHeap: AquireSys(HeapLock) unlockedDealloc(p) - when hasThreadSupport: ReleaseSys(HeapLock) + when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock) proc ptrSize(p: pointer): int = var x = cast[pointer](cast[TAddress](p) -% sizeof(TFreeCell)) diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim new file mode 100644 index 000000000..31c25c5af --- /dev/null +++ b/lib/system/atomics.nim @@ -0,0 +1,41 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Atomic operations for Nimrod. + +when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: + proc sync_add_and_fetch(p: var int, val: int): int {. + importc: "__sync_add_and_fetch", nodecl.} + proc sync_sub_and_fetch(p: var int, val: int): int {. + importc: "__sync_sub_and_fetch", nodecl.} +elif defined(vcc) and hasThreadSupport: + proc sync_add_and_fetch(p: var int, val: int): int {. + importc: "NimXadd", nodecl.} +else: + proc sync_add_and_fetch(p: var int, val: int): int {.inline.} = + inc(p, val) + result = p + +proc atomicInc(memLoc: var int, x: int): int = + when hasThreadSupport: + result = sync_add_and_fetch(memLoc, x) + else: + inc(memLoc, x) + result = memLoc + +proc atomicDec(memLoc: var int, x: int): int = + when hasThreadSupport: + when defined(sync_sub_and_fetch): + result = sync_sub_and_fetch(memLoc, x) + else: + result = sync_add_and_fetch(memLoc, -x) + else: + dec(memLoc, x) + result = memLoc + diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim index bbe608fdc..3ef39902a 100755 --- a/lib/system/excpt.nim +++ b/lib/system/excpt.nim @@ -10,9 +10,6 @@ # Exception handling code. This is difficult because it has # to work if there is no more memory (but it doesn't yet!). -const - MaxLocksPerThread = 10 - var stackTraceNewLine* = "\n" ## undocumented feature; it is replaced by ``<br>`` ## for CGI applications @@ -35,111 +32,10 @@ proc chckRange(i, a, b: int): int {.inline, compilerproc.} proc chckRangeF(x, a, b: float): float {.inline, compilerproc.} proc chckNil(p: pointer) {.inline, compilerproc.} -type - PSafePoint = ptr TSafePoint - TSafePoint {.compilerproc, final.} = object - prev: PSafePoint # points to next safe point ON THE STACK - status: int - context: C_JmpBuf - -when hasThreadSupport: - # Support for thread local storage: - when defined(windows): - type - TThreadVarSlot {.compilerproc.} = distinct int32 - - proc TlsAlloc(): TThreadVarSlot {. - importc: "TlsAlloc", stdcall, dynlib: "kernel32".} - proc TlsSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {. - importc: "TlsSetValue", stdcall, dynlib: "kernel32".} - proc TlsGetValue(dwTlsIndex: TThreadVarSlot): pointer {. - importc: "TlsGetValue", stdcall, dynlib: "kernel32".} - - proc ThreadVarAlloc(): TThreadVarSlot {.compilerproc, inline.} = - result = TlsAlloc() - proc ThreadVarSetValue(s: TThreadVarSlot, value: pointer) {. - compilerproc, inline.} = - TlsSetValue(s, value) - proc ThreadVarGetValue(s: TThreadVarSlot): pointer {. - compilerproc, inline.} = - result = TlsGetValue(s) - - else: - {.passL: "-pthread".} - {.passC: "-pthread".} - type - TThreadVarSlot {.importc: "pthread_key_t", pure, final, - header: "<sys/types.h>".} = object - - proc pthread_getspecific(a1: TThreadVarSlot): pointer {. - importc: "pthread_getspecific", header: "<pthread.h>".} - proc pthread_key_create(a1: ptr TThreadVarSlot, - destruct: proc (x: pointer) {.noconv.}): int32 {. - importc: "pthread_key_create", header: "<pthread.h>".} - proc pthread_key_delete(a1: TThreadVarSlot): int32 {. - importc: "pthread_key_delete", header: "<pthread.h>".} - - proc pthread_setspecific(a1: TThreadVarSlot, a2: pointer): int32 {. - importc: "pthread_setspecific", header: "<pthread.h>".} - - proc specificDestroy(mem: pointer) {.noconv.} = - # we really need a thread-safe 'dealloc' here: - dealloc(mem) - - proc ThreadVarAlloc(): TThreadVarSlot {.compilerproc, inline.} = - discard pthread_key_create(addr(result), specificDestroy) - proc ThreadVarSetValue(s: TThreadVarSlot, value: pointer) {. - compilerproc, inline.} = - discard pthread_setspecific(s, value) - proc ThreadVarGetValue(s: TThreadVarSlot): pointer {.compilerproc, inline.} = - result = pthread_getspecific(s) - - type - TGlobals* {.final, pure.} = object - excHandler: PSafePoint - currException: ref E_Base - framePtr: PFrame - locksLen*: int - locks*: array [0..MaxLocksPerThread-1, pointer] - buf: string # cannot be allocated on the stack! - assertBuf: string # we need a different buffer for - # assert, as it raises an exception and - # exception handler needs the buffer too - gAssertionFailed: ref EAssertionFailed - tempFrames: array [0..127, PFrame] # cannot be allocated on the stack! - data: float # compiler should add thread local variables here! - PGlobals* = ptr TGlobals - - # XXX it'd be more efficient to not use a global variable for the - # thread storage slot, but to rely on the implementation to assign slot 0 - # for us... ;-) - var globalsSlot = ThreadVarAlloc() - #const globalsSlot = TThreadVarSlot(0) - #assert checkSlot.int == globalsSlot.int - - proc NewGlobals(): PGlobals = - result = cast[PGlobals](alloc0(sizeof(TGlobals))) - new(result.gAssertionFailed) - result.buf = newStringOfCap(2000) - result.assertBuf = newStringOfCap(2000) - - proc AllocThreadLocalStorage*(): pointer {.inl.} = - isMultiThreaded = true - result = NewGlobals() - - proc SetThreadLocalStorage*(p: pointer) {.inl.} = - ThreadVarSetValue(globalsSlot, p) - - proc GetGlobals*(): PGlobals {.compilerRtl, inl.} = - result = cast[PGlobals](ThreadVarGetValue(globalsSlot)) - - # create for the main thread: - ThreadVarSetValue(globalsSlot, NewGlobals()) - when hasThreadSupport: template ThreadGlobals = - var globals = GetGlobals() - template `||`(varname: expr): expr = globals.varname + var currentThread = ThisThread() + template `||`(varname: expr): expr = currentThread.g.varname else: template ThreadGlobals = nil # nothing diff --git a/lib/system/gc.nim b/lib/system/gc.nim index ab8f19674..d276d6cec 100755 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -15,10 +15,6 @@ # stack overflows when traversing deep datastructures. This is comparable to # an incremental and generational GC. It should be well-suited for soft real # time applications (like games). -# -# Future Improvements: -# * Support for multi-threading. However, locks for the reference counting -# might turn out to be too slow. const CycleIncrease = 2 # is a multiplicative increase @@ -64,10 +60,10 @@ type stat: TGcStat var - stackBottom: pointer - gch: TGcHeap - cycleThreshold: int = InitialCycleThreshold - recGcLock: int = 0 + stackBottom {.rtlThreadVar.}: pointer + gch {.rtlThreadVar.}: TGcHeap + cycleThreshold {.rtlThreadVar.}: int = InitialCycleThreshold + recGcLock {.rtlThreadVar.}: int = 0 # we use a lock to prevent the garbage collector to be triggered in a # finalizer; the collector should not call itself this way! Thus every # object allocated by a finalizer will not trigger a garbage collection. @@ -186,6 +182,15 @@ proc doOperation(p: pointer, op: TWalkOp) proc forAllChildrenAux(dest: Pointer, mt: PNimType, op: TWalkOp) # we need the prototype here for debugging purposes +when hasThreadSupport and hasSharedHeap: + template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement + template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement) +else: + template `--`(x: expr): expr = + Dec(x, rcIncrement) + x <% rcIncrement + template `++`(x: expr): stmt = Inc(x, rcIncrement) + proc prepareDealloc(cell: PCell) = if cell.typ.finalizer != nil: # the finalizer could invoke something that @@ -219,13 +224,13 @@ proc decRef(c: PCell) {.inline.} = writeCell("broken cell", c) assert(c.refcount >=% rcIncrement) #if c.refcount <% rcIncrement: quit("leck mich") - if atomicDec(c.refcount, rcIncrement) <% rcIncrement: + if --c.refcount: rtlAddZCT(c) elif canBeCycleRoot(c): rtlAddCycleRoot(c) proc incRef(c: PCell) {.inline.} = - discard atomicInc(c.refcount, rcIncrement) + ++c.refcount if canBeCycleRoot(c): rtlAddCycleRoot(c) @@ -245,10 +250,10 @@ proc asgnRefNoCycle(dest: ppointer, src: pointer) {.compilerProc, inline.} = # cycle is possible. if src != nil: var c = usrToCell(src) - discard atomicInc(c.refcount, rcIncrement) + ++c.refcount if dest[] != nil: var c = usrToCell(dest[]) - if atomicDec(c.refcount, rcIncrement) <% rcIncrement: + if --c.refcount: rtlAddZCT(c) dest[] = src @@ -517,7 +522,17 @@ proc gcMark(p: pointer) {.inline.} = proc markThreadStacks(gch: var TGcHeap) = when hasThreadSupport: - nil + var it = threadList + while it != nil: + # mark registers: + for i in 0 .. high(it.registers): gcMark(it.registers[i]) + var sp = cast[TAddress](it.stackBottom) + var max = cast[TAddress](it.stackTop) + # XXX unroll this loop: + while sp <=% max: + gcMark(cast[ppointer](sp)[]) + sp = sp +% sizeof(pointer) + it = it.next # ----------------- stack management -------------------------------------- # inspired from Smart Eiffel @@ -684,7 +699,7 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) = # decRef(d[i]) inlined: cannot create a cycle and must not aquire lock var c = d[i] # XXX no need for an atomic dec here: - if atomicDec(c.refcount, rcIncrement) <% rcIncrement: + if --c.refcount: addZCT(gch.zct, c) assert c.typ != nil gch.decStack.len = 0 diff --git a/lib/system/repr.nim b/lib/system/repr.nim index 395adc2ca..d9997001d 100755 --- a/lib/system/repr.nim +++ b/lib/system/repr.nim @@ -97,7 +97,7 @@ proc reprSetAux(result: var string, p: pointer, typ: PNimType) = inc(elemCounter) if typ.size <= 8: for i in 0..sizeof(int64)*8-1: - if (u and (1 shl i)) != 0: + if (u and (1'i64 shl int64(i))) != 0'i64: if elemCounter > 0: add result, ", " addSetElem(result, i+typ.node.len, typ.base) inc(elemCounter) diff --git a/lib/system/systhread.nim b/lib/system/systhread.nim deleted file mode 100755 index c83062942..000000000 --- a/lib/system/systhread.nim +++ /dev/null @@ -1,98 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2011 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -const - maxThreads = 256 - SystemInclude = defined(hasThreadSupport) - -when not SystemInclude: - # ugly hack: this file is then included from core/threads, so we have - # thread support: - const hasThreadSupport = true - - include "lib/system/ansi_c" - -when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: - proc sync_add_and_fetch(p: var int, val: int): int {. - importc: "__sync_add_and_fetch", nodecl.} - proc sync_sub_and_fetch(p: var int, val: int): int {. - importc: "__sync_sub_and_fetch", nodecl.} -elif defined(vcc) and hasThreadSupport: - proc sync_add_and_fetch(p: var int, val: int): int {. - importc: "NimXadd", nodecl.} -else: - proc sync_add_and_fetch(p: var int, val: int): int {.inline.} = - inc(p, val) - result = p - -proc atomicInc(memLoc: var int, x: int): int = - when hasThreadSupport: - result = sync_add_and_fetch(memLoc, x) - else: - inc(memLoc, x) - result = memLoc - -proc atomicDec(memLoc: var int, x: int): int = - when hasThreadSupport: - when defined(sync_sub_and_fetch): - result = sync_sub_and_fetch(memLoc, x) - else: - result = sync_add_and_fetch(memLoc, -x) - else: - dec(memLoc, x) - result = memLoc - -when defined(Windows): - type - TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi - DebugInfo: pointer - LockCount: int32 - RecursionCount: int32 - OwningThread: int - LockSemaphore: int - Reserved: int32 - - proc InitSysLock(L: var TSysLock) {.stdcall, - dynlib: "kernel32", importc: "InitializeCriticalSection".} - ## Initializes the lock `L`. - - proc TryAquireSysAux(L: var TSysLock): int32 {.stdcall, - dynlib: "kernel32", importc: "TryEnterCriticalSection".} - ## Tries to aquire the lock `L`. - - proc TryAquireSys(L: var TSysLock): bool {.inline.} = - result = TryAquireSysAux(L) != 0'i32 - - proc AquireSys(L: var TSysLock) {.stdcall, - dynlib: "kernel32", importc: "EnterCriticalSection".} - ## Aquires the lock `L`. - - proc ReleaseSys(L: var TSysLock) {.stdcall, - dynlib: "kernel32", importc: "LeaveCriticalSection".} - ## Releases the lock `L`. - -else: - type - TSysLock {.importc: "pthread_mutex_t", pure, final, - header: "<sys/types.h>".} = object - - proc InitSysLock(L: var TSysLock, attr: pointer = nil) {. - importc: "pthread_mutex_init", header: "<pthread.h>".} - - proc AquireSys(L: var TSysLock) {. - importc: "pthread_mutex_lock", header: "<pthread.h>".} - proc TryAquireSysAux(L: var TSysLock): cint {. - importc: "pthread_mutex_trylock", header: "<pthread.h>".} - - proc TryAquireSys(L: var TSysLock): bool {.inline.} = - result = TryAquireSysAux(L) == 0'i32 - - proc ReleaseSys(L: var TSysLock) {. - importc: "pthread_mutex_unlock", header: "<pthread.h>".} - diff --git a/lib/system/threads.nim b/lib/system/threads.nim new file mode 100755 index 000000000..052335baa --- /dev/null +++ b/lib/system/threads.nim @@ -0,0 +1,481 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2011 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Thread support for Nimrod. **Note**: This is part of the system module. +## Do not import it directly. To active thread support you need to compile +## with the ``--threads:on`` command line switch. +## +## Nimrod's memory model for threads is quite different from other common +## programming languages (C, Pascal): Each thread has its own +## (garbage collected) heap and sharing of memory is restricted. This helps +## to prevent race conditions and improves efficiency. See the manual for +## details of this memory model. +## +## Example: +## +## .. code-block:: nimrod +## +## var +## thr: array [0..4, TThread[tuple[a,b: int]]] +## L: TLock +## +## proc threadFunc(interval: tuple[a,b: int]) {.procvar.} = +## for i in interval.a..interval.b: +## Aquire(L) # lock stdout +## echo i +## Release(L) +## +## InitLock(L) +## +## for i in 0..high(thr): +## createThread(thr[i], threadFunc, (i*10, i*10+5)) +## joinThreads(thr) + +const + maxRegisters = 256 # don't think there is an arch with more registers + maxLocksPerThread* = 10 ## max number of locks a thread can hold + ## at the same time + +when defined(Windows): + type + TSysLock {.final, pure.} = object # CRITICAL_SECTION in WinApi + DebugInfo: pointer + LockCount: int32 + RecursionCount: int32 + OwningThread: int + LockSemaphore: int + Reserved: int32 + + proc InitSysLock(L: var TSysLock) {.stdcall, + dynlib: "kernel32", importc: "InitializeCriticalSection".} + ## Initializes the lock `L`. + + proc TryAquireSysAux(L: var TSysLock): int32 {.stdcall, + dynlib: "kernel32", importc: "TryEnterCriticalSection".} + ## Tries to aquire the lock `L`. + + proc TryAquireSys(L: var TSysLock): bool {.inline.} = + result = TryAquireSysAux(L) != 0'i32 + + proc AquireSys(L: var TSysLock) {.stdcall, + dynlib: "kernel32", importc: "EnterCriticalSection".} + ## Aquires the lock `L`. + + proc ReleaseSys(L: var TSysLock) {.stdcall, + dynlib: "kernel32", importc: "LeaveCriticalSection".} + ## Releases the lock `L`. + + type + THandle = int + TSysThread = THandle + TWinThreadProc = proc (x: pointer): int32 {.stdcall.} + + proc CreateThread(lpThreadAttributes: Pointer, dwStackSize: int32, + lpStartAddress: TWinThreadProc, + lpParameter: Pointer, + dwCreationFlags: int32, + lpThreadId: var int32): TSysThread {. + stdcall, dynlib: "kernel32", importc: "CreateThread".} + + proc winSuspendThread(hThread: TSysThread): int32 {. + stdcall, dynlib: "kernel32", importc: "SuspendThread".} + + proc winResumeThread(hThread: TSysThread): int32 {. + stdcall, dynlib: "kernel32", importc: "ResumeThread".} + + proc WaitForMultipleObjects(nCount: int32, + lpHandles: ptr TSysThread, + bWaitAll: int32, + dwMilliseconds: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "WaitForMultipleObjects".} + + proc WaitForSingleObject(hHandle: TSysThread, dwMilliseconds: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "WaitForSingleObject".} + + proc TerminateThread(hThread: TSysThread, dwExitCode: int32): int32 {. + stdcall, dynlib: "kernel32", importc: "TerminateThread".} + + type + TThreadVarSlot {.compilerproc.} = distinct int32 + + proc TlsAlloc(): TThreadVarSlot {. + importc: "TlsAlloc", stdcall, dynlib: "kernel32".} + proc TlsSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {. + importc: "TlsSetValue", stdcall, dynlib: "kernel32".} + proc TlsGetValue(dwTlsIndex: TThreadVarSlot): pointer {. + importc: "TlsGetValue", stdcall, dynlib: "kernel32".} + + proc ThreadVarAlloc(): TThreadVarSlot {.compilerproc, inline.} = + result = TlsAlloc() + proc ThreadVarSetValue(s: TThreadVarSlot, value: pointer) {. + compilerproc, inline.} = + TlsSetValue(s, value) + proc ThreadVarGetValue(s: TThreadVarSlot): pointer {. + compilerproc, inline.} = + result = TlsGetValue(s) + +else: + {.passL: "-pthread".} + {.passC: "-pthread".} + + type + TSysLock {.importc: "pthread_mutex_t", pure, final, + header: "<sys/types.h>".} = object + + proc InitSysLock(L: var TSysLock, attr: pointer = nil) {. + importc: "pthread_mutex_init", header: "<pthread.h>".} + + proc AquireSys(L: var TSysLock) {. + importc: "pthread_mutex_lock", header: "<pthread.h>".} + proc TryAquireSysAux(L: var TSysLock): cint {. + importc: "pthread_mutex_trylock", header: "<pthread.h>".} + + proc TryAquireSys(L: var TSysLock): bool {.inline.} = + result = TryAquireSysAux(L) == 0'i32 + + proc ReleaseSys(L: var TSysLock) {. + importc: "pthread_mutex_unlock", header: "<pthread.h>".} + + type + TSysThread {.importc: "pthread_t", header: "<sys/types.h>", + final, pure.} = object + Tpthread_attr {.importc: "pthread_attr_t", + header: "<sys/types.h>", final, pure.} = object + + Ttimespec {.importc: "struct timespec", + header: "<time.h>", final, pure.} = object + tv_sec: int + tv_nsec: int + + proc pthread_attr_init(a1: var TPthread_attr) {. + importc, header: "<pthread.h>".} + proc pthread_attr_setstacksize(a1: var TPthread_attr, a2: int) {. + importc, header: "<pthread.h>".} + + proc pthread_create(a1: var TSysThread, a2: var TPthread_attr, + a3: proc (x: pointer) {.noconv.}, + a4: pointer): cint {.importc: "pthread_create", + header: "<pthread.h>".} + proc pthread_join(a1: TSysThread, a2: ptr pointer): cint {. + importc, header: "<pthread.h>".} + + proc pthread_cancel(a1: TSysThread): cint {. + importc: "pthread_cancel", header: "<pthread.h>".} + + proc AquireSysTimeoutAux(L: var TSysLock, timeout: var Ttimespec): cint {. + importc: "pthread_mutex_timedlock", header: "<time.h>".} + + proc AquireSysTimeout(L: var TSysLock, msTimeout: int) {.inline.} = + var a: Ttimespec + a.tv_sec = msTimeout div 1000 + a.tv_nsec = (msTimeout mod 1000) * 1000 + var res = AquireSysTimeoutAux(L, a) + if res != 0'i32: raise newException(EResourceExhausted, $strerror(res)) + + type + TThreadVarSlot {.importc: "pthread_key_t", pure, final, + header: "<sys/types.h>".} = object + + proc pthread_getspecific(a1: TThreadVarSlot): pointer {. + importc: "pthread_getspecific", header: "<pthread.h>".} + proc pthread_key_create(a1: ptr TThreadVarSlot, + destruct: proc (x: pointer) {.noconv.}): int32 {. + importc: "pthread_key_create", header: "<pthread.h>".} + proc pthread_key_delete(a1: TThreadVarSlot): int32 {. + importc: "pthread_key_delete", header: "<pthread.h>".} + + proc pthread_setspecific(a1: TThreadVarSlot, a2: pointer): int32 {. + importc: "pthread_setspecific", header: "<pthread.h>".} + + proc ThreadVarAlloc(): TThreadVarSlot {.compilerproc, inline.} = + discard pthread_key_create(addr(result), nil) + proc ThreadVarSetValue(s: TThreadVarSlot, value: pointer) {. + compilerproc, inline.} = + discard pthread_setspecific(s, value) + proc ThreadVarGetValue(s: TThreadVarSlot): pointer {.compilerproc, inline.} = + result = pthread_getspecific(s) + +type + TGlobals {.final, pure.} = object + excHandler: PSafePoint + currException: ref E_Base + framePtr: PFrame + buf: string # cannot be allocated on the stack! + assertBuf: string # we need a different buffer for + # assert, as it raises an exception and + # exception handler needs the buffer too + gAssertionFailed: ref EAssertionFailed + tempFrames: array [0..127, PFrame] # cannot be allocated on the stack! + data: float # compiler should add thread local variables here! + +proc initGlobals(g: var TGlobals) = + new(g.gAssertionFailed) + g.buf = newStringOfCap(2000) + g.assertBuf = newStringOfCap(2000) + + +type + PGcThread = ptr TGcThread + TGcThread {.pure.} = object + sys: TSysThread + next, prev: PGcThread + stackBottom, stackTop: pointer + stackSize: int + g: TGlobals + locksLen: int + locks: array [0..MaxLocksPerThread-1, pointer] + registers: array[0..maxRegisters-1, pointer] # register contents for GC + +# XXX it'd be more efficient to not use a global variable for the +# thread storage slot, but to rely on the implementation to assign slot 0 +# for us... ;-) +var globalsSlot = ThreadVarAlloc() +#const globalsSlot = TThreadVarSlot(0) +#assert checkSlot.int == globalsSlot.int + +proc ThisThread(): PGcThread {.compilerRtl, inl.} = + result = cast[PGcThread](ThreadVarGetValue(globalsSlot)) + +# create for the main thread. Note: do not insert this data into the list +# of all threads; it's not to be stopped etc. +when not defined(useNimRtl): + var mainThread: TGcThread + initGlobals(mainThread.g) + ThreadVarSetValue(globalsSlot, addr(mainThread)) + + var heapLock: TSysLock + InitSysLock(HeapLock) + + var + threadList: PGcThread + + proc registerThread(t: PGcThread) = + # we need to use the GC global lock here! + AquireSys(HeapLock) + t.prev = nil + t.next = threadList + if threadList != nil: + assert(threadList.prev == nil) + threadList.prev = t + threadList = t + ReleaseSys(HeapLock) + + proc unregisterThread(t: PGcThread) = + # we need to use the GC global lock here! + AquireSys(HeapLock) + if t == threadList: threadList = t.next + if t.next != nil: t.next.prev = t.prev + if t.prev != nil: t.prev.next = t.next + # so that a thread can be unregistered twice which might happen if the + # code executes `destroyThread`: + t.next = nil + t.prev = nil + ReleaseSys(HeapLock) + + # on UNIX, the GC uses ``SIGFREEZE`` to tell every thread to stop so that + # the GC can examine the stacks? + + proc stopTheWord() = + nil + +# We jump through some hops here to ensure that Nimrod thread procs can have +# the Nimrod calling convention. This is needed because thread procs are +# ``stdcall`` on Windows and ``noconv`` on UNIX. Alternative would be to just +# use ``stdcall`` since it is mapped to ``noconv`` on UNIX anyway. However, +# the current approach will likely result in less problems later when we have +# GC'ed closures in Nimrod. + +type + TThread* {.pure, final.}[TParam] = object of TGcThread ## Nimrod thread. + fn: proc (p: TParam) + data: TParam + +template ThreadProcWrapperBody(closure: expr) = + when not hasSharedHeap: initGC() # init the GC for this thread + ThreadVarSetValue(globalsSlot, closure) + var t = cast[ptr TThread[TParam]](closure) + when not hasSharedHeap: stackBottom = addr(t) + t.stackBottom = addr(t) + registerThread(t) + try: + t.fn(t.data) + finally: + unregisterThread(t) + +{.push stack_trace:off.} +when defined(windows): + proc threadProcWrapper[TParam](closure: pointer): int32 {.stdcall.} = + ThreadProcWrapperBody(closure) + # implicitely return 0 +else: + proc threadProcWrapper[TParam](closure: pointer) {.noconv.} = + ThreadProcWrapperBody(closure) +{.pop.} + +proc joinThread*[TParam](t: TThread[TParam]) {.inline.} = + ## waits for the thread `t` to finish. + when hostOS == "windows": + discard WaitForSingleObject(t.sys, -1'i32) + else: + discard pthread_join(t.sys, nil) + +proc joinThreads*[TParam](t: openArray[TThread[TParam]]) = + ## waits for every thread in `t` to finish. + when hostOS == "windows": + var a: array[0..255, TSysThread] + assert a.len >= t.len + for i in 0..t.high: a[i] = t[i].sys + discard WaitForMultipleObjects(t.len, cast[ptr TSysThread](addr(a)), 1, -1) + else: + for i in 0..t.high: joinThread(t[i]) + +proc destroyThread*[TParam](t: var TThread[TParam]) {.inline.} = + ## forces the thread `t` to terminate. This is potentially dangerous if + ## you don't have full control over `t` and its aquired resources. + when hostOS == "windows": + discard TerminateThread(t.sys, 1'i32) + else: + discard pthread_cancel(t.sys) + unregisterThread(addr(t.gcInfo)) + +proc createThread*[TParam](t: var TThread[TParam], + tp: proc (param: TParam), + param: TParam, + stackSize = 1024*256*sizeof(int)) = + ## creates a new thread `t` and starts its execution. Entry point is the + ## proc `tp`. `param` is passed to `tp`. + t.data = param + t.fn = tp + t.stackSize = stackSize + when hostOS == "windows": + var dummyThreadId: int32 + t.sys = CreateThread(nil, stackSize, threadProcWrapper[TParam], + addr(t), 0'i32, dummyThreadId) + else: + var a: Tpthread_attr + pthread_attr_init(a) + pthread_attr_setstacksize(a, stackSize) + if pthread_create(t.sys, a, threadProcWrapper[TParam], addr(t)) != 0: + raise newException(EIO, "cannot create thread") + +# --------------------------- lock handling ---------------------------------- + +type + TLock* = TSysLock ## Nimrod lock + +const + noDeadlocks = false # compileOption("deadlockPrevention") + +when nodeadlocks: + var + deadlocksPrevented* = 0 ## counts the number of times a + ## deadlock has been prevented + +proc InitLock*(lock: var TLock) {.inline.} = + ## Initializes the lock `lock`. + InitSysLock(lock) + +proc OrderedLocks(g: PGcThread): bool = + for i in 0 .. g.locksLen-2: + if g.locks[i] >= g.locks[i+1]: return false + result = true + +proc TryAquire*(lock: var TLock): bool {.inline.} = + ## Try to aquires the lock `lock`. Returns `true` on success. + when noDeadlocks: + result = TryAquireSys(lock) + if not result: return + # we have to add it to the ordered list. Oh, and we might fail if + # there is no space in the array left ... + var g = ThisThread() + if g.locksLen >= len(g.locks): + ReleaseSys(lock) + raise newException(EResourceExhausted, "cannot aquire additional lock") + # find the position to add: + var p = addr(lock) + var L = g.locksLen-1 + var i = 0 + while i <= L: + assert g.locks[i] != nil + if g.locks[i] < p: inc(i) # in correct order + elif g.locks[i] == p: return # thread already holds lock + else: + # do the crazy stuff here: + while L >= i: + g.locks[L+1] = g.locks[L] + dec L + g.locks[i] = p + inc(g.locksLen) + assert OrderedLocks(g) + return + # simply add to the end: + g.locks[g.locksLen] = p + inc(g.locksLen) + assert OrderedLocks(g) + else: + result = TryAquireSys(lock) + +proc Aquire*(lock: var TLock) = + ## Aquires the lock `lock`. + when nodeadlocks: + var g = ThisThread() + var p = addr(lock) + var L = g.locksLen-1 + var i = 0 + while i <= L: + assert g.locks[i] != nil + if g.locks[i] < p: inc(i) # in correct order + elif g.locks[i] == p: return # thread already holds lock + else: + # do the crazy stuff here: + if g.locksLen >= len(g.locks): + raise newException(EResourceExhausted, "cannot aquire additional lock") + while L >= i: + ReleaseSys(cast[ptr TSysLock](g.locks[L])[]) + g.locks[L+1] = g.locks[L] + dec L + # aquire the current lock: + AquireSys(lock) + g.locks[i] = p + inc(g.locksLen) + # aquire old locks in proper order again: + L = g.locksLen-1 + inc i + while i <= L: + AquireSys(cast[ptr TSysLock](g.locks[i])[]) + inc(i) + # DANGER: We can only modify this global var if we gained every lock! + # NO! We need an atomic increment. Crap. + discard system.atomicInc(deadlocksPrevented, 1) + assert OrderedLocks(g) + return + + # simply add to the end: + if g.locksLen >= len(g.locks): + raise newException(EResourceExhausted, "cannot aquire additional lock") + AquireSys(lock) + g.locks[g.locksLen] = p + inc(g.locksLen) + assert OrderedLocks(g) + else: + AquireSys(lock) + +proc Release*(lock: var TLock) = + ## Releases the lock `lock`. + when nodeadlocks: + var g = ThisThread() + var p = addr(lock) + var L = g.locksLen + for i in countdown(L-1, 0): + if g.locks[i] == p: + for j in i..L-2: g.locks[j] = g.locks[j+1] + dec g.locksLen + break + ReleaseSys(lock) + |