summary refs log tree commit diff stats
diff options
authorAraq <>2011-06-04 23:55:10 +0200
committerAraq <>2011-06-04 23:55:10 +0200
commit24ed9d560fb7a694c90dc0a378549a06600fcbd0 (patch)
parent5008b44467bf545287289087a13f7e53c3d242ff (diff)
threads clean up their heap
9 files changed, 93 insertions, 46 deletions
diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim
index d02c20559..3f3a60b7a 100755
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -765,11 +765,15 @@ proc genIfExpr(p: BProc, n: PNode, d: var TLoc) =
     genAssignment(p, d, tmp, {}) # no need for deep copying
 proc genEcho(p: BProc, n: PNode) =
+  # this unusal way of implementing it ensures that e.g. ``echo("hallo", 45)``
+  # is threadsafe.
+  var args: PRope = nil
   var a: TLoc
-  for i in countup(1, sonsLen(n) - 1):
+  for i in countup(1, n.len-1):
     initLocExpr(p, n.sons[i], a)
-    appcg(p, cpsStmts, "#rawEcho($1);$n", [rdLoc(a)])
-  appcg(p, cpsStmts, "#rawEchoNL();$n")
+    appf(args, ", ($1)->data", [rdLoc(a)])
+  appcg(p, cpsStmts, "printf(\"" & repeatStr(n.len-1, "%s") &
+        "\\n\"$1);$n", [args])
 proc genCall(p: BProc, t: PNode, d: var TLoc) =
   var op, a: TLoc
diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim
index a7b35f08b..38db93988 100755
--- a/compiler/semexprs.nim
+++ b/compiler/semexprs.nim
@@ -527,6 +527,8 @@ proc LookUpForDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PSym =
         GlobalError(n.sons[1].info, errIdentifierExpected, "")
   of nkAccQuoted:
     result = lookupForDefined(c, considerAcc(n), onlyCurrentScope)
+  of nkSym:
+    result = n.sym
     GlobalError(, errIdentifierExpected, renderTree(n))
     result = nil
diff --git a/lib/system.nim b/lib/system.nim
index ef9685574..5250a7ae7 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -778,7 +778,7 @@ proc compileOption*(option, arg: string): bool {.
   hasThreadSupport = compileOption("threads")
-  hasSharedHeap = false # don't share heaps, so every thread has its own heap
+  hasSharedHeap = defined(boehmgc) # don't share heaps; every thread has its own
 when hasThreadSupport and not hasSharedHeap:
   {.pragma: rtlThreadVar, threadvar.}
@@ -1040,8 +1040,6 @@ proc getRefcount*[T](x: ref T): int {.importc: "getRefcount", noSideEffect.}
   ## retrieves the reference count of an heap-allocated object. The
   ## value is implementation-dependant.
-#proc writeStackTrace() {.export: "writeStackTrace".}
 # new constants:
   inf* {.magic: "Inf".} = 1.0 / 0.0
@@ -1386,6 +1384,12 @@ var
     ## set this variable to provide a procedure that should be called before
     ## each executed instruction. This should only be used by debuggers!
     ## Only code compiled with the ``debugger:on`` switch calls this hook.
+  raiseHook*: proc (e: ref E_Base): bool
+    ## with this hook you can influence exception handling on a global level.
+    ## If not nil, every 'raise' statement ends up calling this hook. Ordinary
+    ## application code should never set this hook! You better know what you
+    ## do when setting this. If ``raiseHook`` returns false, the exception
+    ## is caught and does not propagate further through the call stack.
   PFrame = ptr TFrame
@@ -1420,8 +1424,13 @@ else:
 proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".}
-  ## equivalent to ``writeln(stdout, x); flush(stdout)``. BUT: This is
-  ## available for the ECMAScript target too!
+  ## special built-in that takes a variable number of arguments. Each argument
+  ## is converted to a string via ``$``, so it works for user-defined
+  ## types that have an overloaded ``$`` operator.
+  ## It is roughly equivalent to ``writeln(stdout, x); flush(stdout)``, but
+  ## available for the ECMAScript target too.
+  ## Unlike other IO operations this is guaranteed to be thread-safe as
+  ## ``echo`` is very often used for debugging convenience.
 template newException*(exceptn, message: expr): expr = 
   ## creates an exception object of type ``exceptn`` and sets its ``msg`` field
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 0218e3baa..3273242d6 100755
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -91,7 +91,7 @@ type
     key: int             # start address at bit 0
     bits: array[0..IntsPerTrunk-1, int] # a bit vector
-  TTrunkBuckets = array[0..1023, PTrunk]
+  TTrunkBuckets = array[0..255, PTrunk]
   TIntSet {.final.} = object 
     data: TTrunkBuckets
@@ -119,8 +119,7 @@ type
     data: TAlignType     # start of usable memory
   TBigChunk = object of TBaseChunk # not necessarily > PageSize!
-    next: PBigChunk      # chunks of the same (or bigger) size
-    prev: PBigChunk
+    next, prev: PBigChunk    # chunks of the same (or bigger) size
     align: int
     data: TAlignType     # start of usable memory
@@ -148,6 +147,7 @@ type
   TLLChunk {.pure.} = object ## *low-level* chunk
     size: int                # remaining size
     acc: int                 # accumulator
+    next: PLLChunk           # next low-level chunk; only needed for dealloc
   TAllocator {.final, pure.} = object
     llmem: PLLChunk
@@ -172,18 +172,31 @@ proc getMaxMem(a: var TAllocator): int =
 proc llAlloc(a: var TAllocator, size: int): pointer =
   # *low-level* alloc for the memory managers data structures. Deallocation
-  # is never done.
+  # is done at he end of the allocator's life time.
   if a.llmem == nil or size > a.llmem.size:
-    var request = roundup(size+sizeof(TLLChunk), PageSize)
-    a.llmem = cast[PLLChunk](osAllocPages(request))
-    incCurrMem(a, request)
-    a.llmem.size = request - sizeof(TLLChunk)
+    # the requested size is ``roundup(size+sizeof(TLLChunk), PageSize)``, but
+    # since we know ``size`` is a (small) constant, we know the requested size
+    # is one page:
+    assert roundup(size+sizeof(TLLChunk), PageSize) == PageSize
+    var old = a.llmem # can be nil and is correct with nil
+    a.llmem = cast[PLLChunk](osAllocPages(PageSize))
+    incCurrMem(a, PageSize)
+    a.llmem.size = PageSize - sizeof(TLLChunk)
     a.llmem.acc = sizeof(TLLChunk)
+ = old
   result = cast[pointer](cast[TAddress](a.llmem) + a.llmem.acc)
   dec(a.llmem.size, size)
   inc(a.llmem.acc, size)
   zeroMem(result, size)
+proc llDeallocAll(a: var TAllocator) =
+  var it = a.llmem
+  while it != nil:
+    # we know each block in the list has the size of 1 page:
+    var next =
+    osDeallocPages(it, PageSize)
+    it = next
 proc IntSetGet(t: TIntSet, key: int): PTrunk = 
   var it =[key and high(]
   while it != nil: 
@@ -218,6 +231,24 @@ proc Excl(s: var TIntSet, key: int) =
     var u = key and TrunkMask
     t.bits[u shr IntShift] = t.bits[u shr IntShift] and not
         (1 shl (u and IntMask))
+iterator elements(t: TIntSet): int {.inline.} =
+  # while traversing it is forbidden to change the set!
+  for h in 0..high(
+    var r =[h]
+    while r != nil:
+      var i = 0
+      while i <= high(r.bits):
+        var w = r.bits[i] # taking a copy of r.bits[i] here is correct, because
+        # modifying operations are not allowed during traversation
+        var j = 0
+        while w != 0:         # test all remaining bits for zero
+          if (w and 1) != 0:  # the bit is set!
+            yield (r.key shl TrunkShift) or (i shl IntShift +% j)
+          inc(j)
+          w = w shr 1
+        inc(i)
+      r =
 # ------------- chunk management ----------------------------------------------
 proc pageIndex(c: PChunk): int {.inline.} = 
@@ -508,9 +539,21 @@ proc isAllocatedPtr(a: TAllocator, p: pointer): bool =
         var c = cast[PBigChunk](c)
         result = p == addr( and cast[ptr TFreeCell](p).zeroField >% 1
+proc deallocOsPages(a: var TAllocator) =
+  # we free every 'ordinarily' allocated page by iterating over the page
+  # bits:
+  for p in elements(a.chunkStarts): 
+    var page = cast[PChunk](p shl pageShift)
+    var size = if page.size < PageSize: PageSize else: page.size
+    osDeallocPages(page, size)
+  # And then we free the pages that are in use for the page bits:
+  llDeallocAll(a)
   allocator {.rtlThreadVar.}: TAllocator
+proc deallocOsPages = deallocOsPages(allocator)
 # ---------------------- interface to programs -------------------------------
 when not defined(useNimRtl):
diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim
index 3ef39902a..75cac97ba 100755
--- a/lib/system/excpt.nim
+++ b/lib/system/excpt.nim
@@ -212,8 +212,10 @@ proc quitOrDebug() {.inline.} =
     endbStep() # call the debugger
 proc raiseException(e: ref E_Base, ename: CString) {.compilerRtl.} =
-  GC_disable() # a bad thing is an error in the GC while raising an exception = ename
+  if raiseHook != nil:
+    if not raiseHook(e): return
+  GC_disable() # a bad thing is an error in the GC while raising an exception
   if ||excHandler != nil:
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index 80d9b1495..a6d351799 100755
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -107,9 +107,6 @@ proc writeln[Ty](f: TFile, x: openArray[Ty]) =
   for i in items(x): write(f, i)
   write(f, "\n")
-proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
-proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
 # interface to the C procs:
 proc fopen(filename, mode: CString): pointer {.importc: "fopen", noDecl.}
diff --git a/lib/system/threads.nim b/lib/system/threads.nim
index c5264e4e2..8e94e60f5 100755
--- a/lib/system/threads.nim
+++ b/lib/system/threads.nim
@@ -102,24 +102,15 @@ when defined(Windows):
     stdcall, dynlib: "kernel32", importc: "TerminateThread".}
-    TThreadVarSlot {.compilerproc.} = distinct int32
+    TThreadVarSlot = distinct int32
-  proc TlsAlloc(): TThreadVarSlot {.
+  proc ThreadVarAlloc(): TThreadVarSlot {.
     importc: "TlsAlloc", stdcall, dynlib: "kernel32".}
-  proc TlsSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
+  proc ThreadVarSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
     importc: "TlsSetValue", stdcall, dynlib: "kernel32".}
-  proc TlsGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
+  proc ThreadVarGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
     importc: "TlsGetValue", stdcall, dynlib: "kernel32".}
-  proc ThreadVarAlloc(): TThreadVarSlot {.compilerproc, inline.} =
-    result = TlsAlloc()
-  proc ThreadVarSetValue(s: TThreadVarSlot, value: pointer) {.
-                         compilerproc, inline.} =
-    TlsSetValue(s, value)
-  proc ThreadVarGetValue(s: TThreadVarSlot): pointer {.
-                         compilerproc, inline.} =
-    result = TlsGetValue(s)
   {.passL: "-pthread".}
   {.passC: "-pthread".}
@@ -225,7 +216,7 @@ type
   TGcThread {.pure.} = object
     sys: TSysThread
     next, prev: PGcThread
-    stackBottom, stackTop: pointer
+    stackBottom, stackTop, threadLocalStorage: pointer
     stackSize: int
     g: TGlobals
     locksLen: int
@@ -242,6 +233,9 @@ var globalsSlot = ThreadVarAlloc()
 proc ThisThread(): PGcThread {.compilerRtl, inl.} =
   result = cast[PGcThread](ThreadVarGetValue(globalsSlot))
+proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} =
+  result = cast[PGcThread](ThreadVarGetValue(globalsSlot)).threadLocalStorage
 # create for the main thread. Note: do not insert this data into the list
 # of all threads; it's not to be stopped etc.
 when not defined(useNimRtl):
@@ -295,11 +289,14 @@ type
   TThread* {.pure, final.}[TParam] = object of TGcThread ## Nimrod thread.
     fn: proc (p: TParam)
     data: TParam
+when not defined(boehmgc) and not hasSharedHeap:
+  proc deallocOsPages()
 template ThreadProcWrapperBody(closure: expr) =
   ThreadVarSetValue(globalsSlot, closure)
   var t = cast[ptr TThread[TParam]](closure)
-  when not hasSharedHeap:
+  when not defined(boehmgc) and not hasSharedHeap:
     # init the GC for this thread:
@@ -309,6 +306,7 @@ template ThreadProcWrapperBody(closure: expr) =
+    when defined(deallocOsPages): deallocOsPages()
 {.push stack_trace:off.}
 when defined(windows):
diff --git a/todo.txt b/todo.txt
index 46ff7a3f2..d57f86f5d 100755
--- a/todo.txt
+++ b/todo.txt
@@ -1,19 +1,9 @@
-* improve ``echo`` code generation for multi-threading
+* codegen for threadvars
 * two issues for thread local heaps:
   - must prevent to construct a data structure that contains memory 
     from different heaps: = otherHeapPtr
-  - must prevent that GC cleans up memory that other threads can still read...
-    this can be prevented if the shared heap is simply uncollected (at least
-    for now)
 * add --deadlock_prevention:on|off switch? timeout for locks?
-* make GC fully thread-safe; needs:
-  - thread must store its stack boundaries
-  - GC must traverse these stacks: Even better each thread traverses its
-    stack! No need to stop if you can help the GC ;-)
-  - isOnStack() needs to take them into account (SLOW?)
-  - GC must stop the world
 * implicit ref/ptr->var conversion; the compiler may store an object
   implicitly on the heap for write barrier efficiency! (Especially
diff --git a/web/news.txt b/web/news.txt
index fe1d37487..c1477abbe 100755
--- a/web/news.txt
+++ b/web/news.txt
@@ -83,6 +83,8 @@ Additions
 - Added a wrapper for ``0mq`` via the ``zmq`` module.
 - The compiler now supports array, sequence and string slicing.
 - Added ``system.newStringOfCap``.
+- Added ``system.raiseHook``.
+- ``system.echo`` is guaranteed to be thread-safe.
 2010-10-20 Version 0.8.10 released