30 files changed, 1811 insertions, 580 deletions
diff --git a/lib/core/macros.nim b/lib/core/macros.nim
index 678982a52..2e3596d0f 100644
--- a/lib/core/macros.nim
+++ b/lib/core/macros.nim
@@ -818,6 +818,8 @@ proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
     else: result = c
   var i = 0
   var j = 0
+  # first char is case sensitive
+  if a[0] != b[0]: return 1
   while true:
     while a[i] == '_': inc(i)
     while b[j] == '_': inc(j) # BUGFIX: typo
@@ -828,9 +830,23 @@ proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
     inc(i)
     inc(j)
 
-proc eqIdent* (a, b: string): bool = cmpIgnoreStyle(a, b) == 0
+proc eqIdent*(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
   ## Check if two idents are identical.
 
+proc eqIdent*(node: NimNode; s: string): bool {.compileTime.} =
+  ## Check if node is some identifier node (``nnkIdent``, ``nnkSym``, etc.)
+  ## is the same as ``s``. Note that this is the preferred way to check! Most
+  ## other ways like ``node.ident`` are much more error-prone, unfortunately.
+  case node.kind
+  of nnkIdent:
+    result = node.ident == !s
+  of nnkSym:
+    result = eqIdent($node.symbol, s)
+  of nnkOpenSymChoice, nnkClosedSymChoice:
+    result = eqIdent($node[0], s)
+  else:
+    result = false
+
 proc hasArgOfName* (params: NimNode; name: string): bool {.compiletime.}=
   ## Search nnkFormalParams for an argument.
   assert params.kind == nnkFormalParams
diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim
index c8f690461..381b1c3fc 100644
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -566,6 +566,16 @@ proc findAll*(str: string, pattern: Regex, start = 0, endpos = int.high): seq[st
   for match in str.findIter(pattern, start, endpos):
     result.add(match.match)
 
+proc contains*(str: string, pattern: Regex, start = 0, endpos = int.high): bool =
+  ## Determine if the string contains the given pattern between the end and
+  ## start positions:
+  ## -  "abc".contains(re"bc") == true
+  ## -  "abc".contains(re"cd") == false
+  ## -  "abc".contains(re"a", start = 1) == false
+  ##
+  ## Same as ``isSome(str.find(pattern, start, endpos))``.
+  return isSome(str.find(pattern, start, endpos))
+
 proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string] =
   ## Splits the string with the given regex. This works according to the
   ## rules that Perl and Javascript use:
diff --git a/lib/js/dom.nim b/lib/js/dom.nim
index 11df959d7..5104712e8 100644
--- a/lib/js/dom.nim
+++ b/lib/js/dom.nim
@@ -402,7 +402,9 @@ proc routeEvent*(w: Window, event: Event)
 proc scrollBy*(w: Window, x, y: int)
 proc scrollTo*(w: Window, x, y: int)
 proc setInterval*(w: Window, code: cstring, pause: int): ref TInterval
+proc setInterval*(w: Window, function: proc (), pause: int): ref TInterval
 proc setTimeout*(w: Window, code: cstring, pause: int): ref TTimeOut
+proc setTimeout*(w: Window, function: proc (), pause: int): ref TInterval
 proc stop*(w: Window)
 
 # Node "methods"
@@ -481,6 +483,9 @@ proc getAttribute*(s: Style, attr: cstring, caseSensitive=false): cstring
 proc removeAttribute*(s: Style, attr: cstring, caseSensitive=false)
 proc setAttribute*(s: Style, attr, value: cstring, caseSensitive=false)
 
+# Event "methods"
+proc preventDefault*(ev: Event)
+
 {.pop.}
 
 var
diff --git a/lib/nimbase.h b/lib/nimbase.h
index 5a4f403b6..f531f3c49 100644
--- a/lib/nimbase.h
+++ b/lib/nimbase.h
@@ -222,6 +222,8 @@ __clang__
 
 /* ----------------------------------------------------------------------- */
 
+#define COMMA ,
+
 #include <limits.h>
 #include <stddef.h>
 
diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim
index 1bc0af1b6..9de25f82b 100644
--- a/lib/packages/docutils/highlite.nim
+++ b/lib/packages/docutils/highlite.nim
@@ -31,13 +31,14 @@ type
     state: TokenClass
 
   SourceLanguage* = enum
-    langNone, langNim, langNimrod, langCpp, langCsharp, langC, langJava
+    langNone, langNim, langNimrod, langCpp, langCsharp, langC, langJava,
+    langYaml
 {.deprecated: [TSourceLanguage: SourceLanguage, TTokenClass: TokenClass,
               TGeneralTokenizer: GeneralTokenizer].}
 
 const
   sourceLanguageToStr*: array[SourceLanguage, string] = ["none",
-    "Nim", "Nimrod", "C++", "C#", "C", "Java"]
+    "Nim", "Nimrod", "C++", "C#", "C", "Java", "Yaml"]
   tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace",
     "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber",
     "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit",
@@ -578,6 +579,309 @@ proc javaNextToken(g: var GeneralTokenizer) =
       "try", "void", "volatile", "while"]
   clikeNextToken(g, keywords, {})
 
+proc yamlPlainStrLit(g: var GeneralTokenizer, pos: var int) =
+  g.kind = gtStringLit
+  while g.buf[pos] notin {'\0', '\x09'..'\x0D', ',', ']', '}'}:
+    if g.buf[pos] == ':' and
+        g.buf[pos + 1] in {'\0', '\x09'..'\x0D', ' '}:
+      break
+    inc(pos)
+
+proc yamlPossibleNumber(g: var GeneralTokenizer, pos: var int) =
+  g.kind = gtNone
+  if g.buf[pos] == '-': inc(pos)
+  if g.buf[pos] == '0': inc(pos)
+  elif g.buf[pos] in '1'..'9':
+    inc(pos)
+    while g.buf[pos] in {'0'..'9'}: inc(pos)
+  else: yamlPlainStrLit(g, pos)
+  if g.kind == gtNone:
+    if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', ',', ']', '}'}:
+      g.kind = gtDecNumber
+    elif g.buf[pos] == '.':
+      inc(pos)
+      if g.buf[pos] notin {'0'..'9'}: yamlPlainStrLit(g, pos)
+      else:
+        while g.buf[pos] in {'0'..'9'}: inc(pos)
+        if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', ',', ']', '}'}:
+          g.kind = gtFloatNumber
+    if g.kind == gtNone:
+      if g.buf[pos] in {'e', 'E'}:
+        inc(pos)
+        if g.buf[pos] in {'-', '+'}: inc(pos)
+        if g.buf[pos] notin {'0'..'9'}: yamlPlainStrLit(g, pos)
+        else:
+          while g.buf[pos] in {'0'..'9'}: inc(pos)
+          if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', ',', ']', '}'}:
+            g.kind = gtFloatNumber
+          else: yamlPlainStrLit(g, pos)
+      else: yamlPlainStrLit(g, pos)
+  while g.buf[pos] notin {'\0', ',', ']', '}', '\x0A', '\x0D'}:
+    inc(pos)
+    if g.buf[pos] notin {'\x09'..'\x0D', ' ', ',', ']', '}'}:
+      yamlPlainStrLit(g, pos)
+      break
+  # theoretically, we would need to parse indentation (like with block scalars)
+  # because of possible multiline flow scalars that start with number-like
+  # content, but that is far too troublesome. I think it is fine that the
+  # highlighter is sloppy here.
+
+proc yamlNextToken(g: var GeneralTokenizer) =
+  const
+    hexChars = {'0'..'9', 'A'..'F', 'a'..'f'}
+  var pos = g.pos
+  g.start = g.pos
+  if g.state == gtStringLit:
+    g.kind = gtStringLit
+    while true:
+      case g.buf[pos]
+      of '\\':
+        if pos != g.pos: break
+        g.kind = gtEscapeSequence
+        inc(pos)
+        case g.buf[pos]
+        of 'x':
+          inc(pos)
+          for i in 1..2:
+            {.unroll.}
+            if g.buf[pos] in hexChars: inc(pos)
+          break
+        of 'u':
+          inc(pos)
+          for i in 1..4:
+            {.unroll.}
+            if g.buf[pos] in hexChars: inc(pos)
+          break
+        of 'U':
+          inc(pos)
+          for i in 1..8:
+            {.unroll.}
+            if g.buf[pos] in hexChars: inc(pos)
+          break
+        else: inc(pos)
+        break
+      of '\0':
+        g.state = gtOther
+        break
+      of '\"':
+        inc(pos)
+        g.state = gtOther
+        break
+      else: inc(pos)
+  elif g.state == gtCharLit:
+    # abusing gtCharLit as single-quoted string lit
+    g.kind = gtStringLit
+    inc(pos) # skip the starting '
+    while true:
+      case g.buf[pos]
+      of '\'':
+        inc(pos)
+        if g.buf[pos] == '\'':
+          inc(pos)
+          g.kind = gtEscapeSequence
+        else: g.state = gtOther
+        break
+      else: inc(pos)
+  elif g.state == gtCommand:
+    # gtCommand means 'block scalar header'
+    case g.buf[pos]
+    of ' ', '\t':
+      g.kind = gtWhitespace
+      while g.buf[pos] in {' ', '\t'}: inc(pos)
+    of '#':
+      g.kind = gtComment
+      while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
+    of '\x0A', '\x0D': discard
+    else:
+      # illegal here. just don't parse a block scalar
+      g.kind = gtNone
+      g.state = gtOther
+    if g.buf[pos] in {'\x0A', '\x0D'} and g.state == gtCommand:
+      g.state = gtLongStringLit
+  elif g.state == gtLongStringLit:
+    # beware, this is the only token where we actually have to parse
+    # indentation. 
+    
+    g.kind = gtLongStringLit
+    # first, we have to find the parent indentation of the block scalar, so that
+    # we know when to stop
+    assert g.buf[pos] in {'\x0A', '\x0D'}
+    var lookbehind = pos - 1
+    var headerStart = -1
+    while lookbehind >= 0 and g.buf[lookbehind] notin {'\x0A', '\x0D'}:
+      if headerStart == -1 and g.buf[lookbehind] in {'|', '>'}:
+        headerStart = lookbehind
+      dec(lookbehind)
+    assert headerStart != -1
+    var indentation = 1
+    while g.buf[lookbehind + indentation] == ' ': inc(indentation)
+    if g.buf[lookbehind + indentation] in {'|', '>'}:
+      # when the header is alone in a line, this line does not show the parent's
+      # indentation, so we must go further. search the first previous line with
+      # non-whitespace content.
+      while lookbehind >= 0 and g.buf[lookbehind] in {'\x0A', '\x0D'}:
+        dec(lookbehind)
+        while lookbehind >= 0 and
+            g.buf[lookbehind] in {' ', '\t'}: dec(lookbehind)
+      # now, find the beginning of the line...
+      while lookbehind >= 0 and g.buf[lookbehind] notin {'\x0A', '\x0D'}:
+        dec(lookbehind)
+      # ... and its indentation
+      indentation = 1
+      while g.buf[lookbehind + indentation] == ' ': inc(indentation)
+    if lookbehind == -1: indentation = 0 # top level
+    elif g.buf[lookbehind + 1] == '-' and g.buf[lookbehind + 2] == '-' and
+        g.buf[lookbehind + 3] == '-' and
+        g.buf[lookbehind + 4] in {'\x09'..'\x0D', ' '}:
+      # this is a document start, therefore, we are at top level
+      indentation = 0
+    # because lookbehind was at newline char when calculating indentation, we're
+    # off by one. fix that. top level's parent will have indentation of -1.
+    let parentIndentation = indentation - 1
+    
+    # find first content
+    while g.buf[pos] in {' ', '\x0A', '\x0D'}:
+      if g.buf[pos] == ' ': inc(indentation)
+      else: indentation = 0
+      inc(pos)
+    var minIndentation = indentation
+    
+    # for stupid edge cases, we must check whether an explicit indentation depth
+    # is given at the header.
+    while g.buf[headerStart] in {'>', '|', '+', '-'}: inc(headerStart)
+    if g.buf[headerStart] in {'0'..'9'}:
+      minIndentation = min(minIndentation, ord(g.buf[headerStart]) - ord('0'))
+    
+    # process content lines
+    while indentation > parentIndentation and g.buf[pos] != '\0':
+      if (indentation < minIndentation and g.buf[pos] == '#') or
+          (indentation == 0 and g.buf[pos] == '.' and g.buf[pos + 1] == '.' and
+          g.buf[pos + 2] == '.' and
+          g.buf[pos + 3] in {'\0', '\x09'..'\x0D', ' '}):
+        # comment after end of block scalar, or end of document
+        break
+      minIndentation = min(indentation, minIndentation)
+      while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
+      while g.buf[pos] in {' ', '\x0A', '\x0D'}:
+        if g.buf[pos] == ' ': inc(indentation)
+        else: indentation = 0
+        inc(pos)
+    
+    g.state = gtOther
+  elif g.state == gtOther:
+    # gtOther means 'inside YAML document'
+    case g.buf[pos]
+    of ' ', '\x09'..'\x0D':
+      g.kind = gtWhitespace
+      while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
+    of '#':
+      g.kind = gtComment
+      inc(pos)
+      while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
+    of '-':
+      inc(pos)
+      if g.buf[pos] in {'\0', ' ', '\x09'..'\x0D'}:
+        g.kind = gtPunctuation
+      elif g.buf[pos] == '-' and
+          (pos == 1 or g.buf[pos - 2] in {'\x0A', '\x0D'}): # start of line
+        inc(pos)
+        if g.buf[pos] == '-' and g.buf[pos + 1] in {'\0', '\x09'..'\x0D', ' '}:
+          inc(pos)
+          g.kind = gtKeyword
+        else: yamlPossibleNumber(g, pos)
+      else: yamlPossibleNumber(g, pos)
+    of '.':
+      if pos == 0 or g.buf[pos - 1] in {'\x0A', '\x0D'}:
+        inc(pos)
+        for i in 1..2:
+          {.unroll.}
+          if g.buf[pos] != '.': break
+          inc(pos)
+        if pos == g.start + 3:
+          g.kind = gtKeyword
+          g.state = gtNone
+        else: yamlPlainStrLit(g, pos)
+      else: yamlPlainStrLit(g, pos)
+    of '?':
+      inc(pos)
+      if g.buf[pos] in {'\0', ' ', '\x09'..'\x0D'}:
+        g.kind = gtPunctuation
+      else: yamlPlainStrLit(g, pos)
+    of ':':
+      inc(pos)
+      if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', '\'', '\"'} or
+          (pos > 0 and g.buf[pos - 2] in {'}', ']', '\"', '\''}):
+        g.kind = gtPunctuation
+      else: yamlPlainStrLit(g, pos)
+    of '[', ']', '{', '}', ',':
+      inc(pos)
+      g.kind = gtPunctuation
+    of '\"':
+      inc(pos)
+      g.state = gtStringLit
+      g.kind = gtStringLit
+    of '\'':
+      g.state = gtCharLit
+      g.kind = gtNone
+    of '!':
+      g.kind = gtTagStart
+      inc(pos)
+      if g.buf[pos] == '<':
+        # literal tag (e.g. `!<tag:yaml.org,2002:str>`)
+        while g.buf[pos] notin {'\0', '>', '\x09'..'\x0D', ' '}: inc(pos)
+        if g.buf[pos] == '>': inc(pos)
+      else:
+        while g.buf[pos] in {'A'..'Z', 'a'..'z', '0'..'9', '-'}: inc(pos)
+        case g.buf[pos]
+        of '!':
+          # prefixed tag (e.g. `!!str`)
+          inc(pos)
+          while g.buf[pos] notin
+              {'\0', '\x09'..'\x0D', ' ', ',', '[', ']', '{', '}'}: inc(pos)
+        of '\0', '\x09'..'\x0D', ' ': discard
+        else:
+          # local tag (e.g. `!nim:system:int`)
+          while g.buf[pos] notin {'\0', '\x09'..'\x0D', ' '}: inc(pos)
+    of '&':
+      g.kind = gtLabel
+      while g.buf[pos] notin {'\0', '\x09'..'\x0D', ' '}: inc(pos)
+    of '*':
+      g.kind = gtReference
+      while g.buf[pos] notin {'\0', '\x09'..'\x0D', ' '}: inc(pos)
+    of '|', '>':
+      # this can lead to incorrect tokenization when | or > appear inside flow
+      # content. checking whether we're inside flow content is not
+      # chomsky type-3, so we won't do that here.
+      g.kind = gtCommand
+      g.state = gtCommand
+      inc(pos)
+      while g.buf[pos] in {'0'..'9', '+', '-'}: inc(pos)
+    of '0'..'9': yamlPossibleNumber(g, pos)
+    of '\0': g.kind = gtEOF
+    else: yamlPlainStrLit(g, pos)
+  else:
+    # outside document
+    case g.buf[pos]
+    of '%':
+      if pos == 0 or g.buf[pos - 1] in {'\x0A', '\x0D'}:
+        g.kind = gtDirective
+        while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
+      else:
+        g.state = gtOther
+        yamlPlainStrLit(g, pos)
+    of ' ', '\x09'..'\x0D':
+      g.kind = gtWhitespace
+      while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
+    of '#':
+      g.kind = gtComment
+      while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
+    of '\0': g.kind = gtEOF
+    else:
+      g.kind = gtNone
+      g.state = gtOther
+  g.length = pos - g.pos
+  g.pos = pos
+
 proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
   case lang
   of langNone: assert false
@@ -586,6 +890,7 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
   of langCsharp: csharpNextToken(g)
   of langC: cNextToken(g)
   of langJava: javaNextToken(g)
+  of langYaml: yamlNextToken(g)
 
 when isMainModule:
   var keywords: seq[string]
diff --git a/lib/pure/asyncdispatch.nim b/lib/pure/asyncdispatch.nim
index 01b53cb12..139492916 100644
--- a/lib/pure/asyncdispatch.nim
+++ b/lib/pure/asyncdispatch.nim
@@ -9,9 +9,9 @@
 
 include "system/inclrtl"
 
-import os, oids, tables, strutils, macros, times
+import os, oids, tables, strutils, macros, times, heapqueue
 
-import nativesockets, net
+import nativesockets, net, queues
 
 export Port, SocketFlag
 
@@ -155,6 +155,9 @@ type
 
 when not defined(release):
   var currentID = 0
+
+proc callSoon*(cbproc: proc ()) {.gcsafe.}
+
 proc newFuture*[T](fromProc: string = "unspecified"): Future[T] =
   ## Creates a new future.
   ##
@@ -257,7 +260,7 @@ proc `callback=`*(future: FutureBase, cb: proc () {.closure,gcsafe.}) =
   ## passes ``future`` as a param to the callback.
   future.cb = cb
   if future.finished:
-    future.cb()
+    callSoon(future.cb)
 
 proc `callback=`*[T](future: Future[T],
     cb: proc (future: Future[T]) {.closure,gcsafe.}) =
@@ -354,21 +357,33 @@ proc `or`*[T, Y](fut1: Future[T], fut2: Future[Y]): Future[void] =
 
 type
   PDispatcherBase = ref object of RootRef
-    timers: seq[tuple[finishAt: float, fut: Future[void]]]
-
-proc processTimers(p: PDispatcherBase) =
-  var oldTimers = p.timers
-  p.timers = @[]
-  for t in oldTimers:
-    if epochTime() >= t.finishAt:
-      t.fut.complete()
-    else:
-      p.timers.add(t)
+    timers: HeapQueue[tuple[finishAt: float, fut: Future[void]]]
+    callbacks: Queue[proc ()]
+
+proc processTimers(p: PDispatcherBase) {.inline.} =
+  while p.timers.len > 0 and epochTime() >= p.timers[0].finishAt:
+    p.timers.pop().fut.complete()
+
+proc processPendingCallbacks(p: PDispatcherBase) =
+  while p.callbacks.len > 0:
+    var cb = p.callbacks.dequeue()
+    cb()
+
+proc adjustedTimeout(p: PDispatcherBase, timeout: int): int {.inline.} =
+  # If dispatcher has active timers this proc returns the timeout
+  # of the nearest timer. Returns `timeout` otherwise.
+  result = timeout
+  if p.timers.len > 0:
+    let timerTimeout = p.timers[0].finishAt
+    let curTime = epochTime()
+    if timeout == -1 or (curTime + (timeout / 1000)) > timerTimeout:
+      result = int((timerTimeout - curTime) * 1000)
+      if result < 0: result = 0
 
 when defined(windows) or defined(nimdoc):
   import winlean, sets, hashes
   type
-    CompletionKey = Dword
+    CompletionKey = ULONG_PTR
 
     CompletionData* = object
       fd*: AsyncFD # TODO: Rename this.
@@ -396,7 +411,8 @@ when defined(windows) or defined(nimdoc):
     new result
     result.ioPort = createIoCompletionPort(INVALID_HANDLE_VALUE, 0, 0, 1)
     result.handles = initSet[AsyncFD]()
-    result.timers = @[]
+    result.timers.newHeapQueue()
+    result.callbacks = initQueue[proc ()](64)
 
   var gDisp{.threadvar.}: PDispatcher ## Global dispatcher
   proc getGlobalDispatcher*(): PDispatcher =
@@ -423,15 +439,17 @@ when defined(windows) or defined(nimdoc):
   proc poll*(timeout = 500) =
     ## Waits for completion events and processes them.
     let p = getGlobalDispatcher()
-    if p.handles.len == 0 and p.timers.len == 0:
+    if p.handles.len == 0 and p.timers.len == 0 and p.callbacks.len == 0:
       raise newException(ValueError,
         "No handles or timers registered in dispatcher.")
 
-    let llTimeout =
-      if timeout ==  -1: winlean.INFINITE
-      else: timeout.int32
+    let at = p.adjustedTimeout(timeout)
+    var llTimeout =
+      if at == -1: winlean.INFINITE
+      else: at.int32
+
     var lpNumberOfBytesTransferred: Dword
-    var lpCompletionKey: ULONG
+    var lpCompletionKey: ULONG_PTR
     var customOverlapped: PCustomOverlapped
     let res = getQueuedCompletionStatus(p.ioPort,
         addr lpNumberOfBytesTransferred, addr lpCompletionKey,
@@ -461,6 +479,8 @@ when defined(windows) or defined(nimdoc):
 
     # Timer processing.
     processTimers(p)
+    # Callback queue processing
+    processPendingCallbacks(p)
 
   var connectExPtr: pointer = nil
   var acceptExPtr: pointer = nil
@@ -651,34 +671,16 @@ when defined(windows) or defined(nimdoc):
           retFuture.complete("")
         else:
           retFuture.fail(newException(OSError, osErrorMsg(err)))
-    elif ret == 0 and bytesReceived == 0 and dataBuf.buf[0] == '\0':
-      # We have to ensure that the buffer is empty because WSARecv will tell
-      # us immediately when it was disconnected, even when there is still
-      # data in the buffer.
-      # We want to give the user as much data as we can. So we only return
-      # the empty string (which signals a disconnection) when there is
-      # nothing left to read.
-      retFuture.complete("")
-      # TODO: "For message-oriented sockets, where a zero byte message is often
-      # allowable, a failure with an error code of WSAEDISCON is used to
-      # indicate graceful closure."
-      # ~ http://msdn.microsoft.com/en-us/library/ms741688%28v=vs.85%29.aspx
-    else:
-      # Request to read completed immediately.
-      # From my tests bytesReceived isn't reliable.
-      let realSize =
-        if bytesReceived == 0:
-          size
-        else:
-          bytesReceived
-      var data = newString(realSize)
-      assert realSize <= size
-      copyMem(addr data[0], addr dataBuf.buf[0], realSize)
-      #dealloc dataBuf.buf
-      retFuture.complete($data)
-      # We don't deallocate ``ol`` here because even though this completed
-      # immediately poll will still be notified about its completion and it will
-      # free ``ol``.
+    elif ret == 0:
+      # Request completed immediately.
+      if bytesReceived != 0:
+        var data = newString(bytesReceived)
+        assert bytesReceived <= size
+        copyMem(addr data[0], addr dataBuf.buf[0], bytesReceived)
+        retFuture.complete($data)
+      else:
+        if hasOverlappedIoCompleted(cast[POVERLAPPED](ol)):
+          retFuture.complete("")
     return retFuture
 
   proc recvInto*(socket: AsyncFD, buf: cstring, size: int,
@@ -741,31 +743,14 @@ when defined(windows) or defined(nimdoc):
           retFuture.complete(0)
         else:
           retFuture.fail(newException(OSError, osErrorMsg(err)))
-    elif ret == 0 and bytesReceived == 0 and dataBuf.buf[0] == '\0':
-      # We have to ensure that the buffer is empty because WSARecv will tell
-      # us immediately when it was disconnected, even when there is still
-      # data in the buffer.
-      # We want to give the user as much data as we can. So we only return
-      # the empty string (which signals a disconnection) when there is
-      # nothing left to read.
-      retFuture.complete(0)
-      # TODO: "For message-oriented sockets, where a zero byte message is often
-      # allowable, a failure with an error code of WSAEDISCON is used to
-      # indicate graceful closure."
-      # ~ http://msdn.microsoft.com/en-us/library/ms741688%28v=vs.85%29.aspx
-    else:
-      # Request to read completed immediately.
-      # From my tests bytesReceived isn't reliable.
-      let realSize =
-        if bytesReceived == 0:
-          size
-        else:
-          bytesReceived
-      assert realSize <= size
-      retFuture.complete(realSize)
-      # We don't deallocate ``ol`` here because even though this completed
-      # immediately poll will still be notified about its completion and it will
-      # free ``ol``.
+    elif ret == 0:
+      # Request completed immediately.
+      if bytesReceived != 0:
+        assert bytesReceived <= size
+        retFuture.complete(bytesReceived)
+      else:
+        if hasOverlappedIoCompleted(cast[POVERLAPPED](ol)):
+          retFuture.complete(bytesReceived)
     return retFuture
 
   proc send*(socket: AsyncFD, data: string,
@@ -956,7 +941,8 @@ else:
   proc newDispatcher*(): PDispatcher =
     new result
     result.selector = newSelector()
-    result.timers = @[]
+    result.timers.newHeapQueue()
+    result.callbacks = initQueue[proc ()](64)
 
   var gDisp{.threadvar.}: PDispatcher ## Global dispatcher
   proc getGlobalDispatcher*(): PDispatcher =
@@ -1014,7 +1000,7 @@ else:
 
   proc poll*(timeout = 500) =
     let p = getGlobalDispatcher()
-    for info in p.selector.select(timeout):
+    for info in p.selector.select(p.adjustedTimeout(timeout)):
       let data = PData(info.key.data)
       assert data.fd == info.key.fd.AsyncFD
       #echo("In poll ", data.fd.cint)
@@ -1052,7 +1038,10 @@ else:
         # (e.g. socket disconnected).
         discard
 
+    # Timer processing.
     processTimers(p)
+    # Callback queue processing
+    processPendingCallbacks(p)
 
   proc connect*(socket: AsyncFD, address: string, port: Port,
     domain = AF_INET): Future[void] =
@@ -1215,7 +1204,7 @@ proc sleepAsync*(ms: int): Future[void] =
   ## ``ms`` milliseconds.
   var retFuture = newFuture[void]("sleepAsync")
   let p = getGlobalDispatcher()
-  p.timers.add((epochTime() + (ms / 1000), retFuture))
+  p.timers.push((epochTime() + (ms / 1000), retFuture))
   return retFuture
 
 proc accept*(socket: AsyncFD,
@@ -1631,6 +1620,11 @@ proc recvLine*(socket: AsyncFD): Future[string] {.async.} =
       return
     add(result, c)
 
+proc callSoon*(cbproc: proc ()) = 
+  ## Schedule `cbproc` to be called as soon as possible.
+  ## The callback is called when control returns to the event loop.
+  getGlobalDispatcher().callbacks.enqueue(cbproc)
+
 proc runForever*() =
   ## Begins a never ending global dispatcher poll loop.
   while true:
diff --git a/lib/pure/asyncfile.nim b/lib/pure/asyncfile.nim
index c7b9fac18..c91d833fc 100644
--- a/lib/pure/asyncfile.nim
+++ b/lib/pure/asyncfile.nim
@@ -162,7 +162,7 @@ proc read*(f: AsyncFile, size: int): Future[string] =
       # Request completed immediately.
       var bytesRead: DWord
       let overlappedRes = getOverlappedResult(f.fd.Handle,
-          cast[POverlapped](ol)[], bytesRead, false.WinBool)
+          cast[POverlapped](ol), bytesRead, false.WinBool)
       if not overlappedRes.bool:
         let err = osLastError()
         if err.int32 == ERROR_HANDLE_EOF:
@@ -282,7 +282,7 @@ proc write*(f: AsyncFile, data: string): Future[void] =
       # Request completed immediately.
       var bytesWritten: DWord
       let overlappedRes = getOverlappedResult(f.fd.Handle,
-          cast[POverlapped](ol)[], bytesWritten, false.WinBool)
+          cast[POverlapped](ol), bytesWritten, false.WinBool)
       if not overlappedRes.bool:
         retFuture.fail(newException(OSError, osErrorMsg(osLastError())))
       else:
diff --git a/lib/pure/collections/LockFreeHash.nim b/lib/pure/collections/LockFreeHash.nim
index a3ead81e3..954d62491 100644
--- a/lib/pure/collections/LockFreeHash.nim
+++ b/lib/pure/collections/LockFreeHash.nim
@@ -52,7 +52,7 @@ when sizeof(int) == 4: # 32bit
   {.deprecated: [TRaw: Raw].}
 elif sizeof(int) == 8: # 64bit
   type
-    Raw = range[0..4611686018427387903]
+    Raw = range[0'i64..4611686018427387903'i64]
     ## The range of uint values that can be stored directly in a value slot
     ## when on a 64 bit platform
   {.deprecated: [TRaw: Raw].}
@@ -74,7 +74,7 @@ type
     copyDone: int
     next: PConcTable[K,V]
     data: EntryArr
-{.deprecated: [TEntry: Entry, TEntryArr: EntryArr.}
+{.deprecated: [TEntry: Entry, TEntryArr: EntryArr].}
 
 proc setVal[K,V](table: var PConcTable[K,V], key: int, val: int,
   expVal: int, match: bool): int
@@ -244,9 +244,9 @@ proc copySlot[K,V](idx: int, oldTbl: var PConcTable[K,V], newTbl: var PConcTable
     echo("oldVal is Tomb!!!, should not happen")
   if pop(oldVal) != 0:
     result = setVal(newTbl, pop(oldKey), pop(oldVal), 0, true) == 0
-  if result:
+  #if result:
     #echo("Copied a Slot! idx= " & $idx & " key= " & $oldKey & " val= " & $oldVal)
-  else:
+  #else:
     #echo("copy slot failed")
   # Our copy is done so we disable the old slot
   while not ok:
diff --git a/lib/pure/collections/heapqueue.nim b/lib/pure/collections/heapqueue.nim
new file mode 100644
index 000000000..149a1c9fc
--- /dev/null
+++ b/lib/pure/collections/heapqueue.nim
@@ -0,0 +1,107 @@
+##[ Heap queue algorithm (a.k.a. priority queue). Ported from Python heapq.
+
+Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
+all k, counting elements from 0.  For the sake of comparison,
+non-existing elements are considered to be infinite.  The interesting
+property of a heap is that a[0] is always its smallest element.
+
+]##
+
+type HeapQueue*[T] = distinct seq[T]
+
+proc newHeapQueue*[T](): HeapQueue[T] {.inline.} = HeapQueue[T](newSeq[T]())
+proc newHeapQueue*[T](h: var HeapQueue[T]) {.inline.} = h = HeapQueue[T](newSeq[T]())
+
+proc len*[T](h: HeapQueue[T]): int {.inline.} = seq[T](h).len
+proc `[]`*[T](h: HeapQueue[T], i: int): T {.inline.} = seq[T](h)[i]
+proc `[]=`[T](h: var HeapQueue[T], i: int, v: T) {.inline.} = seq[T](h)[i] = v
+proc add[T](h: var HeapQueue[T], v: T) {.inline.} = seq[T](h).add(v)
+
+proc heapCmp[T](x, y: T): bool {.inline.} =
+  return (x < y)
+
+# 'heap' is a heap at all indices >= startpos, except possibly for pos.  pos
+# is the index of a leaf with a possibly out-of-order value.  Restore the
+# heap invariant.
+proc siftdown[T](heap: var HeapQueue[T], startpos, p: int) =
+  var pos = p
+  var newitem = heap[pos]
+  # Follow the path to the root, moving parents down until finding a place
+  # newitem fits.
+  while pos > startpos:
+    let parentpos = (pos - 1) shr 1
+    let parent = heap[parentpos]
+    if heapCmp(newitem, parent):
+      heap[pos] = parent
+      pos = parentpos
+    else:
+      break
+  heap[pos] = newitem
+
+proc siftup[T](heap: var HeapQueue[T], p: int) =
+  let endpos = len(heap)
+  var pos = p
+  let startpos = pos
+  let newitem = heap[pos]
+  # Bubble up the smaller child until hitting a leaf.
+  var childpos = 2*pos + 1    # leftmost child position
+  while childpos < endpos:
+    # Set childpos to index of smaller child.
+    let rightpos = childpos + 1
+    if rightpos < endpos and not heapCmp(heap[childpos], heap[rightpos]):
+      childpos = rightpos
+    # Move the smaller child up.
+    heap[pos] = heap[childpos]
+    pos = childpos
+    childpos = 2*pos + 1
+  # The leaf at pos is empty now.  Put newitem there, and bubble it up
+  # to its final resting place (by sifting its parents down).
+  heap[pos] = newitem
+  siftdown(heap, startpos, pos)
+    
+proc push*[T](heap: var HeapQueue[T], item: T) =
+  ## Push item onto heap, maintaining the heap invariant.
+  (seq[T](heap)).add(item)
+  siftdown(heap, 0, len(heap)-1)
+
+proc pop*[T](heap: var HeapQueue[T]): T =
+  ## Pop the smallest item off the heap, maintaining the heap invariant.
+  let lastelt = seq[T](heap).pop()
+  if heap.len > 0:
+    result = heap[0]
+    heap[0] = lastelt
+    siftup(heap, 0)
+  else:
+    result = lastelt
+
+proc replace*[T](heap: var HeapQueue[T], item: T): T =
+  ## Pop and return the current smallest value, and add the new item.
+  ## This is more efficient than pop() followed by push(), and can be
+  ## more appropriate when using a fixed-size heap.  Note that the value
+  ## returned may be larger than item!  That constrains reasonable uses of
+  ## this routine unless written as part of a conditional replacement:
+
+  ##    if item > heap[0]:
+  ##        item = replace(heap, item)
+  result = heap[0]
+  heap[0] = item
+  siftup(heap, 0)
+
+proc pushpop*[T](heap: var HeapQueue[T], item: T): T =
+  ## Fast version of a push followed by a pop.
+  if heap.len > 0 and heapCmp(heap[0], item):
+    swap(item, heap[0])
+    siftup(heap, 0)
+  return item
+
+when isMainModule:
+  # Simple sanity test
+  var heap = newHeapQueue[int]()
+  let data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
+  for item in data:
+    push(heap, item)
+  doAssert(heap[0] == 0)
+  var sort = newSeq[int]()
+  while heap.len > 0:
+    sort.add(pop(heap))
+  doAssert(sort == @[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 2ed0d2034..58a789e76 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -663,6 +663,27 @@ proc sort*[A, B](t: OrderedTableRef[A, B],
   ## contrast to the `sort` for count tables).
   t[].sort(cmp)
 
+proc del*[A, B](t: var OrderedTable[A, B], key: A) =
+  ## deletes `key` from ordered hash table `t`. O(n) comlexity.
+  var prev = -1
+  let hc = hash(key)
+  forAllOrderedPairs:
+    if t.data[h].hcode == hc:
+      if t.first == h:
+        t.first = t.data[h].next
+      else:
+        t.data[prev].next = t.data[h].next
+      var zeroValue : type(t.data[h])
+      t.data[h] = zeroValue
+      dec t.counter
+      break
+    else:
+      prev = h
+
+proc del*[A, B](t: var OrderedTableRef[A, B], key: A) =
+  ## deletes `key` from ordered hash table `t`. O(n) comlexity.
+  t[].del(key)
+
 # ------------------------------ count tables -------------------------------
 
 type
@@ -984,6 +1005,26 @@ when isMainModule:
   s3[p1] = 30_000
   s3[p2] = 45_000
 
+  block: # Ordered table should preserve order after deletion
+    var
+      s4 = initOrderedTable[int, int]()
+    s4[1] = 1
+    s4[2] = 2
+    s4[3] = 3
+
+    var prev = 0
+    for i in s4.values:
+      doAssert(prev < i)
+      prev = i
+
+    s4.del(2)
+    doAssert(2 notin s4)
+    doAssert(s4.len == 2)
+    prev = 0
+    for i in s4.values:
+      doAssert(prev < i)
+      prev = i
+
   var
     t1 = initCountTable[string]()
     t2 = initCountTable[string]()
diff --git a/lib/pure/json.nim b/lib/pure/json.nim
index b9da8a0dd..483804ed8 100644
--- a/lib/pure/json.nim
+++ b/lib/pure/json.nim
@@ -707,17 +707,27 @@ proc `%`*(b: bool): JsonNode =
 
 proc `%`*(keyVals: openArray[tuple[key: string, val: JsonNode]]): JsonNode =
   ## Generic constructor for JSON data. Creates a new `JObject JsonNode`
-  new(result)
-  result.kind = JObject
-  result.fields = initTable[string, JsonNode](4)
+  result = newJObject()
   for key, val in items(keyVals): result.fields[key] = val
 
-proc `%`*(elements: openArray[JsonNode]): JsonNode =
+template `%`*(j: JsonNode): JsonNode = j
+
+proc `%`*[T](elements: openArray[T]): JsonNode =
   ## Generic constructor for JSON data. Creates a new `JArray JsonNode`
-  new(result)
-  result.kind = JArray
-  newSeq(result.elems, elements.len)
-  for i, p in pairs(elements): result.elems[i] = p
+  result = newJArray()
+  for elem in elements: result.add(%elem)
+
+proc `%`*(o: object): JsonNode =
+  ## Generic constructor for JSON data. Creates a new `JObject JsonNode`
+  result = newJObject()
+  for k, v in o.fieldPairs: result[k] = %v
+
+proc `%`*(o: ref object): JsonNode =
+  ## Generic constructor for JSON data. Creates a new `JObject JsonNode`
+  if o.isNil:
+    result = newJNull()
+  else:
+    result = %(o[])
 
 proc toJson(x: NimNode): NimNode {.compiletime.} =
   case x.kind
@@ -736,6 +746,9 @@ proc toJson(x: NimNode): NimNode {.compiletime.} =
     result = newNimNode(nnkTableConstr)
     x.expectLen(0)
 
+  of nnkNilLit:
+    result = newCall("newJNull")
+
   else:
     result = x
 
@@ -1325,10 +1338,26 @@ when isMainModule:
   var j4 = %*{"test": nil}
   doAssert j4 == %{"test": newJNull()}
 
-  echo("99% of tests finished. Going to try loading file.")
+  let seqOfNodes = @[%1, %2]
+  let jSeqOfNodes = %seqOfNodes
+  doAssert(jSeqOfNodes[1].num == 2)
+
+  type MyObj = object
+    a, b: int
+    s: string
+    f32: float32
+    f64: float64
+    next: ref MyObj
+  var m: MyObj
+  m.s = "hi"
+  m.a = 5
+  let jMyObj = %m
+  doAssert(jMyObj["a"].num == 5)
+  doAssert(jMyObj["s"].str == "hi")
 
   # Test loading of file.
   when not defined(js):
+    echo("99% of tests finished. Going to try loading file.")
     var parsed = parseFile("tests/testdata/jsontest.json")
 
     try:
diff --git a/lib/pure/logging.nim b/lib/pure/logging.nim
index f602ce31d..8a0d2fedd 100644
--- a/lib/pure/logging.nim
+++ b/lib/pure/logging.nim
@@ -54,6 +54,7 @@ type
     lvlAll,       ## all levels active
     lvlDebug,     ## debug level (and any above) active
     lvlInfo,      ## info level (and any above) active
+    lvlNotice,    ## info notice (and any above) active
     lvlWarn,      ## warn level (and any above) active
     lvlError,     ## error level (and any above) active
     lvlFatal,     ## fatal level (and any above) active
@@ -61,7 +62,7 @@ type
 
 const
   LevelNames*: array [Level, string] = [
-    "DEBUG", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "NONE"
+    "DEBUG", "DEBUG", "INFO", "NOTICE", "WARN", "ERROR", "FATAL", "NONE"
   ]
 
   defaultFmtStr* = "$levelname " ## default format string
@@ -258,22 +259,47 @@ template log*(level: Level, args: varargs[string, `$`]) =
 
 template debug*(args: varargs[string, `$`]) =
   ## Logs a debug message to all registered handlers.
+  ##
+  ## Messages that are useful to the application developer only and are usually
+  ## turned off in release.
   log(lvlDebug, args)
 
 template info*(args: varargs[string, `$`]) =
   ## Logs an info message to all registered handlers.
+  ##
+  ## Messages that are generated during the normal operation of an application
+  ## and are of no particular importance. Useful to aggregate for potential
+  ## later analysis.
   log(lvlInfo, args)
 
+template notice*(args: varargs[string, `$`]) =
+  ## Logs an notice message to all registered handlers.
+  ##
+  ## Semantically very similar to `info`, but meant to be messages you want to
+  ## be actively notified about (depending on your application).
+  ## These could be, for example, grouped by hour and mailed out.
+  log(lvlNotice, args)
+
 template warn*(args: varargs[string, `$`]) =
   ## Logs a warning message to all registered handlers.
+  ##
+  ## A non-error message that may indicate a potential problem rising or
+  ## impacted performance.
   log(lvlWarn, args)
 
 template error*(args: varargs[string, `$`]) =
   ## Logs an error message to all registered handlers.
+  ##
+  ## A application-level error condition. For example, some user input generated
+  ## an exception. The application will continue to run, but functionality or
+  ## data was impacted, possibly visible to users.
   log(lvlError, args)
 
 template fatal*(args: varargs[string, `$`]) =
   ## Logs a fatal error message to all registered handlers.
+  ##
+  ## A application-level fatal condition. FATAL usually means that the application
+  ## cannot go on and will exit (but this logging event will not do that for you).
   log(lvlFatal, args)
 
 proc addHandler*(handler: Logger) =
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim
index 3e25eba22..fb7d72182 100644
--- a/lib/pure/parseutils.nim
+++ b/lib/pure/parseutils.nim
@@ -173,6 +173,22 @@ proc parseUntil*(s: string, token: var string, until: char,
   result = i-start
   token = substr(s, start, i-1)
 
+proc parseUntil*(s: string, token: var string, until: string,
+                 start = 0): int {.inline.} =
+  ## parses a token and stores it in ``token``. Returns
+  ## the number of the parsed characters or 0 in case of an error. A token
+  ## consists of any character that comes before the `until`  token.
+  var i = start
+  while i < s.len:
+    if s[i] == until[0]:
+      var u = 1
+      while i+u < s.len and u < until.len and s[i+u] == until[u]:
+        inc u
+      if u >= until.len: break
+    inc(i)
+  result = i-start
+  token = substr(s, start, i-1)
+
 proc parseWhile*(s: string, token: var string, validChars: set[char],
                  start = 0): int {.inline.} =
   ## parses a token and stores it in ``token``. Returns
@@ -240,7 +256,7 @@ proc rawParseUInt(s: string, b: var uint64, start = 0): int =
     res = 0'u64
     prev = 0'u64
     i = start
-  if s[i] == '+': inc(i) # Allow 
+  if s[i] == '+': inc(i) # Allow
   if s[i] in {'0'..'9'}:
     b = 0
     while s[i] in {'0'..'9'}:
@@ -255,8 +271,10 @@ proc rawParseUInt(s: string, b: var uint64, start = 0): int =
 
 proc parseBiggestUInt*(s: string, number: var uint64, start = 0): int {.
   rtl, extern: "npuParseBiggestUInt", noSideEffect.} =
-  ## parses an unsigned integer starting at `start` and stores the value into `number`.
-  ## Result is the number of processed chars or 0 if there is no integer or overflow detected.
+  ## parses an unsigned integer starting at `start` and stores the value
+  ## into `number`.
+  ## Result is the number of processed chars or 0 if there is no integer
+  ## or overflow detected.
   var res: uint64
   # use 'res' for exception safety (don't write to 'number' in case of an
   # overflow exception):
@@ -265,8 +283,10 @@ proc parseBiggestUInt*(s: string, number: var uint64, start = 0): int {.
 
 proc parseUInt*(s: string, number: var uint, start = 0): int {.
   rtl, extern: "npuParseUInt", noSideEffect.} =
-  ## parses an unsigned integer starting at `start` and stores the value into `number`.
-  ## Result is the number of processed chars or 0 if there is no integer or overflow detected.
+  ## parses an unsigned integer starting at `start` and stores the value
+  ## into `number`.
+  ## Result is the number of processed chars or 0 if there is no integer or
+  ## overflow detected.
   var res: uint64
   result = parseBiggestUInt(s, res, start)
   if (sizeof(uint) <= 4) and
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim
index fead66de2..eea20a62c 100644
--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -659,7 +659,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
   of pkSearch:
     var oldMl = c.ml
     result = 0
-    while start+result < s.len:
+    while start+result <= s.len:
       var x = rawMatch(s, p.sons[0], start+result, c)
       if x >= 0:
         inc(result, x)
@@ -671,7 +671,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
     var idx = c.ml # reserve a slot for the subpattern
     inc(c.ml)
     result = 0
-    while start+result < s.len:
+    while start+result <= s.len:
       var x = rawMatch(s, p.sons[0], start+result, c)
       if x >= 0:
         if idx < MaxSubpatterns:
@@ -962,6 +962,50 @@ proc parallelReplace*(s: string, subs: varargs[
   # copy the rest:
   add(result, substr(s, i))
 
+proc replace*(s: string, sub: Peg, cb: proc(
+              match: int, cnt: int, caps: openArray[string]): string): string {.
+              rtl, extern: "npegs$1cb".}=
+  ## Replaces `sub` in `s` by the resulting strings from the callback.
+  ## The callback proc receives the index of the current match (starting with 0),
+  ## the count of captures and an open array with the captures of each match. Examples:
+  ##
+  ## .. code-block:: nim
+  ##
+  ##   proc handleMatches*(m: int, n: int, c: openArray[string]): string =
+  ##     result = ""
+  ##     if m > 0:
+  ##       result.add ", "
+  ##     result.add case n:
+  ##       of 2: c[0].toLower & ": '" & c[1] & "'"
+  ##       of 1: c[0].toLower & ": ''"
+  ##       else: ""
+  ##
+  ##   let s = "Var1=key1;var2=Key2;   VAR3"
+  ##   echo s.replace(peg"{\ident}('='{\ident})* ';'* \s*", handleMatches)
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##
+  ##   "var1: 'key1', var2: 'Key2', var3: ''"
+  result = ""
+  var i = 0
+  var caps: array[0..MaxSubpatterns-1, string]
+  var c: Captures
+  var m = 0
+  while i < s.len:
+    c.ml = 0
+    var x = rawMatch(s, sub, i, c)
+    if x <= 0:
+      add(result, s[i])
+      inc(i)
+    else:
+      fillMatches(s, caps, c)
+      add(result, cb(m, c.ml, caps))
+      inc(i, x)
+      inc(m)
+  add(result, substr(s, i))
+
 proc transformFile*(infile, outfile: string,
                     subs: varargs[tuple[pattern: Peg, repl: string]]) {.
                     rtl, extern: "npegs$1".} =
@@ -1789,3 +1833,22 @@ when isMainModule:
 
   assert(str.find(empty_test) == 0)
   assert(str.match(empty_test))
+
+  proc handleMatches*(m: int, n: int, c: openArray[string]): string =
+    result = ""
+
+    if m > 0:
+      result.add ", "
+
+    result.add case n:
+      of 2: c[0].toLower & ": '" & c[1] & "'"
+      of 1: c[0].toLower & ": ''"
+      else: ""
+
+  assert("Var1=key1;var2=Key2;   VAR3".
+         replace(peg"{\ident}('='{\ident})* ';'* \s*",
+         handleMatches)=="var1: 'key1', var2: 'Key2', var3: ''")
+
+
+  doAssert "test1".match(peg"""{@}$""")
+  doAssert "test2".match(peg"""{(!$ .)*} $""")
diff --git a/lib/pure/rationals.nim b/lib/pure/rationals.nim
index 6fd05dc4b..bf134f2ae 100644
--- a/lib/pure/rationals.nim
+++ b/lib/pure/rationals.nim
@@ -41,26 +41,26 @@ proc toRational*[T:SomeInteger](x: T): Rational[T] =
 
 proc toRationalSub(x: float, n: int): Rational[int] =
   var
-    a = 0
-    b, c, d = 1
+    a = 0'i64
+    b, c, d = 1'i64
   result = 0 // 1   # rational 0
   while b <= n and d <= n:
     let ac = (a+c)
     let bd = (b+d)
     # scale by 1000 so not overflow for high precision
-    let mediant = (ac/1000) / (bd/1000)
+    let mediant = (ac.float/1000) / (bd.float/1000)
     if x == mediant:
       if bd <= n:
-        result.num = ac
-        result.den = bd
+        result.num = ac.int
+        result.den = bd.int
         return result
       elif d > b:
-        result.num = c
-        result.den = d
+        result.num = c.int
+        result.den = d.int
         return result
       else:
-        result.num = a
-        result.den = b
+        result.num = a.int
+        result.den = b.int
         return result
     elif x > mediant:
       a = ac
@@ -69,8 +69,8 @@ proc toRationalSub(x: float, n: int): Rational[int] =
       c = ac
       d = bd
   if (b > n):
-    return initRational(c, d)
-  return initRational(a, b)
+    return initRational(c.int, d.int)
+  return initRational(a.int, b.int)
 
 proc toRational*(x: float, n: int = high(int)): Rational[int] =
   ## Calculate the best rational numerator and denominator
diff --git a/lib/pure/smtp.nim b/lib/pure/smtp.nim
index b2adac2f3..050712902 100644
--- a/lib/pure/smtp.nim
+++ b/lib/pure/smtp.nim
@@ -20,9 +20,9 @@
 ##   var msg = createMessage("Hello from Nim's SMTP",
 ##                           "Hello!.\n Is this awesome or what?",
 ##                           @["foo@gmail.com"])
-##   var smtp = connect("smtp.gmail.com", 465, true, true)
-##   smtp.auth("username", "password")
-##   smtp.sendmail("username@gmail.com", @["foo@gmail.com"], $msg)
+##   var smtpConn = connect("smtp.gmail.com", Port 465, true, true)
+##   smtpConn.auth("username", "password")
+##   smtpConn.sendmail("username@gmail.com", @["foo@gmail.com"], $msg)
 ##
 ##
 ## For SSL support this module relies on OpenSSL. If you want to
@@ -31,6 +31,8 @@
 import net, strutils, strtabs, base64, os
 import asyncnet, asyncdispatch
 
+export Port
+
 type
   Smtp* = object
     sock: Socket
@@ -258,8 +260,8 @@ when not defined(testing) and isMainModule:
   #     "Hello, my name is dom96.\n What\'s yours?", @["dominik@localhost"])
   #echo(msg)
 
-  #var smtp = connect("localhost", 25, False, True)
-  #smtp.sendmail("root@localhost", @["dominik@localhost"], $msg)
+  #var smtpConn = connect("localhost", Port 25, false, true)
+  #smtpConn.sendmail("root@localhost", @["dominik@localhost"], $msg)
 
   #echo(decode("a17sm3701420wbe.12"))
   proc main() {.async.} =
diff --git a/lib/pure/strscans.nim b/lib/pure/strscans.nim
new file mode 100644
index 000000000..f695c3e2a
--- /dev/null
+++ b/lib/pure/strscans.nim
@@ -0,0 +1,521 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2016 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+##[
+This module contains a `scanf`:idx: macro that can be used for extracting
+substrings from an input string. This is often easier than regular expressions.
+Some examples as an apetizer:
+
+.. code-block:: nim
+  # check if input string matches a triple of integers:
+  const input = "(1,2,4)"
+  var x, y, z: int
+  if scanf(input, "($i,$i,$i)", x, y, z):
+    echo "matches and x is ", x, " y is ", y, " z is ", z
+
+  # check if input string matches an ISO date followed by an identifier followed
+  # by whitespace and a floating point number:
+  var year, month, day: int
+  var identifier: string
+  var myfloat: float
+  if scanf(input, "$i-$i-$i $w$s$f", year, month, day, identifier, myfloat):
+    echo "yes, we have a match!"
+
+As can be seen from the examples, strings are matched verbatim except for
+substrings starting with ``$``. These constructions are available:
+
+=================   ========================================================
+``$i``              Matches an integer. This uses ``parseutils.parseInt``.
+``$f``              Matches a floating pointer number. Uses ``parseFloat``.
+``$w``              Matches an ASCII identifier: ``[A-Z-a-z_][A-Za-z_0-9]*``.
+``$s``              Skips optional whitespace.
+``$$``              Matches a single dollar sign.
+``$.``              Matches if the end of the input string has been reached.
+``$*``              Matches until the token following the ``$*`` was found.
+                    The match is allowed to be of 0 length.
+``$+``              Matches until the token following the ``$+`` was found.
+                    The match must consist of at least one char.
+``${foo}``          User defined matcher. Uses the proc ``foo`` to perform
+                    the match. See below for more details.
+``$[foo]``          Call user defined proc ``foo`` to **skip** some optional
+                    parts in the input string. See below for more details.
+=================   ========================================================
+
+Even though ``$*`` and ``$+`` look similar to the regular expressions ``.*``
+and ``.+`` they work quite differently, there is no non-deterministic
+state machine involved and the matches are non-greedy. ``[$*]``
+matches ``[xyz]`` via ``parseutils.parseUntil``.
+
+Furthermore no backtracking is performed, if parsing fails after a value
+has already been bound to a matched subexpression this value is not restored
+to its original value. This rarely causes problems in practice and if it does
+for you, it's easy enough to bind to a temporary variable first.
+
+
+Startswith vs full match
+========================
+
+``scanf`` returns true if the input string **starts with** the specified
+pattern. If instead it should only return true if theres is also nothing
+left in the input, append ``$.`` to your pattern.
+
+
+User definable matchers
+=======================
+
+One very nice advantage over regular expressions is that ``scanf`` is
+extensible with ordinary Nim procs. The proc is either enclosed in ``${}``
+or in ``$[]``. ``${}`` matches and binds the result
+to a variable (that was passed to the ``scanf`` macro) while ``$[]`` merely
+optional tokens.
+
+
+In this example, we define a helper proc ``skipSep`` that skips some separators
+which we then use in our scanf pattern to help us in the matching process:
+
+.. code-block:: nim
+
+  proc someSep(input: string; start: int; seps: set[char] = {':','-','.'}): int =
+    # Note: The parameters and return value must match to what ``scanf`` requires
+    result = 0
+    while input[start+result] in seps: inc result
+
+  if scanf(input, "$w${someSep}$w", key, value):
+    ...
+
+It also possible to pass arguments to a user definable matcher:
+
+.. code-block:: nim
+
+  proc ndigits(input: string; start: int; intVal: var int; n: int): int =
+    # matches exactly ``n`` digits. Matchers need to return 0 if nothing
+    # matched or otherwise the number of processed chars.
+    var x = 0
+    var i = 0
+    while i < n and i+start < input.len and input[i+start] in {'0'..'9'}:
+      x = x * 10 + input[i+start].ord - '0'.ord
+      inc i
+    # only overwrite if we had a match
+    if i == n:
+      result = n
+      intVal = x
+
+  # match an ISO date extracting year, month, day at the same time.
+  # Also ensure the input ends after the ISO date:
+  var year, month, day: int
+  if scanf("2013-01-03", "${ndigits(4)}-${ndigits(2)}-${ndigits(2)}$.", year, month, day):
+    ...
+
+]##
+
+
+import macros, parseutils
+
+proc conditionsToIfChain(n, idx, res: NimNode; start: int): NimNode =
+  assert n.kind == nnkStmtList
+  if start >= n.len: return newAssignment(res, newLit true)
+  var ifs: NimNode = nil
+  if n[start+1].kind == nnkEmpty:
+    ifs = conditionsToIfChain(n, idx, res, start+3)
+  else:
+    ifs = newIfStmt((n[start+1],
+                    newTree(nnkStmtList, newCall(bindSym"inc", idx, n[start+2]),
+                                     conditionsToIfChain(n, idx, res, start+3))))
+  result = newTree(nnkStmtList, n[start], ifs)
+
+proc notZero(x: NimNode): NimNode = newCall(bindSym"!=", x, newLit 0)
+
+proc buildUserCall(x: string; args: varargs[NimNode]): NimNode =
+  let y = parseExpr(x)
+  result = newTree(nnkCall)
+  if y.kind in nnkCallKinds: result.add y[0]
+  else: result.add y
+  for a in args: result.add a
+  if y.kind in nnkCallKinds:
+    for i in 1..<y.len: result.add y[i]
+
+macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): bool =
+  ## See top level documentation of his module of how ``scanf`` works.
+  template matchBind(parser) {.dirty.} =
+    var resLen = genSym(nskLet, "resLen")
+    conds.add newLetStmt(resLen, newCall(bindSym(parser), input, results[i], idx))
+    conds.add resLen.notZero
+    conds.add resLen
+
+  var i = 0
+  var p = 0
+  var idx = genSym(nskVar, "idx")
+  var res = genSym(nskVar, "res")
+  result = newTree(nnkStmtListExpr, newVarStmt(idx, newLit 0), newVarStmt(res, newLit false))
+  var conds = newTree(nnkStmtList)
+  var fullMatch = false
+  while p < pattern.len:
+    if pattern[p] == '$':
+      inc p
+      case pattern[p]
+      of '$':
+        var resLen = genSym(nskLet, "resLen")
+        conds.add newLetStmt(resLen, newCall(bindSym"skip", input, newLit($pattern[p]), idx))
+        conds.add resLen.notZero
+        conds.add resLen
+      of 'w':
+        if i < results.len or getType(results[i]).typeKind != ntyString:
+          matchBind "parseIdent"
+        else:
+          error("no string var given for $w")
+        inc i
+      of 'i':
+        if i < results.len or getType(results[i]).typeKind != ntyInt:
+          matchBind "parseInt"
+        else:
+          error("no int var given for $d")
+        inc i
+      of 'f':
+        if i < results.len or getType(results[i]).typeKind != ntyFloat:
+          matchBind "parseFloat"
+        else:
+          error("no float var given for $f")
+        inc i
+      of 's':
+        conds.add newCall(bindSym"inc", idx, newCall(bindSym"skipWhitespace", input, idx))
+        conds.add newEmptyNode()
+        conds.add newEmptyNode()
+      of '.':
+        if p == pattern.len-1:
+          fullMatch = true
+        else:
+          error("invalid format string")
+      of '*', '+':
+        if i < results.len or getType(results[i]).typeKind != ntyString:
+          var min = ord(pattern[p] == '+')
+          var q=p+1
+          var token = ""
+          while q < pattern.len and pattern[q] != '$':
+            token.add pattern[q]
+            inc q
+          var resLen = genSym(nskLet, "resLen")
+          conds.add newLetStmt(resLen, newCall(bindSym"parseUntil", input, results[i], newLit(token), idx))
+          conds.add newCall(bindSym"!=", resLen, newLit min)
+          conds.add resLen
+        else:
+          error("no string var given for $" & pattern[p])
+        inc i
+      of '{':
+        inc p
+        var nesting = 0
+        let start = p
+        while true:
+          case pattern[p]
+          of '{': inc nesting
+          of '}':
+            if nesting == 0: break
+            dec nesting
+          of '\0': error("expected closing '}'")
+          else: discard
+          inc p
+        let expr = pattern.substr(start, p-1)
+        if i < results.len:
+          var resLen = genSym(nskLet, "resLen")
+          conds.add newLetStmt(resLen, buildUserCall(expr, input, results[i], idx))
+          conds.add newCall(bindSym"!=", resLen, newLit 0)
+          conds.add resLen
+        else:
+          error("no var given for $" & expr)
+        inc i
+      of '[':
+        inc p
+        var nesting = 0
+        let start = p
+        while true:
+          case pattern[p]
+          of '[': inc nesting
+          of ']':
+            if nesting == 0: break
+            dec nesting
+          of '\0': error("expected closing ']'")
+          else: discard
+          inc p
+        let expr = pattern.substr(start, p-1)
+        conds.add newCall(bindSym"inc", idx, buildUserCall(expr, input, idx))
+        conds.add newEmptyNode()
+        conds.add newEmptyNode()
+      else: error("invalid format string")
+      inc p
+    else:
+      var token = ""
+      while p < pattern.len and pattern[p] != '$':
+        token.add pattern[p]
+        inc p
+      var resLen = genSym(nskLet, "resLen")
+      conds.add newLetStmt(resLen, newCall(bindSym"skip", input, newLit(token), idx))
+      conds.add resLen.notZero
+      conds.add resLen
+  result.add conditionsToIfChain(conds, idx, res, 0)
+  if fullMatch:
+    result.add newCall(bindSym">=", idx, newCall(bindSym"len", input))
+  else:
+    result.add res
+
+template atom*(input: string; idx: int; c: char): bool =
+  ## Used in scanp for the matching of atoms (usually chars).
+  input[idx] == c
+
+template atom*(input: string; idx: int; s: set[char]): bool =
+  input[idx] in s
+
+#template prepare*(input: string): int = 0
+template success*(x: int): bool = x != 0
+
+template nxt*(input: string; idx, step: int = 1) = inc(idx, step)
+
+macro scanp*(input, idx: typed; pattern: varargs[untyped]): bool =
+  ## See top level documentation of his module of how ``scanp`` works.
+  type StmtTriple = tuple[init, cond, action: NimNode]
+
+  template interf(x): untyped = bindSym(x, brForceOpen)
+
+  proc toIfChain(n: seq[StmtTriple]; idx, res: NimNode; start: int): NimNode =
+    if start >= n.len: return newAssignment(res, newLit true)
+    var ifs: NimNode = nil
+    if n[start].cond.kind == nnkEmpty:
+      ifs = toIfChain(n, idx, res, start+1)
+    else:
+      ifs = newIfStmt((n[start].cond,
+                      newTree(nnkStmtList, n[start].action,
+                              toIfChain(n, idx, res, start+1))))
+    result = newTree(nnkStmtList, n[start].init, ifs)
+
+  proc attach(x, attached: NimNode): NimNode =
+    if attached == nil: x
+    else: newStmtList(attached, x)
+
+  proc placeholder(n, x, j: NimNode): NimNode =
+    if n.kind == nnkPrefix and n[0].eqIdent("$"):
+      let n1 = n[1]
+      if n1.eqIdent"_" or n1.eqIdent"current":
+        result = newTree(nnkBracketExpr, x, j)
+      elif n1.eqIdent"input":
+        result = x
+      elif n1.eqIdent"i" or n1.eqIdent"index":
+        result = j
+      else:
+        error("unknown pattern " & repr(n))
+    else:
+      result = copyNimNode(n)
+      for i in 0 ..< n.len:
+        result.add placeholder(n[i], x, j)
+
+  proc atm(it, input, idx, attached: NimNode): StmtTriple =
+    template `!!`(x): untyped = attach(x, attached)
+    case it.kind
+    of nnkIdent:
+      var resLen = genSym(nskLet, "resLen")
+      result = (newLetStmt(resLen, newCall(it, input, idx)),
+                newCall(interf"success", resLen),
+                !!newCall(interf"nxt", input, idx, resLen))
+    of nnkCallKinds:
+      # *{'A'..'Z'} !! s.add(!_)
+      template buildWhile(init, cond, action): untyped =
+        while true:
+          init
+          if not cond: break
+          action
+
+      # (x) a  # bind action a to (x)
+      if it[0].kind == nnkPar and it.len == 2:
+        result = atm(it[0], input, idx, placeholder(it[1], input, idx))
+      elif it.kind == nnkInfix and it[0].eqIdent"->":
+        # bind matching to some action:
+        result = atm(it[1], input, idx, placeholder(it[2], input, idx))
+      elif it.kind == nnkInfix and it[0].eqIdent"as":
+        let cond = if it[1].kind in nnkCallKinds: placeholder(it[1], input, idx)
+                   else: newCall(it[1], input, idx)
+        result = (newLetStmt(it[2], cond),
+                  newCall(interf"success", it[2]),
+                  !!newCall(interf"nxt", input, idx, it[2]))
+      elif it.kind == nnkPrefix and it[0].eqIdent"*":
+        let (init, cond, action) = atm(it[1], input, idx, attached)
+        result = (getAst(buildWhile(init, cond, action)),
+                  newEmptyNode(), newEmptyNode())
+      elif it.kind == nnkPrefix and it[0].eqIdent"+":
+        # x+  is the same as  xx*
+        result = atm(newTree(nnkPar, it[1], newTree(nnkPrefix, ident"*", it[1])),
+                      input, idx, attached)
+      elif it.kind == nnkPrefix and it[0].eqIdent"?":
+        # optional.
+        let (init, cond, action) = atm(it[1], input, idx, attached)
+        if cond.kind == nnkEmpty:
+          error("'?' operator applied to a non-condition")
+        else:
+          result = (newTree(nnkStmtList, init, newIfStmt((cond, action))),
+                    newEmptyNode(), newEmptyNode())
+      elif it.kind == nnkPrefix and it[0].eqIdent"~":
+        # not operator
+        let (init, cond, action) = atm(it[1], input, idx, attached)
+        if cond.kind == nnkEmpty:
+          error("'~' operator applied to a non-condition")
+        else:
+          result = (init, newCall(bindSym"not", cond), action)
+      elif it.kind == nnkInfix and it[0].eqIdent"|":
+        let a = atm(it[1], input, idx, attached)
+        let b = atm(it[2], input, idx, attached)
+        if a.cond.kind == nnkEmpty or b.cond.kind == nnkEmpty:
+          error("'|' operator applied to a non-condition")
+        else:
+          result = (newStmtList(a.init,
+                newIfStmt((a.cond, a.action), (newTree(nnkStmtListExpr, b.init, b.cond), b.action))),
+              newEmptyNode(), newEmptyNode())
+      elif it.kind == nnkInfix and it[0].eqIdent"^*":
+        # a ^* b  is rewritten to:  (a *(b a))?
+        #exprList = expr ^+ comma
+        template tmp(a, b): untyped = ?(a, *(b, a))
+        result = atm(getAst(tmp(it[1], it[2])), input, idx, attached)
+
+      elif it.kind == nnkInfix and it[0].eqIdent"^+":
+        # a ^* b  is rewritten to:  (a +(b a))?
+        template tmp(a, b): untyped = (a, *(b, a))
+        result = atm(getAst(tmp(it[1], it[2])), input, idx, attached)
+      elif it.kind == nnkCommand and it.len == 2 and it[0].eqIdent"pred":
+        # enforce that the wrapped call is interpreted as a predicate, not a non-terminal:
+        result = (newEmptyNode(), placeholder(it[1], input, idx), newEmptyNode())
+      else:
+        var resLen = genSym(nskLet, "resLen")
+        result = (newLetStmt(resLen, placeholder(it, input, idx)),
+                  newCall(interf"success", resLen), !!newCall(interf"nxt", input, idx, resLen))
+    of nnkStrLit..nnkTripleStrLit:
+      var resLen = genSym(nskLet, "resLen")
+      result = (newLetStmt(resLen, newCall(interf"skip", input, it, idx)),
+                newCall(interf"success", resLen), !!newCall(interf"nxt", input, idx, resLen))
+    of nnkCurly, nnkAccQuoted, nnkCharLit:
+      result = (newEmptyNode(), newCall(interf"atom", input, idx, it), !!newCall(interf"nxt", input, idx))
+    of nnkCurlyExpr:
+      if it.len == 3 and it[1].kind == nnkIntLit and it[2].kind == nnkIntLit:
+        var h = newTree(nnkPar, it[0])
+        for count in 2..it[1].intVal: h.add(it[0])
+        for count in it[1].intVal .. it[2].intVal-1: h.add(newTree(nnkPrefix, ident"?", it[0]))
+        result = atm(h, input, idx, attached)
+      elif it.len == 2 and it[1].kind == nnkIntLit:
+        var h = newTree(nnkPar, it[0])
+        for count in 2..it[1].intVal: h.add(it[0])
+        result = atm(h, input, idx, attached)
+      else:
+        error("invalid pattern")
+    of nnkPar:
+      if it.len == 1:
+        result = atm(it[0], input, idx, attached)
+      else:
+        # concatenation:
+        var conds: seq[StmtTriple] = @[]
+        for x in it: conds.add atm(x, input, idx, attached)
+        var res = genSym(nskVar, "res")
+        result = (newStmtList(newVarStmt(res, newLit false),
+            toIfChain(conds, idx, res, 0)), res, newEmptyNode())
+    else:
+      error("invalid pattern")
+
+  #var idx = genSym(nskVar, "idx")
+  var res = genSym(nskVar, "res")
+  result = newTree(nnkStmtListExpr, #newVarStmt(idx, newCall(interf"prepare", input)),
+                                    newVarStmt(res, newLit false))
+  var conds: seq[StmtTriple] = @[]
+  for it in pattern:
+    conds.add atm(it, input, idx, nil)
+  result.add toIfChain(conds, idx, res, 0)
+  result.add res
+  when defined(debugScanp):
+    echo repr result
+
+
+when isMainModule:
+  proc twoDigits(input: string; x: var int; start: int): int =
+    if input[start] == '0' and input[start+1] == '0':
+      result = 2
+      x = 13
+    else:
+      result = 0
+
+  proc someSep(input: string; start: int; seps: set[char] = {';',',','-','.'}): int =
+    result = 0
+    while input[start+result] in seps: inc result
+
+  proc demangle(s: string; res: var string; start: int): int =
+    while s[result+start] in {'_', '@'}: inc result
+    res = ""
+    while result+start < s.len and s[result+start] > ' ' and s[result+start] != '_':
+      res.add s[result+start]
+      inc result
+    while result+start < s.len and s[result+start] > ' ':
+      inc result
+
+  proc parseGDB(resp: string): seq[string] =
+    const
+      digits = {'0'..'9'}
+      hexdigits = digits + {'a'..'f', 'A'..'F'}
+      whites = {' ', '\t', '\C', '\L'}
+    result = @[]
+    var idx = 0
+    while true:
+      var prc = ""
+      var info = ""
+      if scanp(resp, idx, *`whites`, '#', *`digits`, +`whites`, ?("0x", *`hexdigits`, " in "),
+               demangle($input, prc, $index), *`whites`, '(', * ~ ')', ')',
+                *`whites`, "at ", +(~{'\C', '\L', '\0'} -> info.add($_)) ):
+        result.add prc & " " & info
+      else:
+        break
+
+  var key, val: string
+  var intval: int
+  var floatval: float
+  doAssert scanf("abc:: xyz 89  33.25", "$w$s::$s$w$s$i  $f", key, val, intval, floatVal)
+  doAssert key == "abc"
+  doAssert val == "xyz"
+  doAssert intval == 89
+  doAssert floatVal == 33.25
+
+  let xx = scanf("$abc", "$$$i", intval)
+  doAssert xx == false
+
+
+  let xx2 = scanf("$1234", "$$$i", intval)
+  doAssert xx2
+
+  let yy = scanf(";.--Breakpoint00 [output]", "$[someSep]Breakpoint${twoDigits}$[someSep({';','.','-'})] [$+]$.", intVal, key)
+  doAssert yy
+  doAssert key == "output"
+  doAssert intVal == 13
+
+  var ident = ""
+  var idx = 0
+  let zz = scanp("foobar x x  x   xWZ", idx, +{'a'..'z'} -> add(ident, $_), *(*{' ', '\t'}, "x"), ~'U', "Z")
+  doAssert zz
+  doAssert ident == "foobar"
+
+  const digits = {'0'..'9'}
+  var year = 0
+  var idx2 = 0
+  if scanp("201655-8-9", idx2, `digits`{4,6} -> (year = year * 10 + ord($_) - ord('0')), "-8", "-9"):
+    doAssert year == 201655
+
+  const gdbOut = """
+      #0  @foo_96013_1208911747@8 (x0=...)
+          at c:/users/anwender/projects/nim/temp.nim:11
+      #1  0x00417754 in tempInit000 () at c:/users/anwender/projects/nim/temp.nim:13
+      #2  0x0041768d in NimMainInner ()
+          at c:/users/anwender/projects/nim/lib/system.nim:2605
+      #3  0x004176b1 in NimMain ()
+          at c:/users/anwender/projects/nim/lib/system.nim:2613
+      #4  0x004176db in main (argc=1, args=0x712cc8, env=0x711ca8)
+          at c:/users/anwender/projects/nim/lib/system.nim:2620"""
+  const result = @["foo c:/users/anwender/projects/nim/temp.nim:11",
+          "tempInit000 c:/users/anwender/projects/nim/temp.nim:13",
+          "NimMainInner c:/users/anwender/projects/nim/lib/system.nim:2605",
+          "NimMain c:/users/anwender/projects/nim/lib/system.nim:2613",
+          "main c:/users/anwender/projects/nim/lib/system.nim:2620"]
+  doAssert parseGDB(gdbOut) == result
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index a5a4ee509..c3d6d75bd 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -669,7 +669,7 @@ proc repeat*(s: string, n: Natural): string {.noSideEffect,
   result = newStringOfCap(n * s.len)
   for i in 1..n: result.add(s)
 
-template spaces*(n: Natural): string =  repeat(' ',n)
+template spaces*(n: Natural): string = repeat(' ', n)
   ## Returns a String with `n` space characters. You can use this proc
   ## to left align strings. Example:
   ##
diff --git a/lib/system.nim b/lib/system.nim
index 231866e90..604992969 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -588,6 +588,9 @@ proc sizeof*[T](x: T): int {.magic: "SizeOf", noSideEffect.}
   ## that one never needs to know ``x``'s size. As a special semantic rule,
   ## ``x`` may also be a type identifier (``sizeof(int)`` is valid).
   ##
+  ## Limitations: If used within nim VM context ``sizeof`` will only work
+  ## for simple types.
+  ##
   ## .. code-block:: nim
   ##  sizeof('A') #=> 1
   ##  sizeof(2) #=> 8
@@ -1291,7 +1294,7 @@ const
 when hasThreadSupport and defined(tcc) and not compileOption("tlsEmulation"):
   # tcc doesn't support TLS
   {.error: "``--tlsEmulation:on`` must be used when using threads with tcc backend".}
-  
+
 when defined(boehmgc):
   when defined(windows):
     const boehmLib = "boehmgc.dll"
@@ -2163,7 +2166,7 @@ proc `&` *[T](x: T, y: seq[T]): seq[T] {.noSideEffect.} =
     result[i+1] = y[i]
 
 when not defined(nimscript):
-  when not defined(JS):
+  when not defined(JS) or defined(nimphp):
     proc seqToPtr[T](x: seq[T]): pointer {.inline, nosideeffect.} =
       result = cast[pointer](x)
   else:
@@ -2565,8 +2568,9 @@ when not defined(JS): #and not defined(nimscript):
   {.push stack_trace: off, profiler:off.}
 
   when not defined(nimscript) and not defined(nogc):
-    proc initGC()
-    when not defined(boehmgc) and not defined(useMalloc) and not defined(gogc):
+    when not defined(gcStack):
+      proc initGC()
+    when not defined(boehmgc) and not defined(useMalloc) and not defined(gogc) and not defined(gcStack):
       proc initAllocator() {.inline.}
 
     proc initStackBottom() {.inline, compilerproc.} =
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index e0fd53b7b..00a16e2bb 100644
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -27,15 +27,14 @@ const
 
 type
   PTrunk = ptr Trunk
-  Trunk {.final.} = object
+  Trunk = object
     next: PTrunk         # all nodes are connected with this pointer
     key: int             # start address at bit 0
     bits: array[0..IntsPerTrunk-1, int] # a bit vector
 
   TrunkBuckets = array[0..255, PTrunk]
-  IntSet {.final.} = object
+  IntSet = object
     data: TrunkBuckets
-{.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkBuckets: TrunkBuckets].}
 
 type
   AlignType = BiggestFloat
@@ -64,8 +63,6 @@ type
     next, prev: PBigChunk    # chunks of the same (or bigger) size
     align: int
     data: AlignType      # start of usable memory
-{.deprecated: [TAlignType: AlignType, TFreeCell: FreeCell, TBaseChunk: BaseChunk,
-              TBigChunk: BigChunk, TSmallChunk: SmallChunk].}
 
 template smallChunkOverhead(): expr = sizeof(SmallChunk)-sizeof(AlignType)
 template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
@@ -79,18 +76,18 @@ template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
 
 type
   PLLChunk = ptr LLChunk
-  LLChunk {.pure.} = object ## *low-level* chunk
+  LLChunk = object ## *low-level* chunk
     size: int                # remaining size
     acc: int                 # accumulator
     next: PLLChunk           # next low-level chunk; only needed for dealloc
 
   PAvlNode = ptr AvlNode
-  AvlNode {.pure, final.} = object
+  AvlNode = object
     link: array[0..1, PAvlNode] # Left (0) and right (1) links
     key, upperBound: int
     level: int
 
-  MemRegion {.final, pure.} = object
+  MemRegion = object
     minLargeObj, maxLargeObj: int
     freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
     llmem: PLLChunk
@@ -99,6 +96,7 @@ type
     freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access
     chunkStarts: IntSet
     root, deleted, last, freeAvlNodes: PAvlNode
+    locked: bool # if locked, we cannot free pages.
 {.deprecated: [TLLChunk: LLChunk, TAvlNode: AvlNode, TMemRegion: MemRegion].}
 
 # shared:
@@ -234,7 +232,8 @@ proc isSmallChunk(c: PChunk): bool {.inline.} =
 proc chunkUnused(c: PChunk): bool {.inline.} =
   result = not c.used
 
-iterator allObjects(m: MemRegion): pointer {.inline.} =
+iterator allObjects(m: var MemRegion): pointer {.inline.} =
+  m.locked = true
   for s in elements(m.chunkStarts):
     # we need to check here again as it could have been modified:
     if s in m.chunkStarts:
@@ -252,6 +251,7 @@ iterator allObjects(m: MemRegion): pointer {.inline.} =
         else:
           let c = cast[PBigChunk](c)
           yield addr(c.data)
+  m.locked = false
 
 proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
                       magic: "Plugin", compileTime.}
@@ -385,7 +385,7 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
           excl(a.chunkStarts, pageIndex(c))
           c = cast[PBigChunk](le)
 
-  if c.size < ChunkOsReturn or doNotUnmap:
+  if c.size < ChunkOsReturn or doNotUnmap or a.locked:
     incl(a, a.chunkStarts, pageIndex(c))
     updatePrevSize(a, c, c.size)
     listAdd(a.freeChunksList, c)
@@ -442,26 +442,29 @@ proc getSmallChunk(a: var MemRegion): PSmallChunk =
 # -----------------------------------------------------------------------------
 proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
 
-proc allocInv(a: MemRegion): bool =
-  ## checks some (not all yet) invariants of the allocator's data structures.
-  for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
-    var c = a.freeSmallChunks[s]
-    while not (c == nil):
-      if c.next == c:
-        echo "[SYSASSERT] c.next == c"
-        return false
-      if not (c.size == s * MemAlign):
-        echo "[SYSASSERT] c.size != s * MemAlign"
-        return false
-      var it = c.freeList
-      while not (it == nil):
-        if not (it.zeroField == 0):
-          echo "[SYSASSERT] it.zeroField != 0"
-          c_printf("%ld %p\n", it.zeroField, it)
+when true:
+  template allocInv(a: MemRegion): bool = true
+else:
+  proc allocInv(a: MemRegion): bool =
+    ## checks some (not all yet) invariants of the allocator's data structures.
+    for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
+      var c = a.freeSmallChunks[s]
+      while not (c == nil):
+        if c.next == c:
+          echo "[SYSASSERT] c.next == c"
           return false
-        it = it.next
-      c = c.next
-  result = true
+        if not (c.size == s * MemAlign):
+          echo "[SYSASSERT] c.size != s * MemAlign"
+          return false
+        var it = c.freeList
+        while not (it == nil):
+          if not (it.zeroField == 0):
+            echo "[SYSASSERT] it.zeroField != 0"
+            c_printf("%ld %p\n", it.zeroField, it)
+            return false
+          it = it.next
+        c = c.next
+    result = true
 
 proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
   sysAssert(allocInv(a), "rawAlloc: begin")
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 4f461b5c3..ee5eec30b 100644
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
+#        (c) Copyright 2016 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,13 +9,8 @@
 
 #            Garbage Collector
 #
-# The basic algorithm is *Deferred Reference Counting* with cycle detection.
-# This is achieved by combining a Deutsch-Bobrow garbage collector
-# together with Christoper's partial mark-sweep garbage collector.
-#
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. It is well-suited
-# for soft real time applications (like games).
+# Refcounting + Mark&Sweep. Complex algorithms avoided.
+# Been there, done that, didn't work.
 
 when defined(nimCoroutines):
   import arch
@@ -30,7 +25,7 @@ const
                       # this seems to be a good value
   withRealTime = defined(useRealtimeGC)
   useMarkForDebug = defined(gcGenerational)
-  useBackupGc = false                     # use a simple M&S GC to collect
+  useBackupGc = true                      # use a simple M&S GC to collect
                                           # cycles instead of the complex
                                           # algorithm
 
@@ -55,8 +50,7 @@ type
   WalkOp = enum
     waMarkGlobal,    # part of the backup/debug mark&sweep
     waMarkPrecise,   # part of the backup/debug mark&sweep
-    waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack,
-    waCollectWhite #, waDebug
+    waZctDecRef, waPush
 
   Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
     # A ref type can have a finalizer that is called before the object's
@@ -87,7 +81,6 @@ type
       idGenerator: int
     zct: CellSeq             # the zero count table
     decStack: CellSeq        # cells in the stack that are to decref again
-    cycleRoots: CellSet
     tempStack: CellSeq       # temporary stack for recursion elimination
     recGcLock: int           # prevent recursion via finalizers; no thread lock
     when withRealTime:
@@ -96,6 +89,7 @@ type
     stat: GcStat
     when useMarkForDebug or useBackupGc:
       marked: CellSet
+      additionalRoots: CellSeq # dummy roots for GC_ref/unref
     when hasThreadSupport:
       toDispose: SharedList[pointer]
 
@@ -136,9 +130,6 @@ proc usrToCell(usr: pointer): PCell {.inline.} =
   # convert pointer to userdata to object (=pointer to refcount)
   result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))
 
-proc canBeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
-
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
   # used for code generation concerning debugging
   result = usrToCell(c).typ
@@ -200,14 +191,16 @@ proc prepareDealloc(cell: PCell) =
     (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
     dec(gch.recGcLock)
 
+template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) =
+  when false:
+    for i in 0..gch.decStack.len-1:
+      if gch.decStack.d[i] == c:
+        sysAssert(false, msg)
+
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
   # we MUST access gch as a global here, because this crosses DLL boundaries!
   when hasThreadSupport and hasSharedHeap:
     acquireSys(HeapLock)
-  when cycleGC:
-    if c.color != rcPurple:
-      c.setColor(rcPurple)
-      incl(gch.cycleRoots, c)
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
@@ -224,22 +217,30 @@ proc decRef(c: PCell) {.inline.} =
   gcAssert(c.refcount >=% rcIncrement, "decRef")
   if --c.refcount:
     rtlAddZCT(c)
-  elif canbeCycleRoot(c):
-    # unfortunately this is necessary here too, because a cycle might just
-    # have been broken up and we could recycle it.
-    rtlAddCycleRoot(c)
-    #writeCell("decRef", c)
 
 proc incRef(c: PCell) {.inline.} =
   gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
   c.refcount = c.refcount +% rcIncrement
   # and not colorMask
   #writeCell("incRef", c)
-  if canbeCycleRoot(c):
-    rtlAddCycleRoot(c)
 
-proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
-proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
+proc nimGCref(p: pointer) {.compilerProc.} =
+  # we keep it from being collected by pretending it's not even allocated:
+  add(gch.additionalRoots, usrToCell(p))
+  incRef(usrToCell(p))
+
+proc nimGCunref(p: pointer) {.compilerProc.} =
+  let cell = usrToCell(p)
+  var L = gch.additionalRoots.len-1
+  var i = L
+  let d = gch.additionalRoots.d
+  while i >= 0:
+    if d[i] == cell:
+      d[i] = d[L]
+      dec gch.additionalRoots.len
+      break
+    dec(i)
+  decRef(usrToCell(p))
 
 proc GC_addCycleRoot*[T](p: ref T) {.inline.} =
   ## adds 'p' to the cycle candidate set for the cycle collector. It is
@@ -306,10 +307,10 @@ proc initGC() =
     # init the rt
     init(gch.zct)
     init(gch.tempStack)
-    init(gch.cycleRoots)
     init(gch.decStack)
     when useMarkForDebug or useBackupGc:
       init(gch.marked)
+      init(gch.additionalRoots)
     when hasThreadSupport:
       gch.toDispose = initSharedList[pointer]()
 
@@ -563,7 +564,7 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
             d[j] = res
             break
           dec(j)
-      if canbeCycleRoot(ol): excl(gch.cycleRoots, ol)
+      beforeDealloc(gch, ol, "growObj stack trash")
       rawDealloc(gch.region, ol)
     else:
       # we split the old refcount in 2 parts. XXX This is still not entirely
@@ -597,54 +598,12 @@ proc freeCyclicCell(gch: var GcHeap, c: PCell) =
   when logGC: writeCell("cycle collector dealloc cell", c)
   when reallyDealloc:
     sysAssert(allocInv(gch.region), "free cyclic cell")
+    beforeDealloc(gch, c, "freeCyclicCell: stack trash")
     rawDealloc(gch.region, c)
   else:
     gcAssert(c.typ != nil, "freeCyclicCell")
     zeroMem(c, sizeof(Cell))
 
-proc markGray(s: PCell) =
-  if s.color != rcGray:
-    setColor(s, rcGray)
-    forAllChildren(s, waMarkGray)
-
-proc scanBlack(s: PCell) =
-  s.setColor(rcBlack)
-  forAllChildren(s, waScanBlack)
-
-proc scan(s: PCell) =
-  if s.color == rcGray:
-    if s.refcount >=% rcIncrement:
-      scanBlack(s)
-    else:
-      s.setColor(rcWhite)
-      forAllChildren(s, waScan)
-
-proc collectWhite(s: PCell) =
-  # This is a hacky way to deal with the following problem (bug #1796)
-  # Consider this content in cycleRoots:
-  #   x -> a; y -> a  where 'a' is an acyclic object so not included in
-  # cycleRoots itself. Then 'collectWhite' used to free 'a' twice. The
-  # 'isAllocatedPtr' check prevents this. This also means we do not need
-  # to query 's notin gch.cycleRoots' at all.
-  if isAllocatedPtr(gch.region, s) and s.color == rcWhite:
-    s.setColor(rcBlack)
-    forAllChildren(s, waCollectWhite)
-    freeCyclicCell(gch, s)
-
-proc markRoots(gch: var GcHeap) =
-  var tabSize = 0
-  for s in elements(gch.cycleRoots):
-    #writeCell("markRoot", s)
-    inc tabSize
-    if s.color == rcPurple and s.refcount >=% rcIncrement:
-      markGray(s)
-    else:
-      excl(gch.cycleRoots, s)
-      # (s.color == rcBlack and rc == 0) as 1 condition:
-      if s.refcount == 0:
-        freeCyclicCell(gch, s)
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
-
 when useBackupGc:
   proc sweep(gch: var GcHeap) =
     for x in allObjects(gch.region):
@@ -666,16 +625,8 @@ when useMarkForDebug or useBackupGc:
 
   proc markGlobals(gch: var GcHeap) =
     for i in 0 .. < globalMarkersLen: globalMarkers[i]()
-
-  proc stackMarkS(gch: var GcHeap, p: pointer) {.inline.} =
-    # the addresses are not as cells on the stack, so turn them to cells:
-    var cell = usrToCell(p)
-    var c = cast[ByteAddress](cell)
-    if c >% PageSize:
-      # fast check: does it look like a cell?
-      var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-      if objStart != nil:
-        markS(gch, objStart)
+    let d = gch.additionalRoots.d
+    for i in 0 .. < gch.additionalRoots.len: markS(gch, d[i])
 
 when logGC:
   var
@@ -717,19 +668,6 @@ proc doOperation(p: pointer, op: WalkOp) =
     #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
   of waPush:
     add(gch.tempStack, c)
-  of waCycleDecRef:
-    gcAssert(c.refcount >=% rcIncrement, "doOperation 3")
-    c.refcount = c.refcount -% rcIncrement
-  of waMarkGray:
-    gcAssert(c.refcount >=% rcIncrement, "waMarkGray")
-    c.refcount = c.refcount -% rcIncrement
-    markGray(c)
-  of waScan: scan(c)
-  of waScanBlack:
-    c.refcount = c.refcount +% rcIncrement
-    if c.color != rcBlack:
-      scanBlack(c)
-  of waCollectWhite: collectWhite(c)
   of waMarkGlobal:
     when useMarkForDebug or useBackupGc:
       when hasThreadSupport:
@@ -748,14 +686,6 @@ proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
 
 proc collectZCT(gch: var GcHeap): bool {.benign.}
 
-when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var GcHeap) {.noinline, cdecl,
-                                                         benign.}
-
-proc collectRoots(gch: var GcHeap) =
-  for s in elements(gch.cycleRoots):
-    collectWhite(s)
-
 proc collectCycles(gch: var GcHeap) =
   when hasThreadSupport:
     for c in gch.toDispose:
@@ -764,33 +694,12 @@ proc collectCycles(gch: var GcHeap) =
   while gch.zct.len > 0: discard collectZCT(gch)
   when useBackupGc:
     cellsetReset(gch.marked)
-    markStackAndRegistersForSweep(gch)
-    markGlobals(gch)
-    sweep(gch)
-  else:
-    markRoots(gch)
-    # scanRoots:
-    for s in elements(gch.cycleRoots): scan(s)
-    collectRoots(gch)
-
-    cellsetReset(gch.cycleRoots)
-  # alive cycles need to be kept in 'cycleRoots' if they are referenced
-  # from the stack; otherwise the write barrier will add the cycle root again
-  # anyway:
-  when false:
     var d = gch.decStack.d
-    var cycleRootsLen = 0
     for i in 0..gch.decStack.len-1:
-      var c = d[i]
-      gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots"
-      gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell"
-      if canBeCycleRoot(c):
-        #if c notin gch.cycleRoots:
-        inc cycleRootsLen
-        incl(gch.cycleRoots, c)
-      gcAssert c.typ != nil, "addBackStackRoots 2"
-    if cycleRootsLen != 0:
-      cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen)
+      sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles"
+      markS(gch, d[i])
+    markGlobals(gch)
+    sweep(gch)
 
 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
   # the addresses are not as cells on the stack, so turn them to cells:
@@ -812,31 +721,11 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
         add(gch.decStack, cell)
   sysAssert(allocInv(gch.region), "gcMark end")
 
-proc markThreadStacks(gch: var GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    {.error: "not fully implemented".}
-    var it = threadList
-    while it != nil:
-      # mark registers:
-      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[ByteAddress](it.stackBottom)
-      var max = cast[ByteAddress](it.stackTop)
-      # XXX stack direction?
-      # XXX unroll this loop:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-      it = it.next
-
 include gc_common
 
 proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
   forEachStackSlot(gch, gcMark)
 
-when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var GcHeap) =
-    forEachStackSlot(gch, stackMarkS)
-
 proc collectZCT(gch: var GcHeap): bool =
   # Note: Freeing may add child objects to the ZCT! So essentially we do
   # deep freeing, which is bad for incremental operation. In order to
@@ -866,8 +755,6 @@ proc collectZCT(gch: var GcHeap): bool =
       # as this might be too slow.
       # In any case, it should be removed from the ZCT. But not
       # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      when cycleGC:
-        if canbeCycleRoot(c): excl(gch.cycleRoots, c)
       when logGC: writeCell("zct dealloc cell", c)
       gcTrace(c, csZctFreed)
       # We are about to free the object, call the finalizer BEFORE its
@@ -877,6 +764,7 @@ proc collectZCT(gch: var GcHeap): bool =
       forAllChildren(c, waZctDecRef)
       when reallyDealloc:
         sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
+        beforeDealloc(gch, c, "collectZCT: stack trash")
         rawDealloc(gch.region, c)
       else:
         sysAssert(c.typ != nil, "collectZCT 2")
@@ -915,7 +803,6 @@ proc collectCTBody(gch: var GcHeap) =
   sysAssert(gch.decStack.len == 0, "collectCT")
   prepareForInteriorPointerChecking(gch.region)
   markStackAndRegisters(gch)
-  markThreadStacks(gch)
   gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
   inc(gch.stat.stackScans)
   if collectZCT(gch):
@@ -937,11 +824,6 @@ proc collectCTBody(gch: var GcHeap) =
       if gch.maxPause > 0 and duration > gch.maxPause:
         c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
 
-when useMarkForDebug or useBackupGc:
-  proc markForDebug(gch: var GcHeap) =
-    markStackAndRegistersForSweep(gch)
-    markGlobals(gch)
-
 when defined(nimCoroutines):
   proc currentStackSizes(): int =
     for stack in items(gch.stack):
@@ -980,7 +862,19 @@ when withRealTime:
       collectCTBody(gch)
     release(gch)
 
-  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    let prevStackBottom = gch.stackBottom
+    if stackSize >= 0:
+      stackTop = addr(stackTop)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) - sizeof(pointer) * 6 - stackSize)
+      else:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) + sizeof(pointer) * 6 + stackSize)
+    GC_step(gch, us, strongAdvice)
+    gch.stackBottom = prevStackBottom
 
 when not defined(useNimRtl):
   proc GC_disable() =
@@ -1023,7 +917,7 @@ when not defined(useNimRtl):
              "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
              "[GC] zct capacity: " & $gch.zct.cap & "\n" &
              "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
     when defined(nimCoroutines):
       result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
       for stack in items(gch.stack):
diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim
index 6c44d509e..7f8e466b5 100644
--- a/lib/system/gc2.nim
+++ b/lib/system/gc2.nim
@@ -97,6 +97,8 @@ type
     additionalRoots: CellSeq # dummy roots for GC_ref/unref
     spaceIter: ObjectSpaceIter
     dumpHeapFile: File # File that is used for GC_dumpHeap
+    when hasThreadSupport:
+      toDispose: SharedList[pointer]
 
 var
   gch {.rtlThreadVar.}: GcHeap
@@ -119,6 +121,8 @@ proc initGC() =
     init(gch.decStack)
     init(gch.additionalRoots)
     init(gch.greyStack)
+    when hasThreadSupport:
+      gch.toDispose = initSharedList[pointer]()
 
 # Which color to use for new objects is tricky: When we're marking,
 # they have to be *white* so that everything is marked that is only
@@ -800,6 +804,10 @@ proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
 proc collectZCT(gch: var GcHeap): bool {.benign.}
 
 proc collectCycles(gch: var GcHeap): bool =
+  when hasThreadSupport:
+    for c in gch.toDispose:
+      nimGCunref(c)
+
   # ensure the ZCT 'color' is not used:
   while gch.zct.len > 0: discard collectZCT(gch)
 
@@ -956,7 +964,19 @@ when withRealTime:
         strongAdvice:
       collectCTBody(gch)
 
-  proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
+  proc GC_step*(us: int, strongAdvice = false, stackSize = -1) {.noinline.} =
+    var stackTop {.volatile.}: pointer
+    let prevStackBottom = gch.stackBottom
+    if stackSize >= 0:
+      stackTop = addr(stackTop)
+      when stackIncreases:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) - sizeof(pointer) * 6 - stackSize)
+      else:
+        gch.stackBottom = cast[pointer](
+          cast[ByteAddress](stackTop) + sizeof(pointer) * 6 + stackSize)
+    GC_step(gch, us, strongAdvice)
+    gch.stackBottom = prevStackBottom
 
 when not defined(useNimRtl):
   proc GC_disable() =
diff --git a/lib/system/gc_stack.nim b/lib/system/gc_stack.nim
index 3a5c5594a..c251a4d0b 100644
--- a/lib/system/gc_stack.nim
+++ b/lib/system/gc_stack.nim
@@ -36,33 +36,34 @@ type
   BaseChunk = object
     next: Chunk
     size: int
-    head, last: ptr ObjHeader # first and last object in chunk that
+    head, tail: ptr ObjHeader # first and last object in chunk that
                               # has a finalizer attached to it
 
 type
   StackPtr = object
-    chunk: pointer
+    bump: pointer
     remaining: int
     current: Chunk
 
   MemRegion* = object
     remaining: int
-    chunk: pointer
-    head, last: Chunk
+    bump: pointer
+    head, tail: Chunk
     nextChunkSize, totalSize: int
     hole: ptr Hole # we support individual freeing
-    lock: SysLock
+    when hasThreadSupport:
+      lock: SysLock
 
 var
-  region {.threadVar.}: MemRegion
+  tlRegion {.threadVar.}: MemRegion
 
 template withRegion*(r: MemRegion; body: untyped) =
-  let oldRegion = region
-  region = r
+  let oldRegion = tlRegion
+  tlRegion = r
   try:
     body
   finally:
-    region = oldRegion
+    tlRegion = oldRegion
 
 template inc(p: pointer, s: int) =
   p = cast[pointer](cast[int](p) +% s)
@@ -71,7 +72,7 @@ template `+!`(p: pointer, s: int): pointer =
   cast[pointer](cast[int](p) +% s)
 
 template `-!`(p: pointer, s: int): pointer =
-  cast[pointer](cast[int](p) +% s)
+  cast[pointer](cast[int](p) -% s)
 
 proc allocSlowPath(r: var MemRegion; size: int) =
   # we need to ensure that the underlying linked list
@@ -84,7 +85,7 @@ proc allocSlowPath(r: var MemRegion; size: int) =
     r.nextChunkSize =
       if r.totalSize < 64 * 1024: PageSize*4
       else: r.nextChunkSize*2
-  var s = align(size+sizeof(BaseChunk), PageSize)
+  var s = roundup(size+sizeof(BaseChunk), PageSize)
   var fresh: Chunk
   if s > r.nextChunkSize:
     fresh = cast[Chunk](osAllocPages(s))
@@ -97,22 +98,25 @@ proc allocSlowPath(r: var MemRegion; size: int) =
     else:
       s = r.nextChunkSize
   fresh.size = s
-  fresh.final = nil
-  r.totalSize += s
-  let old = r.last
+  fresh.head = nil
+  fresh.tail = nil
+  inc r.totalSize, s
+  let old = r.tail
   if old == nil:
     r.head = fresh
   else:
-    r.last.next = fresh
-  r.chunk = fresh +! sizeof(BaseChunk)
-  r.last = fresh
+    r.tail.next = fresh
+  r.bump = fresh +! sizeof(BaseChunk)
+  r.tail = fresh
   r.remaining = s - sizeof(BaseChunk)
 
 proc alloc(r: var MemRegion; size: int): pointer {.inline.} =
-  if unlikely(r.remaining < size): allocSlowPath(r, size)
+  if size > r.remaining:
+    allocSlowPath(r, size)
+  sysAssert(size <= r.remaining, "size <= r.remaining")
   dec(r.remaining, size)
-  result = r.chunk
-  inc r.chunk, size
+  result = r.bump
+  inc r.bump, size
 
 proc runFinalizers(c: Chunk) =
   var it = c.head
@@ -120,228 +124,241 @@ proc runFinalizers(c: Chunk) =
     # indivually freed objects with finalizer stay in the list, but
     # their typ is nil then:
     if it.typ != nil and it.typ.finalizer != nil:
-      (cast[Finalizer](cell.typ.finalizer))(cell+!sizeof(ObjHeader))
-    it = it.next
+      (cast[Finalizer](it.typ.finalizer))(it+!sizeof(ObjHeader))
+    it = it.nextFinal
 
 proc dealloc(r: var MemRegion; p: pointer) =
-  let it = p-!sizeof(ObjHeader)
+  let it = cast[ptr ObjHeader](p-!sizeof(ObjHeader))
   if it.typ != nil and it.typ.finalizer != nil:
-    (cast[Finalizer](cell.typ.finalizer))(p)
+    (cast[Finalizer](it.typ.finalizer))(p)
   it.typ = nil
 
-proc deallocAll(head: Chunk) =
+proc deallocAll(r: var MemRegion; head: Chunk) =
   var it = head
   while it != nil:
+    let nxt = it.next
     runFinalizers(it)
+    dec r.totalSize, it.size
     osDeallocPages(it, it.size)
-    it = it.next
+    it = nxt
 
 proc deallocAll*(r: var MemRegion) =
-  deallocAll(r.head)
+  deallocAll(r, r.head)
   zeroMem(addr r, sizeof r)
 
 proc obstackPtr*(r: MemRegion): StackPtr =
-  result.chunk = r.chunk
+  result.bump = r.bump
   result.remaining = r.remaining
-  result.current = r.last
+  result.current = r.tail
 
-proc setObstackPtr*(r: MemRegion; sp: StackPtr) =
+template computeRemaining(r): untyped =
+  r.tail.size -% (cast[int](r.bump) -% cast[int](r.tail))
+
+proc setObstackPtr*(r: var MemRegion; sp: StackPtr) =
   # free everything after 'sp':
   if sp.current != nil:
-    deallocAll(sp.current.next)
-  r.chunk = sp.chunk
+    deallocAll(r, sp.current.next)
+    sp.current.next = nil
+  else:
+    deallocAll(r, r.head)
+    r.head = nil
+  r.bump = sp.bump
+  r.tail = sp.current
   r.remaining = sp.remaining
-  r.last = sp.current
+
+proc obstackPtr*(): StackPtr = tlRegion.obstackPtr()
+proc setObstackPtr*(sp: StackPtr) = tlRegion.setObstackPtr(sp)
 
 proc joinRegion*(dest: var MemRegion; src: MemRegion) =
   # merging is not hard.
   if dest.head.isNil:
     dest.head = src.head
   else:
-    dest.last.next = src.head
-  dest.last = src.last
-  dest.chunk = src.chunk
+    dest.tail.next = src.head
+  dest.tail = src.tail
+  dest.bump = src.bump
   dest.remaining = src.remaining
   dest.nextChunkSize = max(dest.nextChunkSize, src.nextChunkSize)
-  dest.totalSize += src.totalSize
-  if dest.hole.size < src.hole.size:
-    dest.hole = src.hole
+  inc dest.totalSize, src.totalSize
 
 proc isOnHeap*(r: MemRegion; p: pointer): bool =
-  # the last chunk is the largest, so check it first. It's also special
+  # the tail chunk is the largest, so check it first. It's also special
   # in that contains the current bump pointer:
-  if r.last >= p and p < r.chunk:
+  if r.tail >= p and p < r.bump:
     return true
   var it = r.head
-  while it != r.last:
+  while it != r.tail:
     if it >= p and p <= it+!it.size: return true
     it = it.next
 
-proc isInteriorPointer(r: MemRegion; p: pointer): pointer =
-  discard " we cannot patch stack pointers anyway!"
+when false:
+  # essential feature for later: copy data over from one region to another
 
-type
-  PointerStackChunk = object
-    next, prev: ptr PointerStackChunk
-    len: int
-    data: array[128, pointer]
+  proc isInteriorPointer(r: MemRegion; p: pointer): pointer =
+    discard " we cannot patch stack pointers anyway!"
 
-template head(s: PointerStackChunk): untyped = s.prev
-template tail(s: PointerStackChunk): untyped = s.next
+  type
+    PointerStackChunk = object
+      next, prev: ptr PointerStackChunk
+      len: int
+      data: array[128, pointer]
 
-include chains
+  template head(s: PointerStackChunk): untyped = s.prev
+  template tail(s: PointerStackChunk): untyped = s.next
 
-proc push(r: var MemRegion; s: var PointerStackChunk; x: pointer) =
-  if s.len < high(s.data):
-    s.data[s.len] = x
-    inc s.len
-  else:
-    let fresh = cast[ptr PointerStackChunk](alloc(r, sizeof(PointerStackChunk)))
-    fresh.len = 1
-    fresh.data[0] = x
-    fresh.next = nil
-    fresh.prev = nil
-    append(s, fresh)
-
-
-proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
-                        dest, src: pointer, mt: PNimType) {.benign.}
-proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
-                        dest, src: pointer, n: ptr TNimNode) {.benign.} =
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  case n.kind
-  of nkSlot:
-    genericDeepCopyAux(cast[pointer](d +% n.offset),
-                       cast[pointer](s +% n.offset), n.typ)
-  of nkList:
-    for i in 0..n.len-1:
-      genericDeepCopyAux(dest, src, n.sons[i])
-  of nkCase:
-    var dd = selectBranch(dest, n)
-    var m = selectBranch(src, n)
-    # reset if different branches are in use; note different branches also
-    # imply that's not self-assignment (``x = x``)!
-    if m != dd and dd != nil:
-      genericResetAux(dest, dd)
-    copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
-            n.typ.size)
-    if m != nil:
-      genericDeepCopyAux(dest, src, m)
-  of nkNone: sysAssert(false, "genericDeepCopyAux")
-
-proc copyDeepString(dr: var MemRegion; stack: var PointerStackChunk; src: NimString): NimString {.inline.} =
-  result = rawNewStringNoInit(dr, src.len)
-  result.len = src.len
-  c_memcpy(result.data, src.data, src.len + 1)
-
-proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
-                        dest, src: pointer, mt: PNimType) =
-  var
-    d = cast[ByteAddress](dest)
-    s = cast[ByteAddress](src)
-  sysAssert(mt != nil, "genericDeepCopyAux 2")
-  case mt.kind
-  of tyString:
-    var x = cast[PPointer](dest)
-    var s2 = cast[PPointer](s)[]
-    if s2 == nil:
-      x[] = nil
-    else:
-      x[] = copyDeepString(cast[NimString](s2))
-  of tySequence:
-    var s2 = cast[PPointer](src)[]
-    var seq = cast[PGenericSeq](s2)
-    var x = cast[PPointer](dest)
-    if s2 == nil:
-      x[] = nil
-      return
-    sysAssert(dest != nil, "genericDeepCopyAux 3")
-    x[] = newSeq(mt, seq.len)
-    var dst = cast[ByteAddress](cast[PPointer](dest)[])
-    for i in 0..seq.len-1:
-      genericDeepCopyAux(dr, stack,
-        cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
-        cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
-                     GenericSeqSize),
-        mt.base)
-  of tyObject:
-    # we need to copy m_type field for tyObject, as it could be empty for
-    # sequence reallocations:
-    var pint = cast[ptr PNimType](dest)
-    pint[] = cast[ptr PNimType](src)[]
-    if mt.base != nil:
-      genericDeepCopyAux(dr, stack, dest, src, mt.base)
-    genericDeepCopyAux(dr, stack, dest, src, mt.node)
-  of tyTuple:
-    genericDeepCopyAux(dr, stack, dest, src, mt.node)
-  of tyArray, tyArrayConstr:
-    for i in 0..(mt.size div mt.base.size)-1:
-      genericDeepCopyAux(dr, stack,
-                         cast[pointer](d +% i*% mt.base.size),
-                         cast[pointer](s +% i*% mt.base.size), mt.base)
-  of tyRef:
-    let s2 = cast[PPointer](src)[]
-    if s2 == nil:
-      cast[PPointer](dest)[] = nil
+  include chains
+
+  proc push(r: var MemRegion; s: var PointerStackChunk; x: pointer) =
+    if s.len < high(s.data):
+      s.data[s.len] = x
+      inc s.len
     else:
-      # we modify the header of the cell temporarily; instead of the type
-      # field we store a forwarding pointer. XXX This is bad when the cloning
-      # fails due to OOM etc.
-      let x = usrToCell(s2)
-      let forw = cast[int](x.typ)
-      if (forw and 1) == 1:
-        # we stored a forwarding pointer, so let's use that:
-        let z = cast[pointer](forw and not 1)
-        unsureAsgnRef(cast[PPointer](dest), z)
+      let fresh = cast[ptr PointerStackChunk](alloc(r, sizeof(PointerStackChunk)))
+      fresh.len = 1
+      fresh.data[0] = x
+      fresh.next = nil
+      fresh.prev = nil
+      append(s, fresh)
+
+
+  proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
+                          dest, src: pointer, mt: PNimType) {.benign.}
+  proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
+                          dest, src: pointer, n: ptr TNimNode) {.benign.} =
+    var
+      d = cast[ByteAddress](dest)
+      s = cast[ByteAddress](src)
+    case n.kind
+    of nkSlot:
+      genericDeepCopyAux(cast[pointer](d +% n.offset),
+                         cast[pointer](s +% n.offset), n.typ)
+    of nkList:
+      for i in 0..n.len-1:
+        genericDeepCopyAux(dest, src, n.sons[i])
+    of nkCase:
+      var dd = selectBranch(dest, n)
+      var m = selectBranch(src, n)
+      # reset if different branches are in use; note different branches also
+      # imply that's not self-assignment (``x = x``)!
+      if m != dd and dd != nil:
+        genericResetAux(dest, dd)
+      copyMem(cast[pointer](d +% n.offset), cast[pointer](s +% n.offset),
+              n.typ.size)
+      if m != nil:
+        genericDeepCopyAux(dest, src, m)
+    of nkNone: sysAssert(false, "genericDeepCopyAux")
+
+  proc copyDeepString(dr: var MemRegion; stack: var PointerStackChunk; src: NimString): NimString {.inline.} =
+    result = rawNewStringNoInit(dr, src.len)
+    result.len = src.len
+    c_memcpy(result.data, src.data, src.len + 1)
+
+  proc genericDeepCopyAux(dr: var MemRegion; stack: var PointerStackChunk;
+                          dest, src: pointer, mt: PNimType) =
+    var
+      d = cast[ByteAddress](dest)
+      s = cast[ByteAddress](src)
+    sysAssert(mt != nil, "genericDeepCopyAux 2")
+    case mt.kind
+    of tyString:
+      var x = cast[PPointer](dest)
+      var s2 = cast[PPointer](s)[]
+      if s2 == nil:
+        x[] = nil
       else:
-        let realType = x.typ
-        let z = newObj(realType, realType.base.size)
-
-        unsureAsgnRef(cast[PPointer](dest), z)
-        x.typ = cast[PNimType](cast[int](z) or 1)
-        genericDeepCopyAux(dr, stack, z, s2, realType.base)
-        x.typ = realType
-  else:
-    copyMem(dest, src, mt.size)
-
-proc joinAliveDataFromRegion*(dest: var MemRegion; src: var MemRegion;
-                              root: pointer): pointer =
-  # we mark the alive data and copy only alive data over to 'dest'.
-  # This is O(liveset) but it nicely compacts memory, so it's fine.
-  # We use the 'typ' field as a forwarding pointer. The forwarding
-  # pointers have bit 0 set, so we can disambiguate them.
-  # We allocate a temporary stack in 'src' that we later free:
-  var s: PointerStackChunk
-  s.len = 1
-  s.data[0] = root
-  while s.len > 0:
-    var p: pointer
-    if s.tail == nil:
-      p = s.data[s.len-1]
-      dec s.len
+        x[] = copyDeepString(cast[NimString](s2))
+    of tySequence:
+      var s2 = cast[PPointer](src)[]
+      var seq = cast[PGenericSeq](s2)
+      var x = cast[PPointer](dest)
+      if s2 == nil:
+        x[] = nil
+        return
+      sysAssert(dest != nil, "genericDeepCopyAux 3")
+      x[] = newSeq(mt, seq.len)
+      var dst = cast[ByteAddress](cast[PPointer](dest)[])
+      for i in 0..seq.len-1:
+        genericDeepCopyAux(dr, stack,
+          cast[pointer](dst +% i*% mt.base.size +% GenericSeqSize),
+          cast[pointer](cast[ByteAddress](s2) +% i *% mt.base.size +%
+                       GenericSeqSize),
+          mt.base)
+    of tyObject:
+      # we need to copy m_type field for tyObject, as it could be empty for
+      # sequence reallocations:
+      var pint = cast[ptr PNimType](dest)
+      pint[] = cast[ptr PNimType](src)[]
+      if mt.base != nil:
+        genericDeepCopyAux(dr, stack, dest, src, mt.base)
+      genericDeepCopyAux(dr, stack, dest, src, mt.node)
+    of tyTuple:
+      genericDeepCopyAux(dr, stack, dest, src, mt.node)
+    of tyArray, tyArrayConstr:
+      for i in 0..(mt.size div mt.base.size)-1:
+        genericDeepCopyAux(dr, stack,
+                           cast[pointer](d +% i*% mt.base.size),
+                           cast[pointer](s +% i*% mt.base.size), mt.base)
+    of tyRef:
+      let s2 = cast[PPointer](src)[]
+      if s2 == nil:
+        cast[PPointer](dest)[] = nil
+      else:
+        # we modify the header of the cell temporarily; instead of the type
+        # field we store a forwarding pointer. XXX This is bad when the cloning
+        # fails due to OOM etc.
+        let x = usrToCell(s2)
+        let forw = cast[int](x.typ)
+        if (forw and 1) == 1:
+          # we stored a forwarding pointer, so let's use that:
+          let z = cast[pointer](forw and not 1)
+          unsureAsgnRef(cast[PPointer](dest), z)
+        else:
+          let realType = x.typ
+          let z = newObj(realType, realType.base.size)
+
+          unsureAsgnRef(cast[PPointer](dest), z)
+          x.typ = cast[PNimType](cast[int](z) or 1)
+          genericDeepCopyAux(dr, stack, z, s2, realType.base)
+          x.typ = realType
     else:
-      p = s.tail.data[s.tail.len-1]
-      dec s.tail.len
-      if s.tail.len == 0:
-        unlink(s, s.tail)
+      copyMem(dest, src, mt.size)
+
+  proc joinAliveDataFromRegion*(dest: var MemRegion; src: var MemRegion;
+                                root: pointer): pointer =
+    # we mark the alive data and copy only alive data over to 'dest'.
+    # This is O(liveset) but it nicely compacts memory, so it's fine.
+    # We use the 'typ' field as a forwarding pointer. The forwarding
+    # pointers have bit 0 set, so we can disambiguate them.
+    # We allocate a temporary stack in 'src' that we later free:
+    var s: PointerStackChunk
+    s.len = 1
+    s.data[0] = root
+    while s.len > 0:
+      var p: pointer
+      if s.tail == nil:
+        p = s.data[s.len-1]
+        dec s.len
+      else:
+        p = s.tail.data[s.tail.len-1]
+        dec s.tail.len
+        if s.tail.len == 0:
+          unlink(s, s.tail)
 
 proc rawNewObj(r: var MemRegion, typ: PNimType, size: int): pointer =
   var res = cast[ptr ObjHeader](alloc(r, size + sizeof(ObjHeader)))
   res.typ = typ
   if typ.finalizer != nil:
-    res.nextFinal = r.chunk.head
-    r.chunk.head = res
+    res.nextFinal = r.head.head
+    r.head.head = res
   result = res +! sizeof(ObjHeader)
 
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, region)
+  result = rawNewObj(tlRegion, typ, size)
   zeroMem(result, size)
   when defined(memProfiler): nimProfile(size)
 
 proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, region)
+  result = rawNewObj(tlRegion, typ, size)
   when defined(memProfiler): nimProfile(size)
 
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
@@ -351,7 +368,7 @@ proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).reserved = len
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
-  result = rawNewObj(typ, size, gch)
+  result = rawNewObj(tlRegion, typ, size)
   zeroMem(result, size)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
@@ -360,23 +377,63 @@ proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
 
-proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
-  collectCT(gch)
-  var ol = usrToCell(old)
-  sysAssert(ol.typ != nil, "growObj: 1")
-  gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2")
-
-  var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell)))
-  var elemSize = 1
-  if ol.typ.kind != tyString: elemSize = ol.typ.base.size
-
-  var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
-  copyMem(res, ol, oldsize + sizeof(Cell))
-  zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)),
-          newsize-oldsize)
-  sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3")
-  result = cellToUsr(res)
+proc growObj(region: var MemRegion; old: pointer, newsize: int): pointer =
+  let typ = cast[ptr ObjHeader](old -! sizeof(ObjHeader)).typ
+  result = rawNewObj(region, typ, newsize)
+  let elemSize = if typ.kind == tyString: 1 else: typ.base.size
+  let oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize
+  copyMem(result, old, oldsize)
+  zeroMem(result +! oldsize, newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
-  result = growObj(old, newsize, region)
-
+  result = growObj(tlRegion, old, newsize)
+
+proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRef(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerproc, inline.} =
+  dest[] = src
+
+proc alloc(size: Natural): pointer =
+  result = cmalloc(size)
+  if result == nil: raiseOutOfMem()
+proc alloc0(size: Natural): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc realloc(p: pointer, newsize: Natural): pointer =
+  result = crealloc(p, newsize)
+  if result == nil: raiseOutOfMem()
+proc dealloc(p: pointer) = cfree(p)
+
+proc allocShared(size: Natural): pointer =
+  result = cmalloc(size)
+  if result == nil: raiseOutOfMem()
+proc allocShared0(size: Natural): pointer =
+  result = alloc(size)
+  zeroMem(result, size)
+proc reallocShared(p: pointer, newsize: Natural): pointer =
+  result = crealloc(p, newsize)
+  if result == nil: raiseOutOfMem()
+proc deallocShared(p: pointer) = cfree(p)
+
+when hasThreadSupport:
+  proc getFreeSharedMem(): int = 0
+  proc getTotalSharedMem(): int = 0
+  proc getOccupiedSharedMem(): int = 0
+
+proc GC_disable() = discard
+proc GC_enable() = discard
+proc GC_fullCollect() = discard
+proc GC_setStrategy(strategy: GC_Strategy) = discard
+proc GC_enableMarkAndSweep() = discard
+proc GC_disableMarkAndSweep() = discard
+proc GC_getStatistics(): string = return ""
+
+proc getOccupiedMem(): int =
+  result = tlRegion.totalSize - tlRegion.remaining
+proc getFreeMem(): int = tlRegion.remaining
+proc getTotalMem(): int =
+  result = tlRegion.totalSize
+
+proc setStackBottom(theStackBottom: pointer) = discard
diff --git a/lib/system/jssys.nim b/lib/system/jssys.nim
index 1b98883b9..ce67373bc 100644
--- a/lib/system/jssys.nim
+++ b/lib/system/jssys.nim
@@ -97,6 +97,8 @@ proc rawWriteStackTrace(): string =
   else:
     result = "No stack traceback available\n"
 
+proc getStackTrace*(): string = rawWriteStackTrace()
+
 proc unhandledException(e: ref Exception) {.
     compilerproc, asmNoStackFrame.} =
   when NimStackTrace:
@@ -119,7 +121,10 @@ proc raiseException(e: ref Exception, ename: cstring) {.
   when not defined(noUnhandledHandler):
     if excHandler == 0:
       unhandledException(e)
-  asm "throw `e`;"
+  when defined(nimphp):
+    asm """throw new Exception($`e`["message"]);"""
+  else:
+    asm "throw `e`;"
 
 proc reraiseException() {.compilerproc, asmNoStackFrame.} =
   if lastJSError == nil:
@@ -243,8 +248,12 @@ proc toJSStr(s: string): cstring {.asmNoStackFrame, compilerproc.} =
     for (var i = 0; i < len; ++i) {
       if (nonAsciiPart !== null) {
         var offset = (i - nonAsciiOffset) * 2;
+        var code = `s`[i].toString(16);
+        if (code.length == 1) {
+          code = "0"+code;
+        }
         nonAsciiPart[offset] = "%";
-        nonAsciiPart[offset + 1] = `s`[i].toString(16);
+        nonAsciiPart[offset + 1] = code;
       }
       else if (`s`[i] < 128)
         asciiPart[i] = fcc(`s`[i]);
@@ -729,16 +738,19 @@ proc genericReset(x: JSRef, ti: PNimType): JSRef {.compilerproc.} =
   else:
     discard
 
-proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
-                 asmNoStackFrame, compilerproc.} =
-  # types are fake
-  when defined(nimphp):
+when defined(nimphp):
+  proc arrayConstr(len: int, value: string, typ: string): JSRef {.
+                  asmNoStackFrame, compilerproc.} =
+    # types are fake
     asm """
       $result = array();
       for ($i = 0; $i < `len`; $i++) $result[] = `value`;
       return $result;
     """
-  else:
+else:
+  proc arrayConstr(len: int, value: JSRef, typ: PNimType): JSRef {.
+                  asmNoStackFrame, compilerproc.} =
+  # types are fake
     asm """
       var result = new Array(`len`);
       for (var i = 0; i < `len`; ++i) result[i] = nimCopy(null, `value`, `typ`);
diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim
index 5e576f0a3..d7010a1a3 100644
--- a/lib/system/mmdisp.nim
+++ b/lib/system/mmdisp.nim
@@ -389,15 +389,30 @@ elif defined(nogc) and defined(useMalloc):
 
   when not defined(useNimRtl):
     proc alloc(size: Natural): pointer =
-      result = cmalloc(size)
-      if result == nil: raiseOutOfMem()
+      var x = cmalloc(size + sizeof(size))
+      if x == nil: raiseOutOfMem()
+
+      cast[ptr int](x)[] = size
+      result = cast[pointer](cast[int](x) + sizeof(size))
+
     proc alloc0(size: Natural): pointer =
       result = alloc(size)
       zeroMem(result, size)
     proc realloc(p: pointer, newsize: Natural): pointer =
-      result = crealloc(p, newsize)
-      if result == nil: raiseOutOfMem()
-    proc dealloc(p: pointer) = cfree(p)
+      var x = cast[pointer](cast[int](p) - sizeof(newsize))
+      let oldsize = cast[ptr int](x)[]
+
+      x = crealloc(x, newsize + sizeof(newsize))
+
+      if x == nil: raiseOutOfMem()
+
+      cast[ptr int](x)[] = newsize
+      result = cast[pointer](cast[int](x) + sizeof(newsize))
+
+      if newsize > oldsize:
+        zeroMem(cast[pointer](cast[int](result) + oldsize), newsize - oldsize)
+
+    proc dealloc(p: pointer) = cfree(cast[pointer](cast[int](p) - sizeof(int)))
 
     proc allocShared(size: Natural): pointer =
       result = cmalloc(size)
@@ -511,11 +526,12 @@ elif defined(nogc):
   include "system/cellsets"
 
 else:
-  include "system/alloc"
+  when not defined(gcStack):
+    include "system/alloc"
 
-  include "system/cellsets"
-  when not leakDetector and not useCellIds:
-    sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
+    include "system/cellsets"
+    when not leakDetector and not useCellIds:
+      sysAssert(sizeof(Cell) == sizeof(FreeCell), "sizeof FreeCell")
   when compileOption("gc", "v2"):
     include "system/gc2"
   elif defined(gcStack):
diff --git a/lib/system/osalloc.nim b/lib/system/osalloc.nim
index 78410d716..8b83e194b 100644
--- a/lib/system/osalloc.nim
+++ b/lib/system/osalloc.nim
@@ -72,7 +72,7 @@ when defined(emscripten):
 
   proc osTryAllocPages(size: int): pointer = osAllocPages(size)
 
-  proc osDeallocPages(p: pointer, size: int) {.inline} =
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
     var mmapDescrPos = cast[ByteAddress](p) -% sizeof(EmscriptenMMapBlock)
     var mmapDescr = cast[EmscriptenMMapBlock](mmapDescrPos)
     munmap(mmapDescr.realPointer, mmapDescr.realSize)
@@ -107,7 +107,7 @@ elif defined(posix):
                              MAP_PRIVATE or MAP_ANONYMOUS, -1, 0)
     if result == cast[pointer](-1): result = nil
 
-  proc osDeallocPages(p: pointer, size: int) {.inline} =
+  proc osDeallocPages(p: pointer, size: int) {.inline.} =
     when reallyOsDealloc: discard munmap(p, size)
 
 elif defined(windows):
diff --git a/lib/system/syslocks.nim b/lib/system/syslocks.nim
index 1695deca1..c3e23052b 100644
--- a/lib/system/syslocks.nim
+++ b/lib/system/syslocks.nim
@@ -121,3 +121,5 @@ else:
       importc: "pthread_cond_signal", header: "<pthread.h>", noSideEffect.}
     proc deinitSysCond(cond: var SysCond) {.noSideEffect,
       importc: "pthread_cond_destroy", header: "<pthread.h>".}
+
+{.pop.}
diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim
index e2137e8f4..64d6634d2 100644
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -228,7 +228,8 @@ proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
   elif newLen < result.len:
     # we need to decref here, otherwise the GC leaks!
     when not defined(boehmGC) and not defined(nogc) and
-         not defined(gcMarkAndSweep) and not defined(gogc):
+         not defined(gcMarkAndSweep) and not defined(gogc) and
+         not defined(gcStack):
       when false: # compileOption("gc", "v2"):
         for i in newLen..result.len-1:
           let len0 = gch.tempStack.len
@@ -303,43 +304,39 @@ proc nimFloatToStr(f: float): string {.compilerproc.} =
 proc strtod(buf: cstring, endptr: ptr cstring): float64 {.importc,
   header: "<stdlib.h>", noSideEffect.}
 
-var decimalPoint: char
-
-proc getDecimalPoint(): char =
-  result = decimalPoint
-  if result == '\0':
-    if strtod("0,5", nil) == 0.5: result = ','
-    else: result = '.'
-    # yes this is threadsafe in practice, spare me:
-    decimalPoint = result
-
 const
   IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+  powtens =   [ 1e0,   1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,
+                1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+                1e20, 1e21, 1e22]
 
 proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
                           start = 0): int {.compilerProc.} =
-  # This routine leverages `strtod()` for the non-trivial task of
-  # parsing floating point numbers correctly. Because `strtod()` is
-  # locale-dependent with respect to the radix character, we create
-  # a copy where the decimal point is replaced with the locale's
-  # radix character.
+  # This routine attempt to parse float that can parsed quickly.
+  # ie whose integer part can fit inside a 53bits integer.
+  # their real exponent must also be <= 22. If the float doesn't follow
+  # these restrictions, transform the float into this form:
+  #  INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
+  # This avoid the problems of decimal character portability.
+  # see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
   var
     i = start
     sign = 1.0
-    t: array[500, char] # flaviu says: 325 is the longest reasonable literal
-    ti = 0
-    hasdigits = false
-
-  template addToBuf(c) =
-    if ti < t.high:
-      t[ti] = c; inc(ti)
+    kdigits, fdigits = 0
+    exponent: int
+    integer: uint64
+    fraction: uint64
+    frac_exponent= 0
+    exp_sign = 1
+    first_digit = -1
+    has_sign = false
 
   # Sign?
   if s[i] == '+' or s[i] == '-':
+    has_sign = true
     if s[i] == '-':
       sign = -1.0
-    t[ti] = s[i]
-    inc(i); inc(ti)
+    inc(i)
 
   # NaN?
   if s[i] == 'N' or s[i] == 'n':
@@ -359,40 +356,116 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
           return i+3 - start
     return 0
 
+  # Skip leading zero
+  while s[i] == '0':
+    inc(i)
+    while s[i] == '_': inc(i)
+
+  if s[i] in {'0'..'9'}:
+      first_digit = (s[i].ord - '0'.ord)
   # Integer part?
   while s[i] in {'0'..'9'}:
-    hasdigits = true
-    addToBuf(s[i])
-    inc(i);
+    inc(kdigits)
+    integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
+    inc(i)
     while s[i] == '_': inc(i)
 
   # Fractional part?
   if s[i] == '.':
-    addToBuf(getDecimalPoint())
     inc(i)
+    # if no integer part, Skip leading zeros
+    if kdigits <= 0:
+      while s[i] == '0':
+        inc(frac_exponent)
+        inc(i)
+        while s[i] == '_': inc(i)
+
+    if first_digit == -1 and s[i] in {'0'..'9'}:
+      first_digit = (s[i].ord - '0'.ord)
+    # get fractional part
     while s[i] in {'0'..'9'}:
-      hasdigits = true
-      addToBuf(s[i])
+      inc(fdigits)
+      inc(frac_exponent)
+      integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
       inc(i)
       while s[i] == '_': inc(i)
-  if not hasdigits:
+
+  # if has no digits: return error
+  if kdigits + fdigits <= 0 and
+     (i == start or # was only zero
+      has_sign) :   # or only '+' or '-
     return 0
 
-  # Exponent?
   if s[i] in {'e', 'E'}:
-    addToBuf(s[i])
     inc(i)
-    if s[i] in {'+', '-'}:
-      addToBuf(s[i])
+    if s[i] == '+' or s[i] == '-':
+      if s[i] == '-':
+        exp_sign = -1
+
       inc(i)
     if s[i] notin {'0'..'9'}:
       return 0
     while s[i] in {'0'..'9'}:
-      addToBuf(s[i])
+      exponent = exponent * 10 + (ord(s[i]) - ord('0'))
       inc(i)
-      while s[i] == '_': inc(i)
-  number = strtod(t, nil)
+      while s[i] == '_': inc(i) # underscores are allowed and ignored
+
+  var real_exponent = exp_sign*exponent - frac_exponent
+  let exp_negative = real_exponent < 0
+  var abs_exponent = abs(real_exponent)
+
+  # if exponent greater than can be represented: +/- zero or infinity
+  if abs_exponent > 999:
+    if exp_negative:
+      number = 0.0*sign
+    else:
+      number = Inf*sign
+    return i - start
+
+  # if integer is representable in 53 bits:  fast path
+  # max fast path integer is  1<<53 - 1 or  8999999999999999 (16 digits)
+  if kdigits + fdigits <= 16 and first_digit <= 8:
+    # max float power of ten with set bits above the 53th bit is 10^22
+    if abs_exponent <= 22:
+      if exp_negative:
+        number = sign * integer.float / powtens[abs_exponent]
+      else:
+        number = sign * integer.float * powtens[abs_exponent]
+      return i - start
+
+    # if exponent is greater try to fit extra exponent above 22 by multiplying
+    # integer part is there is space left.
+    let slop = 15 - kdigits - fdigits
+    if  abs_exponent <= 22 + slop and not exp_negative:
+      number = sign * integer.float * powtens[slop] * powtens[abs_exponent-slop]
+      return i - start
+
+  # if failed: slow path with strtod.
+  var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
+  var ti = 0
+  let maxlen = t.high - "e+000".len # reserve enough space for exponent
+
   result = i - start
+  i = start
+  # re-parse without error checking, any error should be handled by the code above.
+  while s[i] in {'0'..'9','+','-'}:
+    if ti < maxlen:
+      t[ti] = s[i]; inc(ti)
+    inc(i)
+    while s[i] in {'.', '_'}: # skip underscore and decimal point
+      inc(i)
+
+  # insert exponent
+  t[ti] = 'E'; inc(ti)
+  t[ti] = if exp_negative: '-' else: '+'; inc(ti)
+  inc(ti, 3)
+
+  # insert adjusted exponent
+  t[ti-1] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
+  t[ti-2] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
+  t[ti-3] = ('0'.ord + abs_exponent mod 10).char
+
+  number = strtod(t, nil)
 
 proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
   result = newString(sizeof(x)*4)
diff --git a/lib/system/timers.nim b/lib/system/timers.nim
index ac8418824..8aa4505c4 100644
--- a/lib/system/timers.nim
+++ b/lib/system/timers.nim
@@ -61,7 +61,7 @@ elif defined(posixRealtime):
                final, pure.} = object ## struct timespec
       tv_sec: int  ## Seconds.
       tv_nsec: int ## Nanoseconds.
-  {.deprecated: [TClockid: Clickid, TTimeSpec: TimeSpec].}
+  {.deprecated: [TClockid: Clockid, TTimeSpec: TimeSpec].}
 
   var
     CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: Clockid
diff --git a/lib/windows/winlean.nim b/lib/windows/winlean.nim
index a08a067fa..750c7b4d1 100644
--- a/lib/windows/winlean.nim
+++ b/lib/windows/winlean.nim
@@ -36,6 +36,8 @@ type
   DWORD* = int32
   PDWORD* = ptr DWORD
   LPINT* = ptr int32
+  ULONG_PTR* = uint
+  PULONG_PTR* = ptr uint
   HDC* = Handle
   HGLRC* = Handle
 
@@ -92,7 +94,7 @@ type
     dwMinorVersion*: DWORD
     dwBuildNumber*: DWORD
     dwPlatformId*: DWORD
-    szCSDVersion*: array[0..127, WinChar];
+    szCSDVersion*: array[0..127, WinChar]
 
 {.deprecated: [THandle: Handle, TSECURITY_ATTRIBUTES: SECURITY_ATTRIBUTES,
     TSTARTUPINFO: STARTUPINFO, TPROCESS_INFORMATION: PROCESS_INFORMATION,
@@ -757,14 +759,15 @@ const
   WSAENETRESET* = 10052
   WSAETIMEDOUT* = 10060
   ERROR_NETNAME_DELETED* = 64
+  STATUS_PENDING* = 0x103
 
 proc createIoCompletionPort*(FileHandle: Handle, ExistingCompletionPort: Handle,
-                             CompletionKey: DWORD,
+                             CompletionKey: ULONG_PTR,
                              NumberOfConcurrentThreads: DWORD): Handle{.stdcall,
     dynlib: "kernel32", importc: "CreateIoCompletionPort".}
 
 proc getQueuedCompletionStatus*(CompletionPort: Handle,
-    lpNumberOfBytesTransferred: PDWORD, lpCompletionKey: PULONG,
+    lpNumberOfBytesTransferred: PDWORD, lpCompletionKey: PULONG_PTR,
                                 lpOverlapped: ptr POVERLAPPED,
                                 dwMilliseconds: DWORD): WINBOOL{.stdcall,
     dynlib: "kernel32", importc: "GetQueuedCompletionStatus".}
@@ -773,6 +776,12 @@ proc getOverlappedResult*(hFile: Handle, lpOverlapped: POVERLAPPED,
               lpNumberOfBytesTransferred: var DWORD, bWait: WINBOOL): WINBOOL{.
     stdcall, dynlib: "kernel32", importc: "GetOverlappedResult".}
 
+# this is copy of HasOverlappedIoCompleted() macro from <winbase.h>
+# because we have declared own OVERLAPPED structure with member names not
+# compatible with original names.
+template hasOverlappedIoCompleted*(lpOverlapped): bool =
+  (cast[uint](lpOverlapped.internal) != STATUS_PENDING)
+
 const
  IOC_OUT* = 0x40000000
  IOC_IN*  = 0x80000000