summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2012-10-10 00:41:34 +0200
committerAraq <rumpf_a@web.de>2012-10-10 00:41:34 +0200
commitd43febf81e78ac79894ab136717c6100a5492b08 (patch)
tree451569132c56fb42c39313f81e7a8e895fff6376
parentcf06131decb2d46304874bd243c29267876e0076 (diff)
downloadNim-d43febf81e78ac79894ab136717c6100a5492b08.tar.gz
first version of a memory profiler
-rwxr-xr-xcompiler/nimrod.nim2
-rw-r--r--doc/estp.txt16
-rwxr-xr-xdoc/nimrodc.txt1
-rw-r--r--lib/pure/nimprof.nim69
-rwxr-xr-xlib/system.nim2
-rwxr-xr-xlib/system/gc.nim13
-rwxr-xr-xlib/system/profiler.nim69
-rwxr-xr-xtodo.txt1
8 files changed, 117 insertions, 56 deletions
diff --git a/compiler/nimrod.nim b/compiler/nimrod.nim
index 31ae355f6..1a73bcca0 100755
--- a/compiler/nimrod.nim
+++ b/compiler/nimrod.nim
@@ -20,7 +20,7 @@ import
 when hasTinyCBackend:
   import tccgen
 
-when defined(profiler):
+when defined(profiler) or defined(memProfiler):
   {.hint: "Profiling support is turned on!".}
   import nimprof
   
diff --git a/doc/estp.txt b/doc/estp.txt
index 48df19739..58b788aa0 100644
--- a/doc/estp.txt
+++ b/doc/estp.txt
@@ -28,6 +28,22 @@ the option ``--stackTrace:on`` is active! Unfortunately this means that a
 profiling build is much slower than a release build.
 
 
+Memory profiler
+===============
+
+You can also use ESTP as a memory profiler to see which stack traces allocate
+the most memory and thus create the most GC pressure. It may also help to 
+find memory leaks. To activate the memory profiler you need to do:
+
+* compile your program with the ``--profiler:off --stackTrace:on -d:memProfiler``
+  command line options
+* import the ``nimprof`` module
+* run your program as usual.
+
+Define the symbol ``ignoreAllocationSize`` so that only the number of 
+allocations is counted and the sizes of the memory allocations do not matter.
+
+
 Example results file
 ====================
 
diff --git a/doc/nimrodc.txt b/doc/nimrodc.txt
index 117d226a6..8e43ce32c 100755
--- a/doc/nimrodc.txt
+++ b/doc/nimrodc.txt
@@ -160,6 +160,7 @@ Define               Effect
                      for further information.
 ``nodejs``           The EcmaScript target is actually ``node.js``.
 ``ssl``              Enables OpenSSL support for the sockets module.
+``memProfiler``      Enables memory profiling for the native GC.
 ==================   =========================================================
 
 

diff --git a/lib/pure/nimprof.nim b/lib/pure/nimprof.nim
index 725a9d0f6..02f0366cd 100644
--- a/lib/pure/nimprof.nim
+++ b/lib/pure/nimprof.nim
@@ -11,14 +11,16 @@
 ## ``--profiler:on``. You only need to import this module to get a profiling
 ## report at program exit.
 
-when not defined(profiler):
+when not defined(profiler) and not defined(memProfiler):
   {.warning: "Profiling support is turned off!".}
 
 # We don't want to profile the profiling code ...
 {.push profiler: off.}
 
 import hashes, algorithm, strutils, tables, sets
-include "system/timers"
+
+when not defined(memProfiler):
+  include "system/timers"
 
 const
   withThreads = compileOption("threads")
@@ -47,15 +49,15 @@ var
   maxChainLen = 0
   totalCalls = 0
 
-var
-  interval: TNanos = 5_000_000 - tickCountCorrection # 5ms
-
-proc setSamplingFrequency*(intervalInUs: int) =
-  ## set this to change the sampling frequency. Default value is 5ms.
-  ## Set it to 0 to disable time based profiling; it uses an imprecise
-  ## instruction count measure instead then.
-  if intervalInUs <= 0: interval = 0
-  else: interval = intervalInUs * 1000 - tickCountCorrection
+when not defined(memProfiler):
+  var interval: TNanos = 5_000_000 - tickCountCorrection # 5ms
+
+  proc setSamplingFrequency*(intervalInUs: int) =
+    ## set this to change the sampling frequency. Default value is 5ms.
+    ## Set it to 0 to disable time based profiling; it uses an imprecise
+    ## instruction count measure instead then.
+    if intervalInUs <= 0: interval = 0
+    else: interval = intervalInUs * 1000 - tickCountCorrection
   
 when withThreads:
   var
@@ -63,7 +65,7 @@ when withThreads:
 
   InitLock profilingLock
 
-proc hookAux(st: TStackTrace) =
+proc hookAux(st: TStackTrace, costs: int) =
   # this is quite performance sensitive!
   when withThreads: Acquire profilingLock
   inc totalCalls
@@ -79,13 +81,13 @@ proc hookAux(st: TStackTrace) =
     while probes >= 0:
       if profileData[h].st == st:
         # wow, same entry found:
-        inc profileData[h].total
+        inc profileData[h].total, costs
         return
       if profileData[minIdx].total < profileData[h].total:
         minIdx = h
       h = ((5 * h) + 1) and high(profileData)
       dec probes
-    profileData[minIdx].total = 1
+    profileData[minIdx].total = costs
     profileData[minIdx].st = st
   else:
     var chain = 0
@@ -93,28 +95,45 @@ proc hookAux(st: TStackTrace) =
       if profileData[h] == nil:
         profileData[h] = cast[ptr TProfileEntry](
                              allocShared0(sizeof(TProfileEntry)))
-        profileData[h].total = 1
+        profileData[h].total = costs
         profileData[h].st = st
         dec emptySlots
         break
       if profileData[h].st == st:
         # wow, same entry found:
-        inc profileData[h].total
+        inc profileData[h].total, costs
         break
       h = ((5 * h) + 1) and high(profileData)
       inc chain
     maxChainLen = max(maxChainLen, chain)
   when withThreads: Release profilingLock
 
-var
-  t0 {.threadvar.}: TTicks
-
-proc hook(st: TStackTrace) {.nimcall.} =
-  if interval == 0:
-    hookAux(st)
-  elif getticks() - t0 > interval:
-    hookAux(st)
-    t0 = getticks()  
+when defined(memProfiler):
+  const
+    SamplingInterval = 50_000
+  var
+    gTicker {.threadvar.}: int
+
+  proc hook(st: TStackTrace, size: int) {.nimcall.} =
+    if gTicker == 0:
+      gTicker = -1
+      when defined(ignoreAllocationSize):
+        hookAux(st, 1)
+      else:
+        hookAux(st, size)
+      gTicker = SamplingInterval
+    dec gTicker
+
+else:
+  var
+    t0 {.threadvar.}: TTicks
+
+  proc hook(st: TStackTrace) {.nimcall.} =
+    if interval == 0:
+      hookAux(st, 1)
+    elif getticks() - t0 > interval:
+      hookAux(st, 1)
+      t0 = getticks()  
 
 proc getTotal(x: ptr TProfileEntry): int =
   result = if isNil(x): 0 else: x.total
diff --git a/lib/system.nim b/lib/system.nim
index 9d7652c94..e35d2914f 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -2045,7 +2045,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
   when defined(endb):
     include "system/debugger"
 
-  when defined(profiler):
+  when defined(profiler) or defined(memProfiler):
     include "system/profiler"
   {.pop.} # stacktrace
 
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index a95319e51..ec656e0ef 100755
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -28,6 +28,8 @@ const
 
 when withRealTime and not defined(getTicks):
   include "system/timers"
+when defined(memProfiler):
+  proc nimProfile(requestedSize: int)
 
 const
   rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -431,12 +433,15 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
+  when defined(memProfiler): nimProfile(size)
 
 proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
   # `newObj` already uses locks, so no need for them here.
-  result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObj(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
 
 proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   # generates a new object and sets its reference counter to 1
@@ -463,11 +468,14 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = cellToUsr(res)
   zeroMem(result, size)
   sysAssert(allocInv(gch.region), "newObjRC1 end")
+  when defined(memProfiler): nimProfile(size)
 
 proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
-  result = newObjRC1(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
+  let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
+  result = newObjRC1(typ, size)
   cast[PGenericSeq](result).len = len
   cast[PGenericSeq](result).reserved = len
+  when defined(memProfiler): nimProfile(size)
   
 proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   acquire(gch)
@@ -512,6 +520,7 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
   release(gch)
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "growObj end")
+  when defined(memProfiler): nimProfile(newsize-oldsize)
 
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
   result = growObj(old, newsize, gch)
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index eafa010ef..8e4c51dd9 100755
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -49,34 +49,51 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
     inc(i)
     b = b.prev
 
-const
-  SamplingInterval = 50_000
-    # set this to change the default sampling interval
-var
-  profilerHook*: TProfilerHook
-    ## set this variable to provide a procedure that implements a profiler in
-    ## user space. See the `nimprof` module for a reference implementation.
-  gTicker {.threadvar.}: int
+when defined(memProfiler):
+  type
+    TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall.}
+  var
+    profilerHook*: TMemProfilerHook
+      ## set this variable to provide a procedure that implements a profiler in
+      ## user space. See the `nimprof` module for a reference implementation.
 
-proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
-  # 'noinline' so that 'nimProfile' does not perform the stack allocation
-  # in the common case.
-  var st: TStackTrace
-  captureStackTrace(framePtr, st)
-  hook(st)
+  proc callProfilerHook(hook: TMemProfilerHook, requestedSize: int) =
+    var st: TStackTrace
+    captureStackTrace(framePtr, st)
+    hook(st, requestedSize)
 
-proc nimProfile() =
-  ## This is invoked by the compiler in every loop and on every proc entry!
-  if gTicker == 0:
-    gTicker = -1
+  proc nimProfile(requestedSize: int) =
     if not isNil(profilerHook):
-      # disable recursive calls: XXX should use try..finally,
-      # but that's too expensive!
-      let oldHook = profilerHook
-      profilerHook = nil
-      callProfilerHook(oldHook)
-      profilerHook = oldHook
-    gTicker = SamplingInterval
-  dec gTicker
+      callProfilerHook(profilerHook, requestedSize)
+else:
+  const
+    SamplingInterval = 50_000
+      # set this to change the default sampling interval
+  var
+    profilerHook*: TProfilerHook
+      ## set this variable to provide a procedure that implements a profiler in
+      ## user space. See the `nimprof` module for a reference implementation.
+    gTicker {.threadvar.}: int
+
+  proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
+    # 'noinline' so that 'nimProfile' does not perform the stack allocation
+    # in the common case.
+    var st: TStackTrace
+    captureStackTrace(framePtr, st)
+    hook(st)
+
+  proc nimProfile() =
+    ## This is invoked by the compiler in every loop and on every proc entry!
+    if gTicker == 0:
+      gTicker = -1
+      if not isNil(profilerHook):
+        # disable recursive calls: XXX should use try..finally,
+        # but that's too expensive!
+        let oldHook = profilerHook
+        profilerHook = nil
+        callProfilerHook(oldHook)
+        profilerHook = oldHook
+      gTicker = SamplingInterval
+    dec gTicker
 
 {.pop.}
diff --git a/todo.txt b/todo.txt
index 687838346..605782f2c 100755
--- a/todo.txt
+++ b/todo.txt
@@ -1,7 +1,6 @@
 version 0.9.2
 =============
 
-- memory profiler
 - implement the compiler as a service
 - implement for loop transformation for first class iterators
 - ``=`` should be overloadable; requires specialization for ``=``