diff options
author | Araq <rumpf_a@web.de> | 2012-10-10 00:41:34 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2012-10-10 00:41:34 +0200 |
commit | d43febf81e78ac79894ab136717c6100a5492b08 (patch) | |
tree | 451569132c56fb42c39313f81e7a8e895fff6376 | |
parent | cf06131decb2d46304874bd243c29267876e0076 (diff) | |
download | Nim-d43febf81e78ac79894ab136717c6100a5492b08.tar.gz |
first version of a memory profiler
-rwxr-xr-x | compiler/nimrod.nim | 2 | ||||
-rw-r--r-- | doc/estp.txt | 16 | ||||
-rwxr-xr-x | doc/nimrodc.txt | 1 | ||||
-rw-r--r-- | lib/pure/nimprof.nim | 69 | ||||
-rwxr-xr-x | lib/system.nim | 2 | ||||
-rwxr-xr-x | lib/system/gc.nim | 13 | ||||
-rwxr-xr-x | lib/system/profiler.nim | 69 | ||||
-rwxr-xr-x | todo.txt | 1 |
8 files changed, 117 insertions, 56 deletions
diff --git a/compiler/nimrod.nim b/compiler/nimrod.nim index 31ae355f6..1a73bcca0 100755 --- a/compiler/nimrod.nim +++ b/compiler/nimrod.nim @@ -20,7 +20,7 @@ import when hasTinyCBackend: import tccgen -when defined(profiler): +when defined(profiler) or defined(memProfiler): {.hint: "Profiling support is turned on!".} import nimprof diff --git a/doc/estp.txt b/doc/estp.txt index 48df19739..58b788aa0 100644 --- a/doc/estp.txt +++ b/doc/estp.txt @@ -28,6 +28,22 @@ the option ``--stackTrace:on`` is active! Unfortunately this means that a profiling build is much slower than a release build. +Memory profiler +=============== + +You can also use ESTP as a memory profiler to see which stack traces allocate +the most memory and thus create the most GC pressure. It may also help to +find memory leaks. To activate the memory profiler you need to do: + +* compile your program with the ``--profiler:off --stackTrace:on -d:memProfiler`` + command line options +* import the ``nimprof`` module +* run your program as usual. + +Define the symbol ``ignoreAllocationSize`` so that only the number of +allocations is counted and the sizes of the memory allocations do not matter. + + Example results file ==================== diff --git a/doc/nimrodc.txt b/doc/nimrodc.txt index 117d226a6..8e43ce32c 100755 --- a/doc/nimrodc.txt +++ b/doc/nimrodc.txt @@ -160,6 +160,7 @@ Define Effect for further information. ``nodejs`` The EcmaScript target is actually ``node.js``. ``ssl`` Enables OpenSSL support for the sockets module. +``memProfiler`` Enables memory profiling for the native GC. ================== ========================================================= diff --git a/lib/pure/nimprof.nim b/lib/pure/nimprof.nim index 725a9d0f6..02f0366cd 100644 --- a/lib/pure/nimprof.nim +++ b/lib/pure/nimprof.nim @@ -11,14 +11,16 @@ ## ``--profiler:on``. You only need to import this module to get a profiling ## report at program exit. -when not defined(profiler): +when not defined(profiler) and not defined(memProfiler): {.warning: "Profiling support is turned off!".} # We don't want to profile the profiling code ... {.push profiler: off.} import hashes, algorithm, strutils, tables, sets -include "system/timers" + +when not defined(memProfiler): + include "system/timers" const withThreads = compileOption("threads") @@ -47,15 +49,15 @@ var maxChainLen = 0 totalCalls = 0 -var - interval: TNanos = 5_000_000 - tickCountCorrection # 5ms - -proc setSamplingFrequency*(intervalInUs: int) = - ## set this to change the sampling frequency. Default value is 5ms. - ## Set it to 0 to disable time based profiling; it uses an imprecise - ## instruction count measure instead then. - if intervalInUs <= 0: interval = 0 - else: interval = intervalInUs * 1000 - tickCountCorrection +when not defined(memProfiler): + var interval: TNanos = 5_000_000 - tickCountCorrection # 5ms + + proc setSamplingFrequency*(intervalInUs: int) = + ## set this to change the sampling frequency. Default value is 5ms. + ## Set it to 0 to disable time based profiling; it uses an imprecise + ## instruction count measure instead then. + if intervalInUs <= 0: interval = 0 + else: interval = intervalInUs * 1000 - tickCountCorrection when withThreads: var @@ -63,7 +65,7 @@ when withThreads: InitLock profilingLock -proc hookAux(st: TStackTrace) = +proc hookAux(st: TStackTrace, costs: int) = # this is quite performance sensitive! when withThreads: Acquire profilingLock inc totalCalls @@ -79,13 +81,13 @@ proc hookAux(st: TStackTrace) = while probes >= 0: if profileData[h].st == st: # wow, same entry found: - inc profileData[h].total + inc profileData[h].total, costs return if profileData[minIdx].total < profileData[h].total: minIdx = h h = ((5 * h) + 1) and high(profileData) dec probes - profileData[minIdx].total = 1 + profileData[minIdx].total = costs profileData[minIdx].st = st else: var chain = 0 @@ -93,28 +95,45 @@ proc hookAux(st: TStackTrace) = if profileData[h] == nil: profileData[h] = cast[ptr TProfileEntry]( allocShared0(sizeof(TProfileEntry))) - profileData[h].total = 1 + profileData[h].total = costs profileData[h].st = st dec emptySlots break if profileData[h].st == st: # wow, same entry found: - inc profileData[h].total + inc profileData[h].total, costs break h = ((5 * h) + 1) and high(profileData) inc chain maxChainLen = max(maxChainLen, chain) when withThreads: Release profilingLock -var - t0 {.threadvar.}: TTicks - -proc hook(st: TStackTrace) {.nimcall.} = - if interval == 0: - hookAux(st) - elif getticks() - t0 > interval: - hookAux(st) - t0 = getticks() +when defined(memProfiler): + const + SamplingInterval = 50_000 + var + gTicker {.threadvar.}: int + + proc hook(st: TStackTrace, size: int) {.nimcall.} = + if gTicker == 0: + gTicker = -1 + when defined(ignoreAllocationSize): + hookAux(st, 1) + else: + hookAux(st, size) + gTicker = SamplingInterval + dec gTicker + +else: + var + t0 {.threadvar.}: TTicks + + proc hook(st: TStackTrace) {.nimcall.} = + if interval == 0: + hookAux(st, 1) + elif getticks() - t0 > interval: + hookAux(st, 1) + t0 = getticks() proc getTotal(x: ptr TProfileEntry): int = result = if isNil(x): 0 else: x.total diff --git a/lib/system.nim b/lib/system.nim index 9d7652c94..e35d2914f 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -2045,7 +2045,7 @@ when not defined(EcmaScript) and not defined(NimrodVM): when defined(endb): include "system/debugger" - when defined(profiler): + when defined(profiler) or defined(memProfiler): include "system/profiler" {.pop.} # stacktrace diff --git a/lib/system/gc.nim b/lib/system/gc.nim index a95319e51..ec656e0ef 100755 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -28,6 +28,8 @@ const when withRealTime and not defined(getTicks): include "system/timers" +when defined(memProfiler): + proc nimProfile(requestedSize: int) const rcIncrement = 0b1000 # so that lowest 3 bits are not touched @@ -431,12 +433,15 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer = proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = result = rawNewObj(typ, size, gch) zeroMem(result, size) + when defined(memProfiler): nimProfile(size) proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = # `newObj` already uses locks, so no need for them here. - result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize)) + let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) + result = newObj(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} = # generates a new object and sets its reference counter to 1 @@ -463,11 +468,14 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} = result = cellToUsr(res) zeroMem(result, size) sysAssert(allocInv(gch.region), "newObjRC1 end") + when defined(memProfiler): nimProfile(size) proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} = - result = newObjRC1(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize)) + let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) + result = newObjRC1(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = acquire(gch) @@ -512,6 +520,7 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = release(gch) result = cellToUsr(res) sysAssert(allocInv(gch.region), "growObj end") + when defined(memProfiler): nimProfile(newsize-oldsize) proc growObj(old: pointer, newsize: int): pointer {.rtl.} = result = growObj(old, newsize, gch) diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim index eafa010ef..8e4c51dd9 100755 --- a/lib/system/profiler.nim +++ b/lib/system/profiler.nim @@ -49,34 +49,51 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) = inc(i) b = b.prev -const - SamplingInterval = 50_000 - # set this to change the default sampling interval -var - profilerHook*: TProfilerHook - ## set this variable to provide a procedure that implements a profiler in - ## user space. See the `nimprof` module for a reference implementation. - gTicker {.threadvar.}: int +when defined(memProfiler): + type + TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall.} + var + profilerHook*: TMemProfilerHook + ## set this variable to provide a procedure that implements a profiler in + ## user space. See the `nimprof` module for a reference implementation. -proc callProfilerHook(hook: TProfilerHook) {.noinline.} = - # 'noinline' so that 'nimProfile' does not perform the stack allocation - # in the common case. - var st: TStackTrace - captureStackTrace(framePtr, st) - hook(st) + proc callProfilerHook(hook: TMemProfilerHook, requestedSize: int) = + var st: TStackTrace + captureStackTrace(framePtr, st) + hook(st, requestedSize) -proc nimProfile() = - ## This is invoked by the compiler in every loop and on every proc entry! - if gTicker == 0: - gTicker = -1 + proc nimProfile(requestedSize: int) = if not isNil(profilerHook): - # disable recursive calls: XXX should use try..finally, - # but that's too expensive! - let oldHook = profilerHook - profilerHook = nil - callProfilerHook(oldHook) - profilerHook = oldHook - gTicker = SamplingInterval - dec gTicker + callProfilerHook(profilerHook, requestedSize) +else: + const + SamplingInterval = 50_000 + # set this to change the default sampling interval + var + profilerHook*: TProfilerHook + ## set this variable to provide a procedure that implements a profiler in + ## user space. See the `nimprof` module for a reference implementation. + gTicker {.threadvar.}: int + + proc callProfilerHook(hook: TProfilerHook) {.noinline.} = + # 'noinline' so that 'nimProfile' does not perform the stack allocation + # in the common case. + var st: TStackTrace + captureStackTrace(framePtr, st) + hook(st) + + proc nimProfile() = + ## This is invoked by the compiler in every loop and on every proc entry! + if gTicker == 0: + gTicker = -1 + if not isNil(profilerHook): + # disable recursive calls: XXX should use try..finally, + # but that's too expensive! + let oldHook = profilerHook + profilerHook = nil + callProfilerHook(oldHook) + profilerHook = oldHook + gTicker = SamplingInterval + dec gTicker {.pop.} diff --git a/todo.txt b/todo.txt index 687838346..605782f2c 100755 --- a/todo.txt +++ b/todo.txt @@ -1,7 +1,6 @@ version 0.9.2 ============= -- memory profiler - implement the compiler as a service - implement for loop transformation for first class iterators - ``=`` should be overloadable; requires specialization for ``=`` |