diff options
author | Araq <rumpf_a@web.de> | 2012-09-17 00:31:41 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2012-09-17 00:31:41 +0200 |
commit | 75abf7250325d90a74189ea7e4852d36c3fdd67c (patch) | |
tree | 8f58e79a9330420722f6712fb27eeae59ef4fc4b | |
parent | ad6ee9e85723a742fe76e6903a63f2796136fe98 (diff) | |
download | Nim-75abf7250325d90a74189ea7e4852d36c3fdd67c.tar.gz |
profiler improvements
-rwxr-xr-x | config/nimrod.cfg | 1 | ||||
-rw-r--r-- | doc/estp.txt | 5 | ||||
-rw-r--r-- | lib/pure/nimprof.nim | 115 | ||||
-rwxr-xr-x | lib/system.nim | 10 | ||||
-rwxr-xr-x | lib/system/ecmasys.nim | 2 | ||||
-rwxr-xr-x | lib/system/gc.nim | 4 | ||||
-rwxr-xr-x | lib/system/profiler.nim | 42 | ||||
-rwxr-xr-x | lib/system/sysio.nim | 2 |
8 files changed, 110 insertions, 71 deletions
diff --git a/config/nimrod.cfg b/config/nimrod.cfg index 947054ab4..564a0d4a6 100755 --- a/config/nimrod.cfg +++ b/config/nimrod.cfg @@ -46,6 +46,7 @@ path="$lib/pure/unidecode" overflow_checks:off assertions:off stacktrace:off + linetrace:off debugger:off line_dir:off dead_code_elim:on diff --git a/doc/estp.txt b/doc/estp.txt index a3cbee516..48df19739 100644 --- a/doc/estp.txt +++ b/doc/estp.txt @@ -21,8 +21,7 @@ your own profiler. The setting ``--profiler:on`` defines the conditional symbol ``profiler``. After your program has finished the profiler will create a -file ``profile_resultsX.txt``(where ``X`` is a previously unused number) -containing the profiling results. +file ``profile_results.txt`` containing the profiling results. Since the profiler works by examining stack traces, it's essential that the option ``--stackTrace:on`` is active! Unfortunately this means that a @@ -35,7 +34,7 @@ Example results file The results file lists stack traces ordered by significance. The following example file has been generated by profiling the Nimrod compiler -itself: It shows that in total 4.1% of the runtime has been spent +itself: It shows that in total 5.4% of the runtime has been spent in ``crcFromRope`` or its children. In general the stack traces show you immediately where the problem is because diff --git a/lib/pure/nimprof.nim b/lib/pure/nimprof.nim index 1d3c57ed0..8cf6ba683 100644 --- a/lib/pure/nimprof.nim +++ b/lib/pure/nimprof.nim @@ -12,19 +12,27 @@ ## report at program exit. when not defined(profiler): - {.error: "Profiling support is turned off!".} + {.warning: "Profiling support is turned off!".} # We don't want to profile the profiling code ... {.push profiler: off.} -import hashes, algorithm, strutils +import hashes, algorithm, strutils, tables, sets +include "system/timers" + +const + withThreads = compileOption("threads") + tickCountCorrection = 50_000 + +when not defined(system.TStackTrace): + type TStackTrace = array [0..20, cstring] # We use a simple hash table of bounded size to keep track of the stack traces: type TProfileEntry = object total: int - st: system.TStackTrace - TProfileData = array [0..64*1024-1, ref TProfileEntry] + st: TStackTrace + TProfileData = array [0..64*1024-1, ptr TProfileEntry] proc `==`(a, b: TStackTrace): bool = for i in 0 .. high(a): @@ -32,14 +40,32 @@ proc `==`(a, b: TStackTrace): bool = result = true # XXX extract this data structure; it is generally useful ;-) +# However a chain length of over 3000 is suspicious... var profileData: TProfileData emptySlots = profileData.len * 3 div 2 maxChainLen = 0 totalCalls = 0 -proc hook(st: system.TStackTrace) {.nimcall.} = +var + interval: TNanos = 1_000_000 - tickCountCorrection # 5ms + +proc setSamplingFrequency*(intervalInUs: int) = + ## set this to change the sampling frequency. Default value is 5ms. + ## Set it to 0 to disable time based profiling; it uses an imprecise + ## instruction count measure instead then. + if intervalInUs <= 0: interval = 0 + else: interval = intervalInUs * 1000 - tickCountCorrection + +when withThreads: + var + profilingLock: TLock + + InitLock profilingLock + +proc hookAux(st: TStackTrace) = # this is quite performance sensitive! + when withThreads: Acquire profilingLock inc totalCalls var last = high(st) while last > 0 and isNil(st[last]): dec last @@ -65,64 +91,93 @@ proc hook(st: system.TStackTrace) {.nimcall.} = var chain = 0 while true: if profileData[h] == nil: - GC_disable() - new profileData[h] - GC_enable() + profileData[h] = cast[ptr TProfileEntry]( + allocShared0(sizeof(TProfileEntry))) profileData[h].total = 1 profileData[h].st = st dec emptySlots - maxChainLen = max(maxChainLen, chain) - return + break if profileData[h].st == st: # wow, same entry found: inc profileData[h].total - maxChainLen = max(maxChainLen, chain) - return + break h = ((5 * h) + 1) and high(profileData) inc chain + maxChainLen = max(maxChainLen, chain) + when withThreads: Release profilingLock -proc getTotal(x: ref TProfileEntry): int = +var + t0 {.threadvar.}: TTicks + +proc hook(st: TStackTrace) {.nimcall.} = + if interval == 0: + hookAux(st) + elif getticks() - t0 > interval: + hookAux(st) + t0 = getticks() + +proc getTotal(x: ptr TProfileEntry): int = result = if isNil(x): 0 else: x.total -proc cmpEntries(a, b: ref TProfileEntry): int = +proc cmpEntries(a, b: ptr TProfileEntry): int = result = b.getTotal - a.getTotal proc `//`(a, b: int): string = result = format("$1/$2 = $3%", a, b, formatFloat(a / b * 100.0, ffDefault, 2)) proc writeProfile() {.noconv.} = - stopProfiling() - const filename = "profile_results" + when defined(system.TStackTrace): + system.profilerHook = nil + const filename = "profile_results.txt" + echo "writing " & filename & "..." var f: TFile - var j = 1 - while open(f, filename & $j & ".txt"): - close(f) - inc(j) - let filen = filename & $j & ".txt" - echo "writing ", filen, "..." - if open(f, filen, fmWrite): + if open(f, filename, fmWrite): sort(profileData, cmpEntries) writeln(f, "total executions of each stack trace:") var entries = 0 for i in 0..high(profileData): if profileData[i] != nil: inc entries + var perProc = initCountTable[string]() + for i in 0..entries-1: + var dups = initSet[string]() + for ii in 0..high(TStackTrace): + let procname = profileData[i].st[ii] + if isNil(procname): break + let p = $procname + if not containsOrIncl(dups, p): + perProc.inc(p, profileData[i].total) + var sum = 0 # only write the first 100 entries: - for i in 0..min(100, high(profileData)): - if profileData[i] != nil and profileData[i].total > 1: + for i in 0..min(100, entries-1): + if profileData[i].total > 1: inc sum, profileData[i].total - writeln(f, "Entry: ", i, "/", entries, " Calls: ", + writeln(f, "Entry: ", i+1, "/", entries, " Calls: ", profileData[i].total // totalCalls, " [sum: ", sum, "; ", sum // totalCalls, "]") for ii in 0..high(TStackTrace): let procname = profileData[i].st[ii] if isNil(procname): break - writeln(f, " ", procname) + writeln(f, " ", procname, " ", perProc[$procname] // totalCalls) close(f) - echo "... done" + echo "... done" + else: + echo "... failed" + +var + disabled: int + +proc disableProfiling*() = + atomicDec disabled + system.profilerHook = nil + +proc enableProfiling*() = + if atomicInc(disabled) >= 0: + system.profilerHook = hook -system.profilerHook = hook -addQuitProc(writeProfile) +when defined(system.TStackTrace): + system.profilerHook = hook + addQuitProc(writeProfile) {.pop.} diff --git a/lib/system.nim b/lib/system.nim index 7b129bf46..e56a7916e 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -1629,7 +1629,7 @@ type len: int # length of slots (when not debugging always zero) when not defined(ECMAScript): - {.push stack_trace:off.} + {.push stack_trace:off, profiler:off.} proc add*(x: var string, y: cstring) {.noStackFrame.} = var i = 0 while y[i] != '\0': @@ -1674,7 +1674,7 @@ template newException*(exceptn: typeDesc, message: string): expr = e when not defined(EcmaScript) and not defined(NimrodVM): - {.push stack_trace: off.} + {.push stack_trace: off, profiler:off.} proc initGC() when not defined(boehmgc) and not defined(useMalloc): @@ -1958,7 +1958,7 @@ when not defined(EcmaScript) and not defined(NimrodVM): ## gets the stack trace associated with `e`, which is the stack that ## lead to the ``raise`` statement. This is only works for debug builds. - {.push stack_trace: off.} + {.push stack_trace: off, profiler:off.} when hostOS == "standalone": include "system/embedded" else: @@ -1997,7 +1997,7 @@ when not defined(EcmaScript) and not defined(NimrodVM): result = n.sons[n.len] include "system/mmdisp" - {.push stack_trace: off.} + {.push stack_trace: off, profiler:off.} when hostOS != "standalone": include "system/sysstr" {.pop.} @@ -2032,7 +2032,7 @@ when not defined(EcmaScript) and not defined(NimrodVM): var e = getCurrentException() return if e == nil: "" else: e.msg - {.push stack_trace: off.} + {.push stack_trace: off, profiler:off.} when defined(endb): include "system/debugger" diff --git a/lib/system/ecmasys.nim b/lib/system/ecmasys.nim index 0c9db2206..e6a7d7057 100755 --- a/lib/system/ecmasys.nim +++ b/lib/system/ecmasys.nim @@ -33,7 +33,7 @@ var # list of exception handlers # a global variable for the root of all try blocks -{.push stacktrace: off.} +{.push stacktrace: off, profiler:off.} proc nimBoolToStr(x: bool): string {.compilerproc.} = if x: result = "true" else: result = "false" diff --git a/lib/system/gc.nim b/lib/system/gc.nim index a5937efe5..a95319e51 100755 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -78,11 +78,11 @@ var when not defined(useNimRtl): InstantiateForRegion(gch.region) -proc acquire(gch: var TGcHeap) {.inline.} = +template acquire(gch: TGcHeap) = when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock) -proc release(gch: var TGcHeap) {.inline.} = +template release(gch: TGcHeap) = when hasThreadSupport and hasSharedHeap: releaseSys(HeapLock) diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim index b7b8d8375..28edeff7a 100755 --- a/lib/system/profiler.nim +++ b/lib/system/profiler.nim @@ -18,9 +18,6 @@ {.push profiler: off.} -when not defined(getTicks): - include "system/timers" - const MaxTraceLen = 20 # tracking the last 20 calls is enough @@ -36,7 +33,7 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) = i = 0 total = 0 while it != nil and i <= high(st)-(firstCalls-1): - # the (-1) is for a nil entry that marks where the '...' should occur + # the (-1) is for the "..." entry st[i] = it.procname inc(i) inc(total) @@ -55,14 +52,14 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) = inc(i) b = b.prev +const + SamplingInterval = 50_000 + # set this to change the default sampling interval var profilerHook*: TProfilerHook ## set this variable to provide a procedure that implements a profiler in ## user space. See the `nimprof` module for a reference implementation. - SamplingInterval = 50_000 - # set this to change the default sampling interval - gTicker = SamplingInterval - interval: TNanos = 5_000_000 # 5ms + gTicker {.threadvar.}: int proc callProfilerHook(hook: TProfilerHook) {.noinline.} = # 'noinline' so that 'nimProfile' does not perform the stack allocation @@ -71,31 +68,18 @@ proc callProfilerHook(hook: TProfilerHook) {.noinline.} = captureStackTrace(framePtr, st) hook(st) -proc setProfilingInterval*(intervalInUs: int): TNanos = - ## set this to change the sampling interval. Default value is 5ms. - interval = intervalInUs * 1000 - -var t0: TTicks - proc nimProfile() = ## This is invoked by the compiler in every loop and on every proc entry! - dec gTicker if gTicker == 0: gTicker = -1 - let t1 = getticks() - if getticks() - t0 > interval: - if not isNil(profilerHook): - # disable recursive calls: XXX should use try..finally, - # but that's too expensive! - let oldHook = profilerHook - profilerHook = nil - callProfilerHook(oldHook) - profilerHook = oldHook - t0 = getticks() + if not isNil(profilerHook): + # disable recursive calls: XXX should use try..finally, + # but that's too expensive! + let oldHook = profilerHook + profilerHook = nil + callProfilerHook(oldHook) + profilerHook = oldHook gTicker = SamplingInterval - -proc stopProfiling*() = - ## call this to stop profiling; should be called by the client profiler. - profilerHook = nil + dec gTicker {.pop.} diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim index 9f7866fdc..02a5893d3 100755 --- a/lib/system/sysio.nim +++ b/lib/system/sysio.nim @@ -224,7 +224,7 @@ proc ReadBytes(f: TFile, a: var openarray[int8], start, len: int): int = proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int = result = readBuffer(f, addr(a[start]), len) -{.push stackTrace:off.} +{.push stackTrace:off, profiler:off.} proc writeBytes(f: TFile, a: openarray[int8], start, len: int): int = var x = cast[ptr array[0..1000_000_000, int8]](a) result = writeBuffer(f, addr(x[start]), len) |