diff options
author | Araq <rumpf_a@web.de> | 2012-09-15 00:21:10 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2012-09-15 00:21:10 +0200 |
commit | 214c7a2ebdd15b623dff7e537007c99e66f256cc (patch) | |
tree | a37304594d96e841d96be39bf53fd89969a5ac66 /lib | |
parent | 4a435a8fb4551b03bdcbbbd9b074a51fa46928dd (diff) | |
download | Nim-214c7a2ebdd15b623dff7e537007c99e66f256cc.tar.gz |
implemented a stack trace profiler
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pure/nimprof.nim | 125 | ||||
-rwxr-xr-x | lib/system.nim | 3 | ||||
-rwxr-xr-x | lib/system/alloc.nim | 2 | ||||
-rwxr-xr-x | lib/system/gc.nim | 10 | ||||
-rwxr-xr-x | lib/system/profiler.nim | 140 |
5 files changed, 230 insertions, 50 deletions
diff --git a/lib/pure/nimprof.nim b/lib/pure/nimprof.nim new file mode 100644 index 000000000..99af71f90 --- /dev/null +++ b/lib/pure/nimprof.nim @@ -0,0 +1,125 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Profiling support for Nimrod. This is an embedded profiler that requires +## ``--profiler:on``. You only need to import this module to get a profiling +## report at program exit. + +when not defined(profiler): + {.error: "Profiling support is turned off!".} + +# We don't want to profile the profiling code ... +{.push profiler: off.} + +import hashes, algorithm, strutils + +# We use a simple hash table of bounded size to keep track of the stack traces: +type + TProfileEntry = object + total: int + st: system.TStackTrace + TProfileData = array [0..64*1024-1, ref TProfileEntry] + +proc `==`(a, b: TStackTrace): bool = + for i in 0 .. high(a): + if a[i] != b[i]: return false + result = true + +# XXX extract this data structure; it is generally useful ;-) +var + profileData: TProfileData + emptySlots = profileData.len * 3 div 2 + maxChainLen = 0 + totalCalls = 0 + +proc hook(st: system.TStackTrace) {.nimcall.} = + # this is quite performance sensitive! + inc totalCalls + var last = high(st) + while last > 0 and isNil(st[last]): dec last + var h = hash(pointer(st[last])) and high(profileData) + + # we use probing for maxChainLen entries and replace the encountered entry + # with the minimal 'total' value: + if emptySlots == 0: + var minIdx = h + var probes = maxChainLen + while probes >= 0: + if profileData[h].st == st: + # wow, same entry found: + inc profileData[h].total + return + if profileData[minIdx].total < profileData[h].total: + minIdx = h + h = ((5 * h) + 1) and high(profileData) + dec probes + profileData[minIdx].total = 1 + profileData[minIdx].st = st + else: + var chain = 0 + while true: + if profileData[h] == nil: + GC_disable() + new profileData[h] + GC_enable() + profileData[h].total = 1 + profileData[h].st = st + dec emptySlots + maxChainLen = max(maxChainLen, chain) + return + if profileData[h].st == st: + # wow, same entry found: + inc profileData[h].total + maxChainLen = max(maxChainLen, chain) + return + h = ((5 * h) + 1) and high(profileData) + inc chain + +proc getTotal(x: ref TProfileEntry): int = + result = if isNil(x): 0 else: x.total + +proc cmpEntries(a, b: ref TProfileEntry): int = + result = b.getTotal - a.getTotal + +proc writeProfile() {.noconv.} = + stopProfiling() + const filename = "profile_results" + var f: TFile + var j = 1 + while open(f, filename & $j & ".txt"): + close(f) + inc(j) + let filen = filename & $j & ".txt" + echo "writing ", filen, "..." + if open(f, filen, fmWrite): + sort(profileData, cmpEntries) + writeln(f, "total executions of each stack trace:") + var entries = 0 + for i in 0..high(profileData): + if profileData[i] != nil: inc entries + + var sum = 0 + # only write the first 100 entries: + for i in 0..min(100, high(profileData)): + if profileData[i] != nil and profileData[i].total > 1: + inc sum, profileData[i].total + writeln(f, "Entry: ", i, "/", entries, " Calls: ", + profileData[i].total, "/", totalCalls, " [sum: ", sum, "; ", + formatFloat(sum / totalCalls * 100.0, ffDefault, 2), "%]") + for ii in 0..high(TStackTrace): + let procname = profileData[i].st[ii] + if isNil(procname): break + writeln(f, " ", procname) + close(f) + echo "... done" + +system.profilerHook = hook +addQuitProc(writeProfile) + +{.pop.} diff --git a/lib/system.nim b/lib/system.nim index 911b134e2..7b129bf46 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -829,7 +829,8 @@ when taintMode: else: type TaintedString* = string - +when defined(profiler): + proc nimProfile() {.compilerProc, noinline.} when hasThreadSupport: {.pragma: rtlThreadVar, threadvar.} else: diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 5182d5ccf..82755ca86 100755 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -11,6 +11,7 @@ # TODO: # - eliminate "used" field # - make searching for block O(1) +{.push profiler:off.} # ------------ platform specific chunk allocation code ----------------------- @@ -794,3 +795,4 @@ template InstantiateForRegion(allocator: expr) = else: result = realloc(p, newsize) +{.pop.} diff --git a/lib/system/gc.nim b/lib/system/gc.nim index 470064c5f..a5937efe5 100755 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -7,7 +7,6 @@ # distribution, for details about the copyright. # - # Garbage Collector # # The basic algorithm is *Deferrent Reference Counting* with cycle detection. @@ -17,6 +16,7 @@ # Special care has been taken to avoid recursion as far as possible to avoid # stack overflows when traversing deep datastructures. It is well-suited # for soft real time applications (like games). +{.push profiler:off.} const CycleIncrease = 2 # is a multiplicative increase @@ -26,7 +26,7 @@ const # this seems to be a good value withRealTime = defined(useRealtimeGC) -when withRealTime: +when withRealTime and not defined(getTicks): include "system/timers" const @@ -426,6 +426,8 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer = result = cellToUsr(res) sysAssert(allocInv(gch.region), "rawNewObj end") +{.pop.} + proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = result = rawNewObj(typ, size, gch) zeroMem(result, size) @@ -514,6 +516,8 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer = proc growObj(old: pointer, newsize: int): pointer {.rtl.} = result = growObj(old, newsize, gch) +{.push profiler:off.} + # ---------------- cycle collector ------------------------------------------- proc doOperation(p: pointer, op: TWalkOp) = @@ -917,3 +921,5 @@ when not defined(useNimRtl): "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) when traceGC: writeLeakage() GC_enable() + +{.pop.} diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim index 295cb43c7..aef75d39d 100755 --- a/lib/system/profiler.nim +++ b/lib/system/profiler.nim @@ -8,54 +8,100 @@ # # This file implements the Nimrod profiler. The profiler needs support by the -# code generator. +# code generator. The idea is to inject the instruction stream +# with 'nimProfile()' calls. These calls are injected at every loop end +# (except perhaps loops that have no side-effects). At every Nth call a +# stack trace is taken. A stack tace is a list of cstrings. We have a count +# table of those. +# +# The nice thing about this profiler is that it's completely time independent! + +{.push profiler: off.} + +when not defined(getTicks): + include "system/timers" + +const + MaxTraceLen = 20 # tracking the last 20 calls is enough type - TProfileData {.compilerproc, final.} = object - procname: cstring - total: float + TStackTrace* = array [0..MaxTraceLen-1, cstring] + TProfilerHook* = proc (st: TStackTrace) {.nimcall.} + +proc captureStackTrace(f: PFrame, st: var TStackTrace) = + const + firstCalls = 5 + var + it = f + i = 0 + total = 0 + while it != nil and i <= high(st)-(firstCalls-1): + # the (-1) is for a nil entry that marks where the '...' should occur + st[i] = it.procname + inc(i) + inc(total) + it = it.prev + var b = it + while it != nil: + inc(total) + it = it.prev + for j in 1..total-i-(firstCalls-1): + if b != nil: b = b.prev + if total != i: + st[i] = "..." + inc(i) + while b != nil and i <= high(st): + st[i] = b.procname + inc(i) + b = b.prev var - profileData {.compilerproc.}: array [0..64*1024-1, TProfileData] - -proc sortProfile(a: var array[0..64*1024-1, TProfileData], N: int) = - # we use shellsort here; fast enough and simple - var h = 1 - while true: - h = 3 * h + 1 - if h > N: break - while true: - h = h div 3 - for i in countup(h, N - 1): - var v = a[i] - var j = i - while a[j-h].total <= v.total: - a[j] = a[j-h] - j = j-h - if j < h: break - a[j] = v - if h == 1: break - -proc writeProfile() {.noconv.} = - const filename = "profile_results" - var i = 0 - var f: TFile - var j = 1 - while open(f, filename & $j & ".txt"): - close(f) - inc(j) - if open(f, filename & $j & ".txt", fmWrite): - var N = 0 - # we have to compute the actual length of the array: - while profileData[N].procname != nil: inc(N) - sortProfile(profileData, N) - writeln(f, "total running time of each proc" & - " (interpret these numbers relatively)") - while profileData[i].procname != nil: - write(f, profileData[i].procname) - write(f, ": ") - writeln(f, profileData[i].total) - inc(i) - close(f) - -addQuitProc(writeProfile) + profilerHook*: TProfilerHook + ## set this variable to provide a procedure that implements a profiler in + ## user space. See the `nimprof` module for a reference implementation. + SamplingInterval = 50_000 + # set this to change the default sampling interval + gTicker = SamplingInterval + interval: TNanos = 5_000_000 # 5ms + +proc callProfilerHook(hook: TProfilerHook) {.noinline.} = + # 'noinline' so that 'nimProfile' does not perform the stack allocation + # in the common case. + var st: TStackTrace + captureStackTrace(framePtr, st) + hook(st) + +proc setProfilingInterval*(intervalInUs: int): TNanos = + ## set this to change the sampling interval. Default value is 5ms. + interval = intervalInUs * 1000 + +var t0: TTicks + +var usefulCall* = 0 +var uselessCall* = 0 + +proc nimProfile() = + ## This is invoked by the compiler in every loop and on every proc entry! + dec gTicker + if gTicker == 0: + gTicker = -1 + let t1 = getticks() + if getticks() - t0 > interval: + inc usefulCall + if not isNil(profilerHook): + # disable recursive calls: XXX should use try..finally, + # but that's too expensive! + let oldHook = profilerHook + profilerHook = nil + callProfilerHook(oldHook) + profilerHook = oldHook + t0 = getticks() + else: + inc uselessCall + gTicker = SamplingInterval + +proc stopProfiling*() = + ## call this to stop profiling; should be called by the client profiler. + profilerHook = nil + +{.pop.} |