summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2012-09-15 00:21:10 +0200
committerAraq <rumpf_a@web.de>2012-09-15 00:21:10 +0200
commit214c7a2ebdd15b623dff7e537007c99e66f256cc (patch)
treea37304594d96e841d96be39bf53fd89969a5ac66 /lib
parent4a435a8fb4551b03bdcbbbd9b074a51fa46928dd (diff)
downloadNim-214c7a2ebdd15b623dff7e537007c99e66f256cc.tar.gz
implemented a stack trace profiler
Diffstat (limited to 'lib')
-rw-r--r--lib/pure/nimprof.nim125
-rwxr-xr-xlib/system.nim3
-rwxr-xr-xlib/system/alloc.nim2
-rwxr-xr-xlib/system/gc.nim10
-rwxr-xr-xlib/system/profiler.nim140
5 files changed, 230 insertions, 50 deletions
diff --git a/lib/pure/nimprof.nim b/lib/pure/nimprof.nim
new file mode 100644
index 000000000..99af71f90
--- /dev/null
+++ b/lib/pure/nimprof.nim
@@ -0,0 +1,125 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Profiling support for Nimrod. This is an embedded profiler that requires
+## ``--profiler:on``. You only need to import this module to get a profiling
+## report at program exit.
+
+when not defined(profiler):
+  {.error: "Profiling support is turned off!".}
+
+# We don't want to profile the profiling code ...
+{.push profiler: off.}
+
+import hashes, algorithm, strutils
+
+# We use a simple hash table of bounded size to keep track of the stack traces:
+type
+  TProfileEntry = object
+    total: int
+    st: system.TStackTrace
+  TProfileData = array [0..64*1024-1, ref TProfileEntry]
+
+proc `==`(a, b: TStackTrace): bool =
+  for i in 0 .. high(a):
+    if a[i] != b[i]: return false
+  result = true
+
+# XXX extract this data structure; it is generally useful ;-)
+var
+  profileData: TProfileData
+  emptySlots = profileData.len * 3 div 2
+  maxChainLen = 0
+  totalCalls = 0
+
+proc hook(st: system.TStackTrace) {.nimcall.} =
+  # this is quite performance sensitive!
+  inc totalCalls
+  var last = high(st)
+  while last > 0 and isNil(st[last]): dec last
+  var h = hash(pointer(st[last])) and high(profileData)
+  
+  # we use probing for maxChainLen entries and replace the encountered entry
+  # with the minimal 'total' value:
+  if emptySlots == 0:
+    var minIdx = h
+    var probes = maxChainLen
+    while probes >= 0:
+      if profileData[h].st == st:
+        # wow, same entry found:
+        inc profileData[h].total
+        return
+      if profileData[minIdx].total < profileData[h].total:
+        minIdx = h
+      h = ((5 * h) + 1) and high(profileData)
+      dec probes
+    profileData[minIdx].total = 1
+    profileData[minIdx].st = st
+  else:
+    var chain = 0
+    while true:
+      if profileData[h] == nil:
+        GC_disable()
+        new profileData[h]
+        GC_enable()
+        profileData[h].total = 1
+        profileData[h].st = st
+        dec emptySlots
+        maxChainLen = max(maxChainLen, chain)
+        return
+      if profileData[h].st == st:
+        # wow, same entry found:
+        inc profileData[h].total
+        maxChainLen = max(maxChainLen, chain)
+        return
+      h = ((5 * h) + 1) and high(profileData)
+      inc chain
+
+proc getTotal(x: ref TProfileEntry): int =
+  result = if isNil(x): 0 else: x.total
+
+proc cmpEntries(a, b: ref TProfileEntry): int =
+  result = b.getTotal - a.getTotal
+
+proc writeProfile() {.noconv.} =
+  stopProfiling()
+  const filename = "profile_results"
+  var f: TFile
+  var j = 1
+  while open(f, filename & $j & ".txt"):
+    close(f)
+    inc(j)
+  let filen = filename & $j & ".txt"
+  echo "writing ", filen, "..."
+  if open(f, filen, fmWrite):
+    sort(profileData, cmpEntries)
+    writeln(f, "total executions of each stack trace:")
+    var entries = 0
+    for i in 0..high(profileData):
+      if profileData[i] != nil: inc entries
+    
+    var sum = 0
+    # only write the first 100 entries:
+    for i in 0..min(100, high(profileData)):
+      if profileData[i] != nil and profileData[i].total > 1:
+        inc sum, profileData[i].total
+        writeln(f, "Entry: ", i, "/", entries, " Calls: ",
+          profileData[i].total, "/", totalCalls, " [sum: ", sum, "; ",
+          formatFloat(sum / totalCalls * 100.0, ffDefault, 2), "%]")
+        for ii in 0..high(TStackTrace):
+          let procname = profileData[i].st[ii]
+          if isNil(procname): break
+          writeln(f, "  ", procname)
+    close(f)
+  echo "... done"
+
+system.profilerHook = hook
+addQuitProc(writeProfile)
+
+{.pop.}
diff --git a/lib/system.nim b/lib/system.nim
index 911b134e2..7b129bf46 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -829,7 +829,8 @@ when taintMode:
 else:
   type TaintedString* = string
 
-
+when defined(profiler):
+  proc nimProfile() {.compilerProc, noinline.}
 when hasThreadSupport:
   {.pragma: rtlThreadVar, threadvar.}
 else:
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim
index 5182d5ccf..82755ca86 100755
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -11,6 +11,7 @@
 # TODO: 
 # - eliminate "used" field
 # - make searching for block O(1)
+{.push profiler:off.}
 
 # ------------ platform specific chunk allocation code -----------------------
 
@@ -794,3 +795,4 @@ template InstantiateForRegion(allocator: expr) =
     else:
       result = realloc(p, newsize)
 
+{.pop.}
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index 470064c5f..a5937efe5 100755
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -7,7 +7,6 @@
 #    distribution, for details about the copyright.
 #
 
-
 #            Garbage Collector
 #
 # The basic algorithm is *Deferrent Reference Counting* with cycle detection.
@@ -17,6 +16,7 @@
 # Special care has been taken to avoid recursion as far as possible to avoid
 # stack overflows when traversing deep datastructures. It is well-suited
 # for soft real time applications (like games).
+{.push profiler:off.}
 
 const
   CycleIncrease = 2 # is a multiplicative increase
@@ -26,7 +26,7 @@ const
                       # this seems to be a good value
   withRealTime = defined(useRealtimeGC)
 
-when withRealTime:
+when withRealTime and not defined(getTicks):
   include "system/timers"
 
 const
@@ -426,6 +426,8 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
   result = cellToUsr(res)
   sysAssert(allocInv(gch.region), "rawNewObj end")
 
+{.pop.}
+
 proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
   result = rawNewObj(typ, size, gch)
   zeroMem(result, size)
@@ -514,6 +516,8 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
 proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
   result = growObj(old, newsize, gch)
 
+{.push profiler:off.}
+
 # ---------------- cycle collector -------------------------------------------
 
 proc doOperation(p: pointer, op: TWalkOp) =
@@ -917,3 +921,5 @@ when not defined(useNimRtl):
              "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
     when traceGC: writeLeakage()
     GC_enable()
+
+{.pop.}
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index 295cb43c7..aef75d39d 100755
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -8,54 +8,100 @@
 #
 
 # This file implements the Nimrod profiler. The profiler needs support by the
-# code generator. 
+# code generator. The idea is to inject the instruction stream
+# with 'nimProfile()' calls. These calls are injected at every loop end
+# (except perhaps loops that have no side-effects). At every Nth call a
+# stack trace is taken. A stack tace is a list of cstrings. We have a count
+# table of those.
+# 
+# The nice thing about this profiler is that it's completely time independent!
+
+{.push profiler: off.}
+
+when not defined(getTicks):
+  include "system/timers"
+
+const
+  MaxTraceLen = 20 # tracking the last 20 calls is enough
 
 type
-  TProfileData {.compilerproc, final.} = object
-    procname: cstring
-    total: float
+  TStackTrace* = array [0..MaxTraceLen-1, cstring]
+  TProfilerHook* = proc (st: TStackTrace) {.nimcall.}
+
+proc captureStackTrace(f: PFrame, st: var TStackTrace) =
+  const
+    firstCalls = 5
+  var
+    it = f
+    i = 0
+    total = 0
+  while it != nil and i <= high(st)-(firstCalls-1):
+    # the (-1) is for a nil entry that marks where the '...' should occur
+    st[i] = it.procname
+    inc(i)
+    inc(total)
+    it = it.prev
+  var b = it
+  while it != nil:
+    inc(total)
+    it = it.prev
+  for j in 1..total-i-(firstCalls-1): 
+    if b != nil: b = b.prev
+  if total != i:
+    st[i] = "..."
+    inc(i)
+  while b != nil and i <= high(st):
+    st[i] = b.procname
+    inc(i)
+    b = b.prev
 
 var
-  profileData {.compilerproc.}: array [0..64*1024-1, TProfileData]
-
-proc sortProfile(a: var array[0..64*1024-1, TProfileData], N: int) = 
-  # we use shellsort here; fast enough and simple
-  var h = 1
-  while true: 
-    h = 3 * h + 1
-    if h > N: break 
-  while true: 
-    h = h div 3
-    for i in countup(h, N - 1): 
-      var v = a[i]
-      var j = i
-      while a[j-h].total <= v.total: 
-        a[j] = a[j-h]
-        j = j-h
-        if j < h: break 
-      a[j] = v
-    if h == 1: break
-
-proc writeProfile() {.noconv.} =
-  const filename = "profile_results"
-  var i = 0
-  var f: TFile
-  var j = 1
-  while open(f, filename & $j & ".txt"):
-    close(f)
-    inc(j)
-  if open(f, filename & $j & ".txt", fmWrite):
-    var N = 0
-    # we have to compute the actual length of the array:
-    while profileData[N].procname != nil: inc(N)
-    sortProfile(profileData, N)
-    writeln(f, "total running time of each proc" &
-               " (interpret these numbers relatively)")
-    while profileData[i].procname != nil:
-      write(f, profileData[i].procname)
-      write(f, ": ")
-      writeln(f, profileData[i].total)
-      inc(i)
-    close(f)
-
-addQuitProc(writeProfile)
+  profilerHook*: TProfilerHook
+    ## set this variable to provide a procedure that implements a profiler in
+    ## user space. See the `nimprof` module for a reference implementation.
+  SamplingInterval = 50_000
+    # set this to change the default sampling interval
+  gTicker = SamplingInterval
+  interval: TNanos = 5_000_000 # 5ms
+
+proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
+  # 'noinline' so that 'nimProfile' does not perform the stack allocation
+  # in the common case.
+  var st: TStackTrace
+  captureStackTrace(framePtr, st)
+  hook(st)
+
+proc setProfilingInterval*(intervalInUs: int): TNanos =
+  ## set this to change the sampling interval. Default value is 5ms.
+  interval = intervalInUs * 1000
+
+var t0: TTicks
+
+var usefulCall* = 0
+var uselessCall* = 0
+
+proc nimProfile() =
+  ## This is invoked by the compiler in every loop and on every proc entry!
+  dec gTicker
+  if gTicker == 0:
+    gTicker = -1
+    let t1 = getticks()
+    if getticks() - t0 > interval:
+      inc usefulCall
+      if not isNil(profilerHook):
+        # disable recursive calls: XXX should use try..finally,
+        # but that's too expensive!
+        let oldHook = profilerHook
+        profilerHook = nil
+        callProfilerHook(oldHook)
+        profilerHook = oldHook
+      t0 = getticks()
+    else:
+      inc uselessCall
+    gTicker = SamplingInterval
+
+proc stopProfiling*() =
+  ## call this to stop profiling; should be called by the client profiler.
+  profilerHook = nil
+
+{.pop.}