summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2012-09-17 00:31:41 +0200
committerAraq <rumpf_a@web.de>2012-09-17 00:31:41 +0200
commit75abf7250325d90a74189ea7e4852d36c3fdd67c (patch)
tree8f58e79a9330420722f6712fb27eeae59ef4fc4b
parentad6ee9e85723a742fe76e6903a63f2796136fe98 (diff)
downloadNim-75abf7250325d90a74189ea7e4852d36c3fdd67c.tar.gz
profiler improvements
-rwxr-xr-xconfig/nimrod.cfg1
-rw-r--r--doc/estp.txt5
-rw-r--r--lib/pure/nimprof.nim115
-rwxr-xr-xlib/system.nim10
-rwxr-xr-xlib/system/ecmasys.nim2
-rwxr-xr-xlib/system/gc.nim4
-rwxr-xr-xlib/system/profiler.nim42
-rwxr-xr-xlib/system/sysio.nim2
8 files changed, 110 insertions, 71 deletions
diff --git a/config/nimrod.cfg b/config/nimrod.cfg
index 947054ab4..564a0d4a6 100755
--- a/config/nimrod.cfg
+++ b/config/nimrod.cfg
@@ -46,6 +46,7 @@ path="$lib/pure/unidecode"
   overflow_checks:off
   assertions:off
   stacktrace:off
+  linetrace:off
   debugger:off
   line_dir:off
   dead_code_elim:on
diff --git a/doc/estp.txt b/doc/estp.txt
index a3cbee516..48df19739 100644
--- a/doc/estp.txt
+++ b/doc/estp.txt
@@ -21,8 +21,7 @@ your own profiler.
 The setting ``--profiler:on`` defines the conditional symbol ``profiler``.

 

 After your program has finished the profiler will create a 
-file ``profile_resultsX.txt``(where ``X`` is a previously unused number) 
-containing the profiling results.
+file ``profile_results.txt`` containing the profiling results.
 
 Since the profiler works by examining stack traces, it's essential that
 the option ``--stackTrace:on`` is active! Unfortunately this means that a 
@@ -35,7 +34,7 @@ Example results file
 The results file lists stack traces ordered by significance. 
 
 The following example file has been generated by profiling the Nimrod compiler
-itself: It shows that in total 4.1% of the runtime has been spent 
+itself: It shows that in total 5.4% of the runtime has been spent 
 in ``crcFromRope`` or its children.
 
 In general the stack traces show you immediately where the problem is because
diff --git a/lib/pure/nimprof.nim b/lib/pure/nimprof.nim
index 1d3c57ed0..8cf6ba683 100644
--- a/lib/pure/nimprof.nim
+++ b/lib/pure/nimprof.nim
@@ -12,19 +12,27 @@
 ## report at program exit.
 
 when not defined(profiler):
-  {.error: "Profiling support is turned off!".}
+  {.warning: "Profiling support is turned off!".}
 
 # We don't want to profile the profiling code ...
 {.push profiler: off.}
 
-import hashes, algorithm, strutils
+import hashes, algorithm, strutils, tables, sets
+include "system/timers"
+
+const
+  withThreads = compileOption("threads")
+  tickCountCorrection = 50_000
+
+when not defined(system.TStackTrace):
+  type TStackTrace = array [0..20, cstring]
 
 # We use a simple hash table of bounded size to keep track of the stack traces:
 type
   TProfileEntry = object
     total: int
-    st: system.TStackTrace
-  TProfileData = array [0..64*1024-1, ref TProfileEntry]
+    st: TStackTrace
+  TProfileData = array [0..64*1024-1, ptr TProfileEntry]
 
 proc `==`(a, b: TStackTrace): bool =
   for i in 0 .. high(a):
@@ -32,14 +40,32 @@ proc `==`(a, b: TStackTrace): bool =
   result = true
 
 # XXX extract this data structure; it is generally useful ;-)
+# However a chain length of over 3000 is suspicious...
 var
   profileData: TProfileData
   emptySlots = profileData.len * 3 div 2
   maxChainLen = 0
   totalCalls = 0
 
-proc hook(st: system.TStackTrace) {.nimcall.} =
+var
+  interval: TNanos = 1_000_000 - tickCountCorrection # 5ms
+
+proc setSamplingFrequency*(intervalInUs: int) =
+  ## set this to change the sampling frequency. Default value is 5ms.
+  ## Set it to 0 to disable time based profiling; it uses an imprecise
+  ## instruction count measure instead then.
+  if intervalInUs <= 0: interval = 0
+  else: interval = intervalInUs * 1000 - tickCountCorrection
+  
+when withThreads:
+  var
+    profilingLock: TLock
+
+  InitLock profilingLock
+
+proc hookAux(st: TStackTrace) =
   # this is quite performance sensitive!
+  when withThreads: Acquire profilingLock
   inc totalCalls
   var last = high(st)
   while last > 0 and isNil(st[last]): dec last
@@ -65,64 +91,93 @@ proc hook(st: system.TStackTrace) {.nimcall.} =
     var chain = 0
     while true:
       if profileData[h] == nil:
-        GC_disable()
-        new profileData[h]
-        GC_enable()
+        profileData[h] = cast[ptr TProfileEntry](
+                             allocShared0(sizeof(TProfileEntry)))
         profileData[h].total = 1
         profileData[h].st = st
         dec emptySlots
-        maxChainLen = max(maxChainLen, chain)
-        return
+        break
       if profileData[h].st == st:
         # wow, same entry found:
         inc profileData[h].total
-        maxChainLen = max(maxChainLen, chain)
-        return
+        break
       h = ((5 * h) + 1) and high(profileData)
       inc chain
+    maxChainLen = max(maxChainLen, chain)
+  when withThreads: Release profilingLock
 
-proc getTotal(x: ref TProfileEntry): int =
+var
+  t0 {.threadvar.}: TTicks
+
+proc hook(st: TStackTrace) {.nimcall.} =
+  if interval == 0:
+    hookAux(st)
+  elif getticks() - t0 > interval:
+    hookAux(st)
+    t0 = getticks()  
+
+proc getTotal(x: ptr TProfileEntry): int =
   result = if isNil(x): 0 else: x.total
 
-proc cmpEntries(a, b: ref TProfileEntry): int =
+proc cmpEntries(a, b: ptr TProfileEntry): int =
   result = b.getTotal - a.getTotal
 
 proc `//`(a, b: int): string =
   result = format("$1/$2 = $3%", a, b, formatFloat(a / b * 100.0, ffDefault, 2))
 
 proc writeProfile() {.noconv.} =
-  stopProfiling()
-  const filename = "profile_results"
+  when defined(system.TStackTrace): 
+    system.profilerHook = nil
+  const filename = "profile_results.txt"
+  echo "writing " & filename & "..."
   var f: TFile
-  var j = 1
-  while open(f, filename & $j & ".txt"):
-    close(f)
-    inc(j)
-  let filen = filename & $j & ".txt"
-  echo "writing ", filen, "..."
-  if open(f, filen, fmWrite):
+  if open(f, filename, fmWrite):
     sort(profileData, cmpEntries)
     writeln(f, "total executions of each stack trace:")
     var entries = 0
     for i in 0..high(profileData):
       if profileData[i] != nil: inc entries
     
+    var perProc = initCountTable[string]()
+    for i in 0..entries-1:
+      var dups = initSet[string]()
+      for ii in 0..high(TStackTrace):
+        let procname = profileData[i].st[ii]
+        if isNil(procname): break
+        let p = $procname
+        if not containsOrIncl(dups, p):
+          perProc.inc(p, profileData[i].total)
+    
     var sum = 0
     # only write the first 100 entries:
-    for i in 0..min(100, high(profileData)):
-      if profileData[i] != nil and profileData[i].total > 1:
+    for i in 0..min(100, entries-1):
+      if profileData[i].total > 1:
         inc sum, profileData[i].total
-        writeln(f, "Entry: ", i, "/", entries, " Calls: ",
+        writeln(f, "Entry: ", i+1, "/", entries, " Calls: ",
           profileData[i].total // totalCalls, " [sum: ", sum, "; ",
           sum // totalCalls, "]")
         for ii in 0..high(TStackTrace):
           let procname = profileData[i].st[ii]
           if isNil(procname): break
-          writeln(f, "  ", procname)
+          writeln(f, "  ", procname, " ", perProc[$procname] // totalCalls)
     close(f)
-  echo "... done"
+    echo "... done"
+  else:
+    echo "... failed"
+
+var
+  disabled: int
+
+proc disableProfiling*() =
+  atomicDec disabled
+  system.profilerHook = nil
+
+proc enableProfiling*() =
+  if atomicInc(disabled) >= 0:
+    system.profilerHook = hook
 
-system.profilerHook = hook
-addQuitProc(writeProfile)
+when defined(system.TStackTrace):
+  system.profilerHook = hook
+  addQuitProc(writeProfile)
 
 {.pop.}
diff --git a/lib/system.nim b/lib/system.nim
index 7b129bf46..e56a7916e 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -1629,7 +1629,7 @@ type
     len: int  # length of slots (when not debugging always zero)
 
 when not defined(ECMAScript):
-  {.push stack_trace:off.}
+  {.push stack_trace:off, profiler:off.}
   proc add*(x: var string, y: cstring) {.noStackFrame.} =
     var i = 0
     while y[i] != '\0':
@@ -1674,7 +1674,7 @@ template newException*(exceptn: typeDesc, message: string): expr =
   e
 
 when not defined(EcmaScript) and not defined(NimrodVM):
-  {.push stack_trace: off.}
+  {.push stack_trace: off, profiler:off.}
 
   proc initGC()
   when not defined(boehmgc) and not defined(useMalloc):
@@ -1958,7 +1958,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
       ## gets the stack trace associated with `e`, which is the stack that
       ## lead to the ``raise`` statement. This is only works for debug builds.
       
-  {.push stack_trace: off.}
+  {.push stack_trace: off, profiler:off.}
   when hostOS == "standalone":
     include "system/embedded"
   else:
@@ -1997,7 +1997,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
       result = n.sons[n.len]
 
   include "system/mmdisp"
-  {.push stack_trace: off.}
+  {.push stack_trace: off, profiler:off.}
   when hostOS != "standalone": include "system/sysstr"
   {.pop.}
 
@@ -2032,7 +2032,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
       var e = getCurrentException()
       return if e == nil: "" else: e.msg
 
-  {.push stack_trace: off.}
+  {.push stack_trace: off, profiler:off.}
   when defined(endb):
     include "system/debugger"
 
diff --git a/lib/system/ecmasys.nim b/lib/system/ecmasys.nim
index 0c9db2206..e6a7d7057 100755
--- a/lib/system/ecmasys.nim
+++ b/lib/system/ecmasys.nim
@@ -33,7 +33,7 @@ var
     # list of exception handlers
     # a global variable for the root of all try blocks
 
-{.push stacktrace: off.}
+{.push stacktrace: off, profiler:off.}
 proc nimBoolToStr(x: bool): string {.compilerproc.} =
   if x: result = "true"
   else: result = "false"
diff --git a/lib/system/gc.nim b/lib/system/gc.nim
index a5937efe5..a95319e51 100755
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -78,11 +78,11 @@ var
 when not defined(useNimRtl):
   InstantiateForRegion(gch.region)
 
-proc acquire(gch: var TGcHeap) {.inline.} = 
+template acquire(gch: TGcHeap) = 
   when hasThreadSupport and hasSharedHeap:
     AcquireSys(HeapLock)
 
-proc release(gch: var TGcHeap) {.inline.} = 
+template release(gch: TGcHeap) = 
   when hasThreadSupport and hasSharedHeap:
     releaseSys(HeapLock)
 
diff --git a/lib/system/profiler.nim b/lib/system/profiler.nim
index b7b8d8375..28edeff7a 100755
--- a/lib/system/profiler.nim
+++ b/lib/system/profiler.nim
@@ -18,9 +18,6 @@
 
 {.push profiler: off.}
 
-when not defined(getTicks):
-  include "system/timers"
-
 const
   MaxTraceLen = 20 # tracking the last 20 calls is enough
 
@@ -36,7 +33,7 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
     i = 0
     total = 0
   while it != nil and i <= high(st)-(firstCalls-1):
-    # the (-1) is for a nil entry that marks where the '...' should occur
+    # the (-1) is for the "..." entry
     st[i] = it.procname
     inc(i)
     inc(total)
@@ -55,14 +52,14 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
     inc(i)
     b = b.prev
 
+const
+  SamplingInterval = 50_000
+    # set this to change the default sampling interval
 var
   profilerHook*: TProfilerHook
     ## set this variable to provide a procedure that implements a profiler in
     ## user space. See the `nimprof` module for a reference implementation.
-  SamplingInterval = 50_000
-    # set this to change the default sampling interval
-  gTicker = SamplingInterval
-  interval: TNanos = 5_000_000 # 5ms
+  gTicker {.threadvar.}: int
 
 proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
   # 'noinline' so that 'nimProfile' does not perform the stack allocation
@@ -71,31 +68,18 @@ proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
   captureStackTrace(framePtr, st)
   hook(st)
 
-proc setProfilingInterval*(intervalInUs: int): TNanos =
-  ## set this to change the sampling interval. Default value is 5ms.
-  interval = intervalInUs * 1000
-
-var t0: TTicks
-
 proc nimProfile() =
   ## This is invoked by the compiler in every loop and on every proc entry!
-  dec gTicker
   if gTicker == 0:
     gTicker = -1
-    let t1 = getticks()
-    if getticks() - t0 > interval:
-      if not isNil(profilerHook):
-        # disable recursive calls: XXX should use try..finally,
-        # but that's too expensive!
-        let oldHook = profilerHook
-        profilerHook = nil
-        callProfilerHook(oldHook)
-        profilerHook = oldHook
-      t0 = getticks()
+    if not isNil(profilerHook):
+      # disable recursive calls: XXX should use try..finally,
+      # but that's too expensive!
+      let oldHook = profilerHook
+      profilerHook = nil
+      callProfilerHook(oldHook)
+      profilerHook = oldHook
     gTicker = SamplingInterval
-
-proc stopProfiling*() =
-  ## call this to stop profiling; should be called by the client profiler.
-  profilerHook = nil
+  dec gTicker
 
 {.pop.}
diff --git a/lib/system/sysio.nim b/lib/system/sysio.nim
index 9f7866fdc..02a5893d3 100755
--- a/lib/system/sysio.nim
+++ b/lib/system/sysio.nim
@@ -224,7 +224,7 @@ proc ReadBytes(f: TFile, a: var openarray[int8], start, len: int): int =
 proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int =
   result = readBuffer(f, addr(a[start]), len)
 
-{.push stackTrace:off.}
+{.push stackTrace:off, profiler:off.}
 proc writeBytes(f: TFile, a: openarray[int8], start, len: int): int =
   var x = cast[ptr array[0..1000_000_000, int8]](a)
   result = writeBuffer(f, addr(x[start]), len)