summary refs log tree commit diff stats
path: root/compiler/sighashes.nim
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/sighashes.nim')
-rw-r--r--compiler/sighashes.nim420
1 files changed, 272 insertions, 148 deletions
diff --git a/compiler/sighashes.nim b/compiler/sighashes.nim
index 6043eb4a7..d8dfe1828 100644
--- a/compiler/sighashes.nim
+++ b/compiler/sighashes.nim
@@ -9,87 +9,43 @@
 
 ## Computes hash values for routine (proc, method etc) signatures.
 
-import ast, md5
-from hashes import Hash
-from astalgo import debug
-from types import typeToString, preferDesc
-from strutils import startsWith, contains
-
-when false:
-  type
-    SigHash* = uint32  ## a hash good enough for a filename or a proc signature
-
-  proc sdbmHash(hash: SigHash, c: char): SigHash {.inline.} =
-    return SigHash(c) + (hash shl 6) + (hash shl 16) - hash
-
-  template `&=`*(x: var SigHash, c: char) = x = sdbmHash(x, c)
-  template `&=`*(x: var SigHash, s: string) =
-    for c in s: x = sdbmHash(x, c)
-
-else:
-  type
-    SigHash* = distinct Md5Digest
-
-  const
-    cb64 = [
-      "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
-      "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
-      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
-      "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
-      "0", "1", "2", "3", "4", "5", "6", "7", "8", "9a",
-      "9b", "9c"]
-
-  proc toBase64a(s: cstring, len: int): string =
-    ## encodes `s` into base64 representation.
-    result = newStringOfCap(((len + 2) div 3) * 4)
-    result.add '_'
-    var i = 0
-    while i < len - 2:
-      let a = ord(s[i])
-      let b = ord(s[i+1])
-      let c = ord(s[i+2])
-      result.add cb64[a shr 2]
-      result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
-      result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)]
-      result.add cb64[c and 0x3F]
-      inc(i, 3)
-    if i < len-1:
-      let a = ord(s[i])
-      let b = ord(s[i+1])
-      result.add cb64[a shr 2]
-      result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
-      result.add cb64[((b and 0x0F) shl 2)]
-    elif i < len:
-      let a = ord(s[i])
-      result.add cb64[a shr 2]
-      result.add cb64[(a and 3) shl 4]
-
-  proc `$`*(u: SigHash): string =
-    toBase64a(cast[cstring](unsafeAddr u), sizeof(u))
-  proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
-  proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
-  proc `&=`(c: var MD5Context, i: BiggestInt) =
-    md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
-
-  template lowlevel(v) =
-    md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
-
-  proc `==`*(a, b: SigHash): bool =
-    # {.borrow.}
-    result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a))
-
-  proc hash*(u: SigHash): Hash =
-    result = 0
-    for x in 0..3:
-      result = (result shl 8) or u.MD5Digest[x].int
+import ast, ropes, modulegraphs, options, msgs, pathutils
+from std/hashes import Hash
+import std/tables
+import types
+import ../dist/checksums/src/checksums/md5
+
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
+
+
+proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
+proc `&=`(c: var MD5Context, ch: char) =
+  # XXX suspicious code here; relies on ch being zero terminated?
+  md5Update(c, cast[cstring](unsafeAddr ch), 1)
+
+proc `&=`(c: var MD5Context, i: BiggestInt) =
+  md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
+proc `&=`(c: var MD5Context, f: BiggestFloat) =
+  md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
+proc `&=`(c: var MD5Context, s: SigHash) =
+  md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
+template lowlevel(v) =
+  md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
+
+
 type
   ConsiderFlag* = enum
     CoProc
     CoType
     CoOwnerSig
+    CoIgnoreRange
+    CoConsiderOwned
+    CoDistinct
+    CoHashTypeInsideNode
 
-proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag])
-
+proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef)
 proc hashSym(c: var MD5Context, s: PSym) =
   if sfAnon in s.flags or s.kind == skGenericParam:
     c &= ":anon"
@@ -99,21 +55,26 @@ proc hashSym(c: var MD5Context, s: PSym) =
       c &= it.name.s
       c &= "."
       it = it.owner
+    c &= "#"
+    c &= s.disamb
 
-proc hashTypeSym(c: var MD5Context, s: PSym) =
+proc hashTypeSym(c: var MD5Context, s: PSym; conf: ConfigRef) =
   if sfAnon in s.flags or s.kind == skGenericParam:
     c &= ":anon"
   else:
     var it = s
+    c &= customPath(conf.toFullPath(s.info))
     while it != nil:
       if sfFromGeneric in it.flags and it.kind in routineKinds and
           it.typ != nil:
-        hashType c, it.typ, {CoProc}
+        hashType c, it.typ, {CoProc}, conf
       c &= it.name.s
       c &= "."
       it = it.owner
+    c &= "#"
+    c &= s.disamb
 
-proc hashTree(c: var MD5Context, n: PNode) =
+proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]; conf: ConfigRef) =
   if n == nil:
     c &= "\255"
     return
@@ -127,6 +88,8 @@ proc hashTree(c: var MD5Context, n: PNode) =
     c &= n.ident.s
   of nkSym:
     hashSym(c, n.sym)
+    if CoHashTypeInsideNode in flags and n.sym.typ != nil:
+      hashType(c, n.sym.typ, flags, conf)
   of nkCharLit..nkUInt64Lit:
     let v = n.intVal
     lowlevel v
@@ -136,89 +99,133 @@ proc hashTree(c: var MD5Context, n: PNode) =
   of nkStrLit..nkTripleStrLit:
     c &= n.strVal
   else:
-    for i in 0.. <n.len: hashTree(c, n.sons[i])
+    for i in 0..<n.len: hashTree(c, n[i], flags, conf)
 
-proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
+proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef) =
   if t == nil:
     c &= "\254"
     return
 
   case t.kind
   of tyGenericInvocation:
-    for i in countup(0, sonsLen(t) - 1):
-      c.hashType t.sons[i], flags
-    return
+    for a in t.kids:
+      c.hashType a, flags, conf
   of tyDistinct:
-    if CoType in flags:
-      c.hashType t.lastSon, flags
+    if CoDistinct in flags:
+      if t.sym != nil: c.hashSym(t.sym)
+      if t.sym == nil or tfFromGeneric in t.flags:
+        c.hashType t.elementType, flags, conf
+    elif CoType in flags or t.sym == nil:
+      c.hashType t.elementType, flags, conf
     else:
       c.hashSym(t.sym)
-    return
-  of tyAlias, tyGenericInst, tyUserTypeClasses:
-    c.hashType t.lastSon, flags
-    return
-  else:
-    discard
-  c &= char(t.kind)
-  case t.kind
+  of tyGenericInst:
+    if sfInfixCall in t.base.sym.flags:
+      # This is an imported C++ generic type.
+      # We cannot trust the `lastSon` to hold a properly populated and unique
+      # value for each instantiation, so we hash the generic parameters here:
+      let normalizedType = t.skipGenericAlias
+      c.hashType normalizedType.genericHead, flags, conf
+      for _, a in normalizedType.genericInstParams:
+        c.hashType a, flags, conf
+    else:
+      c.hashType t.skipModifier, flags, conf
+  of tyAlias, tySink, tyUserTypeClasses, tyInferred:
+    c.hashType t.skipModifier, flags, conf
+  of tyOwned:
+    if CoConsiderOwned in flags:
+      c &= char(t.kind)
+    c.hashType t.skipModifier, flags, conf
   of tyBool, tyChar, tyInt..tyUInt64:
     # no canonicalization for integral types, so that e.g. ``pid_t`` is
     # produced instead of ``NI``:
+    c &= char(t.kind)
     if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
       c.hashSym(t.sym)
   of tyObject, tyEnum:
     if t.typeInst != nil:
-      assert t.typeInst.kind == tyGenericInst
-      for i in countup(1, sonsLen(t.typeInst) - 2):
-        c.hashType t.typeInst.sons[i], flags
+      # prevent against infinite recursions here, see bug #8883:
+      let inst = t.typeInst
+      t.typeInst = nil
+      assert inst.kind == tyGenericInst
+      c.hashType inst.genericHead, flags, conf
+      for _, a in inst.genericInstParams:
+        c.hashType a, flags, conf
+      t.typeInst = inst
+      return
+    c &= char(t.kind)
     # Every cyclic type in Nim need to be constructed via some 't.sym', so this
     # is actually safe without an infinite recursion check:
     if t.sym != nil:
-      #if "Future:" in t.sym.name.s and t.typeInst == nil:
-      #  writeStackTrace()
-      #  echo "yes ", t.sym.name.s
-      #  #quit 1
-      if CoOwnerSig in flags:
-        c.hashTypeSym(t.sym)
+      if {sfCompilerProc} * t.sym.flags != {}:
+        doAssert t.sym.loc.snippet != ""
+        # The user has set a specific name for this type
+        c &= t.sym.loc.snippet
+      elif CoOwnerSig in flags:
+        c.hashTypeSym(t.sym, conf)
       else:
         c.hashSym(t.sym)
-      if sfAnon in t.sym.flags:
-        # generated object names can be identical, so we need to
-        # disambiguate furthermore by hashing the field types and names:
-        # mild hack to prevent endless recursions (makes nimforum compile again):
-        excl t.sym.flags, sfAnon
-        let n = t.n
-        for i in 0 ..< n.len:
-          assert n[i].kind == nkSym
-          let s = n[i].sym
-          c.hashSym s
-          c.hashType s.typ, flags
-        incl t.sym.flags, sfAnon
+
+      var symWithFlags: PSym = nil
+      template hasFlag(sym): bool =
+        let ret = {sfAnon, sfGenSym} * sym.flags != {}
+        if ret: symWithFlags = sym
+        ret
+      if hasFlag(t.sym) or (t.kind == tyObject and t.owner.kind == skType and t.owner.typ.kind == tyRef and hasFlag(t.owner)):
+        # for `PFoo:ObjectType`, arising from `type PFoo = ref object`
+        # Generated object names can be identical, so we need to
+        # disambiguate furthermore by hashing the field types and names.
+        if t.n.len > 0:
+          let oldFlags = symWithFlags.flags
+          # Hack to prevent endless recursion
+          # xxx instead, use a hash table to indicate we've already visited a type, which
+          # would also be more efficient.
+          symWithFlags.flags.excl {sfAnon, sfGenSym}
+          hashTree(c, t.n, flags + {CoHashTypeInsideNode}, conf)
+          symWithFlags.flags = oldFlags
+        else:
+          # The object has no fields: we _must_ add something here in order to
+          # make the hash different from the one we produce by hashing only the
+          # type name.
+          c &= ".empty"
     else:
       c &= t.id
-    if t.len > 0 and t.sons[0] != nil:
-      hashType c, t.sons[0], flags
-  of tyRef, tyPtr, tyGenericBody, tyVar:
-    c.hashType t.lastSon, flags
+    if t.hasElementType and t.baseClass != nil:
+      hashType c, t.baseClass, flags, conf
+  of tyRef, tyPtr, tyVar:
+    c &= char(t.kind)
+    if t.hasElementType:
+      c.hashType t.elementType, flags, conf
     if tfVarIsPtr in t.flags: c &= ".varisptr"
+  of tyGenericBody:
+    c &= char(t.kind)
+    if t.hasElementType:
+      c.hashType t.typeBodyImpl, flags, conf
   of tyFromExpr:
-    c.hashTree(t.n)
+    c &= char(t.kind)
+    c.hashTree(t.n, {}, conf)
   of tyTuple:
+    c &= char(t.kind)
     if t.n != nil and CoType notin flags:
-      assert(sonsLen(t.n) == sonsLen(t))
-      for i in countup(0, sonsLen(t.n) - 1):
-        assert(t.n.sons[i].kind == nkSym)
-        c &= t.n.sons[i].sym.name.s
+      for i in 0..<t.n.len:
+        assert(t.n[i].kind == nkSym)
+        c &= t.n[i].sym.name.s
         c &= ':'
-        c.hashType(t.sons[i], flags)
+        c.hashType(t.n[i].sym.typ, flags+{CoIgnoreRange}, conf)
         c &= ','
     else:
-      for i in countup(0, sonsLen(t) - 1): c.hashType t.sons[i], flags
-  of tyRange, tyStatic:
-    #if CoType notin flags:
-    c.hashTree(t.n)
-    c.hashType(t.sons[0], flags)
+      for a in t.kids: c.hashType a, flags+{CoIgnoreRange}, conf
+  of tyRange:
+    if CoIgnoreRange notin flags:
+      c &= char(t.kind)
+      c.hashTree(t.n, {}, conf)
+    c.hashType(t.elementType, flags, conf)
+  of tyStatic:
+    c &= char(t.kind)
+    c.hashTree(t.n, {}, conf)
+    c.hashType(t.skipModifier, flags, conf)
   of tyProc:
+    c &= char(t.kind)
     c &= (if tfIterator in t.flags: "iterator " else: "proc ")
     if CoProc in flags and t.n != nil:
       let params = t.n
@@ -226,18 +233,27 @@ proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
         let param = params[i].sym
         c &= param.name.s
         c &= ':'
-        c.hashType(param.typ, flags)
+        c.hashType(param.typ, flags, conf)
         c &= ','
-      c.hashType(t.sons[0], flags)
+      c.hashType(t.returnType, flags, conf)
     else:
-      for i in 0.. <t.len: c.hashType(t.sons[i], flags)
+      for a in t.signature: c.hashType(a, flags, conf)
     c &= char(t.callConv)
-    if CoType notin flags:
-      if tfNoSideEffect in t.flags: c &= ".noSideEffect"
-      if tfThread in t.flags: c &= ".thread"
+    # purity of functions doesn't have to affect the mangling (which is in fact
+    # problematic for HCR - someone could have cached a pointer to another
+    # function which changes its purity and suddenly the cached pointer is danglign)
+    # IMHO anything that doesn't affect the overload resolution shouldn't be part of the mangling...
+    # if CoType notin flags:
+    #   if tfNoSideEffect in t.flags: c &= ".noSideEffect"
+    #   if tfThread in t.flags: c &= ".thread"
     if tfVarargs in t.flags: c &= ".varargs"
+  of tyArray:
+    c &= char(t.kind)
+    c.hashType(t.indexType, flags-{CoIgnoreRange}, conf)
+    c.hashType(t.elementType, flags-{CoIgnoreRange}, conf)
   else:
-    for i in 0.. <t.len: c.hashType(t.sons[i], flags)
+    c &= char(t.kind)
+    for a in t.kids: c.hashType(a, flags, conf)
   if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
 
 when defined(debugSigHashes):
@@ -254,19 +270,21 @@ when defined(debugSigHashes):
   #  select hash, type from sighashes where hash in
   # (select hash from sighashes group by hash having count(*) > 1) order by hash;
 
-proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash =
-  var c: MD5Context
+proc hashType*(t: PType; conf: ConfigRef; flags: set[ConsiderFlag] = {CoType}): SigHash =
+  result = default(SigHash)
+  var c: MD5Context = default(MD5Context)
   md5Init c
-  hashType c, t, flags+{CoOwnerSig}
-  md5Final c, result.Md5Digest
+  hashType c, t, flags+{CoOwnerSig}, conf
+  md5Final c, result.MD5Digest
   when defined(debugSigHashes):
     db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
             typeToString(t), $result)
 
-proc hashProc*(s: PSym): SigHash =
-  var c: MD5Context
+proc hashProc(s: PSym; conf: ConfigRef): SigHash =
+  result = default(SigHash)
+  var c: MD5Context = default(MD5Context)
   md5Init c
-  hashType c, s.typ, {CoProc}
+  hashType c, s.typ, {CoProc}, conf
 
   var m = s
   while m.kind != skModule: m = m.owner
@@ -281,10 +299,11 @@ proc hashProc*(s: PSym): SigHash =
   # hash, we also hash the line information. This is pretty bad, but the best
   # solution for now:
   #c &= s.info.line
-  md5Final c, result.Md5Digest
+  md5Final c, result.MD5Digest
 
 proc hashNonProc*(s: PSym): SigHash =
-  var c: MD5Context
+  result = default(SigHash)
+  var c: MD5Context = default(MD5Context)
   md5Init c
   hashSym(c, s)
   var it = s
@@ -297,10 +316,11 @@ proc hashNonProc*(s: PSym): SigHash =
   # might cause:
   if s.kind == skParam:
     c &= s.position
-  md5Final c, result.Md5Digest
+  md5Final c, result.MD5Digest
 
 proc hashOwner*(s: PSym): SigHash =
-  var c: MD5Context
+  result = default(SigHash)
+  var c: MD5Context = default(MD5Context)
   md5Init c
   var m = s
   while m.kind != skModule: m = m.owner
@@ -310,4 +330,108 @@ proc hashOwner*(s: PSym): SigHash =
   c &= "."
   c &= m.name.s
 
-  md5Final c, result.Md5Digest
+  md5Final c, result.MD5Digest
+
+proc sigHash*(s: PSym; conf: ConfigRef): SigHash =
+  if s.kind in routineKinds and s.typ != nil:
+    result = hashProc(s, conf)
+  else:
+    result = hashNonProc(s)
+
+proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
+
+proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
+
+proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
+  assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
+  if sfGlobal notin s.flags:
+    c &= char(s.kind)
+    c &= s.name.s
+  else:
+    c &= hashNonProc(s)
+    # this one works for let and const but not for var. True variables can change value
+    # later on. it is user resposibility to hash his global state if required
+    if s.ast != nil and s.ast.kind in {nkIdentDefs, nkConstDef}:
+      hashBodyTree(graph, c, s.ast[^1])
+    else:
+      hashBodyTree(graph, c, s.ast)
+
+proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
+  # hash Nim tree recursing into simply
+  if n == nil:
+    c &= "nil"
+    return
+  c &= char(n.kind)
+  case n.kind
+  of nkEmpty, nkNilLit, nkType: discard
+  of nkIdent:
+    c &= n.ident.s
+  of nkSym:
+    if n.sym.kind in skProcKinds:
+      c &= symBodyDigest(graph, n.sym)
+    elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
+      hashVarSymBody(graph, c, n.sym)
+    else:
+      c &= hashNonProc(n.sym)
+  of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
+    discard # we track usage of proc symbols not their definition
+  of nkCharLit..nkUInt64Lit:
+    c &= n.intVal
+  of nkFloatLit..nkFloat64Lit:
+    c &= n.floatVal
+  of nkStrLit..nkTripleStrLit:
+    c &= n.strVal
+  else:
+    for i in 0..<n.len:
+      hashBodyTree(graph, c, n[i])
+
+proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
+  ## compute unique digest of the proc/func/method symbols
+  ## recursing into invoked symbols as well
+  assert(sym.kind in skProcKinds, $sym.kind)
+  result = default(SigHash)
+  graph.symBodyHashes.withValue(sym.id, value):
+    return value[]
+
+  var c: MD5Context = default(MD5Context)
+  md5Init(c)
+  c.hashType(sym.typ, {CoProc}, graph.config)
+  c &= char(sym.kind)
+  c.md5Final(result.MD5Digest)
+  graph.symBodyHashes[sym.id] = result # protect from recursion in the body
+
+  if sym.ast != nil:
+    md5Init(c)
+    c.md5Update(cast[cstring](result.addr), sizeof(result))
+    hashBodyTree(graph, c, getBody(graph, sym))
+    c.md5Final(result.MD5Digest)
+    graph.symBodyHashes[sym.id] = result
+
+proc idOrSig*(s: PSym, currentModule: string,
+              sigCollisions: var CountTable[SigHash]; conf: ConfigRef): Rope =
+  if s.kind in routineKinds and s.typ != nil:
+    # signatures for exported routines are reliable enough to
+    # produce a unique name and this means produced C++ is more stable regarding
+    # Nim changes:
+    let sig = hashProc(s, conf)
+    result = rope($sig)
+    #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
+    let counter = sigCollisions.getOrDefault(sig)
+    #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
+    #  echo "counter ", counter, " ", s.id
+    if counter != 0:
+      result.add "_" & rope(counter+1)
+    # this minor hack is necessary to make tests/collections/thashes compile.
+    # The inlined hash function's original module is ambiguous so we end up
+    # generating duplicate names otherwise:
+    if s.typ.callConv == ccInline:
+      result.add rope(currentModule)
+    sigCollisions.inc(sig)
+  else:
+    let sig = hashNonProc(s)
+    result = rope($sig)
+    let counter = sigCollisions.getOrDefault(sig)
+    if counter != 0:
+      result.add "_" & rope(counter+1)
+    sigCollisions.inc(sig)
+