summary refs log tree commit diff stats
path: root/lib/pure/hashes.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/hashes.nim')
-rw-r--r--lib/pure/hashes.nim281
1 files changed, 226 insertions, 55 deletions
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index e88210757..1038d55a1 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -62,7 +62,7 @@ runnableExamples:
 ## ========
 ## * `md5 module <md5.html>`_ for the MD5 checksum algorithm
 ## * `base64 module <base64.html>`_ for a Base64 encoder and decoder
-## * `std/sha1 module <sha1.html>`_ for the SHA-1 checksum algorithm
+## * `sha1 module <sha1.html>`_ for the SHA-1 checksum algorithm
 ## * `tables module <tables.html>`_ for hash tables
 
 import std/private/since
@@ -190,7 +190,7 @@ proc hashData*(data: pointer, size: int): Hash =
   var h: Hash = 0
   when defined(js):
     var p: cstring
-    asm """`p` = `Data`"""
+    {.emit: """`p` = `Data`;""".}
   else:
     var p = cast[cstring](data)
   var i = 0
@@ -217,7 +217,7 @@ else:
 when defined(js):
   var objectID = 0
   proc getObjectId(x: pointer): int =
-    asm """
+    {.emit: """
       if (typeof `x` == "object") {
         if ("_NimID" in `x`)
           `result` = `x`["_NimID"];
@@ -226,7 +226,7 @@ when defined(js):
           `x`["_NimID"] = `result`;
         }
       }
-    """
+    """.}
 
 proc hash*(x: pointer): Hash {.inline.} =
   ## Efficient `hash` overload.
@@ -247,7 +247,7 @@ proc hash*[T](x: ptr[T]): Hash {.inline.} =
 when defined(nimPreviewHashRef) or defined(nimdoc):
   proc hash*[T](x: ref[T]): Hash {.inline.} =
     ## Efficient `hash` overload.
-    ## 
+    ##
     ## .. important:: Use `-d:nimPreviewHashRef` to
     ##    enable hashing `ref`s. It is expected that this behavior
     ##    becomes the new default in upcoming versions.
@@ -319,16 +319,24 @@ proc murmurHash(x: openArray[byte]): Hash =
     h1: uint32
     i = 0
 
+
+  template impl =
+    var j = stepSize
+    while j > 0:
+      dec j
+      k1 = (k1 shl 8) or (ord(x[i+j])).uint32
+
   # body
   while i < n * stepSize:
     var k1: uint32
-    when defined(js) or defined(sparc) or defined(sparc64):
-      var j = stepSize
-      while j > 0:
-        dec j
-        k1 = (k1 shl 8) or (ord(x[i+j])).uint32
+
+    when nimvm:
+      impl()
     else:
-      k1 = cast[ptr uint32](unsafeAddr x[i])[]
+      when declared(copyMem):
+        copyMem(addr k1, addr x[i], 4)
+      else:
+        impl()
     inc i, stepSize
 
     k1 = imul(k1, c1)
@@ -360,16 +368,168 @@ proc murmurHash(x: openArray[byte]): Hash =
   return cast[Hash](h1)
 
 proc hashVmImpl(x: cstring, sPos, ePos: int): Hash =
-  doAssert false, "implementation override in compiler/vmops.nim"
+  raiseAssert "implementation override in compiler/vmops.nim"
 
 proc hashVmImpl(x: string, sPos, ePos: int): Hash =
-  doAssert false, "implementation override in compiler/vmops.nim"
+  raiseAssert "implementation override in compiler/vmops.nim"
 
 proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash =
-  doAssert false, "implementation override in compiler/vmops.nim"
+  raiseAssert "implementation override in compiler/vmops.nim"
 
 proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash =
-  doAssert false, "implementation override in compiler/vmops.nim"
+  raiseAssert "implementation override in compiler/vmops.nim"
+
+const k0 = 0xc3a5c85c97cb3127u64 # Primes on (2^63, 2^64) for various uses
+const k1 = 0xb492b66fbe98f273u64
+const k2 = 0x9ae16a3b2f90404fu64
+
+proc load4e(s: openArray[byte], o=0): uint32 {.inline.} =
+  uint32(s[o + 3]) shl 24 or uint32(s[o + 2]) shl 16 or
+  uint32(s[o + 1]) shl  8 or uint32(s[o + 0])
+
+proc load8e(s: openArray[byte], o=0): uint64 {.inline.} =
+  uint64(s[o + 7]) shl 56 or uint64(s[o + 6]) shl 48 or
+  uint64(s[o + 5]) shl 40 or uint64(s[o + 4]) shl 32 or
+  uint64(s[o + 3]) shl 24 or uint64(s[o + 2]) shl 16 or
+  uint64(s[o + 1]) shl  8 or uint64(s[o + 0])
+
+proc load4(s: openArray[byte], o=0): uint32 {.inline.} =
+  when nimvm: result = load4e(s, o)
+  else:
+    when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
+    else: result = load4e(s, o)
+
+proc load8(s: openArray[byte], o=0): uint64 {.inline.} =
+  when nimvm: result = load8e(s, o)
+  else:
+    when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
+    else: result = load8e(s, o)
+
+proc lenU(s: openArray[byte]): uint64 {.inline.} = s.len.uint64
+
+proc shiftMix(v: uint64): uint64 {.inline.} = v xor (v shr 47)
+
+proc rotR(v: uint64; bits: cint): uint64 {.inline.} =
+  (v shr bits) or (v shl (64 - bits))
+
+proc len16(u: uint64; v: uint64; mul: uint64): uint64 {.inline.} =
+  var a = (u xor v)*mul
+  a = a xor (a shr 47)
+  var b = (v xor a)*mul
+  b = b xor (b shr 47)
+  b*mul
+
+proc len0_16(s: openArray[byte]): uint64 {.inline.} =
+  if s.len >= 8:
+    let mul = k2 + 2*s.lenU
+    let a   = load8(s) + k2
+    let b   = load8(s, s.len - 8)
+    let c   = rotR(b, 37)*mul + a
+    let d   = (rotR(a, 25) + b)*mul
+    len16 c, d, mul
+  elif s.len >= 4:
+    let mul = k2 + 2*s.lenU
+    let a   = load4(s).uint64
+    len16 s.lenU + (a shl 3), load4(s, s.len - 4), mul
+  elif s.len > 0:
+    let a = uint32(s[0])
+    let b = uint32(s[s.len shr 1])
+    let c = uint32(s[s.len - 1])
+    let y = a      + (b shl 8)
+    let z = s.lenU + (c shl 2)
+    shiftMix(y*k2 xor z*k0)*k2
+  else: k2      # s.len == 0
+
+proc len17_32(s: openArray[byte]): uint64 {.inline.} =
+  let mul = k2 + 2*s.lenU
+  let a = load8(s)*k1
+  let b = load8(s, 8)
+  let c = load8(s, s.len - 8)*mul
+  let d = load8(s, s.len - 16)*k2
+  len16 rotR(a + b, 43) + rotR(c, 30) + d, a + rotR(b + k2, 18) + c, mul
+
+proc len33_64(s: openArray[byte]): uint64 {.inline.} =
+  let mul = k2 + 2*s.lenU
+  let a = load8(s)*k2
+  let b = load8(s, 8)
+  let c = load8(s, s.len - 8)*mul
+  let d = load8(s, s.len - 16)*k2
+  let y = rotR(a + b, 43) + rotR(c, 30) + d
+  let z = len16(y, a + rotR(b + k2, 18) + c, mul)
+  let e = load8(s, 16)*mul
+  let f = load8(s, 24)
+  let g = (y + load8(s, s.len - 32))*mul
+  let h = (z + load8(s, s.len - 24))*mul
+  len16 rotR(e + f, 43) + rotR(g, 30) + h, e + rotR(f + a, 18) + g, mul
+
+type Pair = tuple[first, second: uint64]
+
+proc weakLen32withSeeds2(w, x, y, z, a, b: uint64): Pair {.inline.} =
+  var a = a + w
+  var b = rotR(b + a + z, 21)
+  let c = a
+  a += x
+  a += y
+  b += rotR(a, 44)
+  result[0] = a + z
+  result[1] = b + c
+
+proc weakLen32withSeeds(s: openArray[byte]; o: int; a,b: uint64): Pair {.inline.} =
+  weakLen32withSeeds2 load8(s, o     ), load8(s, o + 8),
+                      load8(s, o + 16), load8(s, o + 24), a, b
+
+proc hashFarm(s: openArray[byte]): uint64 {.inline.} =
+  if s.len <= 16: return len0_16(s)
+  if s.len <= 32: return len17_32(s)
+  if s.len <= 64: return len33_64(s)
+  const seed = 81u64 # not const to use input `h`
+  var
+    o = 0         # s[] ptr arith -> variable origin variable `o`
+    x = seed
+    y = seed*k1 + 113
+    z = shiftMix(y*k2 + 113)*k2
+    v, w: Pair
+  x = x*k2 + load8(s)
+  let eos    = ((s.len - 1) div 64)*64
+  let last64 = eos + ((s.len - 1) and 63) - 63
+  while true:
+    x = rotR(x + y + v[0] + load8(s, o+8), 37)*k1
+    y = rotR(y + v[1] + load8(s, o+48), 42)*k1
+    x = x xor w[1]
+    y += v[0] + load8(s, o+40)
+    z = rotR(z + w[0], 33)*k1
+    v = weakLen32withSeeds(s, o+0 , v[1]*k1, x + w[0])
+    w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
+    swap z, x
+    inc o, 64
+    if o == eos: break
+  let mul = k1 + ((z and 0xff) shl 1)
+  o = last64
+  w[0] += (s.lenU - 1) and 63
+  v[0] += w[0]
+  w[0] += v[0]
+  x = rotR(x + y + v[0] + load8(s, o+8), 37)*mul
+  y = rotR(y + v[1] + load8(s, o+48), 42)*mul
+  x = x xor w[1]*9
+  y += v[0]*9 + load8(s, o+40)
+  z = rotR(z + w[0], 33)*mul
+  v = weakLen32withSeeds(s, o+0 , v[1]*mul, x + w[0])
+  w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
+  swap z, x
+  len16 len16(v[0],w[0],mul) + shiftMix(y)*k0 + z, len16(v[1],w[1],mul) + x, mul
+
+template jsNoInt64: untyped =
+  when defined js:
+    when compiles(compileOption("jsbigint64")):
+      when not compileOption("jsbigint64"): true
+      else: false
+    else: false
+  else: false
+const sHash2 = (when defined(nimStringHash2) or jsNoInt64(): true else: false)
+
+template maybeFailJS_Number =
+  when jsNoInt64() and not defined(nimStringHash2):
+    {.error: "Must use `-d:nimStringHash2` when using `--jsbigint64:off`".}
 
 proc hash*(x: string): Hash =
   ## Efficient hashing of strings.
@@ -379,16 +539,13 @@ proc hash*(x: string): Hash =
   ## * `hashIgnoreCase <#hashIgnoreCase,string>`_
   runnableExamples:
     doAssert hash("abracadabra") != hash("AbracadabrA")
-
-  when not defined(nimToOpenArrayCString):
-    result = 0
-    for c in x:
-      result = result !& ord(c)
-    result = !$result
+  maybeFailJS_Number()
+  when not sHash2:
+    result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
   else:
-    when nimvm:
-      result = hashVmImpl(x, 0, high(x))
-    else:
+    #when nimvm:
+    #  result = hashVmImpl(x, 0, high(x))
+    when true:
       result = murmurHash(toOpenArrayByte(x, 0, high(x)))
 
 proc hash*(x: cstring): Hash =
@@ -398,22 +555,22 @@ proc hash*(x: cstring): Hash =
     doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
     doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")
 
-  when not defined(nimToOpenArrayCString):
-    result = 0
-    var i = 0
-    while x[i] != '\0':
-      result = result !& ord(x[i])
-      inc i
-    result = !$result
-  else:
-    when nimvm:
-      hashVmImpl(x, 0, high(x))
+  maybeFailJS_Number()
+  when not sHash2:
+    when defined js:
+      let xx = $x
+      result = cast[Hash](hashFarm(toOpenArrayByte(xx, 0, xx.high)))
     else:
-      when not defined(js) and defined(nimToOpenArrayCString):
-        murmurHash(toOpenArrayByte(x, 0, x.high))
+      result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
+  else:
+    #when nimvm:
+    #  result = hashVmImpl(x, 0, high(x))
+    when true:
+      when not defined(js):
+        result = murmurHash(toOpenArrayByte(x, 0, x.high))
       else:
         let xx = $x
-        murmurHash(toOpenArrayByte(xx, 0, high(xx)))
+        result = murmurHash(toOpenArrayByte(xx, 0, high(xx)))
 
 proc hash*(sBuf: string, sPos, ePos: int): Hash =
   ## Efficient hashing of a string buffer, from starting
@@ -424,11 +581,9 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
     var a = "abracadabra"
     doAssert hash(a, 0, 3) == hash(a, 7, 10)
 
-  when not defined(nimToOpenArrayCString):
-    result = 0
-    for i in sPos..ePos:
-      result = result !& ord(sBuf[i])
-    result = !$result
+  maybeFailJS_Number()
+  when not sHash2:
+    result = cast[Hash](hashFarm(toOpenArrayByte(sBuf, sPos, ePos)))
   else:
     murmurHash(toOpenArrayByte(sBuf, sPos, ePos))
 
@@ -521,7 +676,7 @@ proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
     h = h !& ord(c)
   result = !$h
 
-proc hash*[T: tuple | object | proc](x: T): Hash =
+proc hash*[T: tuple | object | proc | iterator {.closure.}](x: T): Hash =
   ## Efficient `hash` overload.
   runnableExamples:
     # for `tuple|object`, `hash` must be defined for each component of `x`.
@@ -554,8 +709,9 @@ proc hash*[T: tuple | object | proc](x: T): Hash =
   when T is "closure":
     result = hash((rawProc(x), rawEnv(x)))
   elif T is (proc):
-    result = hash(pointer(x))
+    result = hash(cast[pointer](x))
   else:
+    result = 0
     for f in fields(x):
       result = result !& hash(f)
     result = !$result
@@ -564,13 +720,20 @@ proc hash*[A](x: openArray[A]): Hash =
   ## Efficient hashing of arrays and sequences.
   ## There must be a `hash` proc defined for the element type `A`.
   when A is byte:
-    result = murmurHash(x)
+    when not sHash2:
+      result = cast[Hash](hashFarm(x))
+    else:
+      result = murmurHash(x)
   elif A is char:
-    when nimvm:
-      result = hashVmImplChar(x, 0, x.high)
+    when not sHash2:
+      result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
     else:
-      result = murmurHash(toOpenArrayByte(x, 0, x.high))
+      #when nimvm:
+      #  result = hashVmImplChar(x, 0, x.high)
+      when true:
+        result = murmurHash(toOpenArrayByte(x, 0, x.high))
   else:
+    result = 0
     for a in x:
       result = result !& hash(a)
     result = !$result
@@ -584,17 +747,24 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
   runnableExamples:
     let a = [1, 2, 5, 1, 2, 6]
     doAssert hash(a, 0, 1) == hash(a, 3, 4)
-
   when A is byte:
-    when nimvm:
-      result = hashVmImplByte(aBuf, sPos, ePos)
+    maybeFailJS_Number()
+    when not sHash2:
+      result = cast[Hash](hashFarm(toOpenArray(aBuf, sPos, ePos)))
     else:
-      result = murmurHash(toOpenArray(aBuf, sPos, ePos))
+      #when nimvm:
+      #  result = hashVmImplByte(aBuf, sPos, ePos)
+      when true:
+        result = murmurHash(toOpenArray(aBuf, sPos, ePos))
   elif A is char:
-    when nimvm:
-      result = hashVmImplChar(aBuf, sPos, ePos)
+    maybeFailJS_Number()
+    when not sHash2:
+      result = cast[Hash](hashFarm(toOpenArrayByte(aBuf, sPos, ePos)))
     else:
-      result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
+      #when nimvm:
+      #  result = hashVmImplChar(aBuf, sPos, ePos)
+      when true:
+        result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
   else:
     for i in sPos .. ePos:
       result = result !& hash(aBuf[i])
@@ -603,6 +773,7 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
 proc hash*[A](x: set[A]): Hash =
   ## Efficient hashing of sets.
   ## There must be a `hash` proc defined for the element type `A`.
+  result = 0
   for it in items(x):
     result = result !& hash(it)
   result = !$result