summary refs log tree commit diff stats
path: root/lib/pure/hashes.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/hashes.nim')
-rw-r--r--lib/pure/hashes.nim232
1 files changed, 206 insertions, 26 deletions
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index 6d246d5b9..1038d55a1 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -190,7 +190,7 @@ proc hashData*(data: pointer, size: int): Hash =
   var h: Hash = 0
   when defined(js):
     var p: cstring
-    asm """`p` = `Data`"""
+    {.emit: """`p` = `Data`;""".}
   else:
     var p = cast[cstring](data)
   var i = 0
@@ -217,7 +217,7 @@ else:
 when defined(js):
   var objectID = 0
   proc getObjectId(x: pointer): int =
-    asm """
+    {.emit: """
       if (typeof `x` == "object") {
         if ("_NimID" in `x`)
           `result` = `x`["_NimID"];
@@ -226,7 +226,7 @@ when defined(js):
           `x`["_NimID"] = `result`;
         }
       }
-    """
+    """.}
 
 proc hash*(x: pointer): Hash {.inline.} =
   ## Efficient `hash` overload.
@@ -379,6 +379,158 @@ proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash =
 proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash =
   raiseAssert "implementation override in compiler/vmops.nim"
 
+const k0 = 0xc3a5c85c97cb3127u64 # Primes on (2^63, 2^64) for various uses
+const k1 = 0xb492b66fbe98f273u64
+const k2 = 0x9ae16a3b2f90404fu64
+
+proc load4e(s: openArray[byte], o=0): uint32 {.inline.} =
+  uint32(s[o + 3]) shl 24 or uint32(s[o + 2]) shl 16 or
+  uint32(s[o + 1]) shl  8 or uint32(s[o + 0])
+
+proc load8e(s: openArray[byte], o=0): uint64 {.inline.} =
+  uint64(s[o + 7]) shl 56 or uint64(s[o + 6]) shl 48 or
+  uint64(s[o + 5]) shl 40 or uint64(s[o + 4]) shl 32 or
+  uint64(s[o + 3]) shl 24 or uint64(s[o + 2]) shl 16 or
+  uint64(s[o + 1]) shl  8 or uint64(s[o + 0])
+
+proc load4(s: openArray[byte], o=0): uint32 {.inline.} =
+  when nimvm: result = load4e(s, o)
+  else:
+    when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
+    else: result = load4e(s, o)
+
+proc load8(s: openArray[byte], o=0): uint64 {.inline.} =
+  when nimvm: result = load8e(s, o)
+  else:
+    when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof
+    else: result = load8e(s, o)
+
+proc lenU(s: openArray[byte]): uint64 {.inline.} = s.len.uint64
+
+proc shiftMix(v: uint64): uint64 {.inline.} = v xor (v shr 47)
+
+proc rotR(v: uint64; bits: cint): uint64 {.inline.} =
+  (v shr bits) or (v shl (64 - bits))
+
+proc len16(u: uint64; v: uint64; mul: uint64): uint64 {.inline.} =
+  var a = (u xor v)*mul
+  a = a xor (a shr 47)
+  var b = (v xor a)*mul
+  b = b xor (b shr 47)
+  b*mul
+
+proc len0_16(s: openArray[byte]): uint64 {.inline.} =
+  if s.len >= 8:
+    let mul = k2 + 2*s.lenU
+    let a   = load8(s) + k2
+    let b   = load8(s, s.len - 8)
+    let c   = rotR(b, 37)*mul + a
+    let d   = (rotR(a, 25) + b)*mul
+    len16 c, d, mul
+  elif s.len >= 4:
+    let mul = k2 + 2*s.lenU
+    let a   = load4(s).uint64
+    len16 s.lenU + (a shl 3), load4(s, s.len - 4), mul
+  elif s.len > 0:
+    let a = uint32(s[0])
+    let b = uint32(s[s.len shr 1])
+    let c = uint32(s[s.len - 1])
+    let y = a      + (b shl 8)
+    let z = s.lenU + (c shl 2)
+    shiftMix(y*k2 xor z*k0)*k2
+  else: k2      # s.len == 0
+
+proc len17_32(s: openArray[byte]): uint64 {.inline.} =
+  let mul = k2 + 2*s.lenU
+  let a = load8(s)*k1
+  let b = load8(s, 8)
+  let c = load8(s, s.len - 8)*mul
+  let d = load8(s, s.len - 16)*k2
+  len16 rotR(a + b, 43) + rotR(c, 30) + d, a + rotR(b + k2, 18) + c, mul
+
+proc len33_64(s: openArray[byte]): uint64 {.inline.} =
+  let mul = k2 + 2*s.lenU
+  let a = load8(s)*k2
+  let b = load8(s, 8)
+  let c = load8(s, s.len - 8)*mul
+  let d = load8(s, s.len - 16)*k2
+  let y = rotR(a + b, 43) + rotR(c, 30) + d
+  let z = len16(y, a + rotR(b + k2, 18) + c, mul)
+  let e = load8(s, 16)*mul
+  let f = load8(s, 24)
+  let g = (y + load8(s, s.len - 32))*mul
+  let h = (z + load8(s, s.len - 24))*mul
+  len16 rotR(e + f, 43) + rotR(g, 30) + h, e + rotR(f + a, 18) + g, mul
+
+type Pair = tuple[first, second: uint64]
+
+proc weakLen32withSeeds2(w, x, y, z, a, b: uint64): Pair {.inline.} =
+  var a = a + w
+  var b = rotR(b + a + z, 21)
+  let c = a
+  a += x
+  a += y
+  b += rotR(a, 44)
+  result[0] = a + z
+  result[1] = b + c
+
+proc weakLen32withSeeds(s: openArray[byte]; o: int; a,b: uint64): Pair {.inline.} =
+  weakLen32withSeeds2 load8(s, o     ), load8(s, o + 8),
+                      load8(s, o + 16), load8(s, o + 24), a, b
+
+proc hashFarm(s: openArray[byte]): uint64 {.inline.} =
+  if s.len <= 16: return len0_16(s)
+  if s.len <= 32: return len17_32(s)
+  if s.len <= 64: return len33_64(s)
+  const seed = 81u64 # not const to use input `h`
+  var
+    o = 0         # s[] ptr arith -> variable origin variable `o`
+    x = seed
+    y = seed*k1 + 113
+    z = shiftMix(y*k2 + 113)*k2
+    v, w: Pair
+  x = x*k2 + load8(s)
+  let eos    = ((s.len - 1) div 64)*64
+  let last64 = eos + ((s.len - 1) and 63) - 63
+  while true:
+    x = rotR(x + y + v[0] + load8(s, o+8), 37)*k1
+    y = rotR(y + v[1] + load8(s, o+48), 42)*k1
+    x = x xor w[1]
+    y += v[0] + load8(s, o+40)
+    z = rotR(z + w[0], 33)*k1
+    v = weakLen32withSeeds(s, o+0 , v[1]*k1, x + w[0])
+    w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
+    swap z, x
+    inc o, 64
+    if o == eos: break
+  let mul = k1 + ((z and 0xff) shl 1)
+  o = last64
+  w[0] += (s.lenU - 1) and 63
+  v[0] += w[0]
+  w[0] += v[0]
+  x = rotR(x + y + v[0] + load8(s, o+8), 37)*mul
+  y = rotR(y + v[1] + load8(s, o+48), 42)*mul
+  x = x xor w[1]*9
+  y += v[0]*9 + load8(s, o+40)
+  z = rotR(z + w[0], 33)*mul
+  v = weakLen32withSeeds(s, o+0 , v[1]*mul, x + w[0])
+  w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16))
+  swap z, x
+  len16 len16(v[0],w[0],mul) + shiftMix(y)*k0 + z, len16(v[1],w[1],mul) + x, mul
+
+template jsNoInt64: untyped =
+  when defined js:
+    when compiles(compileOption("jsbigint64")):
+      when not compileOption("jsbigint64"): true
+      else: false
+    else: false
+  else: false
+const sHash2 = (when defined(nimStringHash2) or jsNoInt64(): true else: false)
+
+template maybeFailJS_Number =
+  when jsNoInt64() and not defined(nimStringHash2):
+    {.error: "Must use `-d:nimStringHash2` when using `--jsbigint64:off`".}
+
 proc hash*(x: string): Hash =
   ## Efficient hashing of strings.
   ##
@@ -387,11 +539,14 @@ proc hash*(x: string): Hash =
   ## * `hashIgnoreCase <#hashIgnoreCase,string>`_
   runnableExamples:
     doAssert hash("abracadabra") != hash("AbracadabrA")
-
-  when nimvm:
-    result = hashVmImpl(x, 0, high(x))
+  maybeFailJS_Number()
+  when not sHash2:
+    result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
   else:
-    result = murmurHash(toOpenArrayByte(x, 0, high(x)))
+    #when nimvm:
+    #  result = hashVmImpl(x, 0, high(x))
+    when true:
+      result = murmurHash(toOpenArrayByte(x, 0, high(x)))
 
 proc hash*(x: cstring): Hash =
   ## Efficient hashing of null-terminated strings.
@@ -400,14 +555,22 @@ proc hash*(x: cstring): Hash =
     doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
     doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")
 
-  when nimvm:
-    hashVmImpl(x, 0, high(x))
-  else:
-    when not defined(js):
-      murmurHash(toOpenArrayByte(x, 0, x.high))
-    else:
+  maybeFailJS_Number()
+  when not sHash2:
+    when defined js:
       let xx = $x
-      murmurHash(toOpenArrayByte(xx, 0, high(xx)))
+      result = cast[Hash](hashFarm(toOpenArrayByte(xx, 0, xx.high)))
+    else:
+      result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
+  else:
+    #when nimvm:
+    #  result = hashVmImpl(x, 0, high(x))
+    when true:
+      when not defined(js):
+        result = murmurHash(toOpenArrayByte(x, 0, x.high))
+      else:
+        let xx = $x
+        result = murmurHash(toOpenArrayByte(xx, 0, high(xx)))
 
 proc hash*(sBuf: string, sPos, ePos: int): Hash =
   ## Efficient hashing of a string buffer, from starting
@@ -418,7 +581,11 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
     var a = "abracadabra"
     doAssert hash(a, 0, 3) == hash(a, 7, 10)
 
-  murmurHash(toOpenArrayByte(sBuf, sPos, ePos))
+  maybeFailJS_Number()
+  when not sHash2:
+    result = cast[Hash](hashFarm(toOpenArrayByte(sBuf, sPos, ePos)))
+  else:
+    murmurHash(toOpenArrayByte(sBuf, sPos, ePos))
 
 proc hashIgnoreStyle*(x: string): Hash =
   ## Efficient hashing of strings; style is ignored.
@@ -553,12 +720,18 @@ proc hash*[A](x: openArray[A]): Hash =
   ## Efficient hashing of arrays and sequences.
   ## There must be a `hash` proc defined for the element type `A`.
   when A is byte:
-    result = murmurHash(x)
+    when not sHash2:
+      result = cast[Hash](hashFarm(x))
+    else:
+      result = murmurHash(x)
   elif A is char:
-    when nimvm:
-      result = hashVmImplChar(x, 0, x.high)
+    when not sHash2:
+      result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high)))
     else:
-      result = murmurHash(toOpenArrayByte(x, 0, x.high))
+      #when nimvm:
+      #  result = hashVmImplChar(x, 0, x.high)
+      when true:
+        result = murmurHash(toOpenArrayByte(x, 0, x.high))
   else:
     result = 0
     for a in x:
@@ -574,17 +747,24 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
   runnableExamples:
     let a = [1, 2, 5, 1, 2, 6]
     doAssert hash(a, 0, 1) == hash(a, 3, 4)
-
   when A is byte:
-    when nimvm:
-      result = hashVmImplByte(aBuf, sPos, ePos)
+    maybeFailJS_Number()
+    when not sHash2:
+      result = cast[Hash](hashFarm(toOpenArray(aBuf, sPos, ePos)))
     else:
-      result = murmurHash(toOpenArray(aBuf, sPos, ePos))
+      #when nimvm:
+      #  result = hashVmImplByte(aBuf, sPos, ePos)
+      when true:
+        result = murmurHash(toOpenArray(aBuf, sPos, ePos))
   elif A is char:
-    when nimvm:
-      result = hashVmImplChar(aBuf, sPos, ePos)
+    maybeFailJS_Number()
+    when not sHash2:
+      result = cast[Hash](hashFarm(toOpenArrayByte(aBuf, sPos, ePos)))
     else:
-      result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
+      #when nimvm:
+      #  result = hashVmImplChar(aBuf, sPos, ePos)
+      when true:
+        result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
   else:
     for i in sPos .. ePos:
       result = result !& hash(aBuf[i])