summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorJamesP <jlp765@gmail.com>2015-10-07 13:03:31 +1000
committerJamesP <jlp765@gmail.com>2015-10-07 13:03:31 +1000
commit07eaafca698def254fff1f3ed843526441dc91b7 (patch)
treef143c7e51d28aba8d58b49aee2392dfe6b494008
parent80ee72956af12b54853bfe05c1bd3fd3e47ff052 (diff)
downloadNim-07eaafca698def254fff1f3ed843526441dc91b7.tar.gz
added hash procs for handling portions of strings/arrays/seqs.
added tests at bottom of file
changed some doco layout

Makes hashing iteratively through buffers faster when you
don't have to pass copied portions of the buffer to the
hash function
-rw-r--r--lib/pure/hashes.nim74
1 files changed, 70 insertions, 4 deletions
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index 61c16129b..11af81149 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -8,9 +8,10 @@
 #
 
 ## This module implements efficient computations of hash values for diverse
-## Nim types. All the procs are based on these two building blocks: the `!&
-## proc <#!&>`_ used to start or mix a hash value, and the `!$ proc <#!$>`_
-## used to *finish* the hash value.  If you want to implement hash procs for
+## Nim types. All the procs are based on these two building blocks:
+## - `!& proc <#!&>`_ used to start or mix a hash value, and
+## - `!$ proc <#!$>`_ used to *finish* the hash value.
+## If you want to implement hash procs for
 ## your custom types you will end up writing the following kind of skeleton of
 ## code:
 ##
@@ -108,7 +109,7 @@ proc hash*(x: int): Hash {.inline.} =
   result = x
 
 proc hash*(x: int64): Hash {.inline.} =
-  ## efficient hashing of integers
+  ## efficient hashing of int64 integers
   result = toU32(x)
 
 proc hash*(x: char): Hash {.inline.} =
@@ -126,6 +127,16 @@ proc hash*(x: string): Hash =
     h = h !& ord(x[i])
   result = !$h
 
+proc hash*(sBuf: string, sPos, ePos: int): Hash =
+  ## efficient hashing of a string buffer, from starting
+  ## position `sPos` to ending position `ePos`
+  ##
+  ## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``
+  var h: Hash = 0
+  for i in sPos..ePos:
+    h = h !& ord(sBuf[i])
+  result = !$h
+
 proc hashIgnoreStyle*(x: string): Hash =
   ## efficient hashing of strings; style is ignored
   var h: Hash = 0
@@ -145,6 +156,27 @@ proc hashIgnoreStyle*(x: string): Hash =
 
   result = !$h
 
+proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
+  ## efficient hashing of a string buffer, from starting
+  ## position `sPos` to ending position `ePos`; style is ignored
+  ##
+  ## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
+  ## to ``hashIgnoreStyle(myBuf)``
+  var h: Hash = 0
+  var i = sPos
+  while i <= ePos:
+    var c = sBuf[i]
+    if c == '_':
+      inc(i)
+    elif isMagicIdentSeparatorRune(cstring(sBuf), i):
+      inc(i, magicIdentSeparatorRuneByteWidth)
+    else:
+      if c in {'A'..'Z'}:
+        c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
+      h = h !& ord(c)
+      inc(i)
+  result = !$h
+
 proc hashIgnoreCase*(x: string): Hash =
   ## efficient hashing of strings; case is ignored
   var h: Hash = 0
@@ -155,7 +187,22 @@ proc hashIgnoreCase*(x: string): Hash =
     h = h !& ord(c)
   result = !$h
 
+proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
+  ## efficient hashing of a string buffer, from starting
+  ## position `sPos` to ending position `ePos`; case is ignored
+  ##
+  ## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
+  ## to ``hashIgnoreCase(myBuf)``
+  var h: Hash = 0
+  for i in sPos..ePos:
+    var c = sBuf[i]
+    if c in {'A'..'Z'}:
+      c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
+    h = h !& ord(c)
+  result = !$h
+
 proc hash*(x: float): Hash {.inline.} =
+  ## efficient hashing of floats.
   var y = x + 1.0
   result = cast[ptr Hash](addr(y))[]
 
@@ -173,10 +220,29 @@ proc hash*[T: tuple](x: T): Hash =
   result = !$result
 
 proc hash*[A](x: openArray[A]): Hash =
+  ## efficient hashing of arrays and sequences.
   for it in items(x): result = result !& hash(it)
   result = !$result
 
+proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
+  ## efficient hashing of portions of arrays and sequences.
+  ##
+  ## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``
+  for i in sPos..ePos:
+    result = result !& hash(aBuf[i])
+  result = !$result
+
 proc hash*[A](x: set[A]): Hash =
+  ## efficient hashing of sets.
   for it in items(x): result = result !& hash(it)
   result = !$result
 
+when isMainModule:
+  doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
+  doAssert( hashIgnoreCase("aa bb aaaa1234") == hash("aa bb aaaa1234") )
+  doAssert( hashIgnoreStyle("aa bb aaaa1234") == hashIgnoreCase("aa bb aaaa1234") )
+  let xx = @['H','e','l','l','o']
+  let ss = "Hello"
+  doAssert( hash(xx) == hash(ss) )
+  doAssert( hash(xx) == hash(xx, 0, xx.high) )
+  doAssert( hash(ss) == hash(ss, 0, ss.high) )