added hash procs for handling portions of strings/arrays/seqs.

added tests at bottom of file changed some doco layout Makes hashing iteratively through buffers faster when you don't have to pass copied portions of the buffer to the hash function
author: JamesP <jlp765@gmail.com> 2015-10-07 13:03:31 +1000
committer: JamesP <jlp765@gmail.com> 2015-10-07 13:03:31 +1000
commit: 07eaafca698def254fff1f3ed843526441dc91b7 (patch)
tree: f143c7e51d28aba8d58b49aee2392dfe6b494008
parent: 80ee72956af12b54853bfe05c1bd3fd3e47ff052 (diff)
download: Nim-07eaafca698def254fff1f3ed843526441dc91b7.tar.gz
1 files changed, 70 insertions, 4 deletions
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index 61c16129b..11af81149 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -8,9 +8,10 @@
 #
 
 ## This module implements efficient computations of hash values for diverse
-## Nim types. All the procs are based on these two building blocks: the `!&
-## proc <#!&>`_ used to start or mix a hash value, and the `!$ proc <#!$>`_
-## used to *finish* the hash value.  If you want to implement hash procs for
+## Nim types. All the procs are based on these two building blocks:
+## - `!& proc <#!&>`_ used to start or mix a hash value, and
+## - `!$ proc <#!$>`_ used to *finish* the hash value.
+## If you want to implement hash procs for
 ## your custom types you will end up writing the following kind of skeleton of
 ## code:
 ##
@@ -108,7 +109,7 @@ proc hash*(x: int): Hash {.inline.} =
   result = x
 
 proc hash*(x: int64): Hash {.inline.} =
-  ## efficient hashing of integers
+  ## efficient hashing of int64 integers
   result = toU32(x)
 
 proc hash*(x: char): Hash {.inline.} =
@@ -126,6 +127,16 @@ proc hash*(x: string): Hash =
     h = h !& ord(x[i])
   result = !$h
 
+proc hash*(sBuf: string, sPos, ePos: int): Hash =
+  ## efficient hashing of a string buffer, from starting
+  ## position `sPos` to ending position `ePos`
+  ##
+  ## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``
+  var h: Hash = 0
+  for i in sPos..ePos:
+    h = h !& ord(sBuf[i])
+  result = !$h
+
 proc hashIgnoreStyle*(x: string): Hash =
   ## efficient hashing of strings; style is ignored
   var h: Hash = 0
@@ -145,6 +156,27 @@ proc hashIgnoreStyle*(x: string): Hash =
 
   result = !$h
 
+proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
+  ## efficient hashing of a string buffer, from starting
+  ## position `sPos` to ending position `ePos`; style is ignored
+  ##
+  ## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
+  ## to ``hashIgnoreStyle(myBuf)``
+  var h: Hash = 0
+  var i = sPos
+  while i <= ePos:
+    var c = sBuf[i]
+    if c == '_':
+      inc(i)
+    elif isMagicIdentSeparatorRune(cstring(sBuf), i):
+      inc(i, magicIdentSeparatorRuneByteWidth)
+    else:
+      if c in {'A'..'Z'}:
+        c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
+      h = h !& ord(c)
+      inc(i)
+  result = !$h
+
 proc hashIgnoreCase*(x: string): Hash =
   ## efficient hashing of strings; case is ignored
   var h: Hash = 0
@@ -155,7 +187,22 @@ proc hashIgnoreCase*(x: string): Hash =
     h = h !& ord(c)
   result = !$h
 
+proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
+  ## efficient hashing of a string buffer, from starting
+  ## position `sPos` to ending position `ePos`; case is ignored
+  ##
+  ## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
+  ## to ``hashIgnoreCase(myBuf)``
+  var h: Hash = 0
+  for i in sPos..ePos:
+    var c = sBuf[i]
+    if c in {'A'..'Z'}:
+      c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
+    h = h !& ord(c)
+  result = !$h
+
 proc hash*(x: float): Hash {.inline.} =
+  ## efficient hashing of floats.
   var y = x + 1.0
   result = cast[ptr Hash](addr(y))[]
 
@@ -173,10 +220,29 @@ proc hash*[T: tuple](x: T): Hash =
   result = !$result
 
 proc hash*[A](x: openArray[A]): Hash =
+  ## efficient hashing of arrays and sequences.
   for it in items(x): result = result !& hash(it)
   result = !$result
 
+proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
+  ## efficient hashing of portions of arrays and sequences.
+  ##
+  ## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``
+  for i in sPos..ePos:
+    result = result !& hash(aBuf[i])
+  result = !$result
+
 proc hash*[A](x: set[A]): Hash =
+  ## efficient hashing of sets.
   for it in items(x): result = result !& hash(it)
   result = !$result
 
+when isMainModule:
+  doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
+  doAssert( hashIgnoreCase("aa bb aaaa1234") == hash("aa bb aaaa1234") )
+  doAssert( hashIgnoreStyle("aa bb aaaa1234") == hashIgnoreCase("aa bb aaaa1234") )
+  let xx = @['H','e','l','l','o']
+  let ss = "Hello"
+  doAssert( hash(xx) == hash(ss) )
+  doAssert( hash(xx) == hash(xx, 0, xx.high) )
+  doAssert( hash(ss) == hash(ss, 0, ss.high) )
author	JamesP <jlp765@gmail.com>	2015-10-07 13:03:31 +1000
committer	JamesP <jlp765@gmail.com>	2015-10-07 13:03:31 +1000
commit	07eaafca698def254fff1f3ed843526441dc91b7 (patch)
tree	f143c7e51d28aba8d58b49aee2392dfe6b494008
parent	80ee72956af12b54853bfe05c1bd3fd3e47ff052 (diff)
download	Nim-07eaafca698def254fff1f3ed843526441dc91b7.tar.gz