Implement #2811 - Unicode en-dash (U+2013) as hump/snake alt.

author: Oscar Campbell <oscar@campbell.nu> 2015-05-31 01:31:06 +0200
committer: Oscar Campbell <oscar@campbell.nu> 2015-05-31 01:31:06 +0200
commit: 1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba (patch)
tree: b9bcf3444852500625b06d5b83fee615189a8c5c
parent: 6820b2fea919c033405e7e204343fddd947c2ef3 (diff)
download: Nim-1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba.tar.gz
3 files changed, 29 insertions, 7 deletions
diff --git a/compiler/idents.nim b/compiler/idents.nim
index 0cca18929..5216b109f 100644
--- a/compiler/idents.nim
+++ b/compiler/idents.nim
@@ -12,7 +12,7 @@
 # id. This module is essential for the compiler's performance.
 
 import 
-  hashes, strutils
+  hashes, strutils, etcpriv
 
 type 
   TIdObj* = object of RootObj
@@ -37,6 +37,8 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int =
   while j < blen:
     while a[i] == '_': inc(i)
     while b[j] == '_': inc(j)
+    while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth)
+    while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth)
     # tolower inlined:
     var aa = a[i]
     var bb = b[j]
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 8080e0e8c..c37b4deba 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -17,7 +17,7 @@
 
 import
   hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
-  wordrecg
+  wordrecg, etcpriv
 
 const
   MaxLineLength* = 80         # lines longer than this lead to a warning
@@ -143,7 +143,8 @@ proc isNimIdentifier*(s: string): bool =
     while i < s.len:
       if s[i] == '_':
         inc(i)
-        if s[i] notin SymChars: return
+      elif isMagicIdentSeparatorRune(cstring s, i):
+        inc(i, magicIdentSeparatorRuneByteWidth)
       if s[i] notin SymChars: return
       inc(i)
     result = true
@@ -632,16 +633,27 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
     var c = buf[pos]
     case c
     of 'a'..'z', '0'..'9', '\x80'..'\xFF':
-      h = h !& ord(c)
+      if  c == '\226' and
+          buf[pos+1] == '\128' and
+          buf[pos+2] == '\147':  # It's a 'magic separator' en-dash Unicode
+        if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars:
+          lexMessage(L, errInvalidToken, "·")
+          break
+        inc(pos, magicIdentSeparatorRuneByteWidth)
+      else:
+        h = h !& ord(c)
+        inc(pos)
     of 'A'..'Z':
       c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
       h = h !& ord(c)
+      inc(pos)
     of '_':
       if buf[pos+1] notin SymChars:
         lexMessage(L, errInvalidToken, "_")
         break
+      inc(pos)
+
     else: break
-    inc(pos)
   h = !$h
   tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
   L.bufpos = pos
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index 2ce8ac796..c6af8f918 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -38,7 +38,7 @@
 ##    result = !$h
 
 import
-  strutils
+  strutils, etcpriv
 
 type
   THash* = int ## a hash value; hash tables using these values should
@@ -124,13 +124,21 @@ proc hash*(x: string): THash =
 proc hashIgnoreStyle*(x: string): THash =
   ## efficient hashing of strings; style is ignored
   var h: THash = 0
-  for i in 0..x.len-1:
+  var i = 0
+  let xLen = x.len
+  while i < xLen:
     var c = x[i]
     if c == '_':
+      inc(i)
       continue                # skip _
+    if isMagicIdentSeparatorRune(cstring(x), i):
+      inc(i, magicIdentSeparatorRuneByteWidth)
+      continue                # skip '·' (unicode middle dot)
     if c in {'A'..'Z'}:
       c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
     h = h !& ord(c)
+    inc(i)
+
   result = !$h
 
 proc hashIgnoreCase*(x: string): THash =
author	Oscar Campbell <oscar@campbell.nu>	2015-05-31 01:31:06 +0200
committer	Oscar Campbell <oscar@campbell.nu>	2015-05-31 01:31:06 +0200
commit	1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba (patch)
tree	b9bcf3444852500625b06d5b83fee615189a8c5c
parent	6820b2fea919c033405e7e204343fddd947c2ef3 (diff)
download	Nim-1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba.tar.gz