summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAndreas Rumpf <rumpf_a@web.de>2015-06-04 11:37:26 +0200
committerAndreas Rumpf <rumpf_a@web.de>2015-06-04 11:37:26 +0200
commit8264c3cbeef6a4b86f7613c05d036658cbd9a24d (patch)
tree3e62ce8c9ae45b85b3d2d96a752a9f99c7ad8905
parent874637be32e4dcecb1301a5c436013945fc90cd0 (diff)
parentdd30bab480f59e4bb4ab8fad5aabd13c08aa1b11 (diff)
downloadNim-8264c3cbeef6a4b86f7613c05d036658cbd9a24d.tar.gz
Merge pull request #2849 from ozra/feature-2811-hump-snake-dash
Feature #2811 hump, snake and now dash
-rw-r--r--compiler/idents.nim4
-rw-r--r--compiler/lexer.nim23
-rw-r--r--lib/pure/etcpriv.nim23
-rw-r--r--lib/pure/hashes.nim19
4 files changed, 57 insertions, 12 deletions
diff --git a/compiler/idents.nim b/compiler/idents.nim
index 0cca18929..5216b109f 100644
--- a/compiler/idents.nim
+++ b/compiler/idents.nim
@@ -12,7 +12,7 @@
 # id. This module is essential for the compiler's performance.
 
 import 
-  hashes, strutils
+  hashes, strutils, etcpriv
 
 type 
   TIdObj* = object of RootObj
@@ -37,6 +37,8 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int =
   while j < blen:
     while a[i] == '_': inc(i)
     while b[j] == '_': inc(j)
+    while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth)
+    while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth)
     # tolower inlined:
     var aa = a[i]
     var bb = b[j]
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 8080e0e8c..eaabe05e2 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -17,7 +17,7 @@
 
 import
   hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
-  wordrecg
+  wordrecg, etcpriv
 
 const
   MaxLineLength* = 80         # lines longer than this lead to a warning
@@ -140,10 +140,12 @@ proc isKeyword*(kind: TTokType): bool =
 proc isNimIdentifier*(s: string): bool =
   if s[0] in SymStartChars:
     var i = 1
-    while i < s.len:
+    var sLen = s.len
+    while i < sLen:
       if s[i] == '_':
         inc(i)
-        if s[i] notin SymChars: return
+      elif isMagicIdentSeparatorRune(cstring s, i):
+        inc(i, magicIdentSeparatorRuneByteWidth)
       if s[i] notin SymChars: return
       inc(i)
     result = true
@@ -632,16 +634,27 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
     var c = buf[pos]
     case c
     of 'a'..'z', '0'..'9', '\x80'..'\xFF':
-      h = h !& ord(c)
+      if  c == '\226' and
+          buf[pos+1] == '\128' and
+          buf[pos+2] == '\147':  # It's a 'magic separator' en-dash Unicode
+        if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars:
+          lexMessage(L, errInvalidToken, "–")
+          break
+        inc(pos, magicIdentSeparatorRuneByteWidth)
+      else:
+        h = h !& ord(c)
+        inc(pos)
     of 'A'..'Z':
       c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
       h = h !& ord(c)
+      inc(pos)
     of '_':
       if buf[pos+1] notin SymChars:
         lexMessage(L, errInvalidToken, "_")
         break
+      inc(pos)
+
     else: break
-    inc(pos)
   h = !$h
   tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
   L.bufpos = pos
diff --git a/lib/pure/etcpriv.nim b/lib/pure/etcpriv.nim
new file mode 100644
index 000000000..e7a525e4d
--- /dev/null
+++ b/lib/pure/etcpriv.nim
@@ -0,0 +1,23 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2015 Nim Authors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module contains utils that are less then easy to categorize and
+## don't really warrant a specific module. They are private to compiler
+## and stdlib usage, and should not be used outside of that - they may
+## change or disappear at any time.
+
+
+# Used by pure/hashes.nim, and the compiler parsing
+const magicIdentSeparatorRuneByteWidth* = 3
+
+# Used by pure/hashes.nim, and the compiler parsing
+proc isMagicIdentSeparatorRune*(cs: cstring, i: int): bool  {. inline } =
+  result =  cs[i] == '\226' and 
+            cs[i + 1] == '\128' and
+            cs[i + 2] == '\147'     # en-dash  # 145 = nb-hyphen
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index 2ce8ac796..132264e4a 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -38,7 +38,7 @@
 ##    result = !$h
 
 import
-  strutils
+  strutils, etcpriv
 
 type
   THash* = int ## a hash value; hash tables using these values should
@@ -124,13 +124,20 @@ proc hash*(x: string): THash =
 proc hashIgnoreStyle*(x: string): THash =
   ## efficient hashing of strings; style is ignored
   var h: THash = 0
-  for i in 0..x.len-1:
+  var i = 0
+  let xLen = x.len
+  while i < xLen:
     var c = x[i]
     if c == '_':
-      continue                # skip _
-    if c in {'A'..'Z'}:
-      c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
-    h = h !& ord(c)
+      inc(i)
+    elif isMagicIdentSeparatorRune(cstring(x), i):
+      inc(i, magicIdentSeparatorRuneByteWidth)
+    else:
+      if c in {'A'..'Z'}:
+        c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
+      h = h !& ord(c)
+      inc(i)
+
   result = !$h
 
 proc hashIgnoreCase*(x: string): THash =