remove en-dash from the language

author: Andreas Rumpf <rumpf_a@web.de> 2017-04-02 15:21:10 +0200
committer: Andreas Rumpf <rumpf_a@web.de> 2017-04-02 23:41:29 +0200
commit: f520dfbfabe1134d92214c66f2e1fcd222053771 (patch)
tree: 0f08a0c85075f9db127b51704f959b30d6ad089e
parent: d587b6a25f9976abad9bf4b7039dd0c1f31b2913 (diff)
download: Nim-f520dfbfabe1134d92214c66f2e1fcd222053771.tar.gz
6 files changed, 10 insertions, 51 deletions
diff --git a/compiler/idents.nim b/compiler/idents.nim
index eecfa60a1..2cce4710e 100644
--- a/compiler/idents.nim
+++ b/compiler/idents.nim
@@ -12,7 +12,7 @@
 # id. This module is essential for the compiler's performance.
 
 import
-  hashes, strutils, etcpriv, wordrecg
+  hashes, strutils, wordrecg
 
 type
   TIdObj* = object of RootObj
@@ -45,8 +45,6 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int =
   while j < blen:
     while a[i] == '_': inc(i)
     while b[j] == '_': inc(j)
-    while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth)
-    while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth)
     # tolower inlined:
     var aa = a[i]
     var bb = b[j]
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 2bb228f41..e0875a118 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -17,7 +17,7 @@
 
 import
   hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
-  wordrecg, etcpriv
+  wordrecg
 
 const
   MaxLineLength* = 80         # lines longer than this lead to a warning
@@ -158,8 +158,6 @@ proc isNimIdentifier*(s: string): bool =
     while i < sLen:
       if s[i] == '_':
         inc(i)
-      elif isMagicIdentSeparatorRune(cstring s, i):
-        inc(i, magicIdentSeparatorRuneByteWidth)
       if s[i] notin SymChars: return
       inc(i)
     result = true
@@ -782,27 +780,17 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
     var c = buf[pos]
     case c
     of 'a'..'z', '0'..'9', '\x80'..'\xFF':
-      if  c == '\226' and
-          buf[pos+1] == '\128' and
-          buf[pos+2] == '\147':  # It's a 'magic separator' en-dash Unicode
-        if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars or
-            isMagicIdentSeparatorRune(buf, pos+magicIdentSeparatorRuneByteWidth) or pos == L.bufpos:
-          lexMessage(L, errInvalidToken, "–")
-          break
-        inc(pos, magicIdentSeparatorRuneByteWidth)
-      else:
-        h = h !& ord(c)
-        inc(pos)
+      h = h !& ord(c)
+      inc(pos)
     of 'A'..'Z':
       c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
       h = h !& ord(c)
       inc(pos)
     of '_':
-      if buf[pos+1] notin SymChars or isMagicIdentSeparatorRune(buf, pos+1):
+      if buf[pos+1] notin SymChars:
         lexMessage(L, errInvalidToken, "_")
         break
       inc(pos)
-
     else: break
   tokenEnd(pos-1)
   h = !$h
@@ -1117,8 +1105,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
       inc(L.bufpos)
     of '_':
       inc(L.bufpos)
-      if L.buf[L.bufpos] notin SymChars+{'_'} and not
-          isMagicIdentSeparatorRune(L.buf, L.bufpos):
+      if L.buf[L.bufpos] notin SymChars+{'_'}:
         tok.tokType = tkSymbol
         tok.ident = L.cache.getIdent("_")
       else:
diff --git a/doc/manual/lexing.txt b/doc/manual/lexing.txt
index 7ffd5eb1c..d4c11adf7 100644
--- a/doc/manual/lexing.txt
+++ b/doc/manual/lexing.txt
@@ -133,8 +133,7 @@ Two identifiers are considered equal if the following algorithm returns true:
       a.replace(re"_|–", "").toLower == b.replace(re"_|–", "").toLower
 
 That means only the first letters are compared in a case sensitive manner. Other
-letters are compared case insensitively and underscores and en-dash (Unicode
-point U+2013) are ignored.
+letters are compared case insensitively and underscores are ignored.
 
 This rather unorthodox way to do identifier comparisons is called
 `partial case insensitivity`:idx: and has some advantages over the conventional
diff --git a/lib/pure/etcpriv.nim b/lib/pure/etcpriv.nim
deleted file mode 100644
index 5b785b051..000000000
--- a/lib/pure/etcpriv.nim
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Nim Authors
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## This module contains utils that are less then easy to categorize and
-## don't really warrant a specific module. They are private to compiler
-## and stdlib usage, and should not be used outside of that - they may
-## change or disappear at any time.
-
-
-# Used by pure/hashes.nim, and the compiler parsing
-const magicIdentSeparatorRuneByteWidth* = 3
-
-# Used by pure/hashes.nim, and the compiler parsing
-proc isMagicIdentSeparatorRune*(cs: cstring, i: int): bool  {. inline } =
-  result =  cs[i] == '\226' and
-            cs[i + 1] == '\128' and
-            cs[i + 2] == '\147'     # en-dash  # 145 = nb-hyphen
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index d5759e507..e8c8776c6 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -39,7 +39,7 @@
 ##    result = !$h
 
 import
-  strutils, etcpriv
+  strutils
 
 type
   Hash* = int ## a hash value; hash tables using these values should
@@ -163,8 +163,6 @@ proc hashIgnoreStyle*(x: string): Hash =
     var c = x[i]
     if c == '_':
       inc(i)
-    elif isMagicIdentSeparatorRune(cstring(x), i):
-      inc(i, magicIdentSeparatorRuneByteWidth)
     else:
       if c in {'A'..'Z'}:
         c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
@@ -185,8 +183,6 @@ proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
     var c = sBuf[i]
     if c == '_':
       inc(i)
-    elif isMagicIdentSeparatorRune(cstring(sBuf), i):
-      inc(i, magicIdentSeparatorRuneByteWidth)
     else:
       if c in {'A'..'Z'}:
         c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
diff --git a/web/news/e031_version_0_16_2.rst b/web/news/e031_version_0_16_2.rst
index 4c4cac129..d34c7de53 100644
--- a/web/news/e031_version_0_16_2.rst
+++ b/web/news/e031_version_0_16_2.rst
@@ -47,6 +47,8 @@ Changes affecting backwards compatibility
   would be a ``nop`` then.
 - ``posix.nim``: the family of ``ntohs`` procs now takes unsigned integers
   instead of signed integers.
+- In Nim identifiers en-dash (Unicode point U+2013) is not an alias for the
+  underscore anymore. Use underscores and fix your programming font instead.
 
 
 Library Additions
author	Andreas Rumpf <rumpf_a@web.de>	2017-04-02 15:21:10 +0200
committer	Andreas Rumpf <rumpf_a@web.de>	2017-04-02 23:41:29 +0200
commit	f520dfbfabe1134d92214c66f2e1fcd222053771 (patch)
tree	0f08a0c85075f9db127b51704f959b30d6ad089e
parent	d587b6a25f9976abad9bf4b7039dd0c1f31b2913 (diff)
download	Nim-f520dfbfabe1134d92214c66f2e1fcd222053771.tar.gz