diff options
-rw-r--r-- | compiler/idents.nim | 4 | ||||
-rw-r--r-- | compiler/lexer.nim | 23 | ||||
-rw-r--r-- | doc/manual/lexing.txt | 3 | ||||
-rw-r--r-- | lib/pure/etcpriv.nim | 23 | ||||
-rw-r--r-- | lib/pure/hashes.nim | 6 | ||||
-rw-r--r-- | web/news/e031_version_0_16_2.rst | 2 |
6 files changed, 10 insertions, 51 deletions
diff --git a/compiler/idents.nim b/compiler/idents.nim index eecfa60a1..2cce4710e 100644 --- a/compiler/idents.nim +++ b/compiler/idents.nim @@ -12,7 +12,7 @@ # id. This module is essential for the compiler's performance. import - hashes, strutils, etcpriv, wordrecg + hashes, strutils, wordrecg type TIdObj* = object of RootObj @@ -45,8 +45,6 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int = while j < blen: while a[i] == '_': inc(i) while b[j] == '_': inc(j) - while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth) - while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth) # tolower inlined: var aa = a[i] var bb = b[j] diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 2bb228f41..e0875a118 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -17,7 +17,7 @@ import hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream, - wordrecg, etcpriv + wordrecg const MaxLineLength* = 80 # lines longer than this lead to a warning @@ -158,8 +158,6 @@ proc isNimIdentifier*(s: string): bool = while i < sLen: if s[i] == '_': inc(i) - elif isMagicIdentSeparatorRune(cstring s, i): - inc(i, magicIdentSeparatorRuneByteWidth) if s[i] notin SymChars: return inc(i) result = true @@ -782,27 +780,17 @@ proc getSymbol(L: var TLexer, tok: var TToken) = var c = buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': - if c == '\226' and - buf[pos+1] == '\128' and - buf[pos+2] == '\147': # It's a 'magic separator' en-dash Unicode - if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars or - isMagicIdentSeparatorRune(buf, pos+magicIdentSeparatorRuneByteWidth) or pos == L.bufpos: - lexMessage(L, errInvalidToken, "–") - break - inc(pos, magicIdentSeparatorRuneByteWidth) - else: - h = h !& ord(c) - inc(pos) + h = h !& ord(c) + inc(pos) of 'A'..'Z': c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) inc(pos) of '_': - if buf[pos+1] notin SymChars or isMagicIdentSeparatorRune(buf, pos+1): + if buf[pos+1] notin SymChars: lexMessage(L, errInvalidToken, "_") break inc(pos) - else: break tokenEnd(pos-1) h = !$h @@ -1117,8 +1105,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) = inc(L.bufpos) of '_': inc(L.bufpos) - if L.buf[L.bufpos] notin SymChars+{'_'} and not - isMagicIdentSeparatorRune(L.buf, L.bufpos): + if L.buf[L.bufpos] notin SymChars+{'_'}: tok.tokType = tkSymbol tok.ident = L.cache.getIdent("_") else: diff --git a/doc/manual/lexing.txt b/doc/manual/lexing.txt index 7ffd5eb1c..d4c11adf7 100644 --- a/doc/manual/lexing.txt +++ b/doc/manual/lexing.txt @@ -133,8 +133,7 @@ Two identifiers are considered equal if the following algorithm returns true: a.replace(re"_|–", "").toLower == b.replace(re"_|–", "").toLower That means only the first letters are compared in a case sensitive manner. Other -letters are compared case insensitively and underscores and en-dash (Unicode -point U+2013) are ignored. +letters are compared case insensitively and underscores are ignored. This rather unorthodox way to do identifier comparisons is called `partial case insensitivity`:idx: and has some advantages over the conventional diff --git a/lib/pure/etcpriv.nim b/lib/pure/etcpriv.nim deleted file mode 100644 index 5b785b051..000000000 --- a/lib/pure/etcpriv.nim +++ /dev/null @@ -1,23 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Nim Authors -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module contains utils that are less then easy to categorize and -## don't really warrant a specific module. They are private to compiler -## and stdlib usage, and should not be used outside of that - they may -## change or disappear at any time. - - -# Used by pure/hashes.nim, and the compiler parsing -const magicIdentSeparatorRuneByteWidth* = 3 - -# Used by pure/hashes.nim, and the compiler parsing -proc isMagicIdentSeparatorRune*(cs: cstring, i: int): bool {. inline } = - result = cs[i] == '\226' and - cs[i + 1] == '\128' and - cs[i + 2] == '\147' # en-dash # 145 = nb-hyphen diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index d5759e507..e8c8776c6 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -39,7 +39,7 @@ ## result = !$h import - strutils, etcpriv + strutils type Hash* = int ## a hash value; hash tables using these values should @@ -163,8 +163,6 @@ proc hashIgnoreStyle*(x: string): Hash = var c = x[i] if c == '_': inc(i) - elif isMagicIdentSeparatorRune(cstring(x), i): - inc(i, magicIdentSeparatorRuneByteWidth) else: if c in {'A'..'Z'}: c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() @@ -185,8 +183,6 @@ proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash = var c = sBuf[i] if c == '_': inc(i) - elif isMagicIdentSeparatorRune(cstring(sBuf), i): - inc(i, magicIdentSeparatorRuneByteWidth) else: if c in {'A'..'Z'}: c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() diff --git a/web/news/e031_version_0_16_2.rst b/web/news/e031_version_0_16_2.rst index 4c4cac129..d34c7de53 100644 --- a/web/news/e031_version_0_16_2.rst +++ b/web/news/e031_version_0_16_2.rst @@ -47,6 +47,8 @@ Changes affecting backwards compatibility would be a ``nop`` then. - ``posix.nim``: the family of ``ntohs`` procs now takes unsigned integers instead of signed integers. +- In Nim identifiers en-dash (Unicode point U+2013) is not an alias for the + underscore anymore. Use underscores and fix your programming font instead. Library Additions |