diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2015-06-04 11:37:26 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2015-06-04 11:37:26 +0200 |
commit | 8264c3cbeef6a4b86f7613c05d036658cbd9a24d (patch) | |
tree | 3e62ce8c9ae45b85b3d2d96a752a9f99c7ad8905 | |
parent | 874637be32e4dcecb1301a5c436013945fc90cd0 (diff) | |
parent | dd30bab480f59e4bb4ab8fad5aabd13c08aa1b11 (diff) | |
download | Nim-8264c3cbeef6a4b86f7613c05d036658cbd9a24d.tar.gz |
Merge pull request #2849 from ozra/feature-2811-hump-snake-dash
Feature #2811 hump, snake and now dash
-rw-r--r-- | compiler/idents.nim | 4 | ||||
-rw-r--r-- | compiler/lexer.nim | 23 | ||||
-rw-r--r-- | lib/pure/etcpriv.nim | 23 | ||||
-rw-r--r-- | lib/pure/hashes.nim | 19 |
4 files changed, 57 insertions, 12 deletions
diff --git a/compiler/idents.nim b/compiler/idents.nim index 0cca18929..5216b109f 100644 --- a/compiler/idents.nim +++ b/compiler/idents.nim @@ -12,7 +12,7 @@ # id. This module is essential for the compiler's performance. import - hashes, strutils + hashes, strutils, etcpriv type TIdObj* = object of RootObj @@ -37,6 +37,8 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int = while j < blen: while a[i] == '_': inc(i) while b[j] == '_': inc(j) + while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth) + while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth) # tolower inlined: var aa = a[i] var bb = b[j] diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 8080e0e8c..eaabe05e2 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -17,7 +17,7 @@ import hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream, - wordrecg + wordrecg, etcpriv const MaxLineLength* = 80 # lines longer than this lead to a warning @@ -140,10 +140,12 @@ proc isKeyword*(kind: TTokType): bool = proc isNimIdentifier*(s: string): bool = if s[0] in SymStartChars: var i = 1 - while i < s.len: + var sLen = s.len + while i < sLen: if s[i] == '_': inc(i) - if s[i] notin SymChars: return + elif isMagicIdentSeparatorRune(cstring s, i): + inc(i, magicIdentSeparatorRuneByteWidth) if s[i] notin SymChars: return inc(i) result = true @@ -632,16 +634,27 @@ proc getSymbol(L: var TLexer, tok: var TToken) = var c = buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': - h = h !& ord(c) + if c == '\226' and + buf[pos+1] == '\128' and + buf[pos+2] == '\147': # It's a 'magic separator' en-dash Unicode + if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars: + lexMessage(L, errInvalidToken, "–") + break + inc(pos, magicIdentSeparatorRuneByteWidth) + else: + h = h !& ord(c) + inc(pos) of 'A'..'Z': c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) + inc(pos) of '_': if buf[pos+1] notin SymChars: lexMessage(L, errInvalidToken, "_") break + inc(pos) + else: break - inc(pos) h = !$h tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) L.bufpos = pos diff --git a/lib/pure/etcpriv.nim b/lib/pure/etcpriv.nim new file mode 100644 index 000000000..e7a525e4d --- /dev/null +++ b/lib/pure/etcpriv.nim @@ -0,0 +1,23 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Nim Authors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains utils that are less then easy to categorize and +## don't really warrant a specific module. They are private to compiler +## and stdlib usage, and should not be used outside of that - they may +## change or disappear at any time. + + +# Used by pure/hashes.nim, and the compiler parsing +const magicIdentSeparatorRuneByteWidth* = 3 + +# Used by pure/hashes.nim, and the compiler parsing +proc isMagicIdentSeparatorRune*(cs: cstring, i: int): bool {. inline } = + result = cs[i] == '\226' and + cs[i + 1] == '\128' and + cs[i + 2] == '\147' # en-dash # 145 = nb-hyphen diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 2ce8ac796..132264e4a 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -38,7 +38,7 @@ ## result = !$h import - strutils + strutils, etcpriv type THash* = int ## a hash value; hash tables using these values should @@ -124,13 +124,20 @@ proc hash*(x: string): THash = proc hashIgnoreStyle*(x: string): THash = ## efficient hashing of strings; style is ignored var h: THash = 0 - for i in 0..x.len-1: + var i = 0 + let xLen = x.len + while i < xLen: var c = x[i] if c == '_': - continue # skip _ - if c in {'A'..'Z'}: - c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() - h = h !& ord(c) + inc(i) + elif isMagicIdentSeparatorRune(cstring(x), i): + inc(i, magicIdentSeparatorRuneByteWidth) + else: + if c in {'A'..'Z'}: + c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() + h = h !& ord(c) + inc(i) + result = !$h proc hashIgnoreCase*(x: string): THash = |