diff options
author | Oscar Campbell <oscar@campbell.nu> | 2015-05-31 01:31:06 +0200 |
---|---|---|
committer | Oscar Campbell <oscar@campbell.nu> | 2015-05-31 01:31:06 +0200 |
commit | 1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba (patch) | |
tree | b9bcf3444852500625b06d5b83fee615189a8c5c | |
parent | 6820b2fea919c033405e7e204343fddd947c2ef3 (diff) | |
download | Nim-1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba.tar.gz |
Implement #2811 - Unicode en-dash (U+2013) as hump/snake alt.
-rw-r--r-- | compiler/idents.nim | 4 | ||||
-rw-r--r-- | compiler/lexer.nim | 20 | ||||
-rw-r--r-- | lib/pure/hashes.nim | 12 |
3 files changed, 29 insertions, 7 deletions
diff --git a/compiler/idents.nim b/compiler/idents.nim index 0cca18929..5216b109f 100644 --- a/compiler/idents.nim +++ b/compiler/idents.nim @@ -12,7 +12,7 @@ # id. This module is essential for the compiler's performance. import - hashes, strutils + hashes, strutils, etcpriv type TIdObj* = object of RootObj @@ -37,6 +37,8 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int = while j < blen: while a[i] == '_': inc(i) while b[j] == '_': inc(j) + while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth) + while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth) # tolower inlined: var aa = a[i] var bb = b[j] diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 8080e0e8c..c37b4deba 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -17,7 +17,7 @@ import hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream, - wordrecg + wordrecg, etcpriv const MaxLineLength* = 80 # lines longer than this lead to a warning @@ -143,7 +143,8 @@ proc isNimIdentifier*(s: string): bool = while i < s.len: if s[i] == '_': inc(i) - if s[i] notin SymChars: return + elif isMagicIdentSeparatorRune(cstring s, i): + inc(i, magicIdentSeparatorRuneByteWidth) if s[i] notin SymChars: return inc(i) result = true @@ -632,16 +633,27 @@ proc getSymbol(L: var TLexer, tok: var TToken) = var c = buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': - h = h !& ord(c) + if c == '\226' and + buf[pos+1] == '\128' and + buf[pos+2] == '\147': # It's a 'magic separator' en-dash Unicode + if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars: + lexMessage(L, errInvalidToken, "·") + break + inc(pos, magicIdentSeparatorRuneByteWidth) + else: + h = h !& ord(c) + inc(pos) of 'A'..'Z': c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) + inc(pos) of '_': if buf[pos+1] notin SymChars: lexMessage(L, errInvalidToken, "_") break + inc(pos) + else: break - inc(pos) h = !$h tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) L.bufpos = pos diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 2ce8ac796..c6af8f918 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -38,7 +38,7 @@ ## result = !$h import - strutils + strutils, etcpriv type THash* = int ## a hash value; hash tables using these values should @@ -124,13 +124,21 @@ proc hash*(x: string): THash = proc hashIgnoreStyle*(x: string): THash = ## efficient hashing of strings; style is ignored var h: THash = 0 - for i in 0..x.len-1: + var i = 0 + let xLen = x.len + while i < xLen: var c = x[i] if c == '_': + inc(i) continue # skip _ + if isMagicIdentSeparatorRune(cstring(x), i): + inc(i, magicIdentSeparatorRuneByteWidth) + continue # skip '·' (unicode middle dot) if c in {'A'..'Z'}: c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) + inc(i) + result = !$h proc hashIgnoreCase*(x: string): THash = |