diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/idents.nim | 4 | ||||
-rw-r--r-- | compiler/lexer.nim | 20 |
2 files changed, 19 insertions, 5 deletions
diff --git a/compiler/idents.nim b/compiler/idents.nim index 0cca18929..5216b109f 100644 --- a/compiler/idents.nim +++ b/compiler/idents.nim @@ -12,7 +12,7 @@ # id. This module is essential for the compiler's performance. import - hashes, strutils + hashes, strutils, etcpriv type TIdObj* = object of RootObj @@ -37,6 +37,8 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int = while j < blen: while a[i] == '_': inc(i) while b[j] == '_': inc(j) + while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth) + while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth) # tolower inlined: var aa = a[i] var bb = b[j] diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 8080e0e8c..c37b4deba 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -17,7 +17,7 @@ import hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream, - wordrecg + wordrecg, etcpriv const MaxLineLength* = 80 # lines longer than this lead to a warning @@ -143,7 +143,8 @@ proc isNimIdentifier*(s: string): bool = while i < s.len: if s[i] == '_': inc(i) - if s[i] notin SymChars: return + elif isMagicIdentSeparatorRune(cstring s, i): + inc(i, magicIdentSeparatorRuneByteWidth) if s[i] notin SymChars: return inc(i) result = true @@ -632,16 +633,27 @@ proc getSymbol(L: var TLexer, tok: var TToken) = var c = buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': - h = h !& ord(c) + if c == '\226' and + buf[pos+1] == '\128' and + buf[pos+2] == '\147': # It's a 'magic separator' en-dash Unicode + if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars: + lexMessage(L, errInvalidToken, "ยท") + break + inc(pos, magicIdentSeparatorRuneByteWidth) + else: + h = h !& ord(c) + inc(pos) of 'A'..'Z': c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) + inc(pos) of '_': if buf[pos+1] notin SymChars: lexMessage(L, errInvalidToken, "_") break + inc(pos) + else: break - inc(pos) h = !$h tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) L.bufpos = pos |