summary refs log tree commit diff stats
path: root/compiler/lexer.nim
diff options
context:
space:
mode:
authorOscar Campbell <oscar@campbell.nu>2015-05-31 01:31:06 +0200
committerOscar Campbell <oscar@campbell.nu>2015-05-31 01:31:06 +0200
commit1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba (patch)
treeb9bcf3444852500625b06d5b83fee615189a8c5c /compiler/lexer.nim
parent6820b2fea919c033405e7e204343fddd947c2ef3 (diff)
downloadNim-1b4db5a34caf301995b3c08eb9a1e3b75c54a9ba.tar.gz
Implement #2811 - Unicode en-dash (U+2013) as hump/snake alt.
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r--compiler/lexer.nim20
1 files changed, 16 insertions, 4 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 8080e0e8c..c37b4deba 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -17,7 +17,7 @@
 
 import
   hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
-  wordrecg
+  wordrecg, etcpriv
 
 const
   MaxLineLength* = 80         # lines longer than this lead to a warning
@@ -143,7 +143,8 @@ proc isNimIdentifier*(s: string): bool =
     while i < s.len:
       if s[i] == '_':
         inc(i)
-        if s[i] notin SymChars: return
+      elif isMagicIdentSeparatorRune(cstring s, i):
+        inc(i, magicIdentSeparatorRuneByteWidth)
       if s[i] notin SymChars: return
       inc(i)
     result = true
@@ -632,16 +633,27 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
     var c = buf[pos]
     case c
     of 'a'..'z', '0'..'9', '\x80'..'\xFF':
-      h = h !& ord(c)
+      if  c == '\226' and
+          buf[pos+1] == '\128' and
+          buf[pos+2] == '\147':  # It's a 'magic separator' en-dash Unicode
+        if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars:
+          lexMessage(L, errInvalidToken, "ยท")
+          break
+        inc(pos, magicIdentSeparatorRuneByteWidth)
+      else:
+        h = h !& ord(c)
+        inc(pos)
     of 'A'..'Z':
       c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
       h = h !& ord(c)
+      inc(pos)
     of '_':
       if buf[pos+1] notin SymChars:
         lexMessage(L, errInvalidToken, "_")
         break
+      inc(pos)
+
     else: break
-    inc(pos)
   h = !$h
   tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
   L.bufpos = pos