Merge branch 'devel' into bugfix-2858-testament-sources-system-wide

author: Oscar Campbell <oscar@campbell.nu> 2015-06-04 22:43:18 +0200
committer: Oscar Campbell <oscar@campbell.nu> 2015-06-04 22:43:18 +0200
commit: cdf631c508d4e7d1968ec3b238e29de17df670d7 (patch)
tree: 3b5b7408c5b54ad26f578c583edcdb07101551bd /compiler
parent: 071ccf3d026b03135dbc1e413a5312ac296191ac (diff)
parent: ccb9ae2d52cc42f2489a7bbd758c924a4dbbe872 (diff)
download: Nim-cdf631c508d4e7d1968ec3b238e29de17df670d7.tar.gz
8 files changed, 240 insertions, 174 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim
index 64cb1b1bc..044f21341 100644
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -842,7 +842,7 @@ type
     data*: TIdNodePairSeq
 
   TNodePair* = object
-    h*: THash                 # because it is expensive to compute!
+    h*: Hash                 # because it is expensive to compute!
     key*: PNode
     val*: int
 
diff --git a/compiler/astalgo.nim b/compiler/astalgo.nim
index 1707718d7..5980edb27 100644
--- a/compiler/astalgo.nim
+++ b/compiler/astalgo.nim
@@ -14,7 +14,7 @@
 import
   ast, hashes, intsets, strutils, options, msgs, ropes, idents, rodutils
 
-proc hashNode*(p: RootRef): THash
+proc hashNode*(p: RootRef): Hash
 proc treeToYaml*(n: PNode, indent: int = 0, maxRecDepth: int = - 1): Rope
   # Convert a tree into its YAML representation; this is used by the
   # YAML code generator and it is invaluable for debugging purposes.
@@ -49,7 +49,7 @@ proc strTableGet*(t: TStrTable, name: PIdent): PSym
 
 type
   TTabIter*{.final.} = object # consider all fields here private
-    h*: THash                 # current hash
+    h*: Hash                  # current hash
 
 proc initTabIter*(ti: var TTabIter, tab: TStrTable): PSym
 proc nextIter*(ti: var TTabIter, tab: TStrTable): PSym
@@ -65,7 +65,7 @@ proc nextIter*(ti: var TTabIter, tab: TStrTable): PSym
 
 type
   TIdentIter*{.final.} = object # iterator over all syms with same identifier
-    h*: THash                   # current hash
+    h*: Hash                    # current hash
     name*: PIdent
 
 
@@ -94,7 +94,7 @@ proc getSymFromList*(list: PNode, ident: PIdent, start: int = 0): PSym
 proc lookupInRecord*(n: PNode, field: PIdent): PSym
 proc getModule*(s: PSym): PSym
 proc mustRehash*(length, counter: int): bool
-proc nextTry*(h, maxHash: THash): THash {.inline.}
+proc nextTry*(h, maxHash: Hash): Hash {.inline.}
 
 # ------------- table[int, int] ---------------------------------------------
 const
@@ -196,7 +196,7 @@ proc getSymFromList(list: PNode, ident: PIdent, start: int = 0): PSym =
     else: internalError(list.info, "getSymFromList")
   result = nil
 
-proc hashNode(p: RootRef): THash =
+proc hashNode(p: RootRef): Hash =
   result = hash(cast[pointer](p))
 
 proc mustRehash(length, counter: int): bool =
@@ -466,7 +466,7 @@ proc debug(n: PNode) =
 const
   EmptySeq = @[]
 
-proc nextTry(h, maxHash: THash): THash =
+proc nextTry(h, maxHash: Hash): Hash =
   result = ((5 * h) + 1) and maxHash
   # For any initial h in range(maxHash), repeating that maxHash times
   # generates each int in range(maxHash) exactly once (see any text on
@@ -474,7 +474,7 @@ proc nextTry(h, maxHash: THash): THash =
 
 proc objectSetContains(t: TObjectSet, obj: RootRef): bool =
   # returns true whether n is in t
-  var h: THash = hashNode(obj) and high(t.data) # start with real hash value
+  var h: Hash = hashNode(obj) and high(t.data) # start with real hash value
   while t.data[h] != nil:
     if t.data[h] == obj:
       return true
@@ -482,7 +482,7 @@ proc objectSetContains(t: TObjectSet, obj: RootRef): bool =
   result = false
 
 proc objectSetRawInsert(data: var TObjectSeq, obj: RootRef) =
-  var h: THash = hashNode(obj) and high(data)
+  var h: Hash = hashNode(obj) and high(data)
   while data[h] != nil:
     assert(data[h] != obj)
     h = nextTry(h, high(data))
@@ -503,7 +503,7 @@ proc objectSetIncl(t: var TObjectSet, obj: RootRef) =
 
 proc objectSetContainsOrIncl(t: var TObjectSet, obj: RootRef): bool =
   # returns true if obj is already in the string table:
-  var h: THash = hashNode(obj) and high(t.data)
+  var h: Hash = hashNode(obj) and high(t.data)
   while true:
     var it = t.data[h]
     if it == nil: break
@@ -520,7 +520,7 @@ proc objectSetContainsOrIncl(t: var TObjectSet, obj: RootRef): bool =
   result = false
 
 proc tableRawGet(t: TTable, key: RootRef): int =
-  var h: THash = hashNode(key) and high(t.data) # start with real hash value
+  var h: Hash = hashNode(key) and high(t.data) # start with real hash value
   while t.data[h].key != nil:
     if t.data[h].key == key:
       return h
@@ -529,7 +529,7 @@ proc tableRawGet(t: TTable, key: RootRef): int =
 
 proc tableSearch(t: TTable, key, closure: RootRef,
                  comparator: TCmpProc): RootRef =
-  var h: THash = hashNode(key) and high(t.data) # start with real hash value
+  var h: Hash = hashNode(key) and high(t.data) # start with real hash value
   while t.data[h].key != nil:
     if t.data[h].key == key:
       if comparator(t.data[h].val, closure):
@@ -544,7 +544,7 @@ proc tableGet(t: TTable, key: RootRef): RootRef =
   else: result = nil
 
 proc tableRawInsert(data: var TPairSeq, key, val: RootRef) =
-  var h: THash = hashNode(key) and high(data)
+  var h: Hash = hashNode(key) and high(data)
   while data[h].key != nil:
     assert(data[h].key != key)
     h = nextTry(h, high(data))
@@ -569,7 +569,7 @@ proc tablePut(t: var TTable, key, val: RootRef) =
     inc(t.counter)
 
 proc strTableContains(t: TStrTable, n: PSym): bool =
-  var h: THash = n.name.h and high(t.data) # start with real hash value
+  var h: Hash = n.name.h and high(t.data) # start with real hash value
   while t.data[h] != nil:
     if (t.data[h] == n):
       return true
@@ -577,7 +577,7 @@ proc strTableContains(t: TStrTable, n: PSym): bool =
   result = false
 
 proc strTableRawInsert(data: var TSymSeq, n: PSym) =
-  var h: THash = n.name.h and high(data)
+  var h: Hash = n.name.h and high(data)
   if sfImmediate notin n.flags:
     # fast path:
     while data[h] != nil:
@@ -606,7 +606,7 @@ proc strTableRawInsert(data: var TSymSeq, n: PSym) =
 
 proc symTabReplaceRaw(data: var TSymSeq, prevSym: PSym, newSym: PSym) =
   assert prevSym.name.h == newSym.name.h
-  var h: THash = prevSym.name.h and high(data)
+  var h: Hash = prevSym.name.h and high(data)
   while data[h] != nil:
     if data[h] == prevSym:
       data[h] = newSym
@@ -640,7 +640,7 @@ proc strTableIncl*(t: var TStrTable, n: PSym): bool {.discardable.} =
   # It is essential that `n` is written nevertheless!
   # This way the newest redefinition is picked by the semantic analyses!
   assert n.name != nil
-  var h: THash = n.name.h and high(t.data)
+  var h: Hash = n.name.h and high(t.data)
   var replaceSlot = -1
   while true:
     var it = t.data[h]
@@ -666,7 +666,7 @@ proc strTableIncl*(t: var TStrTable, n: PSym): bool {.discardable.} =
   result = false
 
 proc strTableGet(t: TStrTable, name: PIdent): PSym =
-  var h: THash = name.h and high(t.data)
+  var h: Hash = name.h and high(t.data)
   while true:
     result = t.data[h]
     if result == nil: break
@@ -694,7 +694,7 @@ proc nextIdentIter(ti: var TIdentIter, tab: TStrTable): PSym =
 
 proc nextIdentExcluding*(ti: var TIdentIter, tab: TStrTable,
                          excluding: IntSet): PSym =
-  var h: THash = ti.h and high(tab.data)
+  var h: Hash = ti.h and high(tab.data)
   var start = h
   result = tab.data[h]
   while result != nil:
@@ -743,7 +743,7 @@ proc hasEmptySlot(data: TIdPairSeq): bool =
   result = false
 
 proc idTableRawGet(t: TIdTable, key: int): int =
-  var h: THash
+  var h: Hash
   h = key and high(t.data)    # start with real hash value
   while t.data[h].key != nil:
     if t.data[h].key.id == key:
@@ -772,7 +772,7 @@ iterator pairs*(t: TIdTable): tuple[key: int, value: RootRef] =
       yield (t.data[i].key.id, t.data[i].val)
 
 proc idTableRawInsert(data: var TIdPairSeq, key: PIdObj, val: RootRef) =
-  var h: THash
+  var h: Hash
   h = key.id and high(data)
   while data[h].key != nil:
     assert(data[h].key.id != key.id)
@@ -805,7 +805,7 @@ iterator idTablePairs*(t: TIdTable): tuple[key: PIdObj, val: RootRef] =
     if not isNil(t.data[i].key): yield (t.data[i].key, t.data[i].val)
 
 proc idNodeTableRawGet(t: TIdNodeTable, key: PIdObj): int =
-  var h: THash
+  var h: Hash
   h = key.id and high(t.data) # start with real hash value
   while t.data[h].key != nil:
     if t.data[h].key.id == key.id:
@@ -824,7 +824,7 @@ proc idNodeTableGetLazy*(t: TIdNodeTable, key: PIdObj): PNode =
     result = idNodeTableGet(t, key)
 
 proc idNodeTableRawInsert(data: var TIdNodePairSeq, key: PIdObj, val: PNode) =
-  var h: THash
+  var h: Hash
   h = key.id and high(data)
   while data[h].key != nil:
     assert(data[h].key.id != key.id)
@@ -863,7 +863,7 @@ proc initIITable(x: var TIITable) =
   for i in countup(0, StartSize - 1): x.data[i].key = InvalidKey
 
 proc iiTableRawGet(t: TIITable, key: int): int =
-  var h: THash
+  var h: Hash
   h = key and high(t.data)    # start with real hash value
   while t.data[h].key != InvalidKey:
     if t.data[h].key == key: return h
@@ -876,7 +876,7 @@ proc iiTableGet(t: TIITable, key: int): int =
   else: result = InvalidKey
 
 proc iiTableRawInsert(data: var TIIPairSeq, key, val: int) =
-  var h: THash
+  var h: Hash
   h = key and high(data)
   while data[h].key != InvalidKey:
     assert(data[h].key != key)
diff --git a/compiler/docgen.nim b/compiler/docgen.nim
index f8489d825..4b52b1c92 100644
--- a/compiler/docgen.nim
+++ b/compiler/docgen.nim
@@ -18,7 +18,7 @@ import
 
 type
   TSections = array[TSymKind, Rope]
-  TDocumentor = object of rstgen.TRstGenerator
+  TDocumentor = object of rstgen.RstGenerator
     modDesc: Rope           # module description
     id: int                  # for generating IDs
     toc, section: TSections
@@ -29,7 +29,7 @@ type
   PDoc* = ref TDocumentor ## Alias to type less.
 
 proc compilerMsgHandler(filename: string, line, col: int,
-                        msgKind: rst.TMsgKind, arg: string) {.procvar.} =
+                        msgKind: rst.MsgKind, arg: string) {.procvar.} =
   # translate msg kind:
   var k: msgs.TMsgKind
   case msgKind
@@ -53,7 +53,7 @@ proc docgenFindFile(s: string): string {.procvar.} =
 
 proc parseRst(text, filename: string,
               line, column: int, hasToc: var bool,
-              rstOptions: TRstParseOptions): PRstNode =
+              rstOptions: RstParseOptions): PRstNode =
   result = rstParse(text, filename, line, column, hasToc, rstOptions,
                     docgenFindFile, compilerMsgHandler)
 
diff --git a/compiler/idents.nim b/compiler/idents.nim
index 0cca18929..6986800cf 100644
--- a/compiler/idents.nim
+++ b/compiler/idents.nim
@@ -12,7 +12,7 @@
 # id. This module is essential for the compiler's performance.
 
 import 
-  hashes, strutils
+  hashes, strutils, etcpriv
 
 type 
   TIdObj* = object of RootObj
@@ -23,7 +23,7 @@ type
   TIdent*{.acyclic.} = object of TIdObj
     s*: string
     next*: PIdent             # for hash-table chaining
-    h*: THash                 # hash value of s
+    h*: Hash                 # hash value of s
 
 var firstCharIsCS*: bool = true
 var buckets*: array[0..4096 * 2 - 1, PIdent]
@@ -37,6 +37,8 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int =
   while j < blen:
     while a[i] == '_': inc(i)
     while b[j] == '_': inc(j)
+    while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth)
+    while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth)
     # tolower inlined:
     var aa = a[i]
     var bb = b[j]
@@ -65,7 +67,7 @@ proc cmpExact(a, b: cstring, blen: int): int =
 
 var wordCounter = 1
 
-proc getIdent*(identifier: cstring, length: int, h: THash): PIdent =
+proc getIdent*(identifier: cstring, length: int, h: Hash): PIdent =
   var idx = h and high(buckets)
   result = buckets[idx]
   var last: PIdent = nil
@@ -99,7 +101,7 @@ proc getIdent*(identifier: string): PIdent =
   result = getIdent(cstring(identifier), len(identifier), 
                     hashIgnoreStyle(identifier))
 
-proc getIdent*(identifier: string, h: THash): PIdent = 
+proc getIdent*(identifier: string, h: Hash): PIdent = 
   result = getIdent(cstring(identifier), len(identifier), h)
 
 proc identEq*(id: PIdent, name: string): bool = 
diff --git a/compiler/jsgen.nim b/compiler/jsgen.nim
index 0f6323abc..2fdf14b76 100644
--- a/compiler/jsgen.nim
+++ b/compiler/jsgen.nim
@@ -986,6 +986,15 @@ proc genAddr(p: PProc, n: PNode, r: var TCompRes) =
     else: internalError(n.sons[0].info, "expr(nkBracketExpr, " & $ty.kind & ')')
   else: internalError(n.sons[0].info, "genAddr")
 
+proc genProcForSymIfNeeded(p: PProc, s: PSym) =
+  if not p.g.generatedSyms.containsOrIncl(s.id):
+    let newp = genProc(p, s)
+    var owner = p
+    while owner != nil and owner.prc != s.owner:
+      owner = owner.up
+    if owner != nil: add(owner.locals, newp)
+    else: add(p.g.code, newp)
+
 proc genSym(p: PProc, n: PNode, r: var TCompRes) =
   var s = n.sym
   case s.kind
@@ -1021,13 +1030,8 @@ proc genSym(p: PProc, n: PNode, r: var TCompRes) =
       discard
     elif sfForward in s.flags:
       p.g.forwarded.add(s)
-    elif not p.g.generatedSyms.containsOrIncl(s.id):
-      let newp = genProc(p, s)
-      var owner = p
-      while owner != nil and owner.prc != s.owner:
-        owner = owner.up
-      if owner != nil: add(owner.locals, newp)
-      else: add(p.g.code, newp)
+    else:
+      genProcForSymIfNeeded(p, s)
   else:
     if s.loc.r == nil:
       internalError(n.info, "symbol has no generated name: " & s.name.s)
@@ -1394,6 +1398,9 @@ proc genMagic(p: PProc, n: PNode, r: var TCompRes) =
   of mCopyStrLast: ternaryExpr(p, n, r, "", "($1.slice($2, ($3)+1).concat(0))")
   of mNewString: unaryExpr(p, n, r, "mnewString", "mnewString($1)")
   of mNewStringOfCap: unaryExpr(p, n, r, "mnewString", "mnewString(0)")
+  of mDotDot:
+    genProcForSymIfNeeded(p, n.sons[0].sym)
+    genCall(p, n, r)
   else:
     genCall(p, n, r)
     #else internalError(e.info, 'genMagic: ' + magicToStr[op]);
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 8080e0e8c..6b38ee062 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -17,7 +17,7 @@
 
 import
   hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
-  wordrecg
+  wordrecg, etcpriv
 
 const
   MaxLineLength* = 80         # lines longer than this lead to a warning
@@ -140,10 +140,12 @@ proc isKeyword*(kind: TTokType): bool =
 proc isNimIdentifier*(s: string): bool =
   if s[0] in SymStartChars:
     var i = 1
-    while i < s.len:
+    var sLen = s.len
+    while i < sLen:
       if s[i] == '_':
         inc(i)
-        if s[i] notin SymChars: return
+      elif isMagicIdentSeparatorRune(cstring s, i):
+        inc(i, magicIdentSeparatorRuneByteWidth)
       if s[i] notin SymChars: return
       inc(i)
     result = true
@@ -229,23 +231,6 @@ proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") =
   var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart)
   L.dispMessage(info, msg, arg)
 
-proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) =
-  var pos = L.bufpos              # use registers for pos, buf
-  var buf = L.buf
-  while true:
-    if buf[pos] in chars:
-      add(tok.literal, buf[pos])
-      inc(pos)
-    else:
-      break
-    if buf[pos] == '_':
-      if buf[pos+1] notin chars:
-        lexMessage(L, errInvalidToken, "_")
-        break
-      add(tok.literal, '_')
-      inc(pos)
-  L.bufpos = pos
-
 proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool =
   result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second)
 
@@ -268,136 +253,195 @@ proc unsafeParseUInt(s: string, b: var BiggestInt, start = 0): int =
     result = i - start
 {.pop.} # overflowChecks
 
+
+template eatChar(L: var TLexer, t: var TToken, replacementChar: char) =
+  add(t.literal, replacementChar)
+  inc(L.bufpos)
+
+template eatChar(L: var TLexer, t: var TToken) =
+  add(t.literal, L.buf[L.bufpos])
+  inc(L.bufpos)
+
 proc getNumber(L: var TLexer): TToken =
   var
-    pos, endpos: int
+    startpos, endpos: int
     xi: BiggestInt
-  # get the base:
+  const literalishChars = {   'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'c',
+    'C', 'b', 'B', '_', '.', '\''}
+  const literalishCharsNoDot = literalishChars - {'.'}
+
+  proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) =
+    var pos = L.bufpos              # use registers for pos, buf
+    var buf = L.buf
+    while true:
+      if buf[pos] in chars:
+        add(tok.literal, buf[pos])
+        inc(pos)
+      else:
+        break
+      if buf[pos] == '_':
+        if buf[pos+1] notin chars:
+          lexMessage(L, errInvalidToken, "_")
+          break
+        add(tok.literal, '_')
+        inc(pos)
+    L.bufpos = pos
+
+  proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) =
+    var pos = L.bufpos              # use registers for pos, buf
+    var buf = L.buf
+    while buf[pos] in chars:
+      add(tok.literal, buf[pos])
+      inc(pos)
+    L.bufpos = pos
+
+  proc lexMessageLitNum(L: var TLexer, msg: TMsgKind, startpos: int) =
+    # Used to get slightly human friendlier err messages.
+    # Note: the erroneous 'O' char in the character set is intentional
+    const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
+      'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
+    var msgPos = L.bufpos
+    var t: TToken
+    t.literal = ""
+    L.bufpos = startpos # Use L.bufpos as pos because of matchChars
+    matchChars(L, t, literalishChars)
+    # We must verify +/- specifically so that we're not past the literal
+    if  L.buf[L.bufpos] in {'+', '-'} and
+        L.buf[L.bufpos - 1] in {'e', 'E'}:
+      add(t.literal, L.buf[L.bufpos])
+      inc(L.bufpos)
+      matchChars(L, t, literalishChars)
+    if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
+      inc(L.bufpos)
+      add(t.literal, L.buf[L.bufpos])
+      matchChars(L, t, {'0'..'9'})
+    L.bufpos = msgPos
+    lexMessage(L, msg, t.literal)
+
   result.tokType = tkIntLit   # int literal until we know better
   result.literal = ""
-  result.base = base10        # BUGFIX
-  pos = L.bufpos     # make sure the literal is correct for error messages:
-  var eallowed = false
-  if L.buf[pos] == '0' and L.buf[pos+1] in {'X', 'x'}:
-    matchUnderscoreChars(L, result, {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x'})
+  result.base = base10
+  startpos = L.bufpos
+  var isAFloatLiteral = false
+  # First stage: find out base, make verifications, build token literal string
+  if  L.buf[L.bufpos] == '0' and 
+      L.buf[L.bufpos + 1] in {'X', 'x', 'o', 'O', 'c', 'C', 'b', 'B'}:
+    eatChar(L, result, '0')
+    case L.buf[L.bufpos]
+    of 'O':
+      lexMessageLitNum(L, errInvalidNumberOctalCode, startpos)
+    of 'x', 'X':
+      eatChar(L, result, 'x')
+      matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'})
+    of 'o', 'c', 'C':
+      eatChar(L, result, 'c')
+      matchUnderscoreChars(L, result, {'0'..'7'})
+    of 'b', 'B':
+      eatChar(L, result, 'b')
+      matchUnderscoreChars(L, result, {'0'..'1'})
+    else:
+      internalError(getLineInfo(L), "getNumber")
   else:
-    matchUnderscoreChars(L, result, {'0'..'9', 'b', 'B', 'o', 'c', 'C'})
-    eallowed = true
-  if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
-    add(result.literal, '.')
-    inc(L.bufpos)
-    matchUnderscoreChars(L, result, {'0'..'9'})
-    eallowed = true
-  if eallowed and L.buf[L.bufpos] in {'e', 'E'}:
-    add(result.literal, 'e')
-    inc(L.bufpos)
-    if L.buf[L.bufpos] in {'+', '-'}:
-      add(result.literal, L.buf[L.bufpos])
-      inc(L.bufpos)
     matchUnderscoreChars(L, result, {'0'..'9'})
+    if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
+      isAFloatLiteral = true
+      eatChar(L, result, '.')
+      matchUnderscoreChars(L, result, {'0'..'9'})
+    if L.buf[L.bufpos] in {'e', 'E'}:
+      isAFloatLiteral = true
+      eatChar(L, result, 'e')
+      if L.buf[L.bufpos] in {'+', '-'}:
+        eatChar(L, result)
+      matchUnderscoreChars(L, result, {'0'..'9'})
   endpos = L.bufpos
-  if L.buf[endpos] in {'\'', 'f', 'F', 'i', 'I', 'u', 'U'}:
-    if L.buf[endpos] == '\'': inc(endpos)
-    L.bufpos = pos            # restore position
-    case L.buf[endpos]
+  # Second stage, find out if there's a datatype postfix and handle it
+  var postPos = endpos
+  if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
+    if L.buf[postPos] == '\'': 
+      inc(postPos)
+    case L.buf[postPos]
     of 'f', 'F':
-      inc(endpos)
-      if (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
+      inc(postPos)
+      if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
         result.tokType = tkFloat32Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
         result.tokType = tkFloat64Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '1') and
-           (L.buf[endpos + 1] == '2') and
-           (L.buf[endpos + 2] == '8'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '1') and
+           (L.buf[postPos + 1] == '2') and
+           (L.buf[postPos + 2] == '8'):
         result.tokType = tkFloat128Lit
-        inc(endpos, 3)
-      else:
-        lexMessage(L, errInvalidNumber, result.literal & "'f" & L.buf[endpos])
+        inc(postPos, 3)
+      else:   # "f" alone defaults to float32
+        result.tokType = tkFloat32Lit
+    of 'd', 'D':  # ad hoc convenience shortcut for f64
+      inc(postPos)
+      result.tokType = tkFloat64Lit
     of 'i', 'I':
-      inc(endpos)
-      if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'):
+      inc(postPos)
+      if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
         result.tokType = tkInt64Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
         result.tokType = tkInt32Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
         result.tokType = tkInt16Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '8'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '8'):
         result.tokType = tkInt8Lit
-        inc(endpos)
+        inc(postPos)
       else:
-        lexMessage(L, errInvalidNumber, result.literal & "'i" & L.buf[endpos])
+        lexMessageLitNum(L, errInvalidNumber, startpos)
     of 'u', 'U':
-      inc(endpos)
-      if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'):
+      inc(postPos)
+      if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
         result.tokType = tkUInt64Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
         result.tokType = tkUInt32Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
         result.tokType = tkUInt16Lit
-        inc(endpos, 2)
-      elif (L.buf[endpos] == '8'):
+        inc(postPos, 2)
+      elif (L.buf[postPos] == '8'):
         result.tokType = tkUInt8Lit
-        inc(endpos)
+        inc(postPos)
       else:
         result.tokType = tkUIntLit
-    else: lexMessage(L, errInvalidNumber, result.literal & "'" & L.buf[endpos])
-  else:
-    L.bufpos = pos            # restore position
+    else: 
+      lexMessageLitNum(L, errInvalidNumber, startpos)
+  # Is there still a literalish char awaiting? Then it's an error!
+  if  L.buf[postPos] in literalishCharsNoDot or 
+     (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
+    lexMessageLitNum(L, errInvalidNumber, startpos)
+  # Third stage, extract actual number
+  L.bufpos = startpos            # restore position
+  var pos: int = startpos
   try:
     if (L.buf[pos] == '0') and
         (L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}):
       inc(pos, 2)
-      xi = 0                  # it may be a base prefix
+      xi = 0                  # it is a base prefix
       case L.buf[pos - 1]     # now look at the optional type suffix:
       of 'b', 'B':
         result.base = base2
-        while true:
-          case L.buf[pos]
-          of '2'..'9', '.':
-            lexMessage(L, errInvalidNumber, result.literal)
-            inc(pos)
-          of '_':
-            if L.buf[pos+1] notin {'0'..'1'}:
-              lexMessage(L, errInvalidToken, "_")
-              break
-            inc(pos)
-          of '0', '1':
+        while pos < endpos:
+          if L.buf[pos] != '_':
             xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
-            inc(pos)
-          else: break
+          inc(pos)
       of 'o', 'c', 'C':
         result.base = base8
-        while true:
-          case L.buf[pos]
-          of '8'..'9', '.':
-            lexMessage(L, errInvalidNumber, result.literal)
-            inc(pos)
-          of '_':
-            if L.buf[pos+1] notin {'0'..'7'}:
-              lexMessage(L, errInvalidToken, "_")
-              break
-            inc(pos)
-          of '0'..'7':
+        while pos < endpos:
+          if L.buf[pos] != '_':
             xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
-            inc(pos)
-          else: break
-      of 'O':
-        lexMessage(L, errInvalidNumber, result.literal)
+          inc(pos)
       of 'x', 'X':
         result.base = base16
-        while true:
+        while pos < endpos:
           case L.buf[pos]
           of '_':
-            if L.buf[pos+1] notin {'0'..'9', 'a'..'f', 'A'..'F'}:
-              lexMessage(L, errInvalidToken, "_")
-              break
             inc(pos)
           of '0'..'9':
             xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
@@ -408,8 +452,10 @@ proc getNumber(L: var TLexer): TToken =
           of 'A'..'F':
             xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
             inc(pos)
-          else: break
-      else: internalError(getLineInfo(L), "getNumber")
+          else: 
+            break
+      else: 
+        internalError(getLineInfo(L), "getNumber")
       case result.tokType
       of tkIntLit, tkInt64Lit: result.iNumber = xi
       of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi))))
@@ -425,7 +471,7 @@ proc getNumber(L: var TLexer): TToken =
         # XXX: Test this on big endian machine!
       of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[]
       else: internalError(getLineInfo(L), "getNumber")
-    elif isFloatLiteral(result.literal) or (result.tokType == tkFloat32Lit) or
+    elif isAFloatLiteral or (result.tokType == tkFloat32Lit) or
         (result.tokType == tkFloat64Lit):
       result.fNumber = parseFloat(result.literal)
       if result.tokType == tkIntLit: result.tokType = tkFloatLit
@@ -441,18 +487,18 @@ proc getNumber(L: var TLexer): TToken =
         if result.tokType == tkIntLit:
           result.tokType = tkInt64Lit
         elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}:
-          lexMessage(L, errNumberOutOfRange, result.literal)
+          lexMessageLitNum(L, errNumberOutOfRange, startpos)
       elif result.tokType == tkInt8Lit and
           (result.iNumber < int8.low or result.iNumber > int8.high):
-        lexMessage(L, errNumberOutOfRange, result.literal)
+        lexMessageLitNum(L, errNumberOutOfRange, startpos)
       elif result.tokType == tkInt16Lit and
           (result.iNumber < int16.low or result.iNumber > int16.high):
-        lexMessage(L, errNumberOutOfRange, result.literal)
+        lexMessageLitNum(L, errNumberOutOfRange, startpos)
   except ValueError:
-    lexMessage(L, errInvalidNumber, result.literal)
+    lexMessageLitNum(L, errInvalidNumber, startpos)
   except OverflowError, RangeError:
-    lexMessage(L, errNumberOutOfRange, result.literal)
-  L.bufpos = endpos
+    lexMessageLitNum(L, errNumberOutOfRange, startpos)
+  L.bufpos = postPos
 
 proc handleHexChar(L: var TLexer, xi: var int) =
   case L.buf[L.bufpos]
@@ -625,23 +671,34 @@ proc getCharacter(L: var TLexer, tok: var TToken) =
   inc(L.bufpos)               # skip '
 
 proc getSymbol(L: var TLexer, tok: var TToken) =
-  var h: THash = 0
+  var h: Hash = 0
   var pos = L.bufpos
   var buf = L.buf
   while true:
     var c = buf[pos]
     case c
     of 'a'..'z', '0'..'9', '\x80'..'\xFF':
-      h = h !& ord(c)
+      if  c == '\226' and
+          buf[pos+1] == '\128' and
+          buf[pos+2] == '\147':  # It's a 'magic separator' en-dash Unicode
+        if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars:
+          lexMessage(L, errInvalidToken, "–")
+          break
+        inc(pos, magicIdentSeparatorRuneByteWidth)
+      else:
+        h = h !& ord(c)
+        inc(pos)
     of 'A'..'Z':
       c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
       h = h !& ord(c)
+      inc(pos)
     of '_':
       if buf[pos+1] notin SymChars:
         lexMessage(L, errInvalidToken, "_")
         break
+      inc(pos)
+
     else: break
-    inc(pos)
   h = !$h
   tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
   L.bufpos = pos
@@ -652,7 +709,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
     tok.tokType = TTokType(tok.ident.id + ord(tkSymbol))
 
 proc endOperator(L: var TLexer, tok: var TToken, pos: int,
-                 hash: THash) {.inline.} =
+                 hash: Hash) {.inline.} =
   var h = !$hash
   tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
   if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr
@@ -662,7 +719,7 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int,
 proc getOperator(L: var TLexer, tok: var TToken) =
   var pos = L.bufpos
   var buf = L.buf
-  var h: THash = 0
+  var h: Hash = 0
   while true:
     var c = buf[pos]
     if c notin OpChars: break
diff --git a/compiler/msgs.nim b/compiler/msgs.nim
index 041a181be..e97032db4 100644
--- a/compiler/msgs.nim
+++ b/compiler/msgs.nim
@@ -17,10 +17,9 @@ type
     errIntLiteralExpected, errInvalidCharacterConstant,
     errClosingTripleQuoteExpected, errClosingQuoteExpected,
     errTabulatorsAreNotAllowed, errInvalidToken, errLineTooLong,
-    errInvalidNumber, errNumberOutOfRange, errNnotAllowedInCharacter,
-    errClosingBracketExpected, errMissingFinalQuote, errIdentifierExpected,
-    errNewlineExpected,
-    errInvalidModuleName,
+    errInvalidNumber, errInvalidNumberOctalCode, errNumberOutOfRange,
+    errNnotAllowedInCharacter, errClosingBracketExpected, errMissingFinalQuote,
+    errIdentifierExpected, errNewlineExpected, errInvalidModuleName,
     errOperatorExpected, errTokenExpected, errStringAfterIncludeExpected,
     errRecursiveDependencyX, errOnOrOffExpected, errNoneSpeedOrSizeExpected,
     errInvalidPragma, errUnknownPragma, errInvalidDirectiveX,
@@ -143,6 +142,7 @@ const
     errInvalidToken: "invalid token: $1",
     errLineTooLong: "line too long",
     errInvalidNumber: "$1 is not a valid number",
+    errInvalidNumberOctalCode: "$1 is not a valid number; did you mean octal? Then use one of '0o', '0c' or '0C'.",
     errNumberOutOfRange: "number $1 out of valid range",
     errNnotAllowedInCharacter: "\\n not allowed in character literal",
     errClosingBracketExpected: "closing ']' expected, but end of file reached",
diff --git a/compiler/treetab.nim b/compiler/treetab.nim
index 8d66d56c7..adfc7b2ce 100644
--- a/compiler/treetab.nim
+++ b/compiler/treetab.nim
@@ -12,7 +12,7 @@
 import 
   hashes, ast, astalgo, types
 
-proc hashTree(n: PNode): THash = 
+proc hashTree(n: PNode): Hash = 
   if n == nil: return 
   result = ord(n.kind)
   case n.kind
@@ -53,8 +53,8 @@ proc treesEquivalent(a, b: PNode): bool =
         result = true
     if result: result = sameTypeOrNil(a.typ, b.typ)
   
-proc nodeTableRawGet(t: TNodeTable, k: THash, key: PNode): int = 
-  var h: THash = k and high(t.data)
+proc nodeTableRawGet(t: TNodeTable, k: Hash, key: PNode): int = 
+  var h: Hash = k and high(t.data)
   while t.data[h].key != nil: 
     if (t.data[h].h == k) and treesEquivalent(t.data[h].key, key): 
       return h
@@ -66,9 +66,9 @@ proc nodeTableGet*(t: TNodeTable, key: PNode): int =
   if index >= 0: result = t.data[index].val
   else: result = low(int)
   
-proc nodeTableRawInsert(data: var TNodePairSeq, k: THash, key: PNode, 
+proc nodeTableRawInsert(data: var TNodePairSeq, k: Hash, key: PNode, 
                         val: int) = 
-  var h: THash = k and high(data)
+  var h: Hash = k and high(data)
   while data[h].key != nil: h = nextTry(h, high(data))
   assert(data[h].key == nil)
   data[h].h = k
@@ -77,7 +77,7 @@ proc nodeTableRawInsert(data: var TNodePairSeq, k: THash, key: PNode,
 
 proc nodeTablePut*(t: var TNodeTable, key: PNode, val: int) = 
   var n: TNodePairSeq
-  var k: THash = hashTree(key)
+  var k: Hash = hashTree(key)
   var index = nodeTableRawGet(t, k, key)
   if index >= 0: 
     assert(t.data[index].key != nil)
@@ -94,7 +94,7 @@ proc nodeTablePut*(t: var TNodeTable, key: PNode, val: int) =
 
 proc nodeTableTestOrSet*(t: var TNodeTable, key: PNode, val: int): int = 
   var n: TNodePairSeq
-  var k: THash = hashTree(key)
+  var k: Hash = hashTree(key)
   var index = nodeTableRawGet(t, k, key)
   if index >= 0: 
     assert(t.data[index].key != nil)
author	Oscar Campbell <oscar@campbell.nu>	2015-06-04 22:43:18 +0200
committer	Oscar Campbell <oscar@campbell.nu>	2015-06-04 22:43:18 +0200
commit	cdf631c508d4e7d1968ec3b238e29de17df670d7 (patch)
tree	3b5b7408c5b54ad26f578c583edcdb07101551bd /compiler
parent	071ccf3d026b03135dbc1e413a5312ac296191ac (diff)
parent	ccb9ae2d52cc42f2489a7bbd758c924a4dbbe872 (diff)
download	Nim-cdf631c508d4e7d1968ec3b238e29de17df670d7.tar.gz