diff options
author | Oscar Campbell <oscar@campbell.nu> | 2015-06-04 22:43:18 +0200 |
---|---|---|
committer | Oscar Campbell <oscar@campbell.nu> | 2015-06-04 22:43:18 +0200 |
commit | cdf631c508d4e7d1968ec3b238e29de17df670d7 (patch) | |
tree | 3b5b7408c5b54ad26f578c583edcdb07101551bd /compiler | |
parent | 071ccf3d026b03135dbc1e413a5312ac296191ac (diff) | |
parent | ccb9ae2d52cc42f2489a7bbd758c924a4dbbe872 (diff) | |
download | Nim-cdf631c508d4e7d1968ec3b238e29de17df670d7.tar.gz |
Merge branch 'devel' into bugfix-2858-testament-sources-system-wide
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/ast.nim | 2 | ||||
-rw-r--r-- | compiler/astalgo.nim | 48 | ||||
-rw-r--r-- | compiler/docgen.nim | 6 | ||||
-rw-r--r-- | compiler/idents.nim | 10 | ||||
-rw-r--r-- | compiler/jsgen.nim | 21 | ||||
-rw-r--r-- | compiler/lexer.nim | 305 | ||||
-rw-r--r-- | compiler/msgs.nim | 8 | ||||
-rw-r--r-- | compiler/treetab.nim | 14 |
8 files changed, 240 insertions, 174 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim index 64cb1b1bc..044f21341 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -842,7 +842,7 @@ type data*: TIdNodePairSeq TNodePair* = object - h*: THash # because it is expensive to compute! + h*: Hash # because it is expensive to compute! key*: PNode val*: int diff --git a/compiler/astalgo.nim b/compiler/astalgo.nim index 1707718d7..5980edb27 100644 --- a/compiler/astalgo.nim +++ b/compiler/astalgo.nim @@ -14,7 +14,7 @@ import ast, hashes, intsets, strutils, options, msgs, ropes, idents, rodutils -proc hashNode*(p: RootRef): THash +proc hashNode*(p: RootRef): Hash proc treeToYaml*(n: PNode, indent: int = 0, maxRecDepth: int = - 1): Rope # Convert a tree into its YAML representation; this is used by the # YAML code generator and it is invaluable for debugging purposes. @@ -49,7 +49,7 @@ proc strTableGet*(t: TStrTable, name: PIdent): PSym type TTabIter*{.final.} = object # consider all fields here private - h*: THash # current hash + h*: Hash # current hash proc initTabIter*(ti: var TTabIter, tab: TStrTable): PSym proc nextIter*(ti: var TTabIter, tab: TStrTable): PSym @@ -65,7 +65,7 @@ proc nextIter*(ti: var TTabIter, tab: TStrTable): PSym type TIdentIter*{.final.} = object # iterator over all syms with same identifier - h*: THash # current hash + h*: Hash # current hash name*: PIdent @@ -94,7 +94,7 @@ proc getSymFromList*(list: PNode, ident: PIdent, start: int = 0): PSym proc lookupInRecord*(n: PNode, field: PIdent): PSym proc getModule*(s: PSym): PSym proc mustRehash*(length, counter: int): bool -proc nextTry*(h, maxHash: THash): THash {.inline.} +proc nextTry*(h, maxHash: Hash): Hash {.inline.} # ------------- table[int, int] --------------------------------------------- const @@ -196,7 +196,7 @@ proc getSymFromList(list: PNode, ident: PIdent, start: int = 0): PSym = else: internalError(list.info, "getSymFromList") result = nil -proc hashNode(p: RootRef): THash = +proc hashNode(p: RootRef): Hash = result = hash(cast[pointer](p)) proc mustRehash(length, counter: int): bool = @@ -466,7 +466,7 @@ proc debug(n: PNode) = const EmptySeq = @[] -proc nextTry(h, maxHash: THash): THash = +proc nextTry(h, maxHash: Hash): Hash = result = ((5 * h) + 1) and maxHash # For any initial h in range(maxHash), repeating that maxHash times # generates each int in range(maxHash) exactly once (see any text on @@ -474,7 +474,7 @@ proc nextTry(h, maxHash: THash): THash = proc objectSetContains(t: TObjectSet, obj: RootRef): bool = # returns true whether n is in t - var h: THash = hashNode(obj) and high(t.data) # start with real hash value + var h: Hash = hashNode(obj) and high(t.data) # start with real hash value while t.data[h] != nil: if t.data[h] == obj: return true @@ -482,7 +482,7 @@ proc objectSetContains(t: TObjectSet, obj: RootRef): bool = result = false proc objectSetRawInsert(data: var TObjectSeq, obj: RootRef) = - var h: THash = hashNode(obj) and high(data) + var h: Hash = hashNode(obj) and high(data) while data[h] != nil: assert(data[h] != obj) h = nextTry(h, high(data)) @@ -503,7 +503,7 @@ proc objectSetIncl(t: var TObjectSet, obj: RootRef) = proc objectSetContainsOrIncl(t: var TObjectSet, obj: RootRef): bool = # returns true if obj is already in the string table: - var h: THash = hashNode(obj) and high(t.data) + var h: Hash = hashNode(obj) and high(t.data) while true: var it = t.data[h] if it == nil: break @@ -520,7 +520,7 @@ proc objectSetContainsOrIncl(t: var TObjectSet, obj: RootRef): bool = result = false proc tableRawGet(t: TTable, key: RootRef): int = - var h: THash = hashNode(key) and high(t.data) # start with real hash value + var h: Hash = hashNode(key) and high(t.data) # start with real hash value while t.data[h].key != nil: if t.data[h].key == key: return h @@ -529,7 +529,7 @@ proc tableRawGet(t: TTable, key: RootRef): int = proc tableSearch(t: TTable, key, closure: RootRef, comparator: TCmpProc): RootRef = - var h: THash = hashNode(key) and high(t.data) # start with real hash value + var h: Hash = hashNode(key) and high(t.data) # start with real hash value while t.data[h].key != nil: if t.data[h].key == key: if comparator(t.data[h].val, closure): @@ -544,7 +544,7 @@ proc tableGet(t: TTable, key: RootRef): RootRef = else: result = nil proc tableRawInsert(data: var TPairSeq, key, val: RootRef) = - var h: THash = hashNode(key) and high(data) + var h: Hash = hashNode(key) and high(data) while data[h].key != nil: assert(data[h].key != key) h = nextTry(h, high(data)) @@ -569,7 +569,7 @@ proc tablePut(t: var TTable, key, val: RootRef) = inc(t.counter) proc strTableContains(t: TStrTable, n: PSym): bool = - var h: THash = n.name.h and high(t.data) # start with real hash value + var h: Hash = n.name.h and high(t.data) # start with real hash value while t.data[h] != nil: if (t.data[h] == n): return true @@ -577,7 +577,7 @@ proc strTableContains(t: TStrTable, n: PSym): bool = result = false proc strTableRawInsert(data: var TSymSeq, n: PSym) = - var h: THash = n.name.h and high(data) + var h: Hash = n.name.h and high(data) if sfImmediate notin n.flags: # fast path: while data[h] != nil: @@ -606,7 +606,7 @@ proc strTableRawInsert(data: var TSymSeq, n: PSym) = proc symTabReplaceRaw(data: var TSymSeq, prevSym: PSym, newSym: PSym) = assert prevSym.name.h == newSym.name.h - var h: THash = prevSym.name.h and high(data) + var h: Hash = prevSym.name.h and high(data) while data[h] != nil: if data[h] == prevSym: data[h] = newSym @@ -640,7 +640,7 @@ proc strTableIncl*(t: var TStrTable, n: PSym): bool {.discardable.} = # It is essential that `n` is written nevertheless! # This way the newest redefinition is picked by the semantic analyses! assert n.name != nil - var h: THash = n.name.h and high(t.data) + var h: Hash = n.name.h and high(t.data) var replaceSlot = -1 while true: var it = t.data[h] @@ -666,7 +666,7 @@ proc strTableIncl*(t: var TStrTable, n: PSym): bool {.discardable.} = result = false proc strTableGet(t: TStrTable, name: PIdent): PSym = - var h: THash = name.h and high(t.data) + var h: Hash = name.h and high(t.data) while true: result = t.data[h] if result == nil: break @@ -694,7 +694,7 @@ proc nextIdentIter(ti: var TIdentIter, tab: TStrTable): PSym = proc nextIdentExcluding*(ti: var TIdentIter, tab: TStrTable, excluding: IntSet): PSym = - var h: THash = ti.h and high(tab.data) + var h: Hash = ti.h and high(tab.data) var start = h result = tab.data[h] while result != nil: @@ -743,7 +743,7 @@ proc hasEmptySlot(data: TIdPairSeq): bool = result = false proc idTableRawGet(t: TIdTable, key: int): int = - var h: THash + var h: Hash h = key and high(t.data) # start with real hash value while t.data[h].key != nil: if t.data[h].key.id == key: @@ -772,7 +772,7 @@ iterator pairs*(t: TIdTable): tuple[key: int, value: RootRef] = yield (t.data[i].key.id, t.data[i].val) proc idTableRawInsert(data: var TIdPairSeq, key: PIdObj, val: RootRef) = - var h: THash + var h: Hash h = key.id and high(data) while data[h].key != nil: assert(data[h].key.id != key.id) @@ -805,7 +805,7 @@ iterator idTablePairs*(t: TIdTable): tuple[key: PIdObj, val: RootRef] = if not isNil(t.data[i].key): yield (t.data[i].key, t.data[i].val) proc idNodeTableRawGet(t: TIdNodeTable, key: PIdObj): int = - var h: THash + var h: Hash h = key.id and high(t.data) # start with real hash value while t.data[h].key != nil: if t.data[h].key.id == key.id: @@ -824,7 +824,7 @@ proc idNodeTableGetLazy*(t: TIdNodeTable, key: PIdObj): PNode = result = idNodeTableGet(t, key) proc idNodeTableRawInsert(data: var TIdNodePairSeq, key: PIdObj, val: PNode) = - var h: THash + var h: Hash h = key.id and high(data) while data[h].key != nil: assert(data[h].key.id != key.id) @@ -863,7 +863,7 @@ proc initIITable(x: var TIITable) = for i in countup(0, StartSize - 1): x.data[i].key = InvalidKey proc iiTableRawGet(t: TIITable, key: int): int = - var h: THash + var h: Hash h = key and high(t.data) # start with real hash value while t.data[h].key != InvalidKey: if t.data[h].key == key: return h @@ -876,7 +876,7 @@ proc iiTableGet(t: TIITable, key: int): int = else: result = InvalidKey proc iiTableRawInsert(data: var TIIPairSeq, key, val: int) = - var h: THash + var h: Hash h = key and high(data) while data[h].key != InvalidKey: assert(data[h].key != key) diff --git a/compiler/docgen.nim b/compiler/docgen.nim index f8489d825..4b52b1c92 100644 --- a/compiler/docgen.nim +++ b/compiler/docgen.nim @@ -18,7 +18,7 @@ import type TSections = array[TSymKind, Rope] - TDocumentor = object of rstgen.TRstGenerator + TDocumentor = object of rstgen.RstGenerator modDesc: Rope # module description id: int # for generating IDs toc, section: TSections @@ -29,7 +29,7 @@ type PDoc* = ref TDocumentor ## Alias to type less. proc compilerMsgHandler(filename: string, line, col: int, - msgKind: rst.TMsgKind, arg: string) {.procvar.} = + msgKind: rst.MsgKind, arg: string) {.procvar.} = # translate msg kind: var k: msgs.TMsgKind case msgKind @@ -53,7 +53,7 @@ proc docgenFindFile(s: string): string {.procvar.} = proc parseRst(text, filename: string, line, column: int, hasToc: var bool, - rstOptions: TRstParseOptions): PRstNode = + rstOptions: RstParseOptions): PRstNode = result = rstParse(text, filename, line, column, hasToc, rstOptions, docgenFindFile, compilerMsgHandler) diff --git a/compiler/idents.nim b/compiler/idents.nim index 0cca18929..6986800cf 100644 --- a/compiler/idents.nim +++ b/compiler/idents.nim @@ -12,7 +12,7 @@ # id. This module is essential for the compiler's performance. import - hashes, strutils + hashes, strutils, etcpriv type TIdObj* = object of RootObj @@ -23,7 +23,7 @@ type TIdent*{.acyclic.} = object of TIdObj s*: string next*: PIdent # for hash-table chaining - h*: THash # hash value of s + h*: Hash # hash value of s var firstCharIsCS*: bool = true var buckets*: array[0..4096 * 2 - 1, PIdent] @@ -37,6 +37,8 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int = while j < blen: while a[i] == '_': inc(i) while b[j] == '_': inc(j) + while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth) + while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth) # tolower inlined: var aa = a[i] var bb = b[j] @@ -65,7 +67,7 @@ proc cmpExact(a, b: cstring, blen: int): int = var wordCounter = 1 -proc getIdent*(identifier: cstring, length: int, h: THash): PIdent = +proc getIdent*(identifier: cstring, length: int, h: Hash): PIdent = var idx = h and high(buckets) result = buckets[idx] var last: PIdent = nil @@ -99,7 +101,7 @@ proc getIdent*(identifier: string): PIdent = result = getIdent(cstring(identifier), len(identifier), hashIgnoreStyle(identifier)) -proc getIdent*(identifier: string, h: THash): PIdent = +proc getIdent*(identifier: string, h: Hash): PIdent = result = getIdent(cstring(identifier), len(identifier), h) proc identEq*(id: PIdent, name: string): bool = diff --git a/compiler/jsgen.nim b/compiler/jsgen.nim index 0f6323abc..2fdf14b76 100644 --- a/compiler/jsgen.nim +++ b/compiler/jsgen.nim @@ -986,6 +986,15 @@ proc genAddr(p: PProc, n: PNode, r: var TCompRes) = else: internalError(n.sons[0].info, "expr(nkBracketExpr, " & $ty.kind & ')') else: internalError(n.sons[0].info, "genAddr") +proc genProcForSymIfNeeded(p: PProc, s: PSym) = + if not p.g.generatedSyms.containsOrIncl(s.id): + let newp = genProc(p, s) + var owner = p + while owner != nil and owner.prc != s.owner: + owner = owner.up + if owner != nil: add(owner.locals, newp) + else: add(p.g.code, newp) + proc genSym(p: PProc, n: PNode, r: var TCompRes) = var s = n.sym case s.kind @@ -1021,13 +1030,8 @@ proc genSym(p: PProc, n: PNode, r: var TCompRes) = discard elif sfForward in s.flags: p.g.forwarded.add(s) - elif not p.g.generatedSyms.containsOrIncl(s.id): - let newp = genProc(p, s) - var owner = p - while owner != nil and owner.prc != s.owner: - owner = owner.up - if owner != nil: add(owner.locals, newp) - else: add(p.g.code, newp) + else: + genProcForSymIfNeeded(p, s) else: if s.loc.r == nil: internalError(n.info, "symbol has no generated name: " & s.name.s) @@ -1394,6 +1398,9 @@ proc genMagic(p: PProc, n: PNode, r: var TCompRes) = of mCopyStrLast: ternaryExpr(p, n, r, "", "($1.slice($2, ($3)+1).concat(0))") of mNewString: unaryExpr(p, n, r, "mnewString", "mnewString($1)") of mNewStringOfCap: unaryExpr(p, n, r, "mnewString", "mnewString(0)") + of mDotDot: + genProcForSymIfNeeded(p, n.sons[0].sym) + genCall(p, n, r) else: genCall(p, n, r) #else internalError(e.info, 'genMagic: ' + magicToStr[op]); diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 8080e0e8c..6b38ee062 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -17,7 +17,7 @@ import hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream, - wordrecg + wordrecg, etcpriv const MaxLineLength* = 80 # lines longer than this lead to a warning @@ -140,10 +140,12 @@ proc isKeyword*(kind: TTokType): bool = proc isNimIdentifier*(s: string): bool = if s[0] in SymStartChars: var i = 1 - while i < s.len: + var sLen = s.len + while i < sLen: if s[i] == '_': inc(i) - if s[i] notin SymChars: return + elif isMagicIdentSeparatorRune(cstring s, i): + inc(i, magicIdentSeparatorRuneByteWidth) if s[i] notin SymChars: return inc(i) result = true @@ -229,23 +231,6 @@ proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart) L.dispMessage(info, msg, arg) -proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = - var pos = L.bufpos # use registers for pos, buf - var buf = L.buf - while true: - if buf[pos] in chars: - add(tok.literal, buf[pos]) - inc(pos) - else: - break - if buf[pos] == '_': - if buf[pos+1] notin chars: - lexMessage(L, errInvalidToken, "_") - break - add(tok.literal, '_') - inc(pos) - L.bufpos = pos - proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool = result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second) @@ -268,136 +253,195 @@ proc unsafeParseUInt(s: string, b: var BiggestInt, start = 0): int = result = i - start {.pop.} # overflowChecks + +template eatChar(L: var TLexer, t: var TToken, replacementChar: char) = + add(t.literal, replacementChar) + inc(L.bufpos) + +template eatChar(L: var TLexer, t: var TToken) = + add(t.literal, L.buf[L.bufpos]) + inc(L.bufpos) + proc getNumber(L: var TLexer): TToken = var - pos, endpos: int + startpos, endpos: int xi: BiggestInt - # get the base: + const literalishChars = { 'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'c', + 'C', 'b', 'B', '_', '.', '\''} + const literalishCharsNoDot = literalishChars - {'.'} + + proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = + var pos = L.bufpos # use registers for pos, buf + var buf = L.buf + while true: + if buf[pos] in chars: + add(tok.literal, buf[pos]) + inc(pos) + else: + break + if buf[pos] == '_': + if buf[pos+1] notin chars: + lexMessage(L, errInvalidToken, "_") + break + add(tok.literal, '_') + inc(pos) + L.bufpos = pos + + proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) = + var pos = L.bufpos # use registers for pos, buf + var buf = L.buf + while buf[pos] in chars: + add(tok.literal, buf[pos]) + inc(pos) + L.bufpos = pos + + proc lexMessageLitNum(L: var TLexer, msg: TMsgKind, startpos: int) = + # Used to get slightly human friendlier err messages. + # Note: the erroneous 'O' char in the character set is intentional + const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O', + 'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'} + var msgPos = L.bufpos + var t: TToken + t.literal = "" + L.bufpos = startpos # Use L.bufpos as pos because of matchChars + matchChars(L, t, literalishChars) + # We must verify +/- specifically so that we're not past the literal + if L.buf[L.bufpos] in {'+', '-'} and + L.buf[L.bufpos - 1] in {'e', 'E'}: + add(t.literal, L.buf[L.bufpos]) + inc(L.bufpos) + matchChars(L, t, literalishChars) + if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}: + inc(L.bufpos) + add(t.literal, L.buf[L.bufpos]) + matchChars(L, t, {'0'..'9'}) + L.bufpos = msgPos + lexMessage(L, msg, t.literal) + result.tokType = tkIntLit # int literal until we know better result.literal = "" - result.base = base10 # BUGFIX - pos = L.bufpos # make sure the literal is correct for error messages: - var eallowed = false - if L.buf[pos] == '0' and L.buf[pos+1] in {'X', 'x'}: - matchUnderscoreChars(L, result, {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x'}) + result.base = base10 + startpos = L.bufpos + var isAFloatLiteral = false + # First stage: find out base, make verifications, build token literal string + if L.buf[L.bufpos] == '0' and + L.buf[L.bufpos + 1] in {'X', 'x', 'o', 'O', 'c', 'C', 'b', 'B'}: + eatChar(L, result, '0') + case L.buf[L.bufpos] + of 'O': + lexMessageLitNum(L, errInvalidNumberOctalCode, startpos) + of 'x', 'X': + eatChar(L, result, 'x') + matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'}) + of 'o', 'c', 'C': + eatChar(L, result, 'c') + matchUnderscoreChars(L, result, {'0'..'7'}) + of 'b', 'B': + eatChar(L, result, 'b') + matchUnderscoreChars(L, result, {'0'..'1'}) + else: + internalError(getLineInfo(L), "getNumber") else: - matchUnderscoreChars(L, result, {'0'..'9', 'b', 'B', 'o', 'c', 'C'}) - eallowed = true - if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): - add(result.literal, '.') - inc(L.bufpos) - matchUnderscoreChars(L, result, {'0'..'9'}) - eallowed = true - if eallowed and L.buf[L.bufpos] in {'e', 'E'}: - add(result.literal, 'e') - inc(L.bufpos) - if L.buf[L.bufpos] in {'+', '-'}: - add(result.literal, L.buf[L.bufpos]) - inc(L.bufpos) matchUnderscoreChars(L, result, {'0'..'9'}) + if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): + isAFloatLiteral = true + eatChar(L, result, '.') + matchUnderscoreChars(L, result, {'0'..'9'}) + if L.buf[L.bufpos] in {'e', 'E'}: + isAFloatLiteral = true + eatChar(L, result, 'e') + if L.buf[L.bufpos] in {'+', '-'}: + eatChar(L, result) + matchUnderscoreChars(L, result, {'0'..'9'}) endpos = L.bufpos - if L.buf[endpos] in {'\'', 'f', 'F', 'i', 'I', 'u', 'U'}: - if L.buf[endpos] == '\'': inc(endpos) - L.bufpos = pos # restore position - case L.buf[endpos] + # Second stage, find out if there's a datatype postfix and handle it + var postPos = endpos + if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}: + if L.buf[postPos] == '\'': + inc(postPos) + case L.buf[postPos] of 'f', 'F': - inc(endpos) - if (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + inc(postPos) + if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'): result.tokType = tkFloat32Lit - inc(endpos, 2) - elif (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + inc(postPos, 2) + elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'): result.tokType = tkFloat64Lit - inc(endpos, 2) - elif (L.buf[endpos] == '1') and - (L.buf[endpos + 1] == '2') and - (L.buf[endpos + 2] == '8'): + inc(postPos, 2) + elif (L.buf[postPos] == '1') and + (L.buf[postPos + 1] == '2') and + (L.buf[postPos + 2] == '8'): result.tokType = tkFloat128Lit - inc(endpos, 3) - else: - lexMessage(L, errInvalidNumber, result.literal & "'f" & L.buf[endpos]) + inc(postPos, 3) + else: # "f" alone defaults to float32 + result.tokType = tkFloat32Lit + of 'd', 'D': # ad hoc convenience shortcut for f64 + inc(postPos) + result.tokType = tkFloat64Lit of 'i', 'I': - inc(endpos) - if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + inc(postPos) + if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'): result.tokType = tkInt64Lit - inc(endpos, 2) - elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + inc(postPos, 2) + elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'): result.tokType = tkInt32Lit - inc(endpos, 2) - elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): + inc(postPos, 2) + elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'): result.tokType = tkInt16Lit - inc(endpos, 2) - elif (L.buf[endpos] == '8'): + inc(postPos, 2) + elif (L.buf[postPos] == '8'): result.tokType = tkInt8Lit - inc(endpos) + inc(postPos) else: - lexMessage(L, errInvalidNumber, result.literal & "'i" & L.buf[endpos]) + lexMessageLitNum(L, errInvalidNumber, startpos) of 'u', 'U': - inc(endpos) - if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'): + inc(postPos) + if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'): result.tokType = tkUInt64Lit - inc(endpos, 2) - elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'): + inc(postPos, 2) + elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'): result.tokType = tkUInt32Lit - inc(endpos, 2) - elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'): + inc(postPos, 2) + elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'): result.tokType = tkUInt16Lit - inc(endpos, 2) - elif (L.buf[endpos] == '8'): + inc(postPos, 2) + elif (L.buf[postPos] == '8'): result.tokType = tkUInt8Lit - inc(endpos) + inc(postPos) else: result.tokType = tkUIntLit - else: lexMessage(L, errInvalidNumber, result.literal & "'" & L.buf[endpos]) - else: - L.bufpos = pos # restore position + else: + lexMessageLitNum(L, errInvalidNumber, startpos) + # Is there still a literalish char awaiting? Then it's an error! + if L.buf[postPos] in literalishCharsNoDot or + (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}): + lexMessageLitNum(L, errInvalidNumber, startpos) + # Third stage, extract actual number + L.bufpos = startpos # restore position + var pos: int = startpos try: if (L.buf[pos] == '0') and (L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}): inc(pos, 2) - xi = 0 # it may be a base prefix + xi = 0 # it is a base prefix case L.buf[pos - 1] # now look at the optional type suffix: of 'b', 'B': result.base = base2 - while true: - case L.buf[pos] - of '2'..'9', '.': - lexMessage(L, errInvalidNumber, result.literal) - inc(pos) - of '_': - if L.buf[pos+1] notin {'0'..'1'}: - lexMessage(L, errInvalidToken, "_") - break - inc(pos) - of '0', '1': + while pos < endpos: + if L.buf[pos] != '_': xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0')) - inc(pos) - else: break + inc(pos) of 'o', 'c', 'C': result.base = base8 - while true: - case L.buf[pos] - of '8'..'9', '.': - lexMessage(L, errInvalidNumber, result.literal) - inc(pos) - of '_': - if L.buf[pos+1] notin {'0'..'7'}: - lexMessage(L, errInvalidToken, "_") - break - inc(pos) - of '0'..'7': + while pos < endpos: + if L.buf[pos] != '_': xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0')) - inc(pos) - else: break - of 'O': - lexMessage(L, errInvalidNumber, result.literal) + inc(pos) of 'x', 'X': result.base = base16 - while true: + while pos < endpos: case L.buf[pos] of '_': - if L.buf[pos+1] notin {'0'..'9', 'a'..'f', 'A'..'F'}: - lexMessage(L, errInvalidToken, "_") - break inc(pos) of '0'..'9': xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0')) @@ -408,8 +452,10 @@ proc getNumber(L: var TLexer): TToken = of 'A'..'F': xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10) inc(pos) - else: break - else: internalError(getLineInfo(L), "getNumber") + else: + break + else: + internalError(getLineInfo(L), "getNumber") case result.tokType of tkIntLit, tkInt64Lit: result.iNumber = xi of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi)))) @@ -425,7 +471,7 @@ proc getNumber(L: var TLexer): TToken = # XXX: Test this on big endian machine! of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[] else: internalError(getLineInfo(L), "getNumber") - elif isFloatLiteral(result.literal) or (result.tokType == tkFloat32Lit) or + elif isAFloatLiteral or (result.tokType == tkFloat32Lit) or (result.tokType == tkFloat64Lit): result.fNumber = parseFloat(result.literal) if result.tokType == tkIntLit: result.tokType = tkFloatLit @@ -441,18 +487,18 @@ proc getNumber(L: var TLexer): TToken = if result.tokType == tkIntLit: result.tokType = tkInt64Lit elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}: - lexMessage(L, errNumberOutOfRange, result.literal) + lexMessageLitNum(L, errNumberOutOfRange, startpos) elif result.tokType == tkInt8Lit and (result.iNumber < int8.low or result.iNumber > int8.high): - lexMessage(L, errNumberOutOfRange, result.literal) + lexMessageLitNum(L, errNumberOutOfRange, startpos) elif result.tokType == tkInt16Lit and (result.iNumber < int16.low or result.iNumber > int16.high): - lexMessage(L, errNumberOutOfRange, result.literal) + lexMessageLitNum(L, errNumberOutOfRange, startpos) except ValueError: - lexMessage(L, errInvalidNumber, result.literal) + lexMessageLitNum(L, errInvalidNumber, startpos) except OverflowError, RangeError: - lexMessage(L, errNumberOutOfRange, result.literal) - L.bufpos = endpos + lexMessageLitNum(L, errNumberOutOfRange, startpos) + L.bufpos = postPos proc handleHexChar(L: var TLexer, xi: var int) = case L.buf[L.bufpos] @@ -625,23 +671,34 @@ proc getCharacter(L: var TLexer, tok: var TToken) = inc(L.bufpos) # skip ' proc getSymbol(L: var TLexer, tok: var TToken) = - var h: THash = 0 + var h: Hash = 0 var pos = L.bufpos var buf = L.buf while true: var c = buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': - h = h !& ord(c) + if c == '\226' and + buf[pos+1] == '\128' and + buf[pos+2] == '\147': # It's a 'magic separator' en-dash Unicode + if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars: + lexMessage(L, errInvalidToken, "–") + break + inc(pos, magicIdentSeparatorRuneByteWidth) + else: + h = h !& ord(c) + inc(pos) of 'A'..'Z': c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) + inc(pos) of '_': if buf[pos+1] notin SymChars: lexMessage(L, errInvalidToken, "_") break + inc(pos) + else: break - inc(pos) h = !$h tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) L.bufpos = pos @@ -652,7 +709,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) = tok.tokType = TTokType(tok.ident.id + ord(tkSymbol)) proc endOperator(L: var TLexer, tok: var TToken, pos: int, - hash: THash) {.inline.} = + hash: Hash) {.inline.} = var h = !$hash tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr @@ -662,7 +719,7 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int, proc getOperator(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf - var h: THash = 0 + var h: Hash = 0 while true: var c = buf[pos] if c notin OpChars: break diff --git a/compiler/msgs.nim b/compiler/msgs.nim index 041a181be..e97032db4 100644 --- a/compiler/msgs.nim +++ b/compiler/msgs.nim @@ -17,10 +17,9 @@ type errIntLiteralExpected, errInvalidCharacterConstant, errClosingTripleQuoteExpected, errClosingQuoteExpected, errTabulatorsAreNotAllowed, errInvalidToken, errLineTooLong, - errInvalidNumber, errNumberOutOfRange, errNnotAllowedInCharacter, - errClosingBracketExpected, errMissingFinalQuote, errIdentifierExpected, - errNewlineExpected, - errInvalidModuleName, + errInvalidNumber, errInvalidNumberOctalCode, errNumberOutOfRange, + errNnotAllowedInCharacter, errClosingBracketExpected, errMissingFinalQuote, + errIdentifierExpected, errNewlineExpected, errInvalidModuleName, errOperatorExpected, errTokenExpected, errStringAfterIncludeExpected, errRecursiveDependencyX, errOnOrOffExpected, errNoneSpeedOrSizeExpected, errInvalidPragma, errUnknownPragma, errInvalidDirectiveX, @@ -143,6 +142,7 @@ const errInvalidToken: "invalid token: $1", errLineTooLong: "line too long", errInvalidNumber: "$1 is not a valid number", + errInvalidNumberOctalCode: "$1 is not a valid number; did you mean octal? Then use one of '0o', '0c' or '0C'.", errNumberOutOfRange: "number $1 out of valid range", errNnotAllowedInCharacter: "\\n not allowed in character literal", errClosingBracketExpected: "closing ']' expected, but end of file reached", diff --git a/compiler/treetab.nim b/compiler/treetab.nim index 8d66d56c7..adfc7b2ce 100644 --- a/compiler/treetab.nim +++ b/compiler/treetab.nim @@ -12,7 +12,7 @@ import hashes, ast, astalgo, types -proc hashTree(n: PNode): THash = +proc hashTree(n: PNode): Hash = if n == nil: return result = ord(n.kind) case n.kind @@ -53,8 +53,8 @@ proc treesEquivalent(a, b: PNode): bool = result = true if result: result = sameTypeOrNil(a.typ, b.typ) -proc nodeTableRawGet(t: TNodeTable, k: THash, key: PNode): int = - var h: THash = k and high(t.data) +proc nodeTableRawGet(t: TNodeTable, k: Hash, key: PNode): int = + var h: Hash = k and high(t.data) while t.data[h].key != nil: if (t.data[h].h == k) and treesEquivalent(t.data[h].key, key): return h @@ -66,9 +66,9 @@ proc nodeTableGet*(t: TNodeTable, key: PNode): int = if index >= 0: result = t.data[index].val else: result = low(int) -proc nodeTableRawInsert(data: var TNodePairSeq, k: THash, key: PNode, +proc nodeTableRawInsert(data: var TNodePairSeq, k: Hash, key: PNode, val: int) = - var h: THash = k and high(data) + var h: Hash = k and high(data) while data[h].key != nil: h = nextTry(h, high(data)) assert(data[h].key == nil) data[h].h = k @@ -77,7 +77,7 @@ proc nodeTableRawInsert(data: var TNodePairSeq, k: THash, key: PNode, proc nodeTablePut*(t: var TNodeTable, key: PNode, val: int) = var n: TNodePairSeq - var k: THash = hashTree(key) + var k: Hash = hashTree(key) var index = nodeTableRawGet(t, k, key) if index >= 0: assert(t.data[index].key != nil) @@ -94,7 +94,7 @@ proc nodeTablePut*(t: var TNodeTable, key: PNode, val: int) = proc nodeTableTestOrSet*(t: var TNodeTable, key: PNode, val: int): int = var n: TNodePairSeq - var k: THash = hashTree(key) + var k: Hash = hashTree(key) var index = nodeTableRawGet(t, k, key) if index >= 0: assert(t.data[index].key != nil) |