From 263cabd1c27977aa32c849ffb334984e8d476b97 Mon Sep 17 00:00:00 2001 From: Araq Date: Tue, 25 Feb 2014 01:02:10 +0100 Subject: added canonizer --- compiler/canonicalizer.nim | 288 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 288 insertions(+) create mode 100644 compiler/canonicalizer.nim (limited to 'compiler/canonicalizer.nim') diff --git a/compiler/canonicalizer.nim b/compiler/canonicalizer.nim new file mode 100644 index 000000000..fb5b3b9ce --- /dev/null +++ b/compiler/canonicalizer.nim @@ -0,0 +1,288 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements the canonalization for the various caching mechanisms. + +import strutils, db_sqlite, md5 + +var db: TDbConn + +# We *hash* the relevant information into 128 bit hashes. This should be good enough +# to prevent any collisions. + +type + TUid = distinct MD5Digest + +# For name mangling we encode these hashes via a variant of base64 (called +# 'base64a') and prepend the *primary* identifier to ease the debugging pain. +# So a signature like: +# +# proc gABI(c: PCtx; n: PNode; opc: TOpcode; a, b: TRegister; imm: BiggestInt) +# +# is mangled into: +# gABI_MTdmOWY5MTQ1MDcyNGQ3ZA +# +# This is a good compromise between correctness and brevity. ;-) + +const + cb64 = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T" "U", "V", "W", "X", "Y", "Z", + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", + "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", + "_A", "_B"] + +proc toBase64a(s: cstring, len: int): string = + ## encodes `s` into base64 representation. After `lineLen` characters, a + ## `newline` is added. + var total = ((len + 2) div 3) * 4 + result = newStringOfCap(total) + var i = 0 + while i < s.len - 2: + let a = ord(s[i]) + let b = ord(s[i+1]) + let c = ord(s[i+2]) + result.add cb64[a shr 2] + result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] + result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)] + result.add cb64[c and 0x3F] + inc(i, 3) + if i < s.len-1: + let a = ord(s[i]) + let b = ord(s[i+1]) + result.add cb64[a shr 2] + result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] + result.add cb64[((b and 0x0F) shl 2)] + elif i < s.len: + let a = ord(s[i]) + result.add cb64[a shr 2] + result.add cb64[(a and 3) shl 4] + +proc toBase64a(u: TUid): string = toBase64a(cast[cstring](u), sizeof(u)) + +proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len) + +proc hashSym(c: var MD5Context, s: PSym) = + if sfAnon in s.flags or s.kind == skGenericParam: + c &= ":anon" + else: + var it = s.owner + while it != nil: + hashSym(c, it) + c &= "." + it = s.owner + c &= s.name.s + +proc hashTree(c: var MD5Context, n: PNode) = + if n == nil: + c &= "null" + return + var k = n.kind + md5Update(c, cast[cstring](addr(k)), 1) + # we really must not hash line information. 'n.typ' is debatable but + # shouldn't be necessary for now and avoids potential infinite recursions. + case n.kind + of nkEmpty, nkNilLit, nkType: discard + of nkIdent: + c &= n.ident.s + of nkSym: + hashSym(c, n.sym) + of nkCharLit..nkUInt64Lit: + var v = n.intVal + md5Update(c, cast[cstring](addr(v)), sizeof(v)) + of nkFloatLit..nkFloat64Lit: + var v = n.floatVal + md5Update(c, cast[cstring](addr(v)), sizeof(v)) + of nkStrLit..nkTripleStrLit: + c &= n.strVal + else: + for i in 0.. 1: add(result, ", ") + add(result, typeToString(t.sons[i])) + add(result, ']') + of tyTypeDesc: + if t.base.kind == tyNone: result = "typedesc" + else: result = "typedesc[" & typeToString(t.base) & "]" + of tyStatic: + internalAssert t.len > 0 + result = "static[" & typeToString(t.sons[0]) & "]" + of tyUserTypeClass: + internalAssert t.sym != nil and t.sym.owner != nil + return t.sym.owner.name.s + of tyBuiltInTypeClass: + result = case t.base.kind: + of tyVar: "var" + of tyRef: "ref" + of tyPtr: "ptr" + of tySequence: "seq" + of tyArray: "array" + of tySet: "set" + of tyRange: "range" + of tyDistinct: "distinct" + of tyProc: "proc" + of tyObject: "object" + of tyTuple: "tuple" + else: (internalAssert(false); "") + of tyUserTypeClassInst: + let body = t.base + result = body.sym.name.s & "[" + for i in countup(1, sonsLen(t) - 2): + if i > 1: add(result, ", ") + add(result, typeToString(t.sons[i])) + result.add "]" + of tyAnd: + result = typeToString(t.sons[0]) & " and " & typeToString(t.sons[1]) + of tyOr: + result = typeToString(t.sons[0]) & " or " & typeToString(t.sons[1]) + of tyNot: + result = "not " & typeToString(t.sons[0]) + of tyExpr: + internalAssert t.len == 0 + result = "expr" + of tyFromExpr, tyFieldAccessor: + result = renderTree(t.n) + of tyArray: + if t.sons[0].kind == tyRange: + result = "array[" & hashTree(t.sons[0].n) & ", " & + typeToString(t.sons[1]) & ']' + else: + result = "array[" & typeToString(t.sons[0]) & ", " & + typeToString(t.sons[1]) & ']' + of tyArrayConstr: + result = "Array constructor[" & hashTree(t.sons[0].n) & ", " & + typeToString(t.sons[1]) & ']' + of tySequence: + result = "seq[" & typeToString(t.sons[0]) & ']' + of tyOrdinal: + result = "ordinal[" & typeToString(t.sons[0]) & ']' + of tySet: + result = "set[" & typeToString(t.sons[0]) & ']' + of tyOpenArray: + result = "openarray[" & typeToString(t.sons[0]) & ']' + of tyDistinct: + result = "distinct " & typeToString(t.sons[0], preferName) + of tyTuple: + # we iterate over t.sons here, because t.n may be nil + result = "tuple[" + if t.n != nil: + assert(sonsLen(t.n) == sonsLen(t)) + for i in countup(0, sonsLen(t.n) - 1): + assert(t.n.sons[i].kind == nkSym) + add(result, t.n.sons[i].sym.name.s & ": " & typeToString(t.sons[i])) + if i < sonsLen(t.n) - 1: add(result, ", ") + else: + for i in countup(0, sonsLen(t) - 1): + add(result, typeToString(t.sons[i])) + if i < sonsLen(t) - 1: add(result, ", ") + add(result, ']') + of tyPtr, tyRef, tyVar, tyMutable, tyConst: + result = typeToStr[t.kind] & typeToString(t.sons[0]) + of tyRange: + result = "range " & hashTree(t.n) + if prefer != preferExported: + result.add("(" & typeToString(t.sons[0]) & ")") + of tyProc: + result = if tfIterator in t.flags: "iterator (" else: "proc (" + for i in countup(1, sonsLen(t) - 1): + add(result, typeToString(t.sons[i])) + if i < sonsLen(t) - 1: add(result, ", ") + add(result, ')') + if t.sons[0] != nil: add(result, ": " & typeToString(t.sons[0])) + var prag: string + if t.callConv != ccDefault: prag = CallingConvToStr[t.callConv] + else: prag = "" + if tfNoSideEffect in t.flags: + addSep(prag) + add(prag, "noSideEffect") + if tfThread in t.flags: + addSep(prag) + add(prag, "thread") + if len(prag) != 0: add(result, "{." & prag & ".}") + of tyVarargs, tyIter: + result = typeToStr[t.kind] % typeToString(t.sons[0]) + else: + result = typeToStr[t.kind] + if tfShared in t.flags: result = "shared " & result + if tfNotNil in t.flags: result.add(" not nil") + + +proc createDb() = + db.exec(sql""" + create table if not exists Module( + id integer primary key, + name varchar(256) not null, + fullpath varchar(256) not null, + interfHash varchar(256) not null, + fullHash varchar(256) not null, + + created timestamp not null default (DATETIME('now')), + );""") + + db.exec(sql""" + create table if not exists Symbol( + id integer primary key, + module integer not null, + name varchar(max) not null, + data varchar(max) not null, + created timestamp not null default (DATETIME('now')), + + foreign key (module) references module(id) + );""") + + db.exec(sql""" + create table if not exists Type( + id integer primary key, + module integer not null, + name varchar(max) not null, + data varchar(max) not null, + created timestamp not null default (DATETIME('now')), + + foreign key (module) references module(id) + );""") + + + #db.exec(sql""" + # --create unique index if not exists TsstNameIx on TestResult(name); + # """, []) + -- cgit 1.4.1-2-gfad0 From d828b0b96eef8fd77a57a5b71f65e2ae9f6b1f30 Mon Sep 17 00:00:00 2001 From: Araq Date: Thu, 27 Feb 2014 20:43:10 +0100 Subject: some progress on the new name mangler --- compiler/canonicalizer.nim | 381 +++++++++++++++++++++++++++++---------------- 1 file changed, 251 insertions(+), 130 deletions(-) (limited to 'compiler/canonicalizer.nim') diff --git a/compiler/canonicalizer.nim b/compiler/canonicalizer.nim index fb5b3b9ce..94cb8e355 100644 --- a/compiler/canonicalizer.nim +++ b/compiler/canonicalizer.nim @@ -13,8 +13,8 @@ import strutils, db_sqlite, md5 var db: TDbConn -# We *hash* the relevant information into 128 bit hashes. This should be good enough -# to prevent any collisions. +# We *hash* the relevant information into 128 bit hashes. This should be good +# enough to prevent any collisions. type TUid = distinct MD5Digest @@ -42,8 +42,7 @@ const proc toBase64a(s: cstring, len: int): string = ## encodes `s` into base64 representation. After `lineLen` characters, a ## `newline` is added. - var total = ((len + 2) div 3) * 4 - result = newStringOfCap(total) + result = newStringOfCap(((len + 2) div 3) * 4) var i = 0 while i < s.len - 2: let a = ord(s[i]) @@ -82,7 +81,7 @@ proc hashSym(c: var MD5Context, s: PSym) = proc hashTree(c: var MD5Context, n: PNode) = if n == nil: - c &= "null" + c &= "noTreeKind" return var k = n.kind md5Update(c, cast[cstring](addr(k)), 1) @@ -105,146 +104,268 @@ proc hashTree(c: var MD5Context, n: PNode) = else: for i in 0.. 1: add(result, ", ") - add(result, typeToString(t.sons[i])) - add(result, ']') - of tyTypeDesc: - if t.base.kind == tyNone: result = "typedesc" - else: result = "typedesc[" & typeToString(t.base) & "]" - of tyStatic: - internalAssert t.len > 0 - result = "static[" & typeToString(t.sons[0]) & "]" + for i in countup(0, sonsLen(t) -1 -ord(t.kind != tyGenericInvokation)): + c.hashType t.sons[i] of tyUserTypeClass: internalAssert t.sym != nil and t.sym.owner != nil - return t.sym.owner.name.s - of tyBuiltInTypeClass: - result = case t.base.kind: - of tyVar: "var" - of tyRef: "ref" - of tyPtr: "ptr" - of tySequence: "seq" - of tyArray: "array" - of tySet: "set" - of tyRange: "range" - of tyDistinct: "distinct" - of tyProc: "proc" - of tyObject: "object" - of tyTuple: "tuple" - else: (internalAssert(false); "") + c &= t.sym.owner.name.s of tyUserTypeClassInst: let body = t.base - result = body.sym.name.s & "[" + c.hashSym body.sym for i in countup(1, sonsLen(t) - 2): - if i > 1: add(result, ", ") - add(result, typeToString(t.sons[i])) - result.add "]" - of tyAnd: - result = typeToString(t.sons[0]) & " and " & typeToString(t.sons[1]) - of tyOr: - result = typeToString(t.sons[0]) & " or " & typeToString(t.sons[1]) - of tyNot: - result = "not " & typeToString(t.sons[0]) - of tyExpr: - internalAssert t.len == 0 - result = "expr" + c.hashType t.sons[i] of tyFromExpr, tyFieldAccessor: - result = renderTree(t.n) - of tyArray: - if t.sons[0].kind == tyRange: - result = "array[" & hashTree(t.sons[0].n) & ", " & - typeToString(t.sons[1]) & ']' - else: - result = "array[" & typeToString(t.sons[0]) & ", " & - typeToString(t.sons[1]) & ']' - of tyArrayConstr: - result = "Array constructor[" & hashTree(t.sons[0].n) & ", " & - typeToString(t.sons[1]) & ']' - of tySequence: - result = "seq[" & typeToString(t.sons[0]) & ']' - of tyOrdinal: - result = "ordinal[" & typeToString(t.sons[0]) & ']' - of tySet: - result = "set[" & typeToString(t.sons[0]) & ']' - of tyOpenArray: - result = "openarray[" & typeToString(t.sons[0]) & ']' - of tyDistinct: - result = "distinct " & typeToString(t.sons[0], preferName) + c.hashTree(t.n) + of tyArrayConstr: + c.hashTree(t.sons[0].n) + c.hashType(t.sons[1]) of tyTuple: - # we iterate over t.sons here, because t.n may be nil - result = "tuple[" - if t.n != nil: + if t.n != nil: assert(sonsLen(t.n) == sonsLen(t)) for i in countup(0, sonsLen(t.n) - 1): assert(t.n.sons[i].kind == nkSym) - add(result, t.n.sons[i].sym.name.s & ": " & typeToString(t.sons[i])) - if i < sonsLen(t.n) - 1: add(result, ", ") - else: - for i in countup(0, sonsLen(t) - 1): - add(result, typeToString(t.sons[i])) - if i < sonsLen(t) - 1: add(result, ", ") - add(result, ']') - of tyPtr, tyRef, tyVar, tyMutable, tyConst: - result = typeToStr[t.kind] & typeToString(t.sons[0]) + c &= t.n.sons[i].sym.name.s + c &= ":" + c.hashType(t.sons[i]) + c &= "," + else: + for i in countup(0, sonsLen(t) - 1): c.hashType t.sons[i] of tyRange: - result = "range " & hashTree(t.n) - if prefer != preferExported: - result.add("(" & typeToString(t.sons[0]) & ")") + c.hashTree(t.n) + c.hashType(t.sons[0]) of tyProc: - result = if tfIterator in t.flags: "iterator (" else: "proc (" - for i in countup(1, sonsLen(t) - 1): - add(result, typeToString(t.sons[i])) - if i < sonsLen(t) - 1: add(result, ", ") - add(result, ')') - if t.sons[0] != nil: add(result, ": " & typeToString(t.sons[0])) - var prag: string - if t.callConv != ccDefault: prag = CallingConvToStr[t.callConv] - else: prag = "" - if tfNoSideEffect in t.flags: - addSep(prag) - add(prag, "noSideEffect") - if tfThread in t.flags: - addSep(prag) - add(prag, "thread") - if len(prag) != 0: add(result, "{." & prag & ".}") - of tyVarargs, tyIter: - result = typeToStr[t.kind] % typeToString(t.sons[0]) - else: - result = typeToStr[t.kind] - if tfShared in t.flags: result = "shared " & result - if tfNotNil in t.flags: result.add(" not nil") + c &= (if tfIterator in t.flags: "iterator " else: "proc ") + for i in 0.. ') + +proc encodeType(w: PRodWriter, t: PType, result: var string) = + if t == nil: + # nil nodes have to be stored too: + result.add("[]") + return + # we need no surrounding [] here because the type is in a line of its own + if t.kind == tyForward: internalError("encodeType: tyForward") + # for the new rodfile viewer we use a preceeding [ so that the data section + # can easily be disambiguated: + add(result, '[') + encodeVInt(ord(t.kind), result) + add(result, '+') + encodeVInt(t.id, result) + if t.n != nil: + encodeNode(w, unknownLineInfo(), t.n, result) + if t.flags != {}: + add(result, '$') + encodeVInt(cast[int32](t.flags), result) + if t.callConv != low(t.callConv): + add(result, '?') + encodeVInt(ord(t.callConv), result) + if t.owner != nil: + add(result, '*') + encodeVInt(t.owner.id, result) + pushSym(w, t.owner) + if t.sym != nil: + add(result, '&') + encodeVInt(t.sym.id, result) + pushSym(w, t.sym) + if t.size != - 1: + add(result, '/') + encodeVBiggestInt(t.size, result) + if t.align != 2: + add(result, '=') + encodeVInt(t.align, result) + encodeLoc(w, t.loc, result) + for i in countup(0, sonsLen(t) - 1): + if t.sons[i] == nil: + add(result, "^()") + else: + add(result, '^') + encodeVInt(t.sons[i].id, result) + pushType(w, t.sons[i]) + +proc encodeLib(w: PRodWriter, lib: PLib, info: TLineInfo, result: var string) = + add(result, '|') + encodeVInt(ord(lib.kind), result) + add(result, '|') + encodeStr(ropeToStr(lib.name), result) + add(result, '|') + encodeNode(w, info, lib.path, result) + +proc encodeSym(w: PRodWriter, s: PSym, result: var string) = + if s == nil: + # nil nodes have to be stored too: + result.add("{}") + return + # we need no surrounding {} here because the symbol is in a line of its own + encodeVInt(ord(s.kind), result) + result.add('+') + encodeVInt(s.id, result) + result.add('&') + encodeStr(s.name.s, result) + if s.typ != nil: + result.add('^') + encodeVInt(s.typ.id, result) + pushType(w, s.typ) + result.add('?') + if s.info.col != -1'i16: encodeVInt(s.info.col, result) + result.add(',') + if s.info.line != -1'i16: encodeVInt(s.info.line, result) + result.add(',') + encodeVInt(fileIdx(w, toFilename(s.info)), result) + if s.owner != nil: + result.add('*') + encodeVInt(s.owner.id, result) + pushSym(w, s.owner) + if s.flags != {}: + result.add('$') + encodeVInt(cast[int32](s.flags), result) + if s.magic != mNone: + result.add('@') + encodeVInt(ord(s.magic), result) + if s.options != w.options: + result.add('!') + encodeVInt(cast[int32](s.options), result) + if s.position != 0: + result.add('%') + encodeVInt(s.position, result) + if s.offset != - 1: + result.add('`') + encodeVInt(s.offset, result) + encodeLoc(w, s.loc, result) + if s.annex != nil: encodeLib(w, s.annex, s.info, result) + if s.constraint != nil: + add(result, '#') + encodeNode(w, unknownLineInfo(), s.constraint, result) + # lazy loading will soon reload the ast lazily, so the ast needs to be + # the last entry of a symbol: + if s.ast != nil: + # we used to attempt to save space here by only storing a dummy AST if + # it is not necessary, but Nimrod's heavy compile-time evaluation features + # make that unfeasible nowadays: + encodeNode(w, s.info, s.ast, result) proc createDb() = -- cgit 1.4.1-2-gfad0 From aa92669d9219816f9cd9036b4977ddbdd16d3c5d Mon Sep 17 00:00:00 2001 From: Araq Date: Wed, 5 Mar 2014 02:14:53 +0100 Subject: fixes #937 --- compiler/canonicalizer.nim | 7 +++++++ compiler/vmdef.nim | 1 + compiler/vmgen.nim | 31 +++++++++++++++++++++++++------ tests/vm/twrongwhen.nim | 13 +++++++++++++ todo.txt | 1 + 5 files changed, 47 insertions(+), 6 deletions(-) create mode 100644 tests/vm/twrongwhen.nim (limited to 'compiler/canonicalizer.nim') diff --git a/compiler/canonicalizer.nim b/compiler/canonicalizer.nim index 94cb8e355..07e932b28 100644 --- a/compiler/canonicalizer.nim +++ b/compiler/canonicalizer.nim @@ -161,6 +161,13 @@ proc hashType(c: var MD5Context, t: PType) = if tfShared in t.flags: c &= "shared" if tfNotNil in t.flags: c &= "not nil" +proc canonConst(n: PNode): TUid = + var c: MD5Context + md5Init(c) + c.hashTree(n) + c.hashType(n.typ) + md5Final(c, MD5Digest(result)) + proc canonSym(s: PSym): TUid var c: MD5Context md5Init(c) diff --git a/compiler/vmdef.nim b/compiler/vmdef.nim index 30beea29c..102fc3024 100644 --- a/compiler/vmdef.nim +++ b/compiler/vmdef.nim @@ -161,6 +161,7 @@ type PProc* = ref object blocks*: seq[TBlock] # blocks; temp data structure + sym*: PSym slots*: array[TRegister, tuple[inUse: bool, kind: TSlotKind]] maxSlots*: int diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim index 591c5ade8..123394436 100644 --- a/compiler/vmgen.nim +++ b/compiler/vmgen.nim @@ -980,6 +980,25 @@ proc setSlot(c: PCtx; v: PSym) = kind: if v.kind == skLet: slotFixedLet else: slotFixedVar) inc c.prc.maxSlots +proc cannotEval(n: PNode) {.noinline.} = + globalError(n.info, errGenerated, "cannot evaluate at compile time: " & + n.renderTree) + +proc isOwnedBy(a, b: PSym): bool = + var a = a.owner + while a != nil and a.kind != skModule: + if a == b: return true + a = a.owner + +proc checkCanEval(c: PCtx; n: PNode) = + # we need to ensure that we don't evaluate 'x' here: + # proc foo() = var x ... + let s = n.sym + if s.position == 0: + if s.kind in {skVar, skTemp, skLet, skParam, skResult} and + not s.isOwnedBy(c.prc.sym) and s.owner != c.module: + cannotEval(n) + proc genAsgn(c: PCtx; le, ri: PNode; requiresCopy: bool) = case le.kind of nkBracketExpr: @@ -1007,6 +1026,7 @@ proc genAsgn(c: PCtx; le, ri: PNode; requiresCopy: bool) = c.freeTemp(tmp) of nkSym: let s = le.sym + checkCanEval(c, le) if s.isGlobal: withTemp(tmp, le.typ): c.gen(le, tmp, {gfAddrOf}) @@ -1014,7 +1034,7 @@ proc genAsgn(c: PCtx; le, ri: PNode; requiresCopy: bool) = c.gABC(le, opcWrDeref, tmp, val) c.freeTemp(val) else: - if s.kind == skForVar and c.mode == emRepl: c.setSlot s + if s.kind == skForVar: c.setSlot s internalAssert s.position > 0 or (s.position == 0 and s.kind in {skParam,skResult}) var dest: TRegister = s.position + ord(s.kind == skParam) @@ -1046,10 +1066,6 @@ proc importcSym(c: PCtx; info: TLineInfo; s: PSym) = localError(info, errGenerated, "cannot 'importc' variable at compile time") -proc cannotEval(n: PNode) {.noinline.} = - globalError(n.info, errGenerated, "cannot evaluate at compile time: " & - n.renderTree) - proc getNullValue*(typ: PType, info: TLineInfo): PNode proc genGlobalInit(c: PCtx; n: PNode; s: PSym) = @@ -1190,12 +1206,14 @@ proc genVarSection(c: PCtx; n: PNode) = setSlot(c, a[i].sym) # v = t[i] var v: TDest = -1 + checkCanEval(c, a[i]) genRdVar(c, a[i], v, {gfAddrOf}) c.gABC(n, opcWrObj, v, tmp, i) # XXX globals? c.freeTemp(tmp) elif a.sons[0].kind == nkSym: let s = a.sons[0].sym + checkCanEval(c, a.sons[0]) if s.isGlobal: if s.position == 0: if sfImportc in s.flags: c.importcSym(a.info, s) @@ -1308,6 +1326,7 @@ proc gen(c: PCtx; n: PNode; dest: var TDest; flags: TGenFlags = {}) = case n.kind of nkSym: let s = n.sym + checkCanEval(c, n) case s.kind of skVar, skForVar, skTemp, skLet, skParam, skResult: genRdVar(c, n, dest, flags) @@ -1525,7 +1544,7 @@ proc genProc(c: PCtx; s: PSym): int = # procs easily: let body = s.getBody let procStart = c.xjmp(body, opcJmp, 0) - var p = PProc(blocks: @[]) + var p = PProc(blocks: @[], sym: s) let oldPrc = c.prc c.prc = p # iterate over the parameters and allocate space for them: diff --git a/tests/vm/twrongwhen.nim b/tests/vm/twrongwhen.nim new file mode 100644 index 000000000..085bb6fb6 --- /dev/null +++ b/tests/vm/twrongwhen.nim @@ -0,0 +1,13 @@ +discard """ + output: "Error: cannot evaluate at compile time: x" + line: 7 +""" + +proc bla(x:int) = + when x == 0: + echo "oops" + else: + echo "good" + +bla(2) # echos "oops" + diff --git a/todo.txt b/todo.txt index ba062a949..a13c86b8f 100644 --- a/todo.txt +++ b/todo.txt @@ -1,6 +1,7 @@ version 0.9.4 ============= +- implement unchecked arrays - make testament produce full JSON information - fix gensym capture bug - vm -- cgit 1.4.1-2-gfad0 From 91d842e1ec070a9ab7f883820bd6244526f5d622 Mon Sep 17 00:00:00 2001 From: Araq Date: Fri, 7 Mar 2014 22:25:05 +0100 Subject: implements strongSpaces parsing mode --- compiler/canonicalizer.nim | 6 ++--- compiler/lexer.nim | 16 ++++++++++++ compiler/parser.nim | 65 ++++++++++++++++++++++++++++++++-------------- compiler/pragmas.nim | 10 +++---- compiler/syntaxes.nim | 21 ++++++++------- doc/manual.txt | 46 +++++++++++++++++++++++++++++--- todo.txt | 1 - web/news.txt | 1 + 8 files changed, 123 insertions(+), 43 deletions(-) (limited to 'compiler/canonicalizer.nim') diff --git a/compiler/canonicalizer.nim b/compiler/canonicalizer.nim index 07e932b28..3bc4eb029 100644 --- a/compiler/canonicalizer.nim +++ b/compiler/canonicalizer.nim @@ -81,7 +81,7 @@ proc hashSym(c: var MD5Context, s: PSym) = proc hashTree(c: var MD5Context, n: PNode) = if n == nil: - c &= "noTreeKind" + c &= "\255" return var k = n.kind md5Update(c, cast[cstring](addr(k)), 1) @@ -107,7 +107,7 @@ proc hashTree(c: var MD5Context, n: PNode) = proc hashType(c: var MD5Context, t: PType) = # modelled after 'typeToString' if t == nil: - c &= "noTypeKind" + c &= "\254" return var k = t.kind @@ -168,7 +168,7 @@ proc canonConst(n: PNode): TUid = c.hashType(n.typ) md5Final(c, MD5Digest(result)) -proc canonSym(s: PSym): TUid +proc canonSym(s: PSym): TUid = var c: MD5Context md5Init(c) c.hashSym(s) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 0e7df13cd..9c6c5e22f 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -110,6 +110,8 @@ type fNumber*: BiggestFloat # the parsed floating point literal base*: TNumericalBase # the numerical base; only valid for int # or float literals + strongSpaceA*: int8 # leading spaces of an operator + strongSpaceB*: int8 # trailing spaces of an operator literal*: string # the parsed (string) literal; and # documentation comments are here too line*, col*: int @@ -119,6 +121,7 @@ type indentAhead*: int # if > 0 an indendation has already been read # this is needed because scanning comments # needs so much look-ahead + strongSpaces*: bool var gLinesCompiled*: int # all lines that have been compiled @@ -183,6 +186,7 @@ proc initToken*(L: var TToken) = L.tokType = tkInvalid L.iNumber = 0 L.indent = 0 + L.strongSpaceA = 0 L.literal = "" L.fNumber = 0.0 L.base = base10 @@ -192,6 +196,7 @@ proc fillToken(L: var TToken) = L.tokType = tkInvalid L.iNumber = 0 L.indent = 0 + L.strongSpaceA = 0 setLen(L.literal, 0) L.fNumber = 0.0 L.base = base10 @@ -634,6 +639,14 @@ proc getOperator(L: var TLexer, tok: var TToken) = h = h !& ord(c) inc(pos) endOperator(L, tok, pos, h) + # advance pos but don't store it in L.bufpos so the next token (which might + # be an operator too) gets the preceeding spaces: + tok.strongSpaceB = 0 + while buf[pos] == ' ': + inc pos + inc tok.strongSpaceB + if buf[pos] in {CR, LF, nimlexbase.EndOfFile}: + tok.strongSpaceB = -1 proc scanComment(L: var TLexer, tok: var TToken) = var pos = L.bufpos @@ -677,10 +690,12 @@ proc scanComment(L: var TLexer, tok: var TToken) = proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf + tok.strongSpaceA = 0 while true: case buf[pos] of ' ': inc(pos) + inc(tok.strongSpaceA) of Tabulator: lexMessagePos(L, errTabulatorsAreNotAllowed, pos) inc(pos) @@ -691,6 +706,7 @@ proc skip(L: var TLexer, tok: var TToken) = while buf[pos] == ' ': inc(pos) inc(indent) + tok.strongSpaceA = 0 if buf[pos] > ' ': tok.indent = indent break diff --git a/compiler/parser.nim b/compiler/parser.nim index 5a5bfb574..cfba89f4a 100644 --- a/compiler/parser.nim +++ b/compiler/parser.nim @@ -38,7 +38,6 @@ type inSemiStmtList: int proc parseAll*(p: var TParser): PNode -proc openParser*(p: var TParser, filename: string, inputstream: PLLStream) proc closeParser*(p: var TParser) proc parseTopLevelStmt*(p: var TParser): PNode # implements an iterator. Returns the next top-level statement or @@ -50,7 +49,6 @@ proc parseString*(s: string, filename: string = "", line: int = 0): PNode # correct error messages referring to the original source. # helpers for the other parsers -proc getPrecedence*(tok: TToken): int proc isOperator*(tok: TToken): bool proc getTok*(p: var TParser) proc parMessage*(p: TParser, msg: TMsgKind, arg: string = "") @@ -77,14 +75,17 @@ proc parseCase(p: var TParser): PNode proc getTok(p: var TParser) = rawGetTok(p.lex, p.tok) -proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream) = +proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream, + strongSpaces=false) = initToken(p.tok) openLexer(p.lex, fileIdx, inputStream) getTok(p) # read the first token p.firstTok = true + p.strongSpaces = strongSpaces -proc openParser*(p: var TParser, filename: string, inputStream: PLLStream) = - openParser(p, filename.fileInfoIdx, inputstream) +proc openParser*(p: var TParser, filename: string, inputStream: PLLStream, + strongSpaces=false) = + openParser(p, filename.fileInfoIdx, inputstream, strongSpaces) proc closeParser(p: var TParser) = closeLexer(p.lex) @@ -193,34 +194,52 @@ proc isSigilLike(tok: TToken): bool {.inline.} = proc isLeftAssociative(tok: TToken): bool {.inline.} = result = tok.tokType != tkOpr or relevantOprChar(tok.ident) != '^' -proc getPrecedence(tok: TToken): int = +proc getPrecedence(tok: TToken, strongSpaces: bool): int = + template considerStrongSpaces(x): expr = + x + (if strongSpaces: 100 - tok.strongSpaceA.int*10 else: 0) + case tok.tokType of tkOpr: let L = tok.ident.s.len let relevantChar = relevantOprChar(tok.ident) - template considerAsgn(value: expr) = - result = if tok.ident.s[L-1] == '=': 1 else: value + template considerAsgn(value: expr) = + result = if tok.ident.s[L-1] == '=': 1 else: considerStrongSpaces(value) case relevantChar of '$', '^': considerAsgn(10) of '*', '%', '/', '\\': considerAsgn(9) - of '~': result = 8 + of '~': result = considerStrongSpaces(8) of '+', '-', '|': considerAsgn(8) of '&': considerAsgn(7) - of '=', '<', '>', '!': result = 5 + of '=', '<', '>', '!': result = considerStrongSpaces(5) of '.': considerAsgn(6) - of '?': result = 2 + of '?': result = considerStrongSpaces(2) else: considerAsgn(2) of tkDiv, tkMod, tkShl, tkShr: result = 9 of tkIn, tkNotin, tkIs, tkIsnot, tkNot, tkOf, tkAs: result = 5 - of tkDotDot: result = 6 + of tkDotDot: result = considerStrongSpaces(6) of tkAnd: result = 4 of tkOr, tkXor: result = 3 - else: result = - 10 - -proc isOperator(tok: TToken): bool = - result = getPrecedence(tok) >= 0 + else: result = -10 + +proc isOperator(tok: TToken): bool = + tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs, + tkIsnot, tkNot, tkOf, tkAs, tkDotDot, tkAnd, tkOr, tkXor} + +proc isUnary(p: TParser): bool = + p.strongSpaces and p.tok.tokType in {tkOpr, tkDotDot} and + p.tok.strongSpaceB == 0 and + p.tok.strongSpaceA > 0 + +proc checkBinary(p: TParser) {.inline.} = + # we don't check '..' here as that's too annoying + if p.strongSpaces and p.tok.tokType == tkOpr: + if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB: + parMessage(p, errGenerated, "number of spaces around '$#' not consistent"% + prettyTok(p.tok)) + elif p.tok.strongSpaceA notin {0,1,2,4,8}: + parMessage(p, errGenerated, "number of spaces must be 0,1,2,4 or 8") #| module = stmt ^* (';' / IND{=}) #| @@ -650,6 +669,7 @@ proc primarySuffix(p: var TParser, r: PNode): PNode = while p.tok.indent < 0: case p.tok.tokType of tkParLe: + if p.strongSpaces and p.tok.strongSpaceA > 0: break result = namedParams(p, result, nkCall, tkParRi) if result.len > 1 and result.sons[1].kind == nkExprColonExpr: result.kind = nkObjConstr @@ -664,8 +684,10 @@ proc primarySuffix(p: var TParser, r: PNode): PNode = result = dotExpr(p, result) result = parseGStrLit(p, result) of tkBracketLe: + if p.strongSpaces and p.tok.strongSpaceA > 0: break result = namedParams(p, result, nkBracketExpr, tkBracketRi) of tkCurlyLe: + if p.strongSpaces and p.tok.strongSpaceA > 0: break result = namedParams(p, result, nkCurlyExpr, tkCurlyRi) of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast: if p.inPragma == 0: @@ -695,10 +717,11 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode = result = primary(p, mode) # expand while operators have priorities higher than 'limit' - var opPrec = getPrecedence(p.tok) + var opPrec = getPrecedence(p.tok, p.strongSpaces) let modeB = if mode == pmTypeDef: pmTypeDesc else: mode # the operator itself must not start on a new line: - while opPrec >= limit and p.tok.indent < 0: + while opPrec >= limit and p.tok.indent < 0 and not isUnary(p): + checkBinary(p) var leftAssoc = ord(isLeftAssociative(p.tok)) var a = newNodeP(nkInfix, p) var opNode = newIdentNodeP(p.tok.ident, p) # skip operator: @@ -710,7 +733,7 @@ proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode = addSon(a, result) addSon(a, b) result = a - opPrec = getPrecedence(p.tok) + opPrec = getPrecedence(p.tok, p.strongSpaces) proc simpleExpr(p: var TParser, mode = pmNormal): PNode = result = simpleExprAux(p, -1, mode) @@ -1933,7 +1956,9 @@ proc parseString(s: string, filename: string = "", line: int = 0): PNode = stream.lineOffset = line var parser: TParser - openParser(parser, filename, stream) + # XXX for now the builtin 'parseStmt/Expr' functions do not know about strong + # spaces... + openParser(parser, filename, stream, false) result = parser.parseAll closeParser(parser) diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim index bf3564016..f5d69a01c 100644 --- a/compiler/pragmas.nim +++ b/compiler/pragmas.nim @@ -97,8 +97,6 @@ proc makeExternImport(s: PSym, extname: string) = incl(s.flags, sfImportc) excl(s.flags, sfForward) -const invalidIdentChars = AllChars - IdentChars - proc validateExternCName(s: PSym, info: TLineInfo) = ## Validates that the symbol name in s.loc.r is a valid C identifier. ## @@ -106,16 +104,14 @@ proc validateExternCName(s: PSym, info: TLineInfo) = ## starting with a number. If the check fails, a generic error will be ## displayed to the user. let target = ropeToStr(s.loc.r) - if target.len < 1 or (not (target[0] in IdentStartChars)) or - (not target.allCharsInSet(IdentChars)): + if target.len < 1 or target[0] notin IdentStartChars or + not target.allCharsInSet(IdentChars): localError(info, errGenerated, "invalid exported symbol") proc makeExternExport(s: PSym, extname: string, info: TLineInfo) = setExternName(s, extname) - case gCmd - of cmdCompileToC, cmdCompileToCpp, cmdCompileToOC: + if gCmd in {cmdCompileToC, cmdCompileToCpp, cmdCompileToOC}: validateExternCName(s, info) - else: discard incl(s.flags, sfExportc) proc processImportCompilerProc(s: PSym, extname: string) = diff --git a/compiler/syntaxes.nim b/compiler/syntaxes.nim index 7c44ec0b4..478c2a837 100644 --- a/compiler/syntaxes.nim +++ b/compiler/syntaxes.nim @@ -17,14 +17,15 @@ type TFilterKind* = enum filtNone, filtTemplate, filtReplace, filtStrip TParserKind* = enum - skinStandard, skinBraces, skinEndX + skinStandard, skinStrongSpaces, skinBraces, skinEndX const - parserNames*: array[TParserKind, string] = ["standard", "braces", "endx"] - filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace", - "strip"] + parserNames*: array[TParserKind, string] = ["standard", "strongspaces", + "braces", "endx"] + filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace", + "strip"] -type +type TParsers*{.final.} = object skin*: TParserKind parser*: TParser @@ -54,7 +55,7 @@ proc parseFile(fileIdx: int32): PNode = proc parseAll(p: var TParsers): PNode = case p.skin - of skinStandard: + of skinStandard, skinStrongSpaces: result = parser.parseAll(p.parser) of skinBraces: result = pbraces.parseAll(p.parser) @@ -65,7 +66,7 @@ proc parseAll(p: var TParsers): PNode = proc parseTopLevelStmt(p: var TParsers): PNode = case p.skin - of skinStandard: + of skinStandard, skinStrongSpaces: result = parser.parseTopLevelStmt(p.parser) of skinBraces: result = pbraces.parseTopLevelStmt(p.parser) @@ -170,7 +171,9 @@ proc openParsers(p: var TParsers, fileIdx: int32, inputstream: PLLStream) = else: s = inputstream case p.skin of skinStandard, skinBraces, skinEndX: - parser.openParser(p.parser, fileIdx, s) + parser.openParser(p.parser, fileIdx, s, false) + of skinStrongSpaces: + parser.openParser(p.parser, fileIdx, s, true) -proc closeParsers(p: var TParsers) = +proc closeParsers(p: var TParsers) = parser.closeParser(p.parser) diff --git a/doc/manual.txt b/doc/manual.txt index 98219360e..1c6cf6c1d 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -480,8 +480,8 @@ precedence and associativity; this is useful for meta programming. Associativity ------------- -All binary operators are left-associative, except binary operators whose -relevant char is ``^``. +Binary operators whose relevant character is ``^`` are right-associative, all +other binary operators are left-associative. Precedence ---------- @@ -508,7 +508,7 @@ Precedence level Operators Relevant char 7 ``+ -`` ``+ ~ |`` OP7 6 ``&`` ``&`` OP6 5 ``..`` ``.`` OP5 - 4 ``== <= < >= > != in not_in is isnot not of`` ``= < > !`` OP4 + 4 ``== <= < >= > != in notin is isnot not of`` ``= < > !`` OP4 3 ``and`` OP3 2 ``or xor`` OP2 1 ``@ : ?`` OP1 @@ -516,6 +516,46 @@ Precedence level Operators Relevant char ================ =============================================== ================== =============== +Strong spaces +------------- + +The number of spaces preceeding a non-keyword operator affects precedence +if the experimental parser directive ``#!strongSpaces`` is used. Indentation +is not used to determine the number of spaces. If 2 or more operators have the +same number of preceeding spaces the precedence table applies, so ``1 + 3 * 4`` +is still parsed as ``1 + (3 * 4)``, but ``1+3 * 4`` is parsed as ``(1+3) * 4``: + +.. code-block:: nimrod + #! strongSpaces + if foo+4 * 4 == 8 and b&c | 9 ++ + bar: + echo "" + # is parsed as + if ((foo+4)*4 == 8) and (((b&c) | 9) ++ bar): echo "" + + +Furthermore whether an operator is used a prefix operator is affected by the +number of spaces: + +.. code-block:: nimrod + #! strongSpaces + echo $foo + # is parsed as + echo($foo) + +This also affects whether ``[]``, ``{}``, ``()`` are parsed as constructors +or as accessors: + +.. code-block:: nimrod + #! strongSpaces + echo (1,2) + # is parsed as + echo((1,2)) + + +Grammar +------- + The grammar's start symbol is ``module``. .. include:: grammar.txt diff --git a/todo.txt b/todo.txt index a67ff5172..51f883d1d 100644 --- a/todo.txt +++ b/todo.txt @@ -32,7 +32,6 @@ version 0.9.x - ensure (ref T)(a, b) works as a type conversion and type constructor - optimize 'genericReset'; 'newException' leads to code bloat - stack-less GC -- implement strongSpaces:on - make '--implicitStatic:on' the default - implicit deref for parameter matching diff --git a/web/news.txt b/web/news.txt index 0001cece7..83863cdd9 100644 --- a/web/news.txt +++ b/web/news.txt @@ -82,6 +82,7 @@ News - The *command syntax* is supported in a lot more contexts. - Anonymous iterators are now supported and iterators can capture variables of an outer proc. + - The experimental ``strongSpaces`` parsing mode has been implemented. Tools improvements -- cgit 1.4.1-2-gfad0