diff options
author | Andreas Rumpf <ar@kimeta.de> | 2014-04-10 01:47:20 +0200 |
---|---|---|
committer | Andreas Rumpf <ar@kimeta.de> | 2014-04-10 01:47:20 +0200 |
commit | dcfc7a8896166563f6fd80fbab81bc50e0b5a217 (patch) | |
tree | 4a247ec2d64bafec53e5ab24b0fda89642908a07 /compiler/canonicalizer.nim | |
parent | a86c774932afd8b6782df1925837df9df04ef381 (diff) | |
parent | 8b82004359b8d852fa0107d79cc78b21eb35c028 (diff) | |
download | Nim-dcfc7a8896166563f6fd80fbab81bc50e0b5a217.tar.gz |
resolved conflict
Diffstat (limited to 'compiler/canonicalizer.nim')
-rw-r--r-- | compiler/canonicalizer.nim | 416 |
1 files changed, 416 insertions, 0 deletions
diff --git a/compiler/canonicalizer.nim b/compiler/canonicalizer.nim new file mode 100644 index 000000000..3bc4eb029 --- /dev/null +++ b/compiler/canonicalizer.nim @@ -0,0 +1,416 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements the canonalization for the various caching mechanisms. + +import strutils, db_sqlite, md5 + +var db: TDbConn + +# We *hash* the relevant information into 128 bit hashes. This should be good +# enough to prevent any collisions. + +type + TUid = distinct MD5Digest + +# For name mangling we encode these hashes via a variant of base64 (called +# 'base64a') and prepend the *primary* identifier to ease the debugging pain. +# So a signature like: +# +# proc gABI(c: PCtx; n: PNode; opc: TOpcode; a, b: TRegister; imm: BiggestInt) +# +# is mangled into: +# gABI_MTdmOWY5MTQ1MDcyNGQ3ZA +# +# This is a good compromise between correctness and brevity. ;-) + +const + cb64 = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T" "U", "V", "W", "X", "Y", "Z", + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", + "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", + "_A", "_B"] + +proc toBase64a(s: cstring, len: int): string = + ## encodes `s` into base64 representation. After `lineLen` characters, a + ## `newline` is added. + result = newStringOfCap(((len + 2) div 3) * 4) + var i = 0 + while i < s.len - 2: + let a = ord(s[i]) + let b = ord(s[i+1]) + let c = ord(s[i+2]) + result.add cb64[a shr 2] + result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] + result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)] + result.add cb64[c and 0x3F] + inc(i, 3) + if i < s.len-1: + let a = ord(s[i]) + let b = ord(s[i+1]) + result.add cb64[a shr 2] + result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] + result.add cb64[((b and 0x0F) shl 2)] + elif i < s.len: + let a = ord(s[i]) + result.add cb64[a shr 2] + result.add cb64[(a and 3) shl 4] + +proc toBase64a(u: TUid): string = toBase64a(cast[cstring](u), sizeof(u)) + +proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len) + +proc hashSym(c: var MD5Context, s: PSym) = + if sfAnon in s.flags or s.kind == skGenericParam: + c &= ":anon" + else: + var it = s.owner + while it != nil: + hashSym(c, it) + c &= "." + it = s.owner + c &= s.name.s + +proc hashTree(c: var MD5Context, n: PNode) = + if n == nil: + c &= "\255" + return + var k = n.kind + md5Update(c, cast[cstring](addr(k)), 1) + # we really must not hash line information. 'n.typ' is debatable but + # shouldn't be necessary for now and avoids potential infinite recursions. + case n.kind + of nkEmpty, nkNilLit, nkType: discard + of nkIdent: + c &= n.ident.s + of nkSym: + hashSym(c, n.sym) + of nkCharLit..nkUInt64Lit: + var v = n.intVal + md5Update(c, cast[cstring](addr(v)), sizeof(v)) + of nkFloatLit..nkFloat64Lit: + var v = n.floatVal + md5Update(c, cast[cstring](addr(v)), sizeof(v)) + of nkStrLit..nkTripleStrLit: + c &= n.strVal + else: + for i in 0.. <n.len: hashTree(c, n.sons[i]) + +proc hashType(c: var MD5Context, t: PType) = + # modelled after 'typeToString' + if t == nil: + c &= "\254" + return + + var k = t.kind + md5Update(c, cast[cstring](addr(k)), 1) + + if t.sym != nil and sfAnon notin t.sym.flags: + # t.n for literals, but not for e.g. objects! + if t.kind in {tyFloat, tyInt}: c.hashNode(t.n) + c.hashSym(t.sym) + + case t.kind + of tyGenericBody, tyGenericInst, tyGenericInvokation: + for i in countup(0, sonsLen(t) -1 -ord(t.kind != tyGenericInvokation)): + c.hashType t.sons[i] + of tyUserTypeClass: + internalAssert t.sym != nil and t.sym.owner != nil + c &= t.sym.owner.name.s + of tyUserTypeClassInst: + let body = t.base + c.hashSym body.sym + for i in countup(1, sonsLen(t) - 2): + c.hashType t.sons[i] + of tyFromExpr, tyFieldAccessor: + c.hashTree(t.n) + of tyArrayConstr: + c.hashTree(t.sons[0].n) + c.hashType(t.sons[1]) + of tyTuple: + if t.n != nil: + assert(sonsLen(t.n) == sonsLen(t)) + for i in countup(0, sonsLen(t.n) - 1): + assert(t.n.sons[i].kind == nkSym) + c &= t.n.sons[i].sym.name.s + c &= ":" + c.hashType(t.sons[i]) + c &= "," + else: + for i in countup(0, sonsLen(t) - 1): c.hashType t.sons[i] + of tyRange: + c.hashTree(t.n) + c.hashType(t.sons[0]) + of tyProc: + c &= (if tfIterator in t.flags: "iterator " else: "proc ") + for i in 0.. <t.len: c.hashType(t.sons[i]) + md5Update(c, cast[cstring](addr(t.callConv)), 1) + + if tfNoSideEffect in t.flags: c &= ".noSideEffect" + if tfThread in t.flags: c &= ".thread" + else: + for i in 0.. <t.len: c.hashType(t.sons[i]) + if tfShared in t.flags: c &= "shared" + if tfNotNil in t.flags: c &= "not nil" + +proc canonConst(n: PNode): TUid = + var c: MD5Context + md5Init(c) + c.hashTree(n) + c.hashType(n.typ) + md5Final(c, MD5Digest(result)) + +proc canonSym(s: PSym): TUid = + var c: MD5Context + md5Init(c) + c.hashSym(s) + md5Final(c, MD5Digest(result)) + +proc pushType(w: PRodWriter, t: PType) = + # check so that the stack does not grow too large: + if iiTableGet(w.index.tab, t.id) == InvalidKey: + w.tstack.add(t) + +proc pushSym(w: PRodWriter, s: PSym) = + # check so that the stack does not grow too large: + if iiTableGet(w.index.tab, s.id) == InvalidKey: + w.sstack.add(s) + +proc encodeNode(w: PRodWriter, fInfo: TLineInfo, n: PNode, + result: var string) = + if n == nil: + # nil nodes have to be stored too: + result.add("()") + return + result.add('(') + encodeVInt(ord(n.kind), result) + # we do not write comments for now + # Line information takes easily 20% or more of the filesize! Therefore we + # omit line information if it is the same as the father's line information: + if fInfo.fileIndex != n.info.fileIndex: + result.add('?') + encodeVInt(n.info.col, result) + result.add(',') + encodeVInt(n.info.line, result) + result.add(',') + encodeVInt(fileIdx(w, toFilename(n.info)), result) + elif fInfo.line != n.info.line: + result.add('?') + encodeVInt(n.info.col, result) + result.add(',') + encodeVInt(n.info.line, result) + elif fInfo.col != n.info.col: + result.add('?') + encodeVInt(n.info.col, result) + var f = n.flags * PersistentNodeFlags + if f != {}: + result.add('$') + encodeVInt(cast[int32](f), result) + if n.typ != nil: + result.add('^') + encodeVInt(n.typ.id, result) + pushType(w, n.typ) + case n.kind + of nkCharLit..nkInt64Lit: + if n.intVal != 0: + result.add('!') + encodeVBiggestInt(n.intVal, result) + of nkFloatLit..nkFloat64Lit: + if n.floatVal != 0.0: + result.add('!') + encodeStr($n.floatVal, result) + of nkStrLit..nkTripleStrLit: + if n.strVal != "": + result.add('!') + encodeStr(n.strVal, result) + of nkIdent: + result.add('!') + encodeStr(n.ident.s, result) + of nkSym: + result.add('!') + encodeVInt(n.sym.id, result) + pushSym(w, n.sym) + else: + for i in countup(0, sonsLen(n) - 1): + encodeNode(w, n.info, n.sons[i], result) + add(result, ')') + +proc encodeLoc(w: PRodWriter, loc: TLoc, result: var string) = + var oldLen = result.len + result.add('<') + if loc.k != low(loc.k): encodeVInt(ord(loc.k), result) + if loc.s != low(loc.s): + add(result, '*') + encodeVInt(ord(loc.s), result) + if loc.flags != {}: + add(result, '$') + encodeVInt(cast[int32](loc.flags), result) + if loc.t != nil: + add(result, '^') + encodeVInt(cast[int32](loc.t.id), result) + pushType(w, loc.t) + if loc.r != nil: + add(result, '!') + encodeStr(ropeToStr(loc.r), result) + if loc.a != 0: + add(result, '?') + encodeVInt(loc.a, result) + if oldLen + 1 == result.len: + # no data was necessary, so remove the '<' again: + setLen(result, oldLen) + else: + add(result, '>') + +proc encodeType(w: PRodWriter, t: PType, result: var string) = + if t == nil: + # nil nodes have to be stored too: + result.add("[]") + return + # we need no surrounding [] here because the type is in a line of its own + if t.kind == tyForward: internalError("encodeType: tyForward") + # for the new rodfile viewer we use a preceeding [ so that the data section + # can easily be disambiguated: + add(result, '[') + encodeVInt(ord(t.kind), result) + add(result, '+') + encodeVInt(t.id, result) + if t.n != nil: + encodeNode(w, unknownLineInfo(), t.n, result) + if t.flags != {}: + add(result, '$') + encodeVInt(cast[int32](t.flags), result) + if t.callConv != low(t.callConv): + add(result, '?') + encodeVInt(ord(t.callConv), result) + if t.owner != nil: + add(result, '*') + encodeVInt(t.owner.id, result) + pushSym(w, t.owner) + if t.sym != nil: + add(result, '&') + encodeVInt(t.sym.id, result) + pushSym(w, t.sym) + if t.size != - 1: + add(result, '/') + encodeVBiggestInt(t.size, result) + if t.align != 2: + add(result, '=') + encodeVInt(t.align, result) + encodeLoc(w, t.loc, result) + for i in countup(0, sonsLen(t) - 1): + if t.sons[i] == nil: + add(result, "^()") + else: + add(result, '^') + encodeVInt(t.sons[i].id, result) + pushType(w, t.sons[i]) + +proc encodeLib(w: PRodWriter, lib: PLib, info: TLineInfo, result: var string) = + add(result, '|') + encodeVInt(ord(lib.kind), result) + add(result, '|') + encodeStr(ropeToStr(lib.name), result) + add(result, '|') + encodeNode(w, info, lib.path, result) + +proc encodeSym(w: PRodWriter, s: PSym, result: var string) = + if s == nil: + # nil nodes have to be stored too: + result.add("{}") + return + # we need no surrounding {} here because the symbol is in a line of its own + encodeVInt(ord(s.kind), result) + result.add('+') + encodeVInt(s.id, result) + result.add('&') + encodeStr(s.name.s, result) + if s.typ != nil: + result.add('^') + encodeVInt(s.typ.id, result) + pushType(w, s.typ) + result.add('?') + if s.info.col != -1'i16: encodeVInt(s.info.col, result) + result.add(',') + if s.info.line != -1'i16: encodeVInt(s.info.line, result) + result.add(',') + encodeVInt(fileIdx(w, toFilename(s.info)), result) + if s.owner != nil: + result.add('*') + encodeVInt(s.owner.id, result) + pushSym(w, s.owner) + if s.flags != {}: + result.add('$') + encodeVInt(cast[int32](s.flags), result) + if s.magic != mNone: + result.add('@') + encodeVInt(ord(s.magic), result) + if s.options != w.options: + result.add('!') + encodeVInt(cast[int32](s.options), result) + if s.position != 0: + result.add('%') + encodeVInt(s.position, result) + if s.offset != - 1: + result.add('`') + encodeVInt(s.offset, result) + encodeLoc(w, s.loc, result) + if s.annex != nil: encodeLib(w, s.annex, s.info, result) + if s.constraint != nil: + add(result, '#') + encodeNode(w, unknownLineInfo(), s.constraint, result) + # lazy loading will soon reload the ast lazily, so the ast needs to be + # the last entry of a symbol: + if s.ast != nil: + # we used to attempt to save space here by only storing a dummy AST if + # it is not necessary, but Nimrod's heavy compile-time evaluation features + # make that unfeasible nowadays: + encodeNode(w, s.info, s.ast, result) + + +proc createDb() = + db.exec(sql""" + create table if not exists Module( + id integer primary key, + name varchar(256) not null, + fullpath varchar(256) not null, + interfHash varchar(256) not null, + fullHash varchar(256) not null, + + created timestamp not null default (DATETIME('now')), + );""") + + db.exec(sql""" + create table if not exists Symbol( + id integer primary key, + module integer not null, + name varchar(max) not null, + data varchar(max) not null, + created timestamp not null default (DATETIME('now')), + + foreign key (module) references module(id) + );""") + + db.exec(sql""" + create table if not exists Type( + id integer primary key, + module integer not null, + name varchar(max) not null, + data varchar(max) not null, + created timestamp not null default (DATETIME('now')), + + foreign key (module) references module(id) + );""") + + + #db.exec(sql""" + # --create unique index if not exists TsstNameIx on TestResult(name); + # """, []) + |