# # # The Nimrod Compiler # (c) Copyright 2013 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module implements the 'implies' relation for guards. import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents const someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc, mEqUntracedRef, mEqStr, mEqSet, mEqCString} # set excluded here as the semantics are vastly different: someLe = {mLeI, mLeI64, mLeF64, mLeU, mLeU64, mLeEnum, mLeCh, mLeB, mLePtr, mLeStr} someLt = {mLtI, mLtI64, mLtF64, mLtU, mLtU64, mLtEnum, mLtCh, mLtB, mLtPtr, mLtStr} someLen = {mLengthOpenArray, mLengthStr, mLengthArray, mLengthSeq} someIn = {mInRange, mInSet} proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit} proc isLocation(n: PNode): bool = not n.isValue proc isLet(n: PNode): bool = if n.kind == nkSym: if n.sym.kind in {skLet, skTemp, skForVar}: result = true elif n.sym.kind == skParam and skipTypes(n.sym.typ, abstractInst).kind != tyVar: result = true proc isVar(n: PNode): bool = n.kind == nkSym and n.sym.kind in {skResult, skVar} and {sfGlobal, sfAddrTaken} * n.sym.flags == {} proc isLetLocation(m: PNode, isApprox: bool): bool = # consider: 'n[].kind' --> we really need to support 1 deref op even if this # is technically wrong due to aliasing :-( We could introduce "soft" facts # for this; this would still be very useful for warnings and also nicely # solves the 'var' problems. For now we fix this by requiring much more # restrictive expressions for the 'not nil' checking. var n = m var derefs = 0 while true: case n.kind of nkDotExpr, nkCheckedFieldExpr, nkObjUpConv, nkObjDownConv: n = n.sons[0] of nkDerefExpr, nkHiddenDeref: n = n.sons[0] inc derefs of nkBracketExpr: if isConstExpr(n.sons[1]) or isLet(n.sons[1]): n = n.sons[0] else: return of nkHiddenStdConv, nkHiddenSubConv, nkConv: n = n.sons[1] else: break result = n.isLet and derefs <= ord(isApprox) if not result and isApprox: result = isVar(n) proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true) proc getMagicOp(name: string, m: TMagic): PSym = result = newSym(skProc, getIdent(name), nil, unknownLineInfo()) result.magic = m let opLe = getMagicOp("<=", mLeI) opLt = getMagicOp("<", mLtI) opAnd = getMagicOp("and", mAnd) opOr = getMagicOp("or", mOr) opNot = getMagicOp("not", mNot) opIsNil = getMagicOp("isnil", mIsNil) opContains = getMagicOp("contains", mInSet) opEq = getMagicOp("==", mEqI) proc swapArgs(fact: PNode, newOp: PSym): PNode = result = newNodeI(nkCall, fact.info, 3) result.sons[0] = newSymNode(newOp) result.sons[1] = fact.sons[2] result.sons[2] = fact.sons[1] proc neg(n: PNode): PNode = if n == nil: return nil case n.getMagic of mNot: result = n.sons[1] of someLt: # not (a < b) == a >= b == b <= a result = swapArgs(n, opLe) of someLe: result = swapArgs(n, opLt) of mInSet: if n.sons[1].kind != nkCurly: return nil let t = n.sons[2].typ.skipTypes(abstractInst) result = newNodeI(nkCall, n.info, 3) result.sons[0] = n.sons[0] result.sons[2] = n.sons[2] if t.kind == tyEnum: var s = newNodeIT(nkCurly, n.info, n.sons[1].typ) for e in t.n: let eAsNode = newIntNode(nkIntLit, e.sym.position) if not inSet(n.sons[1], eAsNode): s.add eAsNode result.sons[1] = s elif lengthOrd(t) < 1000: result.sons[1] = complement(n.sons[1]) else: # not ({2, 3, 4}.contains(x)) x != 2 and x != 3 and x != 4 # XXX todo result = nil of mOr: # not (a or b) --> not a and not b let a = n.sons[1].neg b = n.sons[2].neg if a != nil and b != nil: result = newNodeI(nkCall, n.info, 3) result.sons[0] = newSymNode(opAnd) result.sons[1] = a result.sons[2] = b elif a != nil: result = a elif b != nil: result = b else: # leave not (a == 4) as it is result = newNodeI(nkCall, n.info, 2) result.sons[0] = newSymNode(opNot) result.sons[1] = n proc buildIsNil(arg: PNode): PNode = result = newNodeI(nkCall, arg.info, 2) result.sons[0] = newSymNode(opIsNil) result.sons[1] = arg proc usefulFact(n: PNode): PNode = case n.getMagic of someEq: if skipConv(n.sons[2]).kind == nkNilLit and ( isLetLocation(n.sons[1], false) or isVar(n.sons[1])): result = buildIsNil(n.sons[1]) else: if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true): # XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1' result = n of someLe+someLt: if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true): # XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1' result = n of mIsNil: if isLetLocation(n.sons[1], false) or isVar(n.sons[1]): result = n of someIn: if isLetLocation(n.sons[1], true): result = n of mAnd: let a = usefulFact(n.sons[1]) b = usefulFact(n.sons[2]) if a != nil and b != nil: result = newNodeI(nkCall, n.info, 3) result.sons[0] = newSymNode(opAnd) result.sons[1] = a result.sons[2] = b elif a != nil: result = a elif b != nil: result = b of mNot: let a = usefulFact(n.sons[1]) if a != nil: result = a.neg of mOr: # 'or' sucks! (p.isNil or q.isNil) --> hard to do anything # with that knowledge... # DeMorgan helps a little though: # not a or not b --> not (a and b) # (x == 3) or (y == 2) ---> not ( not (x==3) and not (y == 2)) # not (x != 3 and y != 2) let a = usefulFact(n.sons[1]).neg b = usefulFact(n.sons[2]).neg if a != nil and b != nil: result = newNodeI(nkCall, n.info, 3) result.sons[0] = newSymNode(opAnd) result.sons[1] = a result.sons[2] = b result = result.neg elif n.kind == nkSym and n.sym.kind == skLet: # consider: # let a = 2 < x # if a: # ... # We make can easily replace 'a' by '2 < x' here: if n.sym.ast != nil: result = usefulFact(n.sym.ast) elif n.kind == nkStmtListExpr: result = usefulFact(n.lastSon) type TModel* = seq[PNode] # the "knowledge base" proc addFact*(m: var TModel, n: PNode) = let n = usefulFact(n) if n != nil: m.add n proc addFactNeg*(m: var TModel, n: PNode) = let n = n.neg if n != nil: addFact(m, n) proc sameTree(a, b: PNode): bool = result = false if a == b: result = true elif (a != nil) and (b != nil) and (a.kind == b.kind): case a.kind of nkSym: result = a.sym == b.sym of nkIdent: result = a.ident.id == b.ident.id of nkCharLit..nkInt64Lit: result = a.intVal == b.intVal of nkFloatLit..nkFloat64Lit: result = a.floatVal == b.floatVal of nkStrLit..nkTripleStrLit: result = a.strVal == b.strVal of nkType: result = a.typ == b.typ of nkEmpty, nkNilLit: result = true else: if sonsLen(a) == sonsLen(b): for i in countup(0, sonsLen(a) - 1): if not sameTree(a.sons[i], b.sons[i]): return result = true proc hasSubTree(n, x: PNode): bool = if n.sameTree(x): result = true else: for i in 0..safeLen(n)-1: if hasSubTree(n.sons[i], x): return true proc invalidateFacts*(m: var TModel, n: PNode) = # We are able to guard local vars (as opposed to 'let' variables)! # 'while p != nil: f(p); p = p.next' # This is actually quite easy to do: # Re-assignments (incl. pass to a 'var' param) trigger an invalidation # of every fact that contains 'v'. # # if x < 4: # if y < 5 # x = unknown() # # we invalidate 'x' here but it's known that x >= 4 # # for the else anyway # else: # echo x # # The same mechanism could be used for more complex data stored on the heap; # procs that 'write: []' cannot invalidate 'n.kind' for instance. In fact, we # could CSE these expressions then and help C's optimizer. for i in 0..high(m): if m[i] != nil and m[i].hasSubTree(n): m[i] = nil proc valuesUnequal(a, b: PNode): bool = if a.isValue and b.isValue: result = not sameValue(a, b) proc pred(n: PNode): PNode = if n.kind in {nkCharLit..nkUInt64Lit} and n.intVal != low(BiggestInt): result = copyNode(n) dec result.intVal else: result = n type TImplication* = enum impUnknown, impNo, impYes proc impliesEq(fact, eq: PNode): TImplication = let (loc, val) = if isLocation(eq.sons[1]): (1, 2) else: (2, 1) case fact.sons[0].sym.magic of someEq: if sameTree(fact.sons[1], eq.sons[loc]): # this is not correct; consider: a == b; a == 1 --> unknown! if sameTree(fact.sons[2], eq.sons[val]): result = impYes elif valuesUnequal(fact.sons[2], eq.sons[val]): result = impNo elif sameTree(fact.sons[2], eq.sons[loc]): if sameTree(fact.sons[1], eq.sons[val]): result = impYes elif valuesUnequal(fact.sons[1], eq.sons[val]): result = impNo of mInSet: # remember: mInSet is 'contains' so the set comes first! if sameTree(fact.sons[2], eq.sons[loc]) and isValue(eq.sons[val]): if inSet(fact.sons[1], eq.sons[val]): result = impYes else: result = impNo of mNot, mOr, mAnd: internalError(eq.info, "impliesEq") else: discard proc leImpliesIn(x, c, aSet: PNode): TImplication = if c.kind in {nkCharLit..nkUInt64Lit}: # fact: x <= 4; question x in {56}? # --> true if every value <= 4 is in the set {56} # var value = newIntNode(c.kind, firstOrd(x.typ)) # don't iterate too often: if c.intVal - value.intVal < 1000: var i, pos, neg: int while value.intVal <= c.intVal: if inSet(aSet, value): inc pos else: inc neg inc i; inc value.intVal if pos == i: result = impYes elif neg == i: result = impNo proc geImpliesIn(x, c, aSet: PNode): TImplication = if c.kind in {nkCharLit..nkUInt64Lit}: # fact: x >= 4; question x in {56}? # --> true iff every value >= 4 is in the set {56} # var value = newIntNode(c.kind, c.intVal) let max = lastOrd(x.typ) # don't iterate too often: if max - value.intVal < 1000: var i, pos, neg: int while value.intVal <= max: if inSet(aSet, value): inc pos else: inc neg inc i; inc value.intVal if pos == i: result = impYes elif neg == i: result = impNo proc compareSets(a, b: PNode): TImplication = if equalSets(a, b): result = impYes elif intersectSets(a, b).len == 0: result = impNo proc impliesIn(fact, loc, aSet: PNode): TImplication = case fact.sons[0].sym.magic of someEq: if sameTree(fact.sons[1], loc): if inSet(aSet, fact.sons[2]): result = impYes else: result = impNo elif sameTree(fact.sons[2], loc): if inSet(aSet, fact.sons[1]): result = impYes else: result = impNo of mInSet: if sameTree(fact.sons[2], loc): result = compareSets(fact.sons[1], aSet) of someLe: if sameTree(fact.sons[1], loc): result = leImpliesIn(fact.sons[1], fact.sons[2], aSet) elif sameTree(fact.sons[2], loc): result = geImpliesIn(fact.sons[2], fact.sons[1], aSet) of someLt: if sameTree(fact.sons[1], loc): result = leImpliesIn(fact.sons[1], fact.sons[2].pred, aSet) elif sameTree(fact.sons[2], loc): # 4 < x --> 3 <= x result = geImpliesIn(fact.sons[2], fact.sons[1].pred, aSet) of mNot, mOr, mAnd: internalError(loc.info, "impliesIn") else: discard proc valueIsNil(n: PNode): TImplication = if n.kind == nkNilLit: impYes elif n.kind in {nkStrLit..nkTripleStrLit, nkBracket, nkObjConstr}: impNo else: impUnknown proc impliesIsNil(fact, eq: PNode): TImplication = case fact.sons[0].sym.magic of mIsNil: if sameTree(fact.sons[1], eq.sons[1]): result = impYes of someEq: if sameTree(fact.sons[1], eq.sons[1]): result = valueIsNil(fact.sons[2].skipConv) elif sameTree(fact.sons[2], eq.sons[1]): result = valueIsNil(fact.sons[1].skipConv) of mNot, mOr, mAnd: internalError(eq.info, "impliesIsNil") else: discard proc impliesGe(fact, x, c: PNode): TImplication = internalAssert isLocation(x) case fact.sons[0].sym.magic of someEq: if sameTree(fact.sons[1], x): if isValue(fact.sons[2]) and isValue(c): # fact: x = 4; question x >= 56? --> true iff 4 >= 56 if leValue(c, fact.sons[2]): result = impYes else: result = impNo elif sameTree(fact.sons[2], x): if isValue(fact.sons[1]) and isValue(c): if leValue(c, fact.sons[1]): result = impYes else: result = impNo of someLt: if sameTree(fact.sons[1], x): if isValue(fact.sons[2]) and isValue(c): # fact: x < 4; question N <= x? --> false iff N <= 4
=== Goal

A memory-safe language with a simple translator to x86 that can be feasibly
written without itself needing a translator.

Memory-safe: it should be impossible to:
  a) create a pointer out of arbitrary data, or
  b) to access heap memory after it's been freed.

Simple: do all the work in a 2-pass translator:
  Pass 1: check each instruction's types in isolation.
  Pass 2: emit code for each instruction in isolation.

=== Overview of the language

A program consists of a series of type, function and global variable declarations.
(Also constants and tests, but let's focus on these.)

Type declarations basically follow Hindley-Milner with product and (tagged) sum
types. Types are written in s-expression form. There's a `ref` type that's a
type-safe fat pointer, with an alloc id that gets incremented after each
allocation. Memory allocation and reclamation is manual. Dereferencing a ref
after its underlying memory is reclaimed (pointer alloc id no longer matches
payload alloc id) is guaranteed to immediately kill the program (like a
segfault).

  # product type
  type foo [
    x : int
    y : (ref int)
    z : bar
  ]

  # sum type
  choice bar [
    x : int
    y : point
  ]

Functions have a header and a series of instructions in the body:

  fn f a : int -> b : int [
    ...
  ]

Instructions have the following format:

  io1, io2, ... <- operat